Commit 195fb4e4 authored by Xiaodong Liu's avatar Xiaodong Liu Committed by Tomasz Zawadzki
Browse files

bdev/aio: assign new io-ctx to each io-ch



It aims to avoid bdev starvation by setting separate
io_context to each bdev_aio io channel and link them
into group channel.

Previous, each ch of aio_bdev on a same spdk_thread shares
same io_context. If one of them submits too many IO to the
io_context, then other channels will be starved.
Moreover, if the first bdev_io gets NOMEM error, then the
following IO of this bdev have no chance to get processed.
The bdev_aio starvation can be reproduced by bdevperf in
v20.04, by:
./bdevperf -q 100 -w randwrite -t 5 -c hdd.conf -o 1048576
(hdd.conf defines 2 aio_bdevs of HDD)

Change-Id: Ic709323f5baeb2f8f7250b75ad872ec4156b5e78
Signed-off-by: default avatarXiaodong Liu <xiaodong.liu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3808


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarZiye Yang <ziye.yang@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarPaul Luse <paul.e.luse@intel.com>
Community-CI: Broadcom CI
parent 4e06de69
Loading
Loading
Loading
Loading
+37 −12
Original line number Diff line number Diff line
@@ -53,12 +53,14 @@

struct bdev_aio_io_channel {
	uint64_t				io_inflight;
	io_context_t				io_ctx;
	struct bdev_aio_group_channel		*group_ch;
	TAILQ_ENTRY(bdev_aio_io_channel)	link;
};

struct bdev_aio_group_channel {
	struct spdk_poller			*poller;
	io_context_t				io_ctx;
	TAILQ_HEAD(, bdev_aio_io_channel)	io_ch_head;
};

struct bdev_aio_task {
@@ -178,7 +180,7 @@ bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
	SPDK_DEBUGLOG(SPDK_LOG_AIO, "read %d iovs size %lu to off: %#lx\n",
		      iovcnt, nbytes, offset);

	rc = io_submit(aio_ch->group_ch->io_ctx, 1, &iocb);
	rc = io_submit(aio_ch->io_ctx, 1, &iocb);
	if (rc < 0) {
		if (rc == -EAGAIN) {
			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
@@ -209,7 +211,7 @@ bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
	SPDK_DEBUGLOG(SPDK_LOG_AIO, "write %d iovs size %lu from off: %#lx\n",
		      iovcnt, len, offset);

	rc = io_submit(aio_ch->group_ch->io_ctx, 1, &iocb);
	rc = io_submit(aio_ch->io_ctx, 1, &iocb);
	if (rc < 0) {
		if (rc == -EAGAIN) {
			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
@@ -312,18 +314,17 @@ bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *u
}

static int
bdev_aio_group_poll(void *arg)
bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)
{
	struct bdev_aio_group_channel *group_ch = arg;
	int nr, i = 0;
	enum spdk_bdev_io_status status;
	struct bdev_aio_task *aio_task;
	struct io_event events[SPDK_AIO_QUEUE_DEPTH];

	nr = bdev_user_io_getevents(group_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events);
	nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events);

	if (nr < 0) {
		return SPDK_POLLER_IDLE;
		return 0;
	}

	for (i = 0; i < nr; i++) {
@@ -338,6 +339,20 @@ bdev_aio_group_poll(void *arg)
		aio_task->ch->io_inflight--;
	}

	return nr;
}

static int
bdev_aio_group_poll(void *arg)
{
	struct bdev_aio_group_channel *group_ch = arg;
	struct bdev_aio_io_channel *io_ch;
	int nr = 0;

	TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) {
		nr += bdev_aio_io_channel_poll(io_ch);
	}

	return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
}

@@ -481,7 +496,13 @@ bdev_aio_create_cb(void *io_device, void *ctx_buf)
{
	struct bdev_aio_io_channel *ch = ctx_buf;

	if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
		SPDK_ERRLOG("async I/O context setup failure\n");
		return -1;
	}

	ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if));
	TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link);

	return 0;
}
@@ -491,6 +512,11 @@ bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
{
	struct bdev_aio_io_channel *ch = ctx_buf;

	io_destroy(ch->io_ctx);

	assert(ch->group_ch);
	TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link);

	spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
}

@@ -561,10 +587,7 @@ bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
{
	struct bdev_aio_group_channel *ch = ctx_buf;

	if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
		SPDK_ERRLOG("async I/O context setup failure\n");
		return -1;
	}
	TAILQ_INIT(&ch->io_ch_head);

	ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0);
	return 0;
@@ -575,7 +598,9 @@ bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
{
	struct bdev_aio_group_channel *ch = ctx_buf;

	io_destroy(ch->io_ctx);
	if (!TAILQ_EMPTY(&ch->io_ch_head)) {
		SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n");
	}

	spdk_poller_unregister(&ch->poller);
}