Commit 6ff8f60e authored by Ziye Yang's avatar Ziye Yang Committed by Changpeng Liu
Browse files

bdev/aio: Add the epoll support



This patch is used to add the epoll suport and make
the group polling more efficient.

Change-Id: I93416514a53dc45471e375b39aa051e6500de412
Signed-off-by: default avatarZiye Yang <optimistyzy@gmail.com>
Reviewed-on: https://review.gerrithub.io/421432


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarGangCao <gang.cao@intel.com>
parent 0c0d200b
Loading
Loading
Loading
Loading
+43 −7
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ static void bdev_aio_get_spdk_running_config(FILE *fp);
static TAILQ_HEAD(, file_disk) g_aio_disk_head;

#define SPDK_AIO_QUEUE_DEPTH 128
#define MAX_EVENTS_PER_POLL 32

static int
bdev_aio_get_ctx_size(void)
@@ -70,7 +71,7 @@ static struct spdk_bdev_module aio_if = {

struct bdev_aio_group_channel {
	struct spdk_poller	*poller;
	TAILQ_HEAD(, bdev_aio_io_channel)	aio_channel_head;
	int			epfd;
};

SPDK_BDEV_MODULE_REGISTER(&aio_if)
@@ -130,6 +131,7 @@ bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
	io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
	iocb->data = aio_task;
	aio_task->len = nbytes;
	io_set_eventfd(iocb, aio_ch->efd);

	SPDK_DEBUGLOG(SPDK_LOG_AIO, "read %d iovs size %lu to off: %#lx\n",
		      iovcnt, nbytes, offset);
@@ -160,6 +162,7 @@ bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
	io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
	iocb->data = aio_task;
	aio_task->len = len;
	io_set_eventfd(iocb, aio_ch->efd);

	SPDK_DEBUGLOG(SPDK_LOG_AIO, "write %d iovs size %lu from off: %#lx\n",
		      iovcnt, len, offset);
@@ -204,7 +207,14 @@ bdev_aio_destruct(void *ctx)
static int
bdev_aio_initialize_io_channel(struct bdev_aio_io_channel *ch)
{
	ch->efd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
	if (ch->efd == -1) {
		SPDK_ERRLOG("Cannot create efd\n");
		return -1;
	}

	if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
		close(ch->efd);
		SPDK_ERRLOG("async I/O context setup failure\n");
		return -1;
	}
@@ -217,15 +227,23 @@ bdev_aio_group_poll(void *arg)
{
	struct bdev_aio_group_channel *group_ch = arg;
	struct bdev_aio_io_channel *ch;
	int nr, i, total_nr = 0;
	int nr, i, j, rc, total_nr = 0;
	enum spdk_bdev_io_status status;
	struct bdev_aio_task *aio_task;
	struct timespec timeout;
	struct io_event events[SPDK_AIO_QUEUE_DEPTH];
	struct epoll_event epevents[MAX_EVENTS_PER_POLL];

	timeout.tv_sec = 0;
	timeout.tv_nsec = 0;
	TAILQ_FOREACH(ch, &group_ch->aio_channel_head, link) {
	rc = epoll_wait(group_ch->epfd, epevents, MAX_EVENTS_PER_POLL, 0);
	if (rc == -1) {
		SPDK_ERRLOG("epoll_wait error(%d): %s on ch=%p\n", errno, spdk_strerror(errno), group_ch);
		return -1;
	}

	for (j = 0; j < rc; j++) {
		ch = epevents[j].data.ptr;
		nr = io_getevents(ch->io_ctx, 1, SPDK_AIO_QUEUE_DEPTH,
				  events, &timeout);

@@ -374,6 +392,7 @@ bdev_aio_create_cb(void *io_device, void *ctx_buf)
{
	struct bdev_aio_io_channel *ch = ctx_buf;
	struct bdev_aio_group_channel *group_ch_ctx;
	struct epoll_event epevent;

	if (bdev_aio_initialize_io_channel(ch) != 0) {
		return -1;
@@ -381,7 +400,16 @@ bdev_aio_create_cb(void *io_device, void *ctx_buf)

	ch->group_ch = spdk_get_io_channel(&aio_if);
	group_ch_ctx = spdk_io_channel_get_ctx(ch->group_ch);
	TAILQ_INSERT_TAIL(&group_ch_ctx->aio_channel_head, ch, link);

	epevent.events = EPOLLIN | EPOLLET;
	epevent.data.ptr = ch;
	if (epoll_ctl(group_ch_ctx->epfd, EPOLL_CTL_ADD, ch->efd, &epevent)) {
		close(ch->efd);
		io_destroy(ch->io_ctx);
		spdk_put_io_channel(ch->group_ch);
		SPDK_ERRLOG("epoll_ctl error\n");
		return -1;
	}
	return 0;
}

@@ -390,10 +418,12 @@ bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
{
	struct bdev_aio_io_channel *io_channel = ctx_buf;
	struct bdev_aio_group_channel *group_ch_ctx;
	struct epoll_event event;

	group_ch_ctx = spdk_io_channel_get_ctx(io_channel->group_ch);
	TAILQ_REMOVE(&group_ch_ctx->aio_channel_head, io_channel, link);
	epoll_ctl(group_ch_ctx->epfd, EPOLL_CTL_DEL, io_channel->efd, &event);
	spdk_put_io_channel(io_channel->group_ch);
	close(io_channel->efd);
	io_destroy(io_channel->io_ctx);

}
@@ -467,7 +497,12 @@ bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
{
	struct bdev_aio_group_channel *ch = ctx_buf;

	TAILQ_INIT(&ch->aio_channel_head);
	ch->epfd = epoll_create1(0);
	if (ch->epfd == -1) {
		SPDK_ERRLOG("cannot create epoll fd\n");
		return -1;
	}

	ch->poller = spdk_poller_register(bdev_aio_group_poll, ch, 0);
	return 0;
}
@@ -477,6 +512,7 @@ bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
{
	struct bdev_aio_group_channel *ch = ctx_buf;

	close(ch->epfd);
	spdk_poller_unregister(&ch->poller);
}

+3 −0
Original line number Diff line number Diff line
@@ -37,6 +37,8 @@
#include "spdk/stdinc.h"

#include <libaio.h>
#include <sys/epoll.h>
#include <sys/eventfd.h>

#include "spdk/queue.h"
#include "spdk/bdev.h"
@@ -54,6 +56,7 @@ struct bdev_aio_io_channel {
	uint64_t				io_inflight;
	struct spdk_io_channel			*group_ch;
	TAILQ_ENTRY(bdev_aio_io_channel)	link;
	int					efd;
};

typedef void (*spdk_delete_aio_complete)(void *cb_arg, int bdeverrno);