Commit 9e06b192 authored by Fedor Uporov's avatar Fedor Uporov Committed by Tomasz Zawadzki
Browse files

bdev_aio: Add FreeBSD support



The FreeBSD kernel aio is built around kevent() system call.
Use it for polling, like it is done using io_getevents() on Linux side.

Change-Id: I558236081b8d6870f9a4a3a937df60710045bce9
Signed-off-by: default avatarFedor Uporov <fuporov.vstack@gmail.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/18498


Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 14c83787
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -73,6 +73,11 @@ extern "C" {
#include <sched.h>
#endif

/* FreeBSD */
#ifdef __FreeBSD__
#include <aio.h>
#endif

#ifdef __cplusplus
}
#endif
+4 −0
Original line number Diff line number Diff line
@@ -65,6 +65,10 @@ BLOCKDEV_MODULES_PRIVATE_LIBS += -L/usr/lib64/iscsi -liscsi
endif
endif

ifeq ($(OS),FreeBSD)
BLOCKDEV_MODULES_LIST += bdev_aio
endif

ifeq ($(CONFIG_URING),y)
BLOCKDEV_MODULES_LIST += bdev_uring
BLOCKDEV_MODULES_PRIVATE_LIBS += -luring
+4 −0
Original line number Diff line number Diff line
@@ -24,6 +24,10 @@ DIRS-$(CONFIG_ISCSI_INITIATOR) += iscsi
DIRS-$(CONFIG_VIRTIO) += virtio
endif

ifeq ($(OS),FreeBSD)
DIRS-y += aio
endif

DIRS-$(CONFIG_RBD) += rbd

DIRS-$(CONFIG_DAOS) += daos
+3 −0
Original line number Diff line number Diff line
@@ -11,7 +11,10 @@ SO_MINOR := 0

C_SRCS = bdev_aio.c bdev_aio_rpc.c
LIBNAME = bdev_aio

ifeq ($(OS),Linux)
LOCAL_SYS_LIBS = -laio
endif

SPDK_MAP_FILE = $(SPDK_ROOT_DIR)/mk/spdk_blank.map

+164 −52
Original line number Diff line number Diff line
@@ -22,11 +22,18 @@
#include "spdk/log.h"

#include <sys/eventfd.h>

#ifndef __FreeBSD__
#include <libaio.h>
#endif

struct bdev_aio_io_channel {
	uint64_t				io_inflight;
#ifdef __FreeBSD__
	int					kqfd;
#else
	io_context_t				io_ctx;
#endif
	struct bdev_aio_group_channel		*group_ch;
	TAILQ_ENTRY(bdev_aio_io_channel)	link;
};
@@ -42,7 +49,11 @@ struct bdev_aio_group_channel {
};

struct bdev_aio_task {
#ifdef __FreeBSD__
	struct aiocb			aiocb;
#else
	struct iocb			iocb;
#endif
	uint64_t			len;
	struct bdev_aio_io_channel	*ch;
};
@@ -140,60 +151,76 @@ bdev_aio_close(struct file_disk *disk)
	return 0;
}

static void
bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
	       struct bdev_aio_task *aio_task,
#ifdef __FreeBSD__
static int
bdev_aio_submit_io(enum spdk_bdev_io_type type, struct file_disk *fdisk,
		   struct spdk_io_channel *ch, struct bdev_aio_task *aio_task,
		   struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
{
	struct iocb *iocb = &aio_task->iocb;
	struct aiocb *aiocb = &aio_task->aiocb;
	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
	int rc;

	io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
	if (aio_ch->group_ch->efd >= 0) {
		io_set_eventfd(iocb, aio_ch->group_ch->efd);
	}
	iocb->data = aio_task;
	memset(aiocb, 0, sizeof(struct aiocb));
	aiocb->aio_fildes = fdisk->fd;
	aiocb->aio_iov = iov;
	aiocb->aio_iovcnt = iovcnt;
	aiocb->aio_offset = offset;
	aiocb->aio_sigevent.sigev_notify_kqueue = aio_ch->kqfd;
	aiocb->aio_sigevent.sigev_value.sival_ptr = aio_task;
	aiocb->aio_sigevent.sigev_notify = SIGEV_KEVENT;

	aio_task->len = nbytes;
	aio_task->ch = aio_ch;

	SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n",
		      iovcnt, nbytes, offset);

	rc = io_submit(aio_ch->io_ctx, 1, &iocb);
	if (spdk_unlikely(rc < 0)) {
		if (rc == -EAGAIN) {
			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
		} else {
			spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), rc);
			SPDK_ERRLOG("%s: io_submit returned %d\n", __func__, rc);
		}
	} else {
		aio_ch->io_inflight++;
	}
	if (type == SPDK_BDEV_IO_TYPE_READ) {
		return aio_readv(aiocb);
	}

static void
bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
		struct bdev_aio_task *aio_task,
		struct iovec *iov, int iovcnt, size_t len, uint64_t offset)
	return aio_writev(aiocb);
}
#else
static int
bdev_aio_submit_io(enum spdk_bdev_io_type type, struct file_disk *fdisk,
		   struct spdk_io_channel *ch, struct bdev_aio_task *aio_task,
		   struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
{
	struct iocb *iocb = &aio_task->iocb;
	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
	int rc;

	if (type == SPDK_BDEV_IO_TYPE_READ) {
		io_prep_preadv(iocb, fdisk->fd, iov, iovcnt, offset);
	} else {
		io_prep_pwritev(iocb, fdisk->fd, iov, iovcnt, offset);
	}

	if (aio_ch->group_ch->efd >= 0) {
		io_set_eventfd(iocb, aio_ch->group_ch->efd);
	}
	iocb->data = aio_task;
	aio_task->len = len;
	aio_task->len = nbytes;
	aio_task->ch = aio_ch;

	return io_submit(aio_ch->io_ctx, 1, &iocb);
}
#endif

static void
bdev_aio_rw(enum spdk_bdev_io_type type, struct file_disk *fdisk,
	    struct spdk_io_channel *ch, struct bdev_aio_task *aio_task,
	    struct iovec *iov, int iovcnt, uint64_t nbytes, uint64_t offset)
{
	struct bdev_aio_io_channel *aio_ch = spdk_io_channel_get_ctx(ch);
	int rc;

	if (type == SPDK_BDEV_IO_TYPE_READ) {
		SPDK_DEBUGLOG(aio, "read %d iovs size %lu to off: %#lx\n",
			      iovcnt, nbytes, offset);
	} else {
		SPDK_DEBUGLOG(aio, "write %d iovs size %lu from off: %#lx\n",
		      iovcnt, len, offset);
			      iovcnt, nbytes, offset);
	}

	rc = io_submit(aio_ch->io_ctx, 1, &iocb);
	rc = bdev_aio_submit_io(type, fdisk, ch, aio_task, iov, iovcnt, nbytes, offset);
	if (spdk_unlikely(rc < 0)) {
		if (rc == -EAGAIN) {
			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
@@ -242,6 +269,59 @@ bdev_aio_destruct(void *ctx)
	return 0;
}

#ifdef __FreeBSD__
static int
bdev_user_io_getevents(int kq, unsigned int max, struct kevent *events)
{
	struct timespec ts;
	int count;

	memset(events, 0, max * sizeof(struct kevent));
	memset(&ts, 0, sizeof(ts));

	count = kevent(kq, NULL, 0, events, max, &ts);
	if (count < 0) {
		SPDK_ERRLOG("failed to get kevents: %s.\n", spdk_strerror(errno));
		return -errno;
	}

	return count;
}

static int
bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)
{
	int nr, i, res = 0;
	struct bdev_aio_task *aio_task;
	struct kevent events[SPDK_AIO_QUEUE_DEPTH];

	nr = bdev_user_io_getevents(io_ch->kqfd, SPDK_AIO_QUEUE_DEPTH, events);
	if (nr < 0) {
		return 0;
	}

	for (i = 0; i < nr; i++) {
		aio_task = events[i].udata;
		aio_task->ch->io_inflight--;
		if (aio_task == NULL) {
			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
			break;
		} else if ((uint64_t)aio_return(&aio_task->aiocb) == aio_task->len) {
			spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
		} else {
			SPDK_ERRLOG("failed to complete aio: rc %d\n", aio_error(&aio_task->aiocb));
			res = aio_error(&aio_task->aiocb);
			if (res != 0) {
				spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), res);
			} else {
				spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_FAILED);
			}
		}
	}

	return nr;
}
#else
static int
bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *uevents)
{
@@ -340,6 +420,7 @@ bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)

	return nr;
}
#endif

static int
bdev_aio_group_poll(void *arg)
@@ -449,16 +530,9 @@ bdev_aio_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,

	switch (bdev_io->type) {
	case SPDK_BDEV_IO_TYPE_READ:
		bdev_aio_readv((struct file_disk *)bdev_io->bdev->ctxt,
			       ch,
			       (struct bdev_aio_task *)bdev_io->driver_ctx,
			       bdev_io->u.bdev.iovs,
			       bdev_io->u.bdev.iovcnt,
			       bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen,
			       bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen);
		break;
	case SPDK_BDEV_IO_TYPE_WRITE:
		bdev_aio_writev((struct file_disk *)bdev_io->bdev->ctxt,
		bdev_aio_rw(bdev_io->type,
			    (struct file_disk *)bdev_io->bdev->ctxt,
			    ch,
			    (struct bdev_aio_task *)bdev_io->driver_ctx,
			    bdev_io->u.bdev.iovs,
@@ -531,17 +605,55 @@ bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
	}
}

#ifdef __FreeBSD__
static int
bdev_aio_create_cb(void *io_device, void *ctx_buf)
bdev_aio_create_io(struct bdev_aio_io_channel *ch)
{
	struct bdev_aio_io_channel *ch = ctx_buf;
	ch->kqfd = kqueue();
	if (ch->kqfd < 0) {
		SPDK_ERRLOG("async I/O context setup failure: %s.\n", spdk_strerror(errno));
		return -1;
	}

	return 0;
}

static void
bdev_aio_destroy_io(struct bdev_aio_io_channel *ch)
{
	close(ch->kqfd);
}
#else
static int
bdev_aio_create_io(struct bdev_aio_io_channel *ch)
{
	if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
		SPDK_ERRLOG("Async I/O context setup failure, likely due to exceeding kernel limit.\n");
		SPDK_ERRLOG("This limit may be increased using 'sysctl -w fs.aio-max-nr'.\n");
		return -1;
	}

	return 0;
}

static void
bdev_aio_destroy_io(struct bdev_aio_io_channel *ch)
{
	io_destroy(ch->io_ctx);
}
#endif

static int
bdev_aio_create_cb(void *io_device, void *ctx_buf)
{
	struct bdev_aio_io_channel *ch = ctx_buf;
	int rc;

	rc = bdev_aio_create_io(ch);
	if (rc < 0) {
		return rc;
	}

	ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if));
	TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link);

@@ -553,7 +665,7 @@ bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
{
	struct bdev_aio_io_channel *ch = ctx_buf;

	io_destroy(ch->io_ctx);
	bdev_aio_destroy_io(ch);

	assert(ch->group_ch);
	TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link);
Loading