Commit c03985a0 authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Tomasz Zawadzki
Browse files

bdev: Copy data asynchronously when bounce buffer is used



This patch is a preparation for enabling of memory domains
pull/psuh functionality. Since memory domains API is
asynchronous, this patch makes asynchronous operations
with bounce buffers.

Signed-off-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Change-Id: Ieb1f2a0c151149af58cfd7607dbde4c76c3c288d
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10420


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
parent cbda5664
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -746,6 +746,7 @@ struct spdk_bdev_io {

		/** if the request is double buffered, store original request iovs here */
		struct iovec  bounce_iov;
		struct iovec  bounce_md_iov;
		struct iovec  orig_md_iov;
		struct iovec *orig_iovs;
		int           orig_iovcnt;
@@ -770,6 +771,9 @@ struct spdk_bdev_io {

		/** Pointer to a structure passed by the user in ext API */
		struct spdk_bdev_ext_io_opts *ext_opts;

		/** Data transfer completion callback */
		void (*data_transfer_cpl)(void *ctx, int rc);
	} internal;

	/**
+111 −22
Original line number Diff line number Diff line
@@ -133,6 +133,8 @@ static struct spdk_bdev_mgr g_bdev_mgr = {

typedef void (*lock_range_cb)(void *ctx, int status);

typedef void (*bdev_copy_bounce_buffer_cpl)(void *ctx, int rc);

struct lba_range {
	uint64_t			offset;
	uint64_t			length;
@@ -348,6 +350,8 @@ struct set_qos_limit_ctx {
#define __bdev_to_io_dev(bdev)		(((char *)bdev) + 1)
#define __bdev_from_io_dev(io_dev)	((struct spdk_bdev *)(((char *)io_dev) - 1))

static inline void bdev_io_complete(void *ctx);

static void bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
static void bdev_write_zero_buffer_next(void *_bdev_io);

@@ -851,17 +855,34 @@ bdev_io_get_buf_complete(struct spdk_bdev_io *bdev_io, bool status)
}

static void
_bdev_io_set_bounce_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
_bdev_io_pull_buffer_cpl(void *ctx, int rc)
{
	struct spdk_bdev_io *bdev_io = ctx;

	if (rc) {
		SPDK_ERRLOG("Set bounce buffer failed with rc %d\n", rc);
		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
	}
	bdev_io_get_buf_complete(bdev_io, !rc);
}

static void
_bdev_io_pull_bounce_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
{
	/* save original md_buf */
	bdev_io->internal.orig_md_iov.iov_base = bdev_io->u.bdev.md_buf;
	bdev_io->internal.orig_md_iov.iov_len = len;
	bdev_io->internal.bounce_md_iov.iov_base = md_buf;
	bdev_io->internal.bounce_md_iov.iov_len = len;
	/* set bounce md_buf */
	bdev_io->u.bdev.md_buf = md_buf;

	if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
		memcpy(md_buf, bdev_io->internal.orig_md_iov.iov_base, bdev_io->internal.orig_md_iov.iov_len);
	}

	assert(bdev_io->internal.data_transfer_cpl);
	bdev_io->internal.data_transfer_cpl(bdev_io, 0);
}

static void
@@ -878,7 +899,7 @@ _bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io)
		assert(((uintptr_t)buf & (spdk_bdev_get_buf_align(bdev) - 1)) == 0);

		if (bdev_io->u.bdev.md_buf != NULL) {
			_bdev_io_set_bounce_md_buf(bdev_io, buf, md_len);
			_bdev_io_pull_bounce_md_buf(bdev_io, buf, md_len);
			return;
		} else {
			spdk_bdev_io_set_md_buf(bdev_io, buf, md_len);
@@ -889,8 +910,25 @@ _bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io)
}

static void
_bdev_io_set_bounce_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
_bdev_io_pull_bounce_data_buf_done(void *ctx, int rc)
{
	struct spdk_bdev_io *bdev_io = ctx;

	if (rc) {
		SPDK_ERRLOG("Failed to get data buffer\n");
		assert(bdev_io->internal.data_transfer_cpl);
		bdev_io->internal.data_transfer_cpl(bdev_io, rc);
		return;
	}

	_bdev_io_set_md_buf(bdev_io);
}

static void
_bdev_io_pull_bounce_data_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len,
			      bdev_copy_bounce_buffer_cpl cpl_cb)
{
	bdev_io->internal.data_transfer_cpl = cpl_cb;
	/* save original iovec */
	bdev_io->internal.orig_iovs = bdev_io->u.bdev.iovs;
	bdev_io->internal.orig_iovcnt = bdev_io->u.bdev.iovcnt;
@@ -904,6 +942,8 @@ _bdev_io_set_bounce_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
	if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
		_copy_iovs_to_buf(buf, len, bdev_io->internal.orig_iovs, bdev_io->internal.orig_iovcnt);
	}

	_bdev_io_pull_bounce_data_buf_done(bdev_io, 0);
}

static void
@@ -926,7 +966,9 @@ _bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len)
	aligned_buf = (void *)(((uintptr_t)buf + (alignment - 1)) & ~(alignment - 1));

	if (buf_allocated) {
		_bdev_io_set_bounce_buf(bdev_io, aligned_buf, len);
		_bdev_io_pull_bounce_data_buf(bdev_io, aligned_buf, len, _bdev_io_pull_buffer_cpl);
		/* Continue in completion callback */
		return;
	} else {
		spdk_bdev_io_set_buf(bdev_io, aligned_buf, len);
	}
@@ -1053,6 +1095,30 @@ _bdev_io_handle_no_mem(struct spdk_bdev_io *bdev_io)
	return false;
}

static void
_bdev_io_complete_push_bounce_done(void *ctx, int rc)
{
	struct spdk_bdev_io *bdev_io = ctx;
	struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
	struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;

	if (rc) {
		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
	}
	/* We want to free the bounce buffer here since we know we're done with it (as opposed
	 * to waiting for the conditional free of internal.buf in spdk_bdev_free_io()).
	 */
	bdev_io_put_buf(bdev_io);

	/* Continue with IO completion flow */
	_bdev_io_decrement_outstanding(bdev_ch, shared_resource);
	if (spdk_unlikely(_bdev_io_handle_no_mem(bdev_io))) {
		return;
	}

	bdev_io_complete(bdev_io);
}

static inline void
_bdev_io_push_bounce_md_buffer(struct spdk_bdev_io *bdev_io)
{
@@ -1065,22 +1131,24 @@ _bdev_io_push_bounce_md_buffer(struct spdk_bdev_io *bdev_io)
			memcpy(bdev_io->internal.orig_md_iov.iov_base, bdev_io->u.bdev.md_buf,
			       bdev_io->internal.orig_md_iov.iov_len);
		}

		bdev_io->u.bdev.md_buf = bdev_io->internal.orig_md_iov.iov_base;
		bdev_io->internal.orig_md_iov.iov_base = NULL;
	}

	/* We want to free the bounce buffer here since we know we're done with it (as opposed
	 * to waiting for the conditional free of internal.buf in spdk_bdev_free_io()).
	 */
	bdev_io_put_buf(bdev_io);
	assert(bdev_io->internal.data_transfer_cpl);
	bdev_io->internal.data_transfer_cpl(bdev_io, 0);
}

static void
_bdev_io_push_bounce_data_buffer_done(void *ctx)
_bdev_io_push_bounce_data_buffer_done(void *ctx, int rc)
{
	struct spdk_bdev_io *bdev_io = ctx;

	assert(bdev_io->internal.data_transfer_cpl);

	if (rc) {
		bdev_io->internal.data_transfer_cpl(bdev_io, rc);
		return;
	}

	/* set original buffer for this io */
	bdev_io->u.bdev.iovcnt = bdev_io->internal.orig_iovcnt;
	bdev_io->u.bdev.iovs = bdev_io->internal.orig_iovs;
@@ -1091,13 +1159,10 @@ _bdev_io_push_bounce_data_buffer_done(void *ctx)
	_bdev_io_push_bounce_md_buffer(bdev_io);
}

static void
_bdev_io_unset_bounce_buf(struct spdk_bdev_io *bdev_io)
static inline void
_bdev_io_push_bounce_data_buffer(struct spdk_bdev_io *bdev_io, bdev_copy_bounce_buffer_cpl cpl_cb)
{
	if (spdk_likely(bdev_io->internal.orig_iovcnt == 0)) {
		assert(bdev_io->internal.orig_md_iov.iov_base == NULL);
		return;
	}
	bdev_io->internal.data_transfer_cpl = cpl_cb;

	/* if this is read path, copy data from bounce buffer to original buffer */
	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ &&
@@ -1108,7 +1173,7 @@ _bdev_io_unset_bounce_buf(struct spdk_bdev_io *bdev_io)
				  bdev_io->internal.bounce_iov.iov_len);
	}

	_bdev_io_push_bounce_data_buffer_done(bdev_io);
	_bdev_io_push_bounce_data_buffer_done(bdev_io, 0);
}

static void
@@ -2496,6 +2561,19 @@ bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io)
	}
}

static void
parent_bdev_io_complete(void *ctx, int rc)
{
	struct spdk_bdev_io *parent_io = ctx;

	if (rc) {
		parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
	}

	parent_io->internal.cb(parent_io, parent_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
			       parent_io->internal.caller_ctx);
}

static void
bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
@@ -2521,8 +2599,13 @@ bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
		assert(parent_io->internal.cb != bdev_io_split_done);
		spdk_trace_record(TRACE_BDEV_IO_DONE, 0, 0, (uintptr_t)parent_io, bdev_io->internal.caller_ctx);
		TAILQ_REMOVE(&parent_io->internal.ch->io_submitted, parent_io, internal.ch_link);
		parent_io->internal.cb(parent_io, parent_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
				       parent_io->internal.caller_ctx);

		if (parent_io->internal.orig_iovcnt != 0) {
			_bdev_io_push_bounce_data_buffer(parent_io, parent_bdev_io_complete);
			/* bdev IO will be completed in the callback */
		} else {
			parent_bdev_io_complete(parent_io, 0);
		}
		return;
	}

@@ -2766,6 +2849,7 @@ bdev_io_init(struct spdk_bdev_io *bdev_io,
	bdev_io->internal.get_buf_cb = NULL;
	bdev_io->internal.get_aux_buf_cb = NULL;
	bdev_io->internal.ext_opts = NULL;
	bdev_io->internal.data_transfer_cpl = NULL;
}

static bool
@@ -5555,7 +5639,12 @@ spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status sta
			return;
		}
	} else {
		_bdev_io_unset_bounce_buf(bdev_io);
		if (spdk_unlikely(bdev_io->internal.orig_iovcnt != 0)) {
			_bdev_io_push_bounce_data_buffer(bdev_io, _bdev_io_complete_push_bounce_done);
			/* bdev IO will be completed in the callback */
			return;
		}

		_bdev_io_decrement_outstanding(bdev_ch, shared_resource);
		if (spdk_unlikely(_bdev_io_handle_no_mem(bdev_io))) {
			return;
+5 −1
Original line number Diff line number Diff line
@@ -256,8 +256,12 @@ stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)

	CU_ASSERT(expected_io->iovcnt == bdev_io->u.bdev.iovcnt);
	for (i = 0; i < expected_io->iovcnt; i++) {
		iov = &bdev_io->u.bdev.iovs[i];
		expected_iov = &expected_io->iov[i];
		if (bdev_io->internal.orig_iovcnt == 0) {
			iov = &bdev_io->u.bdev.iovs[i];
		} else {
			iov = bdev_io->internal.orig_iovs;
		}
		CU_ASSERT(iov->iov_len == expected_iov->iov_len);
		CU_ASSERT(iov->iov_base == expected_iov->iov_base);
	}