Commit 8e7688c5 authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Tomasz Zawadzki
Browse files

bdev: Copy ext_opts when request is split



That is done to correctly handle metadata pointer which is part
of ext_opts structure. It will also be used by the next patch to
remove memory_domain pointer if request which uses local buffers
is split

Force the user to set correct ext_opts size, update API functions
description.

Signed-off-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Change-Id: I77517d70df34a998d718cc6474fb4c538a42918f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11349


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent c20dd8af
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -934,7 +934,8 @@ int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_c
 * \param cb Called when the request is complete.
 * \param cb_arg Argument passed to cb.
 * \param opts Optional structure with extended IO request options. If set, this structure must be
 * valid until the IO is completed.
 * valid until the IO is completed. `size` member of this structure is used for ABI compatibility and
 * must be set to sizeof(struct spdk_bdev_ext_io_opts).
 *
 * \return 0 on success. On success, the callback will always
 * be called (even if the request ultimately failed). Return
@@ -1136,7 +1137,8 @@ int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_
 * \param cb Called when the request is complete.
 * \param cb_arg Argument passed to cb.
 * \param opts Optional structure with extended IO request options. If set, this structure must be
 * valid until the IO is completed.
 * valid until the IO is completed. `size` member of this structure is used for ABI compatibility and
 * must be set to sizeof(struct spdk_bdev_ext_io_opts).
 *
 * \return 0 on success. On success, the callback will always
 * be called (even if the request ultimately failed). Return
+3 −0
Original line number Diff line number Diff line
@@ -775,6 +775,9 @@ struct spdk_bdev_io {
		/** Pointer to a structure passed by the user in ext API */
		struct spdk_bdev_ext_io_opts *ext_opts;

		/** Copy of user's opts, used when I/O is split */
		struct spdk_bdev_ext_io_opts ext_opts_copy;

		/** Data transfer completion callback */
		void (*data_transfer_cpl)(void *ctx, int rc);
	} internal;
+44 −12
Original line number Diff line number Diff line
@@ -362,13 +362,13 @@ static int
bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			  struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
			  uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
			  struct spdk_bdev_ext_io_opts *opts);
			  struct spdk_bdev_ext_io_opts *opts, bool copy_opts);
static int
bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			   struct iovec *iov, int iovcnt, void *md_buf,
			   uint64_t offset_blocks, uint64_t num_blocks,
			   spdk_bdev_io_completion_cb cb, void *cb_arg,
			   struct spdk_bdev_ext_io_opts *opts);
			   struct spdk_bdev_ext_io_opts *opts, bool copy_opts);

static int
bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
@@ -1876,6 +1876,10 @@ spdk_bdev_free_io(struct spdk_bdev_io *bdev_io)
		bdev_io_put_buf(bdev_io);
	}

	if (spdk_unlikely(bdev_io->internal.ext_opts == &bdev_io->internal.ext_opts_copy)) {
		memset(&bdev_io->internal.ext_opts_copy, 0, sizeof(bdev_io->internal.ext_opts_copy));
	}

	if (ch->per_thread_cache_count < ch->bdev_io_cache_size) {
		ch->per_thread_cache_count++;
		STAILQ_INSERT_HEAD(&ch->per_thread_cache, bdev_io, internal.buf_link);
@@ -2321,7 +2325,7 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
					       iov, iovcnt, md_buf, current_offset,
					       num_blocks,
					       bdev_io_split_done, bdev_io,
					       bdev_io->internal.ext_opts);
					       bdev_io->internal.ext_opts, true);
		break;
	case SPDK_BDEV_IO_TYPE_WRITE:
		rc = bdev_writev_blocks_with_md(bdev_io->internal.desc,
@@ -2329,7 +2333,7 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
						iov, iovcnt, md_buf, current_offset,
						num_blocks,
						bdev_io_split_done, bdev_io,
						bdev_io->internal.ext_opts);
						bdev_io->internal.ext_opts, true);
		break;
	case SPDK_BDEV_IO_TYPE_UNMAP:
		io_wait_fn = _bdev_unmap_split;
@@ -4084,11 +4088,12 @@ static int
bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			  struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
			  uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
			  struct spdk_bdev_ext_io_opts *opts)
			  struct spdk_bdev_ext_io_opts *opts, bool copy_opts)
{
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct spdk_bdev_io *bdev_io;
	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
	struct spdk_bdev_ext_io_opts *opts_copy;

	if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
		return -EINVAL;
@@ -4111,7 +4116,17 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c
	bdev_io->internal.ext_opts = opts;
	bdev_io->u.bdev.ext_opts = opts;

	if (opts && copy_opts) {
		assert(opts->size <= sizeof(*opts));
		opts_copy = &bdev_io->internal.ext_opts_copy;
		memcpy(opts_copy, opts, opts->size);
		bdev_io->internal.ext_opts_copy.metadata = md_buf;
		bdev_io->internal.ext_opts = opts_copy;
		bdev_io->u.bdev.ext_opts = opts_copy;
	}

	bdev_io_submit(bdev_io);

	return 0;
}

@@ -4121,7 +4136,7 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *
			   spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
					 num_blocks, cb, cb_arg, NULL);
					 num_blocks, cb, cb_arg, NULL, false);
}

int
@@ -4139,7 +4154,7 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann
	}

	return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
					 num_blocks, cb, cb_arg, NULL);
					 num_blocks, cb, cb_arg, NULL, false);
}

int
@@ -4152,6 +4167,9 @@ spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *
	void *md = NULL;

	if (opts) {
		if (spdk_unlikely(!opts->size || opts->size > sizeof(struct spdk_bdev_ext_io_opts))) {
			return -EINVAL;
		}
		md = opts->metadata;
	}

@@ -4164,7 +4182,7 @@ spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *
	}

	return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
					 num_blocks, cb, cb_arg, opts);
					 num_blocks, cb, cb_arg, opts, false);
}

static int
@@ -4256,11 +4274,12 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *
			   struct iovec *iov, int iovcnt, void *md_buf,
			   uint64_t offset_blocks, uint64_t num_blocks,
			   spdk_bdev_io_completion_cb cb, void *cb_arg,
			   struct spdk_bdev_ext_io_opts *opts)
			   struct spdk_bdev_ext_io_opts *opts, bool copy_opts)
{
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct spdk_bdev_io *bdev_io;
	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
	struct spdk_bdev_ext_io_opts *opts_copy;

	if (!desc->write) {
		return -EBADF;
@@ -4287,7 +4306,17 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *
	bdev_io->internal.ext_opts = opts;
	bdev_io->u.bdev.ext_opts = opts;

	if (opts && copy_opts) {
		assert(opts->size <= sizeof(*opts));
		opts_copy = &bdev_io->internal.ext_opts_copy;
		memcpy(opts_copy, opts, opts->size);
		bdev_io->internal.ext_opts_copy.metadata = md_buf;
		bdev_io->internal.ext_opts = opts_copy;
		bdev_io->u.bdev.ext_opts = opts_copy;
	}

	bdev_io_submit(bdev_io);

	return 0;
}

@@ -4314,7 +4343,7 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
					  num_blocks, cb, cb_arg, NULL);
					  num_blocks, cb, cb_arg, NULL, false);
}

int
@@ -4332,7 +4361,7 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan
	}

	return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
					  num_blocks, cb, cb_arg, NULL);
					  num_blocks, cb, cb_arg, NULL, false);
}

int
@@ -4345,6 +4374,9 @@ spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel
	void *md = NULL;

	if (opts) {
		if (spdk_unlikely(!opts->size || opts->size > sizeof(struct spdk_bdev_ext_io_opts))) {
			return -EINVAL;
		}
		md = opts->metadata;
	}

@@ -4357,7 +4389,7 @@ spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel
	}

	return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
					  num_blocks, cb, cb_arg, opts);
					  num_blocks, cb, cb_arg, opts, false);
}

static void
+133 −1
Original line number Diff line number Diff line
@@ -238,6 +238,9 @@ stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)

	if (expected_io->md_buf != NULL) {
		CU_ASSERT(expected_io->md_buf == bdev_io->u.bdev.md_buf);
		if (bdev_io->internal.ext_opts) {
			CU_ASSERT(expected_io->md_buf == bdev_io->internal.ext_opts->metadata);
		}
	}

	if (expected_io->length == 0) {
@@ -4900,7 +4903,8 @@ bdev_writev_readv_ext(void)
	struct iovec iov = { .iov_base = (void *)0xbaaddead, .iov_len = 0x1000 };
	struct ut_expected_io *expected_io;
	struct spdk_bdev_ext_io_opts ext_io_opts = {
		.metadata = (void *)0xFF000000
		.metadata = (void *)0xFF000000,
		.size = sizeof(ext_io_opts)
	};
	int rc;

@@ -4947,6 +4951,134 @@ bdev_writev_readv_ext(void)
	stub_complete_io(1);
	CU_ASSERT(g_io_done == true);

	/* Test invalid ext_opts size */
	ext_io_opts.size = 0;
	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
	CU_ASSERT(rc != 0);
	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
	CU_ASSERT(rc != 0);

	ext_io_opts.size = sizeof(ext_io_opts) * 2;
	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
	CU_ASSERT(rc != 0);
	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
	CU_ASSERT(rc != 0);

	/* Check that IO request with ext_opts and metadata is split correctly
	 * Offset 14, length 8, payload 0xF000
	 *  Child - Offset 14, length 2, payload 0xF000
	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
	 */
	bdev->optimal_io_boundary = 16;
	bdev->split_on_optimal_io_boundary = true;
	bdev->md_interleave = false;
	bdev->md_len = 8;

	iov.iov_base = (void *)0xF000;
	iov.iov_len = 4096;
	memset(&ext_io_opts, 0, sizeof(ext_io_opts));
	ext_io_opts.metadata = (void *)0xFF000000;
	ext_io_opts.size = sizeof(ext_io_opts);
	g_io_done = false;

	/* read */
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 1);
	expected_io->md_buf = ext_io_opts.metadata;
	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 6, 1);
	expected_io->md_buf = ext_io_opts.metadata + 2 * 8;
	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_io_done == false);

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
	stub_complete_io(2);
	CU_ASSERT(g_io_done == true);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);

	/* write */
	g_io_done = false;
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 1);
	expected_io->md_buf = ext_io_opts.metadata;
	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 6, 1);
	expected_io->md_buf = ext_io_opts.metadata + 2 * 8;
	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_io_done == false);

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
	stub_complete_io(2);
	CU_ASSERT(g_io_done == true);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);

	/* ext_opts contains metadata but ext_opts::size doesn't cover metadata pointer.
	 * Check that IO request with ext_opts and metadata is split correctly - metadata pointer is not copied, so split
	 * requests do not have metadata
	 * Offset 14, length 8, payload 0xF000
	 *  Child - Offset 14, length 2, payload 0xF000
	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
	 */
	iov.iov_base = (void *)0xF000;
	iov.iov_len = 4096;
	memset(&ext_io_opts, 0, sizeof(ext_io_opts));
	ext_io_opts.metadata = (void *)0xFF000000;
	ext_io_opts.size = offsetof(struct spdk_bdev_ext_io_opts, metadata);;
	g_io_done = false;

	/* read */
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 1);
	/* Split request must not contain metadata pointer */
	expected_io->md_buf = NULL;
	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 6, 1);
	expected_io->md_buf = NULL;
	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_io_done == false);

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
	stub_complete_io(2);
	CU_ASSERT(g_io_done == true);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);

	/* write */
	g_io_done = false;
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 1);
	expected_io->md_buf = NULL;
	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 6, 1);
	/* Split request must not contain metadata pointer */
	expected_io->md_buf = NULL;
	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_io_done == false);

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
	stub_complete_io(2);
	CU_ASSERT(g_io_done == true);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);

	spdk_put_io_channel(io_ch);
	spdk_bdev_close(desc);
	free_bdev(bdev);