Commit ae43c81a authored by Jim Harris's avatar Jim Harris Committed by Tomasz Zawadzki
Browse files

bdev: add spdk_bdev_comparev_and_writev_blocks



We will only support a vectored variant of
compare-and-write for now.

This does no locking for now.  Ii will be added
in a separate patch.

Signed-off-by: default avatarMaciej Szwed <maciej.szwed@intel.com>
Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I5bd075c912de60090e19cf8fced19c4879fcc900
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/475941


Community-CI: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom SPDK FC-NVMe CI <spdk-ci.pdl@broadcom.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
parent d9afeb15
Loading
Loading
Loading
Loading
+41 −0
Original line number Diff line number Diff line
@@ -138,6 +138,7 @@ enum spdk_bdev_io_type {
	SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT,
	SPDK_BDEV_IO_TYPE_ZONE_APPEND,
	SPDK_BDEV_IO_TYPE_COMPARE,
	SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE,
	SPDK_BDEV_NUM_IO_TYPES /* Keep last */
};

@@ -1125,6 +1126,46 @@ int spdk_bdev_comparev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_i
				      uint64_t offset_blocks, uint64_t num_blocks,
				      spdk_bdev_io_completion_cb cb, void *cb_arg);

/**
 * Submit an atomic compare-and-write request to the bdev on the given channel.
 * For bdevs that do not natively support atomic compare-and-write, the bdev layer
 * will quiesce I/O to the specified LBA range, before performing the read,
 * compare and write operations.
 *
 * Currently this supports compare-and-write of only one block.
 *
 * The data buffers for both the compare and write operations are described in a
 * scatter gather list. Some physical devices place memory alignment requirements on
 * data and may not be able to directly transfer out of the buffers provided. In
 * this case, the request may fail.
 *
 * \ingroup bdev_io_submit_functions
 *
 * \param desc Block device descriptor.
 * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
 * \param compare_iov A scatter gather list of buffers to be compared.
 * \param compare_iovcnt The number of elements in compare_iov.
 * \param write_iov A scatter gather list of buffers to be written if the compare is
 *                  successful.
 * \param write_iovcnt The number of elements in write_iov.
 * \param offset_blocks The offset, in blocks, from the start of the block device.
 * \param num_blocks The number of blocks to compare-and-write.
 * \param cb Called when the request is complete.
 * \param cb_arg Argument passed to cb.
 *
 * \return 0 on success. On success, the callback will always
 * be called (even if the request ultimately failed). Return
 * negated errno on failure, in which case the callback will not be called.
 *   * -EINVAL - offset_blocks and/or num_blocks are out of range
 *   * -ENOMEM - spdk_bdev_io buffer cannot be allocated
 *   * -EBADF - desc not open for writing
 */
int spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
		struct iovec *compare_iov, int compare_iovcnt,
		struct iovec *write_iov, int write_iovcnt,
		uint64_t offset_blocks, uint64_t num_blocks,
		spdk_bdev_io_completion_cb cb, void *cb_arg);

/**
 * Submit a request to acquire a data buffer that represents the given
 * range of blocks. The data buffer is placed in the spdk_bdev_io structure
+8 −0
Original line number Diff line number Diff line
@@ -476,6 +476,14 @@ struct spdk_bdev_io {
			/** For SG buffer cases, number of iovecs in iovec array. */
			int iovcnt;

			/** For fused operations such as COMPARE_AND_WRITE, array of iovecs
			 *  for the second operation.
			 */
			struct iovec *fused_iovs;

			/** Number of iovecs in fused_iovs. */
			int fused_iovcnt;

			/* Metadata buffer */
			void *md_buf;

+117 −0
Original line number Diff line number Diff line
@@ -3565,6 +3565,123 @@ spdk_bdev_compare_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_cha
					   cb, cb_arg);
}

static void
bdev_compare_and_write_do_write_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
	struct spdk_bdev_io *parent_io = cb_arg;

	spdk_bdev_free_io(bdev_io);

	if (success) {
		parent_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
	} else {
		parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
	}

	parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
}

static void
bdev_compare_and_write_do_write(void *_bdev_io)
{
	struct spdk_bdev_io *bdev_io = _bdev_io;
	int rc;

	rc = spdk_bdev_writev_blocks(bdev_io->internal.desc,
				     spdk_io_channel_from_ctx(bdev_io->internal.ch),
				     bdev_io->u.bdev.fused_iovs, bdev_io->u.bdev.fused_iovcnt,
				     bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
				     bdev_compare_and_write_do_write_done, bdev_io);


	if (rc == -ENOMEM) {
		bdev_queue_io_wait_with_cb(bdev_io, bdev_compare_and_write_do_write);
	} else if (rc != 0) {
		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
		bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
	}
}

static void
bdev_compare_and_write_do_compare_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
	struct spdk_bdev_io *parent_io = cb_arg;

	spdk_bdev_free_io(bdev_io);

	if (!success) {
		parent_io->internal.status = SPDK_BDEV_IO_STATUS_MISCOMPARE;
		parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
		return;
	}

	bdev_compare_and_write_do_write(parent_io);
}

static void
bdev_compare_and_write_do_compare(void *_bdev_io)
{
	struct spdk_bdev_io *bdev_io = _bdev_io;
	int rc;

	rc = spdk_bdev_comparev_blocks(bdev_io->internal.desc,
				       spdk_io_channel_from_ctx(bdev_io->internal.ch), bdev_io->u.bdev.iovs,
				       bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
				       bdev_compare_and_write_do_compare_done, bdev_io);

	if (rc == -ENOMEM) {
		bdev_queue_io_wait_with_cb(bdev_io, bdev_compare_and_write_do_compare);
	} else if (rc != 0) {
		bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
		bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
	}
}

int
spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
				     struct iovec *compare_iov, int compare_iovcnt,
				     struct iovec *write_iov, int write_iovcnt,
				     uint64_t offset_blocks, uint64_t num_blocks,
				     spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct spdk_bdev_io *bdev_io;
	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);

	if (!desc->write) {
		return -EBADF;
	}

	if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
		return -EINVAL;
	}

	bdev_io = bdev_channel_get_io(channel);
	if (!bdev_io) {
		return -ENOMEM;
	}

	bdev_io->internal.ch = channel;
	bdev_io->internal.desc = desc;
	bdev_io->type = SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE;
	bdev_io->u.bdev.iovs = compare_iov;
	bdev_io->u.bdev.iovcnt = compare_iovcnt;
	bdev_io->u.bdev.fused_iovs = write_iov;
	bdev_io->u.bdev.fused_iovcnt = write_iovcnt;
	bdev_io->u.bdev.md_buf = NULL;
	bdev_io->u.bdev.num_blocks = num_blocks;
	bdev_io->u.bdev.offset_blocks = offset_blocks;
	bdev_io_init(bdev_io, bdev, cb_arg, cb);

	if (bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE)) {
		bdev_io_submit(bdev_io);
		return 0;
	}

	bdev_compare_and_write_do_compare(bdev_io);
	return 0;
}

static void
bdev_zcopy_get_buf(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
{
+117 −0
Original line number Diff line number Diff line
@@ -116,6 +116,10 @@ static enum spdk_bdev_io_status g_io_status;
static enum spdk_bdev_io_status g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
static uint32_t g_bdev_ut_io_device;
static struct bdev_ut_channel *g_bdev_ut_channel;
static void *g_compare_read_buf;
static uint32_t g_compare_read_buf_len;
static void *g_compare_write_buf;
static uint32_t g_compare_write_buf_len;

static struct ut_expected_io *
ut_alloc_expected_io(uint8_t type, uint64_t offset, uint64_t length, int iovcnt)
@@ -150,6 +154,22 @@ stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)

	g_bdev_io = bdev_io;

	if (g_compare_read_buf && bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
		uint32_t len = bdev_io->u.bdev.iovs[0].iov_len;

		CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
		CU_ASSERT(g_compare_read_buf_len == len);
		memcpy(bdev_io->u.bdev.iovs[0].iov_base, g_compare_read_buf, len);
	}

	if (g_compare_write_buf && bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
		uint32_t len = bdev_io->u.bdev.iovs[0].iov_len;

		CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
		CU_ASSERT(g_compare_write_buf_len == len);
		memcpy(g_compare_write_buf, bdev_io->u.bdev.iovs[0].iov_base, len);
	}

	TAILQ_INSERT_TAIL(&ch->outstanding_io, bdev_io, module_link);
	ch->outstanding_io_count++;

@@ -1917,6 +1937,7 @@ bdev_io_alignment(void)
	spdk_put_io_channel(io_ch);
	spdk_bdev_close(desc);
	free_bdev(bdev);
	fn_table.submit_request = stub_submit_request;
	spdk_bdev_finish(bdev_fini_cb, NULL);
	poll_threads();

@@ -2054,6 +2075,7 @@ bdev_io_alignment_with_boundary(void)
	spdk_put_io_channel(io_ch);
	spdk_bdev_close(desc);
	free_bdev(bdev);
	fn_table.submit_request = stub_submit_request;
	spdk_bdev_finish(bdev_fini_cb, NULL);
	poll_threads();

@@ -2168,6 +2190,100 @@ bdev_histograms(void)
	poll_threads();
}

static void
bdev_compare_and_write(void)
{
	struct spdk_bdev *bdev;
	struct spdk_bdev_desc *desc = NULL;
	struct spdk_io_channel *ioch;
	struct ut_expected_io *expected_io;
	uint64_t offset, num_blocks;
	uint32_t num_completed;
	char aa_buf[512];
	char bb_buf[512];
	char cc_buf[512];
	char write_buf[512];
	struct iovec compare_iov;
	struct iovec write_iov;
	int rc;

	memset(aa_buf, 0xaa, sizeof(aa_buf));
	memset(bb_buf, 0xbb, sizeof(bb_buf));
	memset(cc_buf, 0xcc, sizeof(cc_buf));

	spdk_bdev_initialize(bdev_init_cb, NULL);
	fn_table.submit_request = stub_submit_request_get_buf;
	bdev = allocate_bdev("bdev");

	rc = spdk_bdev_open(bdev, true, NULL, NULL, &desc);
	CU_ASSERT_EQUAL(rc, 0);
	SPDK_CU_ASSERT_FATAL(desc != NULL);
	ioch = spdk_bdev_get_io_channel(desc);
	SPDK_CU_ASSERT_FATAL(ioch != NULL);

	fn_table.submit_request = stub_submit_request_get_buf;
	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;

	offset = 50;
	num_blocks = 1;
	compare_iov.iov_base = aa_buf;
	compare_iov.iov_len = sizeof(aa_buf);
	write_iov.iov_base = bb_buf;
	write_iov.iov_len = sizeof(bb_buf);

	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset, num_blocks, 0);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, offset, num_blocks, 0);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	g_io_done = false;
	g_compare_read_buf = aa_buf;
	g_compare_read_buf_len = sizeof(aa_buf);
	memset(write_buf, 0, sizeof(write_buf));
	g_compare_write_buf = write_buf;
	g_compare_write_buf_len = sizeof(write_buf);
	rc = spdk_bdev_comparev_and_writev_blocks(desc, ioch, &compare_iov, 1, &write_iov, 1,
			offset, num_blocks, io_done, NULL);
	CU_ASSERT_EQUAL(rc, 0);
	num_completed = stub_complete_io(1);
	CU_ASSERT_EQUAL(num_completed, 1);
	CU_ASSERT(g_io_done == false);
	num_completed = stub_complete_io(1);
	CU_ASSERT_EQUAL(num_completed, 1);
	CU_ASSERT(g_io_done == true);
	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
	CU_ASSERT(memcmp(write_buf, bb_buf, sizeof(write_buf)) == 0);

	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset, num_blocks, 0);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	g_io_done = false;
	g_compare_read_buf = cc_buf;
	g_compare_read_buf_len = sizeof(cc_buf);
	memset(write_buf, 0, sizeof(write_buf));
	g_compare_write_buf = write_buf;
	g_compare_write_buf_len = sizeof(write_buf);
	rc = spdk_bdev_comparev_and_writev_blocks(desc, ioch, &compare_iov, 1, &write_iov, 1,
			offset, num_blocks, io_done, NULL);
	CU_ASSERT_EQUAL(rc, 0);
	num_completed = stub_complete_io(1);
	CU_ASSERT_EQUAL(num_completed, 1);
	CU_ASSERT(g_io_done == true);
	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
	num_completed = stub_complete_io(1);
	CU_ASSERT_EQUAL(num_completed, 0);

	spdk_put_io_channel(ioch);
	spdk_bdev_close(desc);
	free_bdev(bdev);
	fn_table.submit_request = stub_submit_request;
	spdk_bdev_finish(bdev_fini_cb, NULL);
	poll_threads();

	g_compare_read_buf = NULL;
	g_compare_write_buf = NULL;
}

static void
bdev_write_zeroes(void)
{
@@ -2921,6 +3037,7 @@ main(int argc, char **argv)
		CU_add_test(suite, "bdev_io_alignment", bdev_io_alignment) == NULL ||
		CU_add_test(suite, "bdev_histograms", bdev_histograms) == NULL ||
		CU_add_test(suite, "bdev_write_zeroes", bdev_write_zeroes) == NULL ||
		CU_add_test(suite, "bdev_compare_and_write", bdev_compare_and_write) == NULL ||
		CU_add_test(suite, "bdev_open_while_hotremove", bdev_open_while_hotremove) == NULL ||
		CU_add_test(suite, "bdev_close_while_hotremove", bdev_close_while_hotremove) == NULL ||
		CU_add_test(suite, "bdev_open_ext", bdev_open_ext) == NULL ||