Commit d14afd50 authored by Evgeniy Kochetov's avatar Evgeniy Kochetov Committed by Tomasz Zawadzki
Browse files

bdev: Add copy IO type



Copy operation is defined by source and destination LBAs and LBA count
to copy. For destiantion LBA and LBA count we reuse exiting fields
`offset_blocks` and `num_blocks` in `struct spdk_bdev_io`. For source
LBA new field `src_offset_blocks` was added.

`spdk_bdev_get_max_copy()` function can be used to retrieve maximum
possible unsplit copy size. Zero values means unlimited. It is allowed
to submit larger copy size but it will be split into several bdev IOs.

Signed-off-by: default avatarEvgeniy Kochetov <evgeniik@nvidia.com>
Change-Id: I2ad56294b6c062595c026ffcf9b435f0100d3d7e
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14344


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Community-CI: Mellanox Build Bot
parent e28e2479
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -21,6 +21,8 @@ associated function pointers were added to iterate each channel of the required
The RPC `bdev_get_iostat` now allows a user to query the per channel IO statistics for
required bdev.

New `spdk_bdev_copy_blocks` and `spdk_bdev_get_max_copy` APIs to support copy commands.

## v22.09

### accel
+34 −0
Original line number Diff line number Diff line
@@ -119,6 +119,7 @@ enum spdk_bdev_io_type {
	SPDK_BDEV_IO_TYPE_ABORT,
	SPDK_BDEV_IO_TYPE_SEEK_HOLE,
	SPDK_BDEV_IO_TYPE_SEEK_DATA,
	SPDK_BDEV_IO_TYPE_COPY,
	SPDK_BDEV_NUM_IO_TYPES /* Keep last */
};

@@ -668,6 +669,14 @@ bool spdk_bdev_is_dif_head_of_md(const struct spdk_bdev *bdev);
bool spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev,
				    enum spdk_dif_check_type check_type);

/**
 * Get block device max copy size.
 *
 * \param bdev Block device to query.
 * \return Max copy size for this bdev in blocks. 0 means unlimited.
 */
uint32_t spdk_bdev_get_max_copy(const struct spdk_bdev *bdev);

/**
 * Get the most recently measured queue depth from a bdev.
 *
@@ -1709,6 +1718,31 @@ int spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *bdev_desc,
				  void *buf, size_t nbytes, void *md_buf, size_t md_len,
				  spdk_bdev_io_completion_cb cb, void *cb_arg);

/**
 * Submit a copy request to the block device.
 *
 * \ingroup bdev_io_submit_functions
 *
 * \param desc Block device descriptor.
 * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
 * \param dst_offset_blocks The destination offset, in blocks, from the start of the block device.
 * \param src_offset_blocks The source offset, in blocks, from the start of the block device.
 * \param num_blocks The number of blocks to copy.
 * \param cb Called when the request is complete.
 * \param cb_arg Argument passed to cb.
 *
 * \return 0 on success. On success, the callback will always
 * be called (even if the request ultimately failed). Return
 * negated errno on failure, in which case the callback will not be called.
 *   * -EINVAL - dst_offset_blocks, src_offset_blocks and/or num_blocks are out of range
 *   * -ENOMEM - spdk_bdev_io buffer cannot be allocated
 *   * -EBADF - desc not open for writing
 *   * -ENOTSUP - copy operation is not supported
 */
int spdk_bdev_copy_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			  uint64_t dst_offset_blocks, uint64_t src_offset_blocks,
			  uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg);

/**
 * Free an I/O request. This should only be called after the completion callback
 * for the I/O has been called and notifies the bdev layer that memory may now
+8 −0
Original line number Diff line number Diff line
@@ -374,6 +374,9 @@ struct spdk_bdev {
	/* Maximum write zeroes in unit of logical block */
	uint32_t max_write_zeroes;

	/* Maximum copy size in unit of logical block */
	uint32_t max_copy;

	/**
	 * UUID for this bdev.
	 *
@@ -663,6 +666,11 @@ struct spdk_bdev_io {
				/** The offset of next data/hole.  */
				uint64_t offset;
			} seek;

			struct {
				/** Starting source offset (in blocks) of the bdev for copy I/O. */
				uint64_t src_offset_blocks;
			} copy;
		} bdev;
		struct {
			/** Channel reference held while messages for this reset are in progress. */
+2 −2
Original line number Diff line number Diff line
@@ -6,8 +6,8 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk

SO_VER := 10
SO_MINOR := 1
SO_VER := 11
SO_MINOR := 0

ifeq ($(CONFIG_VTUNE),y)
CFLAGS += -I$(CONFIG_VTUNE_DIR)/include -I$(CONFIG_VTUNE_DIR)/sdk/src/ittnotify
+123 −1
Original line number Diff line number Diff line
@@ -56,6 +56,11 @@ int __itt_init_ittlib(const char *, __itt_group_id);
#define SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS (8)
#define BDEV_RESET_CHECK_OUTSTANDING_IO_PERIOD 1000000

/* The maximum number of children requests for a COPY command
 * when splitting into children requests at a time.
 */
#define SPDK_BDEV_MAX_CHILDREN_COPY_REQS (8)

static const char *qos_rpc_type[] = {"rw_ios_per_sec",
				     "rw_mbytes_per_sec", "r_mbytes_per_sec", "w_mbytes_per_sec"
				    };
@@ -2345,6 +2350,17 @@ bdev_write_zeroes_should_split(struct spdk_bdev_io *bdev_io)
	return false;
}

static bool
bdev_copy_should_split(struct spdk_bdev_io *bdev_io)
{
	if (bdev_io->bdev->max_copy != 0 &&
	    bdev_io->u.bdev.num_blocks > bdev_io->bdev->max_copy) {
		return true;
	}

	return false;
}

static bool
bdev_io_should_split(struct spdk_bdev_io *bdev_io)
{
@@ -2356,6 +2372,8 @@ bdev_io_should_split(struct spdk_bdev_io *bdev_io)
		return bdev_unmap_should_split(bdev_io);
	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
		return bdev_write_zeroes_should_split(bdev_io);
	case SPDK_BDEV_IO_TYPE_COPY:
		return bdev_copy_should_split(bdev_io);
	default:
		return false;
	}
@@ -2387,12 +2405,20 @@ _bdev_write_zeroes_split(void *_bdev_io)
	return bdev_write_zeroes_split((struct spdk_bdev_io *)_bdev_io);
}

static void bdev_copy_split(struct spdk_bdev_io *bdev_io);

static void
_bdev_copy_split(void *_bdev_io)
{
	return bdev_copy_split((struct spdk_bdev_io *)_bdev_io);
}

static int
bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt, void *md_buf,
		     uint64_t num_blocks, uint64_t *offset, uint64_t *remaining)
{
	int rc;
	uint64_t current_offset, current_remaining;
	uint64_t current_offset, current_remaining, current_src_offset;
	spdk_bdev_io_wait_cb io_wait_fn;

	current_offset = *offset;
@@ -2432,6 +2458,15 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
						   current_offset, num_blocks,
						   bdev_io_split_done, bdev_io);
		break;
	case SPDK_BDEV_IO_TYPE_COPY:
		io_wait_fn = _bdev_copy_split;
		current_src_offset = bdev_io->u.bdev.copy.src_offset_blocks +
				     (current_offset - bdev_io->u.bdev.offset_blocks);
		rc = spdk_bdev_copy_blocks(bdev_io->internal.desc,
					   spdk_io_channel_from_ctx(bdev_io->internal.ch),
					   current_offset, current_src_offset, num_blocks,
					   bdev_io_split_done, bdev_io);
		break;
	default:
		assert(false);
		rc = -EINVAL;
@@ -2655,6 +2690,30 @@ bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io)
	}
}

static void
bdev_copy_split(struct spdk_bdev_io *bdev_io)
{
	uint64_t offset, copy_blocks, remaining;
	uint32_t num_children_reqs = 0;
	int rc;

	offset = bdev_io->u.bdev.split_current_offset_blocks;
	remaining = bdev_io->u.bdev.split_remaining_num_blocks;

	assert(bdev_io->bdev->max_copy != 0);
	while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_COPY_REQS)) {
		copy_blocks = spdk_min(remaining, bdev_io->bdev->max_copy);

		rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, copy_blocks,
					  &offset, &remaining);
		if (spdk_likely(rc == 0)) {
			num_children_reqs++;
		} else {
			return;
		}
	}
}

static void
parent_bdev_io_complete(void *ctx, int rc)
{
@@ -2718,6 +2777,9 @@ bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
		bdev_write_zeroes_split(parent_io);
		break;
	case SPDK_BDEV_IO_TYPE_COPY:
		bdev_copy_split(parent_io);
		break;
	default:
		assert(false);
		break;
@@ -2752,6 +2814,9 @@ bdev_io_split(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
		bdev_write_zeroes_split(bdev_io);
		break;
	case SPDK_BDEV_IO_TYPE_COPY:
		bdev_copy_split(bdev_io);
		break;
	default:
		assert(false);
		break;
@@ -2845,6 +2910,7 @@ bdev_io_range_is_locked(struct spdk_bdev_io *bdev_io, struct lba_range *range)
	case SPDK_BDEV_IO_TYPE_UNMAP:
	case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
	case SPDK_BDEV_IO_TYPE_ZCOPY:
	case SPDK_BDEV_IO_TYPE_COPY:
		r.offset = bdev_io->u.bdev.offset_blocks;
		r.length = bdev_io->u.bdev.num_blocks;
		if (!bdev_lba_range_overlapped(range, &r)) {
@@ -3960,6 +4026,12 @@ spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev,
	}
}

uint32_t
spdk_bdev_get_max_copy(const struct spdk_bdev *bdev)
{
	return bdev->max_copy;
}

uint64_t
spdk_bdev_get_qd(const struct spdk_bdev *bdev)
{
@@ -8100,6 +8172,56 @@ spdk_bdev_for_each_channel(struct spdk_bdev *bdev, spdk_bdev_for_each_channel_ms
			      iter, bdev_each_channel_cpl);
}

int
spdk_bdev_copy_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
		      uint64_t dst_offset_blocks, uint64_t src_offset_blocks, uint64_t num_blocks,
		      spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct spdk_bdev_io *bdev_io;
	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);

	if (!desc->write) {
		return -EBADF;
	}

	if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COPY))) {
		SPDK_DEBUGLOG(bdev, "Copy IO type is not supported\n");
		return -ENOTSUP;
	}

	if (num_blocks == 0) {
		SPDK_ERRLOG("Can't copy 0 blocks\n");
		return -EINVAL;
	}

	if (!bdev_io_valid_blocks(bdev, dst_offset_blocks, num_blocks) ||
	    !bdev_io_valid_blocks(bdev, src_offset_blocks, num_blocks)) {
		SPDK_DEBUGLOG(bdev,
			      "Invalid offset or number of blocks: dst %lu, src %lu, count %lu\n",
			      dst_offset_blocks, src_offset_blocks, num_blocks);
		return -EINVAL;
	}

	bdev_io = bdev_channel_get_io(channel);
	if (!bdev_io) {
		return -ENOMEM;
	}

	bdev_io->internal.ch = channel;
	bdev_io->internal.desc = desc;
	bdev_io->type = SPDK_BDEV_IO_TYPE_COPY;

	bdev_io->u.bdev.offset_blocks = dst_offset_blocks;
	bdev_io->u.bdev.copy.src_offset_blocks = src_offset_blocks;
	bdev_io->u.bdev.num_blocks = num_blocks;
	bdev_io->u.bdev.ext_opts = NULL;
	bdev_io_init(bdev_io, bdev, cb_arg, cb);

	bdev_io_submit(bdev_io);
	return 0;
}

SPDK_LOG_REGISTER_COMPONENT(bdev)

SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV)
Loading