Commit ac6f2bdd authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Tomasz Zawadzki
Browse files

bdev: Add extended versions of readv/writev



New functions accept extendable structure of IO options

Change-Id: If6864df151a3c0ad722785cb26d1f5d4309cd733
Signed-off-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6269


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarZiye Yang <ziye.yang@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
parent c3a58489
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -6,6 +6,11 @@

New API `spdk_bdev_get_memory_domains` has been added, it allows to get SPDK memory domains used by bdev.

New API functions `spdk_bdev_readv_blocks_ext` and `spdk_bdev_writev_blocks_ext` have been added.
These functions accept `spdk_bdev_ext_io_opts` structure with extended IO request
options, e.g. DMA memory domain which describes data that may belong to another memory domain and
can't be accessed directly.

### dma

A new library, lib/dma, has been added. This library provides the necessary infrastructure for
+87 −3
Original line number Diff line number Diff line
@@ -81,9 +81,6 @@ struct spdk_bdev_media_event {
 */
struct spdk_bdev;

/** Forward declaration of spdk memory domain */
struct spdk_memory_domain;

/**
 * Block device remove callback.
 *
@@ -203,6 +200,24 @@ struct spdk_bdev_opts {
	uint32_t large_buf_pool_size;
};

/**
 * Structure with optional IO request parameters
 * The content of this structure must be valid until the IO request is completed
 */
struct spdk_bdev_ext_io_opts {
	/** Size of this structure in bytes */
	size_t size;
	/** Memory domain which describes payload in this IO request. bdev must support DMA device type that
	 * can access this memory domain, refer to \ref spdk_bdev_get_memory_domains and \erf spdk_memory_domain_get_dma_device_type
	 * If set, that means that data buffers can't be accessed directly and the memory domain must
	 * be used to fetch data to local buffers or to translate data to another memory domain */
	struct spdk_memory_domain *memory_domain;
	/** Context to be passed to memory domain operations */
	void *memory_domain_ctx;
	/** Metadata buffer, optional */
	void *metadata;
};

/**
 * Get the options for the bdev module.
 *
@@ -900,6 +915,40 @@ int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_c
				   uint64_t offset_blocks, uint64_t num_blocks,
				   spdk_bdev_io_completion_cb cb, void *cb_arg);

/**
 * Submit a read request to the bdev on the given channel. This differs from
 * spdk_bdev_read by allowing the data buffer to be described in a scatter
 * gather list. Some physical devices place memory alignment requirements on
 * data or metadata and may not be able to directly transfer into the buffers
 * provided. In this case, the request may fail. This function uses separate
 * buffer for metadata transfer (valid only if bdev supports this mode).
 *
 * \ingroup bdev_io_submit_functions
 *
 * \param desc Block device descriptor.
 * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
 * \param iov A scatter gather list of buffers to be read into.
 * \param iovcnt The number of elements in iov.
 * \param md Metadata buffer, optional.
 * \param offset_blocks The offset, in blocks, from the start of the block device.
 * \param num_blocks The number of blocks to read.
 * \param cb Called when the request is complete.
 * \param cb_arg Argument passed to cb.
 * \param opts Optional structure with extended IO request options. If set, this structure must be
 * valid until the IO is completed.
 *
 * \return 0 on success. On success, the callback will always
 * be called (even if the request ultimately failed). Return
 * negated errno on failure, in which case the callback will not be called.
 *   * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
 *               metadata is not supported or opts_size is incorrect
 *   * -ENOMEM - spdk_bdev_io buffer cannot be allocated
 */
int spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			       struct iovec *iov, int iovcnt, uint64_t offset_blocks,
			       uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
			       struct spdk_bdev_ext_io_opts *opts);

/**
 * Submit a write request to the bdev on the given channel.
 *
@@ -1069,6 +1118,41 @@ int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_
				    uint64_t offset_blocks, uint64_t num_blocks,
				    spdk_bdev_io_completion_cb cb, void *cb_arg);

/**
 * Submit a write request to the bdev on the given channel. This differs from
 * spdk_bdev_write by allowing the data buffer to be described in a scatter
 * gather list. Some physical devices place memory alignment requirements on
 * data or metadata and may not be able to directly transfer out of the buffers
 * provided. In this case, the request may fail.  This function uses separate
 * buffer for metadata transfer (valid only if bdev supports this mode).
 *
 * \ingroup bdev_io_submit_functions
 *
 * \param desc Block device descriptor.
 * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
 * \param iov A scatter gather list of buffers to be written from.
 * \param iovcnt The number of elements in iov.
 * \param md Metadata buffer, optional.
 * \param offset_blocks The offset, in blocks, from the start of the block device.
 * \param num_blocks The number of blocks to write.
 * \param cb Called when the request is complete.
 * \param cb_arg Argument passed to cb.
 * \param opts Optional structure with extended IO request options. If set, this structure must be
 * valid until the IO is completed.
 *
 * \return 0 on success. On success, the callback will always
 * be called (even if the request ultimately failed). Return
 * negated errno on failure, in which case the callback will not be called.
 *   * -EINVAL - offset_blocks and/or num_blocks are out of range or separate
 *               metadata is not supported or opts_size is incorrect
 *   * -ENOMEM - spdk_bdev_io buffer cannot be allocated
 *   * -EBADF - desc not open for writing
 */
int spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
				struct iovec *iov, int iovcnt, uint64_t offset_blocks,
				uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
				struct spdk_bdev_ext_io_opts *opts);

/**
 * Submit a compare request to the bdev on the given channel.
 *
+3 −0
Original line number Diff line number Diff line
@@ -750,6 +750,9 @@ struct spdk_bdev_io {

		/** Enables queuing parent I/O when no bdev_ios available for split children. */
		struct spdk_bdev_io_wait_entry waitq_entry;

		/** Pointer to a structure passed by the user in ext API */
		struct spdk_bdev_ext_io_opts *ext_opts;
	} internal;

	/**
+69 −10
Original line number Diff line number Diff line
@@ -364,12 +364,14 @@ static void bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status);
static int
bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			  struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
			  uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg);
			  uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
			  struct spdk_bdev_ext_io_opts *opts);
static int
bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			   struct iovec *iov, int iovcnt, void *md_buf,
			   uint64_t offset_blocks, uint64_t num_blocks,
			   spdk_bdev_io_completion_cb cb, void *cb_arg);
			   spdk_bdev_io_completion_cb cb, void *cb_arg,
			   struct spdk_bdev_ext_io_opts *opts);

static int
bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
@@ -2119,14 +2121,16 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
					       spdk_io_channel_from_ctx(bdev_io->internal.ch),
					       iov, iovcnt, md_buf, current_offset,
					       num_blocks,
					       bdev_io_split_done, bdev_io);
					       bdev_io_split_done, bdev_io,
					       bdev_io->internal.ext_opts);
		break;
	case SPDK_BDEV_IO_TYPE_WRITE:
		rc = bdev_writev_blocks_with_md(bdev_io->internal.desc,
						spdk_io_channel_from_ctx(bdev_io->internal.ch),
						iov, iovcnt, md_buf, current_offset,
						num_blocks,
						bdev_io_split_done, bdev_io);
						bdev_io_split_done, bdev_io,
						bdev_io->internal.ext_opts);
		break;
	case SPDK_BDEV_IO_TYPE_UNMAP:
		io_wait_fn = _bdev_unmap_split;
@@ -2624,6 +2628,7 @@ bdev_io_init(struct spdk_bdev_io *bdev_io,
	bdev_io->num_retries = 0;
	bdev_io->internal.get_buf_cb = NULL;
	bdev_io->internal.get_aux_buf_cb = NULL;
	bdev_io->internal.ext_opts = NULL;
}

static bool
@@ -3834,7 +3839,8 @@ spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
static int
bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			  struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
			  uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg)
			  uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
			  struct spdk_bdev_ext_io_opts *opts)
{
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct spdk_bdev_io *bdev_io;
@@ -3858,6 +3864,7 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c
	bdev_io->u.bdev.num_blocks = num_blocks;
	bdev_io->u.bdev.offset_blocks = offset_blocks;
	bdev_io_init(bdev_io, bdev, cb_arg, cb);
	bdev_io->internal.ext_opts = opts;

	bdev_io_submit(bdev_io);
	return 0;
@@ -3869,7 +3876,7 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *
			   spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
					 num_blocks, cb, cb_arg);
					 num_blocks, cb, cb_arg, NULL);
}

int
@@ -3887,7 +3894,32 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann
	}

	return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
					 num_blocks, cb, cb_arg);
					 num_blocks, cb, cb_arg, NULL);
}

int
spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			   struct iovec *iov, int iovcnt,
			   uint64_t offset_blocks, uint64_t num_blocks,
			   spdk_bdev_io_completion_cb cb, void *cb_arg,
			   struct spdk_bdev_ext_io_opts *opts)
{
	void *md = NULL;

	if (opts) {
		md = opts->metadata;
	}

	if (md && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
		return -EINVAL;
	}

	if (md && !_bdev_io_check_md_buf(iov, md)) {
		return -EINVAL;
	}

	return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
					 num_blocks, cb, cb_arg, opts);
}

static int
@@ -3977,7 +4009,8 @@ static int
bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			   struct iovec *iov, int iovcnt, void *md_buf,
			   uint64_t offset_blocks, uint64_t num_blocks,
			   spdk_bdev_io_completion_cb cb, void *cb_arg)
			   spdk_bdev_io_completion_cb cb, void *cb_arg,
			   struct spdk_bdev_ext_io_opts *opts)
{
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct spdk_bdev_io *bdev_io;
@@ -4005,6 +4038,7 @@ bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *
	bdev_io->u.bdev.num_blocks = num_blocks;
	bdev_io->u.bdev.offset_blocks = offset_blocks;
	bdev_io_init(bdev_io, bdev, cb_arg, cb);
	bdev_io->internal.ext_opts = opts;

	bdev_io_submit(bdev_io);
	return 0;
@@ -4033,7 +4067,7 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
					  num_blocks, cb, cb_arg);
					  num_blocks, cb, cb_arg, NULL);
}

int
@@ -4051,7 +4085,32 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan
	}

	return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
					  num_blocks, cb, cb_arg);
					  num_blocks, cb, cb_arg, NULL);
}

int
spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
			    struct iovec *iov, int iovcnt,
			    uint64_t offset_blocks, uint64_t num_blocks,
			    spdk_bdev_io_completion_cb cb, void *cb_arg,
			    struct spdk_bdev_ext_io_opts *opts)
{
	void *md = NULL;

	if (opts) {
		md = opts->metadata;
	}

	if (md && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
		return -EINVAL;
	}

	if (md && !_bdev_io_check_md_buf(iov, md)) {
		return -EINVAL;
	}

	return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
					  num_blocks, cb, cb_arg, opts);
}

static void
+2 −0
Original line number Diff line number Diff line
@@ -95,6 +95,8 @@
	spdk_bdev_histogram_get;
	spdk_bdev_get_media_events;
	spdk_bdev_get_memory_domains;
	spdk_bdev_readv_blocks_ext;
	spdk_bdev_writev_blocks_ext;

	# Public functions in bdev_module.h
	spdk_bdev_register;
Loading