Commit 04a428f5 authored by Karl Bonde Torp's avatar Karl Bonde Torp Committed by Tomasz Zawadzki
Browse files

nvme: add iovec passthru



This is used for sending big passthru commands, like Report Zones, over nvmf.

Change-Id: I83188367e0266e093faadd49cdb2e051eae71829
Signed-off-by: default avatarKarl Bonde Torp <k.torp@samsung.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/20498


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarBen Walker <ben@nvidia.com>
parent d736ac40
Loading
Loading
Loading
Loading
+34 −0
Original line number Diff line number Diff line
@@ -121,6 +121,7 @@ enum spdk_bdev_io_type {
	SPDK_BDEV_IO_TYPE_SEEK_HOLE,
	SPDK_BDEV_IO_TYPE_SEEK_DATA,
	SPDK_BDEV_IO_TYPE_COPY,
	SPDK_BDEV_IO_TYPE_NVME_IOV_MD,
	SPDK_BDEV_NUM_IO_TYPES /* Keep last */
};

@@ -1791,6 +1792,39 @@ int spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *bdev_desc,
				  void *buf, size_t nbytes, void *md_buf, size_t md_len,
				  spdk_bdev_io_completion_cb cb, void *cb_arg);

/**
 * Submit an NVMe I/O command to the bdev. This passes directly through
 * the block layer to the device. Support for NVMe passthru is optional,
 * indicated by calling spdk_bdev_io_type_supported().
 *
 * \ingroup bdev_io_submit_functions
 *
 * The namespace id (nsid) will be populated automatically.
 *
 * \param desc Block device descriptor
 * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel().
 * \param cmd The raw NVMe command. Must be in the NVM command set.
 * \param iov A scatter gather list of buffers for the command to use.
 * \param iovcnt The number of elements in iov.
 * \param nbytes The number of bytes to transfer. The total size of the buffers in iov must be greater than or equal to this size.
 * \param md_buf Meta data buffer to written from.
 * \param md_len md_buf size to transfer. md_buf must be greater than or equal to this size.
 * \param cb Called when the request is complete.
 * \param cb_arg Argument passed to cb.
 *
 * \return 0 on success. On success, the callback will always
 * be called (even if the request ultimately failed). Return
 * negated errno on failure, in which case the callback will not be called.
 *   * -ENOMEM - spdk_bdev_io buffer cannot be allocated
 *   * -EBADF - desc not open for writing
 */
int spdk_bdev_nvme_iov_passthru_md(struct spdk_bdev_desc *desc,
				   struct spdk_io_channel *ch,
				   const struct spdk_nvme_cmd *cmd,
				   struct iovec *iov, int iovcnt,
				   size_t nbytes, void *md_buf, size_t md_len,
				   spdk_bdev_io_completion_cb cb, void *cb_arg);

/**
 * Submit a copy request to the block device.
 *
+6 −0
Original line number Diff line number Diff line
@@ -873,6 +873,12 @@ struct spdk_bdev_io {
			/* The NVMe command to execute */
			struct spdk_nvme_cmd cmd;

			/* For SG buffer cases, array of iovecs to transfer. */
			struct iovec *iovs;

			/* For SG buffer cases, number of iovecs in iovec array. */
			int iovcnt;

			/* The data buffer to transfer */
			void *buf;

+57 −21
Original line number Diff line number Diff line
@@ -1851,6 +1851,63 @@ int spdk_nvme_ctrlr_cmd_io_raw_with_md(struct spdk_nvme_ctrlr *ctrlr,
				       void *buf, uint32_t len, void *md_buf,
				       spdk_nvme_cmd_cb cb_fn, void *cb_arg);

/**
 * Restart the SGL walk to the specified offset when the command has scattered
 * payloads.
 *
 * \param cb_arg Argument passed to readv/writev.
 * \param offset Offset for SGL.
 */
typedef void (*spdk_nvme_req_reset_sgl_cb)(void *cb_arg, uint32_t offset);

/**
 * Fill out *address and *length with the current SGL entry and advance to the
 * next entry for the next time the callback is invoked.
 *
 * The described segment must be physically contiguous.
 *
 * \param cb_arg Argument passed to readv/writev.
 * \param address Virtual address of this segment, a value of UINT64_MAX
 * means the segment should be described via Bit Bucket SGL.
 * \param length Length of this physical segment.
 */
typedef int (*spdk_nvme_req_next_sge_cb)(void *cb_arg, void **address,
		uint32_t *length);

/**
 * Send the given NVM I/O command with metadata to the NVMe controller.
 *
 * This is a low level interface for submitting I/O commands directly. Prefer
 * the spdk_nvme_ns_cmd_* functions instead. The validity of the command will
 * not be checked!
 *
 * The command is submitted to a qpair allocated by  spdk_nvme_ctrlr_alloc_io_qpair().
 * The user must ensure that only one thread submits I/O on a given qpair at any
 * given time.
 *
 * \param ctrlr Opaque handle to NVMe controller.
 * \param qpair I/O qpair to submit command.
 * \param cmd NVM I/O command to submit.
 * \param len Size of buffer.
 * \param md_buf Virtual memory address of a single physically contiguous metadata buffer.
 * \param cb_fn Callback function invoked when the I/O command completes.
 * \param cb_arg Argument passed to callback function.
 * \param reset_sgl_fn Callback function to reset scattered payload.
 * \param next_sge_fn Callback function to iterate each scattered payload memory segment.
 *
 * \return 0 if successfully submitted, negated errnos on the following error
 conditions:
 * -ENOMEM: The request cannot be allocated.
 * -ENXIO: The qpair is failed at the transport level.
 */
int spdk_nvme_ctrlr_cmd_iov_raw_with_md(struct spdk_nvme_ctrlr *ctrlr,
					struct spdk_nvme_qpair *qpair,
					struct spdk_nvme_cmd *cmd, uint32_t len,
					void *md_buf, spdk_nvme_cmd_cb cb_fn,
					void *cb_arg,
					spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
					spdk_nvme_req_next_sge_cb next_sge_fn);

/**
 * Process any outstanding completions for I/O submitted on a queue pair.
 *
@@ -3054,27 +3111,6 @@ uint32_t spdk_nvme_ns_get_ana_group_id(const struct spdk_nvme_ns *ns);
 */
enum spdk_nvme_ana_state spdk_nvme_ns_get_ana_state(const struct spdk_nvme_ns *ns);

/**
 * Restart the SGL walk to the specified offset when the command has scattered payloads.
 *
 * \param cb_arg Argument passed to readv/writev.
 * \param offset Offset for SGL.
 */
typedef void (*spdk_nvme_req_reset_sgl_cb)(void *cb_arg, uint32_t offset);

/**
 * Fill out *address and *length with the current SGL entry and advance to the next
 * entry for the next time the callback is invoked.
 *
 * The described segment must be physically contiguous.
 *
 * \param cb_arg Argument passed to readv/writev.
 * \param address Virtual address of this segment, a value of UINT64_MAX
 * means the segment should be described via Bit Bucket SGL.
 * \param length Length of this physical segment.
 */
typedef int (*spdk_nvme_req_next_sge_cb)(void *cb_arg, void **address, uint32_t *length);

/**
 * Submit a write I/O to the specified NVMe namespace.
 *
+48 −0
Original line number Diff line number Diff line
@@ -6690,6 +6690,54 @@ spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *desc, struct spdk_io_channe
	return 0;
}

int
spdk_bdev_nvme_iov_passthru_md(struct spdk_bdev_desc *desc,
			       struct spdk_io_channel *ch,
			       const struct spdk_nvme_cmd *cmd,
			       struct iovec *iov, int iovcnt, size_t nbytes,
			       void *md_buf, size_t md_len,
			       spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct spdk_bdev_io *bdev_io;
	struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);

	if (!desc->write) {
		/*
		 * Do not try to parse the NVMe command - we could maybe use bits in the opcode
		 * to easily determine if the command is a read or write, but for now just
		 * do not allow io_passthru with a read-only descriptor.
		 */
		return -EBADF;
	}

	if (md_buf && spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO_MD))) {
		return -ENOTSUP;
	} else if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO))) {
		return -ENOTSUP;
	}

	bdev_io = bdev_channel_get_io(channel);
	if (!bdev_io) {
		return -ENOMEM;
	}

	bdev_io->internal.ch = channel;
	bdev_io->internal.desc = desc;
	bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IOV_MD;
	bdev_io->u.nvme_passthru.cmd = *cmd;
	bdev_io->u.nvme_passthru.iovs = iov;
	bdev_io->u.nvme_passthru.iovcnt = iovcnt;
	bdev_io->u.nvme_passthru.nbytes = nbytes;
	bdev_io->u.nvme_passthru.md_buf = md_buf;
	bdev_io->u.nvme_passthru.md_len = md_len;

	bdev_io_init(bdev_io, bdev, cb_arg, cb);

	bdev_io_submit(bdev_io);
	return 0;
}

static void bdev_abort_retry(void *ctx);
static void bdev_abort(struct spdk_bdev_io *parent_io);

+1 −0
Original line number Diff line number Diff line
@@ -86,6 +86,7 @@
	spdk_bdev_nvme_admin_passthru;
	spdk_bdev_nvme_io_passthru;
	spdk_bdev_nvme_io_passthru_md;
	spdk_bdev_nvme_iov_passthru_md;
	spdk_bdev_free_io;
	spdk_bdev_queue_io_wait;
	spdk_bdev_get_io_stat;
Loading