Commit 4f1a0b26 authored by Jacek Kalwas's avatar Jacek Kalwas Committed by Jim Harris
Browse files

nvme: add contig version of rd/wr with ext opts



The flow is simplified for the case when there is only single iovcnt.

From the past some measurements shown better results for contig
vs sgl version (w/o ext opts being used.

Change-Id: I5315703b2814e1f61bdf7b991d6a82853f27ec22
Signed-off-by: default avatarJacek Kalwas <jacek.kalwas@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/21913


Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
parent d7886cf3
Loading
Loading
Loading
Loading
+55 −0
Original line number Diff line number Diff line
@@ -3245,6 +3245,34 @@ int spdk_nvme_ns_cmd_writev_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair
				spdk_nvme_req_next_sge_cb next_sge_fn,
				struct spdk_nvme_ns_cmd_ext_io_opts *opts);

/**
 * Submit a write I/O to the specified NVMe namespace.
 *
 * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
 * The user must ensure that only one thread submits I/O on a given qpair at any
 * given time.
 *
 * \param ns NVMe namespace to submit the write I/O
 * \param qpair I/O queue pair to submit the request
 * \param payload Virtual address pointer to the data payload.
 * \param lba starting LBA to write the data
 * \param lba_count length (in sectors) for the write operation
 * \param cb_fn callback function to invoke when the I/O is completed
 * \param cb_arg argument to pass to the callback function
 * \param opts Optional structure with extended IO request options. If provided, the caller must
 * guarantee that this structure is accessible until IO completes
 *
 * \return 0 if successfully submitted, negated errnos on the following error conditions:
 * -EINVAL: The request is malformed.
 * -ENOMEM: The request cannot be allocated.
 * -ENXIO: The qpair is failed at the transport level.
 * -EFAULT: Invalid address was specified as part of payload.  cb_fn is also called
 *          with error status including dnr=1 in this case.
 */
int spdk_nvme_ns_cmd_write_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
			       void *payload, uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
			       struct spdk_nvme_ns_cmd_ext_io_opts *opts);

/**
 * Submit a write I/O to the specified NVMe namespace.
 *
@@ -3470,6 +3498,33 @@ int spdk_nvme_ns_cmd_readv_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *
			       spdk_nvme_req_next_sge_cb next_sge_fn,
			       struct spdk_nvme_ns_cmd_ext_io_opts *opts);

/**
 * Submit a read I/O to the specified NVMe namespace.
 *
 * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair().
 * The user must ensure that only one thread submits I/O on a given qpair at any given time.
 *
 * \param ns NVMe namespace to submit the read I/O
 * \param qpair I/O queue pair to submit the request
 * \param payload virtual address pointer to the data payload
 * \param lba starting LBA to read the data
 * \param lba_count length (in sectors) for the read operation
 * \param cb_fn callback function to invoke when the I/O is completed
 * \param cb_arg argument to pass to the callback function
 * \param opts Optional structure with extended IO request options. If provided, the caller must
 * guarantee that this structure is accessible until IO completes
 *
 * \return 0 if successfully submitted, negated errnos on the following error conditions:
 * -EINVAL: The request is malformed.
 * -ENOMEM: The request cannot be allocated.
 * -ENXIO: The qpair is failed at the transport level.
 * -EFAULT: Invalid address was specified as part of payload.  cb_fn is also called
 *          with error status including dnr=1 in this case.
 */
int spdk_nvme_ns_cmd_read_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *payload,
			      uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
			      struct spdk_nvme_ns_cmd_ext_io_opts *opts);

/**
 * Submits a read I/O to the specified NVMe namespace.
 *
+59 −0
Original line number Diff line number Diff line
@@ -689,6 +689,55 @@ spdk_nvme_ns_cmd_read_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *q
	}
}

static int
nvme_ns_cmd_rw_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *buffer,
		   uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
		   struct spdk_nvme_ns_cmd_ext_io_opts *opts, enum spdk_nvme_nvm_opcode opc)
{
	struct nvme_request *req;
	struct nvme_payload payload;
	void *seq;
	int rc = 0;

	assert(opc == SPDK_NVME_OPC_READ || opc == SPDK_NVME_OPC_WRITE);
	assert(opts);

	payload = NVME_PAYLOAD_CONTIG(buffer, opts->metadata);

	if (spdk_unlikely(!_is_io_flags_valid(opts->io_flags))) {
		return -EINVAL;
	}

	seq = nvme_ns_cmd_get_ext_io_opt(opts, accel_sequence, NULL);
	if (spdk_unlikely(!_is_accel_sequence_valid(qpair, seq))) {
		return -EINVAL;
	}

	payload.opts = opts;

	req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, opc, opts->io_flags,
			      opts->apptag_mask, opts->apptag, 0, false, seq, &rc);
	if (spdk_unlikely(req == NULL)) {
		return nvme_ns_map_failure_rc(lba_count,
					      ns->sectors_per_max_io,
					      ns->sectors_per_stripe,
					      qpair->ctrlr->opts.io_queue_requests,
					      rc);
	}

	return nvme_qpair_submit_request(qpair, req);
}

int
spdk_nvme_ns_cmd_read_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *buffer,
			  uint64_t lba,
			  uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
			  struct spdk_nvme_ns_cmd_ext_io_opts *opts)
{
	return nvme_ns_cmd_rw_ext(ns, qpair, buffer, lba, lba_count, cb_fn, cb_arg, opts,
				  SPDK_NVME_OPC_READ);
}

int
spdk_nvme_ns_cmd_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
		       uint64_t lba, uint32_t lba_count,
@@ -1002,6 +1051,16 @@ spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *
	}
}

int
spdk_nvme_ns_cmd_write_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
			   void *buffer, uint64_t lba,
			   uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
			   struct spdk_nvme_ns_cmd_ext_io_opts *opts)
{
	return nvme_ns_cmd_rw_ext(ns, qpair, buffer, lba, lba_count, cb_fn, cb_arg, opts,
				  SPDK_NVME_OPC_WRITE);
}

int
spdk_nvme_ns_cmd_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
			uint64_t lba, uint32_t lba_count,
+2 −0
Original line number Diff line number Diff line
@@ -173,7 +173,9 @@
	spdk_nvme_ns_cmd_comparev_with_md;
	spdk_nvme_ns_cmd_compare_with_md;
	spdk_nvme_ns_cmd_writev_ext;
	spdk_nvme_ns_cmd_write_ext;
	spdk_nvme_ns_cmd_readv_ext;
	spdk_nvme_ns_cmd_read_ext;
	spdk_nvme_ns_cmd_verify;

	spdk_nvme_qpair_get_optimal_poll_group;
+20 −10
Original line number Diff line number Diff line
@@ -7525,11 +7525,16 @@ bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
		bio->ext_opts.metadata = md;
		bio->ext_opts.accel_sequence = seq;

		if (iovcnt == 1) {
			rc = spdk_nvme_ns_cmd_read_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_readv_done,
						       bio, &bio->ext_opts);
		} else {
			rc = spdk_nvme_ns_cmd_readv_ext(ns, qpair, lba, lba_count,
							bdev_nvme_readv_done, bio,
							bdev_nvme_queued_reset_sgl,
							bdev_nvme_queued_next_sge,
							&bio->ext_opts);
		}
	} else if (iovcnt == 1) {
		rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base,
						   md, lba, lba_count, bdev_nvme_readv_done,
@@ -7573,11 +7578,16 @@ bdev_nvme_writev(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
		bio->ext_opts.metadata = md;
		bio->ext_opts.accel_sequence = seq;

		if (iovcnt == 1) {
			rc = spdk_nvme_ns_cmd_write_ext(ns, qpair, iov[0].iov_base, lba, lba_count, bdev_nvme_writev_done,
							bio, &bio->ext_opts);
		} else {
			rc = spdk_nvme_ns_cmd_writev_ext(ns, qpair, lba, lba_count,
							 bdev_nvme_writev_done, bio,
							 bdev_nvme_queued_reset_sgl,
							 bdev_nvme_queued_next_sge,
							 &bio->ext_opts);
		}
	} else if (iovcnt == 1) {
		rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base,
						    md, lba, lba_count, bdev_nvme_writev_done,
+23 −3
Original line number Diff line number Diff line
@@ -1062,6 +1062,16 @@ spdk_nvme_ns_cmd_readv_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpai
	return ut_submit_nvme_request(ns, qpair, SPDK_NVME_OPC_READ, cb_fn, cb_arg);
}

static bool g_ut_read_ext_called;
int
spdk_nvme_ns_cmd_read_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *buffer,
			  uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
			  struct spdk_nvme_ns_cmd_ext_io_opts *opts)
{
	g_ut_read_ext_called = true;
	return ut_submit_nvme_request(ns, qpair, SPDK_NVME_OPC_READ, cb_fn, cb_arg);
}

static bool g_ut_writev_ext_called;
int
spdk_nvme_ns_cmd_writev_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
@@ -1075,6 +1085,16 @@ spdk_nvme_ns_cmd_writev_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpa
	return ut_submit_nvme_request(ns, qpair, SPDK_NVME_OPC_WRITE, cb_fn, cb_arg);
}

static bool g_ut_write_ext_called;
int
spdk_nvme_ns_cmd_write_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *buffer,
			   uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg,
			   struct spdk_nvme_ns_cmd_ext_io_opts *opts)
{
	g_ut_write_ext_called = true;
	return ut_submit_nvme_request(ns, qpair, SPDK_NVME_OPC_WRITE, cb_fn, cb_arg);
}

int
spdk_nvme_ns_cmd_comparev_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
				  uint64_t lba, uint32_t lba_count,
@@ -2395,11 +2415,11 @@ test_submit_nvme_cmd(void)
	ut_test_submit_fused_nvme_cmd(ch, bdev_io);

	/* Verify that ext NVME API is called when data is described by memory domain  */
	g_ut_readv_ext_called = false;
	g_ut_read_ext_called = false;
	bdev_io->u.bdev.memory_domain = (void *)0xdeadbeef;
	ut_test_submit_nvme_cmd(ch, bdev_io, SPDK_BDEV_IO_TYPE_READ);
	CU_ASSERT(g_ut_readv_ext_called == true);
	g_ut_readv_ext_called = false;
	CU_ASSERT(g_ut_read_ext_called == true);
	g_ut_read_ext_called = false;
	bdev_io->u.bdev.memory_domain = NULL;

	ut_test_submit_admin_cmd(ch, bdev_io, ctrlr);