Commit 17cb940f authored by Ben Walker's avatar Ben Walker Committed by Konrad Sztyber
Browse files

nvme: Fix bug in SGL splitting when elements do not align to blocks



The scattered memory elements do not have to end on blocks. We can split
them into pieces when this occurs.

Change-Id: I7d203c50dd6ded786abdd7072ad79b739828a1d5
Signed-off-by: default avatarBen Walker <ben@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/24654


Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
parent c0b7e26c
Loading
Loading
Loading
Loading
+22 −12
Original line number Diff line number Diff line
@@ -343,7 +343,7 @@ _nvme_ns_cmd_split_request_sgl(struct spdk_nvme_ns *ns,
	void *sgl_cb_arg = req->payload.contig_or_cb_arg;
	uint64_t child_lba = lba;
	uint32_t req_current_length = 0;
	uint32_t child_length = 0;
	uint32_t accumulated_length = 0;
	uint32_t sge_length;
	uint16_t max_sges, num_sges;
	uintptr_t address;
@@ -360,7 +360,7 @@ _nvme_ns_cmd_split_request_sgl(struct spdk_nvme_ns *ns,
			sge_length = req->payload_size - req_current_length;
		}

		child_length += sge_length;
		accumulated_length += sge_length;
		req_current_length += sge_length;
		num_sges++;

@@ -374,23 +374,33 @@ _nvme_ns_cmd_split_request_sgl(struct spdk_nvme_ns *ns,
		 *  create a child request when no splitting is required - in that case we will
		 *  fall-through and just create a single request with no children for the entire I/O.
		 */
		if (child_length != req->payload_size) {
		if (accumulated_length != req->payload_size) {
			struct nvme_request *child;
			uint32_t child_lba_count;
			uint32_t child_length;
			uint32_t extra_length;

			if ((child_length % ns->extended_lba_size) != 0) {
				SPDK_ERRLOG("child_length %u not even multiple of lba_size %u\n",
					    child_length, ns->extended_lba_size);
			child_length = accumulated_length;
			/* Child length may not be a multiple of the block size! */
			child_lba_count = child_length / ns->extended_lba_size;
			extra_length = child_length - (child_lba_count * ns->extended_lba_size);
			if (extra_length != 0) {
				/* The last SGE does not end on a block boundary. We need to cut it off. */
				if (extra_length >= child_length) {
					SPDK_ERRLOG("Unable to send I/O. Would require more than the supported number of "
						    "SGL Elements.");
					*rc = -EINVAL;
					return NULL;
				}
				child_length -= extra_length;
			}

			if (spdk_unlikely(accel_sequence != NULL)) {
				SPDK_ERRLOG("Splitting requests with accel sequence is unsupported\n");
				*rc = -EINVAL;
				return NULL;
			}

			child_lba_count = child_length / ns->extended_lba_size;
			/*
			 * Note the last parameter is set to "false" - this tells the recursive
			 *  call to _nvme_ns_cmd_rw() to not bother with checking for SGL splitting
@@ -406,12 +416,12 @@ _nvme_ns_cmd_split_request_sgl(struct spdk_nvme_ns *ns,
			payload_offset += child_length;
			md_offset += child_lba_count * ns->md_size;
			child_lba += child_lba_count;
			child_length = 0;
			num_sges = 0;
			accumulated_length -= child_length;
			num_sges = accumulated_length > 0;
		}
	}

	if (child_length == req->payload_size) {
	if (accumulated_length == req->payload_size) {
		/* No splitting was required, so setup the whole payload as one request. */
		_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag, cdw13);
	}
+96 −0
Original line number Diff line number Diff line
@@ -90,6 +90,8 @@ nvme_request_next_sge(void *cb_arg, void **address, uint32_t *length)
	*address = (void *)ctx->iovs[ctx->cur_idx].iov_base;
	*length = ctx->iovs[ctx->cur_idx].iov_len;

	ctx->cur_idx++;

	return 0;
}

@@ -763,6 +765,99 @@ test_nvme_ns_cmd_readv(void)
	cleanup_after_test(&qpair);
}

/* Like test_nvme_ns_cmd_readv, but the underlying controller has SGL support. */
static void
test_nvme_ns_cmd_readv_sgl(void)
{
	struct spdk_nvme_ns		ns;
	struct spdk_nvme_ctrlr		ctrlr;
	struct spdk_nvme_qpair		qpair;
	int				rc = 0;
	uint32_t			lba_count = 256;
	uint32_t			sector_size = 512;
	struct nvme_request_sgl_ctx	sgl_ctx = {};
	struct iovec			iov[3] = {};
	struct nvme_request		*child;
	uint64_t			cmd_lba;
	uint32_t			cmd_lba_count;

	iov[0].iov_base = (void *)(uintptr_t)0x10000000;
	iov[0].iov_len = sector_size * lba_count;
	sgl_ctx.iovs = iov;
	sgl_ctx.iovcnt = 1;

	prepare_for_test(&ns, &ctrlr, &qpair, sector_size, 0, 128 * 1024, 0, false);
	ctrlr.flags |= SPDK_NVME_CTRLR_SGL_SUPPORTED;

	rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, 256, NULL, &sgl_ctx, 0, nvme_request_reset_sgl,
				    NULL);
	CU_ASSERT(rc != 0);


	rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, lba_count, NULL, &sgl_ctx, 0,
				    nvme_request_reset_sgl, nvme_request_next_sge);

	CU_ASSERT(rc == 0);
	SPDK_CU_ASSERT_FATAL(g_request != NULL);
	CU_ASSERT(g_request->cmd.opc == SPDK_NVME_OPC_READ);
	CU_ASSERT(nvme_payload_type(&g_request->payload) == NVME_PAYLOAD_TYPE_SGL);
	CU_ASSERT(g_request->payload.reset_sgl_fn == nvme_request_reset_sgl);
	CU_ASSERT(g_request->payload.next_sge_fn == nvme_request_next_sge);
	CU_ASSERT(g_request->payload.contig_or_cb_arg == &sgl_ctx);
	CU_ASSERT(g_request->cmd.nsid == ns.id);

	/* Set the controller to only support 1 sge per request. Then do a 2 sector I/O with
	 * 3 unaligned sges. This will fail! */
	ctrlr.max_sges = 1;
	lba_count = 2;
	iov[0].iov_base = (void *)(uintptr_t)0x10000000;
	iov[0].iov_len = 300;
	iov[1].iov_base = iov[0].iov_base + iov[0].iov_len;
	iov[1].iov_len = 300;
	iov[2].iov_base = iov[1].iov_base + iov[1].iov_len;
	iov[2].iov_len = (sector_size * lba_count) - iov[0].iov_len - iov[1].iov_len;
	sgl_ctx.iovs = iov;
	sgl_ctx.iovcnt = 3;

	rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, lba_count, NULL, &sgl_ctx, 0,
				    nvme_request_reset_sgl, nvme_request_next_sge);

	CU_ASSERT(rc != 0);

	/* Let the controller support 2 sges per request and repeat. This should
	 * succeed. */
	ctrlr.max_sges = 2;
	rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, lba_count, NULL, &sgl_ctx, 0,
				    nvme_request_reset_sgl, nvme_request_next_sge);

	CU_ASSERT(rc == 0);
	SPDK_CU_ASSERT_FATAL(g_request != NULL);
	CU_ASSERT(g_request->num_children == 2);

	child = TAILQ_FIRST(&g_request->children);
	nvme_request_remove_child(g_request, child);
	nvme_cmd_interpret_rw(&child->cmd, &cmd_lba, &cmd_lba_count);
	CU_ASSERT(child->num_children == 0);
	CU_ASSERT(child->payload_size == 512);
	CU_ASSERT(cmd_lba == 0x1000);
	CU_ASSERT(cmd_lba_count == 1);
	nvme_free_request(child);

	child = TAILQ_FIRST(&g_request->children);
	nvme_request_remove_child(g_request, child);
	nvme_cmd_interpret_rw(&child->cmd, &cmd_lba, &cmd_lba_count);
	CU_ASSERT(child->num_children == 0);
	CU_ASSERT(child->payload_size == 512);
	CU_ASSERT(cmd_lba == 0x1001);
	CU_ASSERT(cmd_lba_count == 1);
	nvme_free_request(child);

	CU_ASSERT(TAILQ_EMPTY(&g_request->children));

	nvme_free_request(g_request);
	cleanup_after_test(&qpair);
}

static void
test_nvme_ns_cmd_writev(void)
{
@@ -2369,6 +2464,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, test_nvme_ns_cmd_reservation_report);
	CU_ADD_TEST(suite, test_cmd_child_request);
	CU_ADD_TEST(suite, test_nvme_ns_cmd_readv);
	CU_ADD_TEST(suite, test_nvme_ns_cmd_readv_sgl);
	CU_ADD_TEST(suite, test_nvme_ns_cmd_read_with_md);
	CU_ADD_TEST(suite, test_nvme_ns_cmd_writev);
	CU_ADD_TEST(suite, test_nvme_ns_cmd_write_with_md);