nvme: split non-compliant SGLs into multiple requests (86e8a920) · Commits · Public Repositories / spdk

lib/nvme/nvme_internal.h

+7 −1

Original line number	Diff line number	Diff line
		@@ -133,7 +133,7 @@ struct __attribute__((packed)) nvme_payload {
		/**
		* Functions for retrieving physical addresses for scattered payloads.
		*/
		struct {
		struct nvme_sgl_args {
		spdk_nvme_req_reset_sgl_cb reset_sgl_fn;
		spdk_nvme_req_next_sge_cb next_sge_fn;
		void *cb_arg;
		@@ -603,4 +603,10 @@ void nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr);
		void nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr);
		int nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr);

		static inline bool
		_is_page_aligned(uint64_t address)
		{
		return (address & (PAGE_SIZE - 1)) == 0;
		}

		#endif /* __NVME_INTERNAL_H__ */

lib/nvme/nvme_ns_cmd.c

+121 −11

Original line number	Diff line number	Diff line
		@@ -37,7 +37,7 @@ static struct nvme_request _nvme_ns_cmd_rw(struct spdk_nvme_ns ns,
		const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset,
		uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn,
		void *cb_arg, uint32_t opc, uint32_t io_flags,
		uint16_t apptag_mask, uint16_t apptag);
		uint16_t apptag_mask, uint16_t apptag, bool check_sgl);

		static void
		nvme_cb_complete_child(void child_arg, const struct spdk_nvme_cpl cpl)
		@@ -114,12 +114,12 @@ _nvme_add_child_request(struct spdk_nvme_ns ns, const struct nvme_payload payl
		uint32_t payload_offset, uint32_t md_offset,
		uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
		uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag,
		struct nvme_request *parent)
		struct nvme_request *parent, bool check_sgl)
		{
		struct nvme_request *child;

		child = _nvme_ns_cmd_rw(ns, payload, payload_offset, md_offset, lba, lba_count, cb_fn,
		cb_arg, opc, io_flags, apptag_mask, apptag);
		cb_arg, opc, io_flags, apptag_mask, apptag, check_sgl);
		if (child == NULL) {
		nvme_request_free_children(parent);
		nvme_free_request(parent);
		@@ -157,7 +157,7 @@ _nvme_ns_cmd_split_request(struct spdk_nvme_ns *ns,

		child = _nvme_add_child_request(ns, payload, payload_offset, md_offset,
		lba, lba_count, cb_fn, cb_arg, opc,
		io_flags, apptag_mask, apptag, req);
		io_flags, apptag_mask, apptag, req, true);
		if (child == NULL) {
		return NULL;
		}
		@@ -200,11 +200,117 @@ _nvme_ns_cmd_setup_request(struct spdk_nvme_ns ns, struct nvme_request req,
		cmd->cdw15 = (cmd->cdw15 << 16 \| apptag);
		}

		static struct nvme_request *
		_nvme_ns_cmd_split_sgl_request(struct spdk_nvme_ns *ns,
		const struct nvme_payload *payload,
		uint32_t payload_offset, uint32_t md_offset,
		uint64_t lba, uint32_t lba_count,
		spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
		uint32_t io_flags, struct nvme_request *req,
		uint16_t apptag_mask, uint16_t apptag)
		{
		struct nvme_sgl_args *args;
		bool start_valid, end_valid, last_sge, child_equals_parent;
		uint64_t child_lba = lba;
		uint32_t req_current_length = 0;
		uint32_t child_length = 0;
		uint32_t sge_length;
		uintptr_t address;

		args = &req->payload.u.sgl;

		args->reset_sgl_fn(args->cb_arg, payload_offset);
		args->next_sge_fn(args->cb_arg, (void **)&address, &sge_length);
		while (req_current_length < req->payload_size) {

		if (sge_length == 0) {
		continue;
		} else if (req_current_length + sge_length > req->payload_size) {
		sge_length = req->payload_size - req_current_length;
		}

		/*
		* The start of the SGE is invalid if the start address is not page aligned,
		* unless it is the first SGE in the child request.
		*/
		start_valid = child_length == 0 \|\| _is_page_aligned(address);

		/* Boolean for whether this is the last SGE in the parent request. */
		last_sge = (req_current_length + sge_length == req->payload_size);

		/*
		* The end of the SGE is invalid if the end address is not page aligned,
		* unless it is the last SGE in the parent request.
		*/
		end_valid = last_sge \|\| _is_page_aligned(address + sge_length);

		/*
		* This child request equals the parent request, meaning that no splitting
		* was required for the parent request (the one passed into this function).
		* In this case, we do not create a child request at all - we just send
		* the original request as a single request at the end of this function.
		*/
		child_equals_parent = (child_length + sge_length == req->payload_size);

		if (start_valid) {
		/*
		* The start of the SGE is valid, so advance the length parameters,
		* to include this SGE with previous SGEs for this child request
		* (if any). If it is not valid, we do not advance the length
		* parameters nor get the next SGE, because we must send what has
		* been collected before this SGE as a child request.
		*/
		child_length += sge_length;
		req_current_length += sge_length;
		if (req_current_length < req->payload_size) {
		args->next_sge_fn(args->cb_arg, (void **)&address, &sge_length);
		}
		}

		/*
		* If one of these criteria is met, send what we have accumlated so far as a
		* child request.
		*/
		if (!start_valid \|\| !end_valid \|\| !child_equals_parent) {
		struct nvme_request *child;
		uint32_t child_lba_count;

		if ((child_length % ns->sector_size) != 0) {
		return NULL;
		}
		child_lba_count = child_length / ns->sector_size;
		/*
		* Note the last parameter is set to "false" - this tells the recursive
		* call to _nvme_ns_cmd_rw() to not bother with checking for SGL splitting
		* since we have already verified it here.
		*/
		child = _nvme_add_child_request(ns, payload, payload_offset, md_offset,
		child_lba, child_lba_count,
		cb_fn, cb_arg, opc, io_flags,
		apptag_mask, apptag, req, false);
		if (child == NULL) {
		return NULL;
		}
		payload_offset += child_length;
		md_offset += child_lba_count * ns->md_size;
		child_lba += child_lba_count;
		child_length = 0;
		}
		}

		if (child_length == req->payload_size) {
		/* No splitting was required, so setup the whole payload as one request. */
		_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag);
		}

		return req;
		}

		static struct nvme_request *
		_nvme_ns_cmd_rw(struct spdk_nvme_ns ns, const struct nvme_payload payload,
		uint32_t payload_offset, uint32_t md_offset,
		uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
		uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag)
		uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag, bool check_sgl)
		{
		struct nvme_request *req;
		uint32_t sector_size;
		@@ -250,6 +356,10 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns ns, const struct nvme_payload payload,
		return _nvme_ns_cmd_split_request(ns, payload, payload_offset, md_offset, lba, lba_count, cb_fn,
		cb_arg, opc,
		io_flags, req, sectors_per_max_io, 0, apptag_mask, apptag);
		} else if (req->payload.type == NVME_PAYLOAD_TYPE_SGL && check_sgl &&
		!(ns->ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED)) {
		return _nvme_ns_cmd_split_sgl_request(ns, payload, payload_offset, md_offset, lba, lba_count,
		cb_fn, cb_arg, opc, io_flags, req, apptag_mask, apptag);
		}

		_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag);
		@@ -271,7 +381,7 @@ spdk_nvme_ns_cmd_read(struct spdk_nvme_ns ns, struct spdk_nvme_qpair qpair, vo

		req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
		io_flags, 0,
		0);
		0, true);
		if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
		} else {
		@@ -295,7 +405,7 @@ spdk_nvme_ns_cmd_read_with_md(struct spdk_nvme_ns ns, struct spdk_nvme_qpair q

		req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
		io_flags,
		apptag_mask, apptag);
		apptag_mask, apptag, true);
		if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
		} else {
		@@ -323,7 +433,7 @@ spdk_nvme_ns_cmd_readv(struct spdk_nvme_ns ns, struct spdk_nvme_qpair qpair,
		payload.u.sgl.cb_arg = cb_arg;

		req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
		io_flags, 0, 0);
		io_flags, 0, 0, true);
		if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
		} else {
		@@ -345,7 +455,7 @@ spdk_nvme_ns_cmd_write(struct spdk_nvme_ns ns, struct spdk_nvme_qpair qpair,
		payload.md = NULL;

		req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
		io_flags, 0, 0);
		io_flags, 0, 0, true);
		if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
		} else {
		@@ -367,7 +477,7 @@ spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns ns, struct spdk_nvme_qpair
		payload.md = metadata;

		req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
		io_flags, apptag_mask, apptag);
		io_flags, apptag_mask, apptag, true);
		if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
		} else {
		@@ -395,7 +505,7 @@ spdk_nvme_ns_cmd_writev(struct spdk_nvme_ns ns, struct spdk_nvme_qpair qpair,
		payload.u.sgl.cb_arg = cb_arg;

		req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
		io_flags, 0, 0);
		io_flags, 0, 0, true);
		if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
		} else {

lib/nvme/nvme_pcie.c

+9 −6

Original line number	Diff line number	Diff line
		@@ -1666,12 +1666,15 @@ nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvm
		return -1;
		}

		/* Confirm that this sge is prp compatible. */
		if (phys_addr & 0x3 \|\|
		(length < remaining_transfer_len && ((phys_addr + length) & (PAGE_SIZE - 1)))) {
		nvme_pcie_fail_request_bad_vtophys(qpair, tr);
		return -1;
		}
		/*
		* Any incompatible sges should have been handled up in the splitting routine,
		* but assert here as an additional check.
		*/
		assert((phys_addr & 0x3) == 0); /* Address must be dword aligned. */
		/* All SGEs except last must end on a page boundary. */
		assert((length >= remaining_transfer_len) \|\| _is_page_aligned(phys_addr + length));
		/* All SGe except first must start on a page boundary. */
		assert((sge_count == 0) \|\| _is_page_aligned(phys_addr));

		data_transferred = nvme_min(remaining_transfer_len, length);

test/lib/nvme/sgl/sgl.c

+45 −1

Original line number	Diff line number	Diff line
		@@ -232,6 +232,48 @@ static void build_io_request_7(struct io_request *req)
		req->iovs[0].len = 0x10000;
		}

		static void build_io_request_8(struct io_request *req)
		{
		req->nseg = 2;

		/*
		* 1KB for 1st sge, make sure the iov address does not start and end
		* at 0x1000 boundary
		*/
		req->iovs[0].base = spdk_zmalloc(0x1000, 0x1000, NULL);
		req->iovs[0].offset = 0x400;
		req->iovs[0].len = 0x400;

		/*
		* 1KB for 1st sge, make sure the iov address does not start and end
		* at 0x1000 boundary
		*/
		req->iovs[1].base = spdk_zmalloc(0x1000, 0x1000, NULL);
		req->iovs[1].offset = 0x400;
		req->iovs[1].len = 0x400;
		}

		static void build_io_request_9(struct io_request *req)
		{
		/*
		* Check if mixed PRP complaint and not complaint requests are handled
		* properly by spliting them into subrequests.
		* Construct buffers with following theme:
		*/
		const size_t req_len[] = { 2048, 4096, 2048, 4096, 2048, 1024 };
		const size_t req_off[] = { 0x800, 0x0, 0x0, 0x100, 0x800, 0x800 };
		struct sgl_element *iovs = req->iovs;
		uint32_t i;
		req->nseg = sizeof(req_len) / sizeof(req_len[0]);
		assert(sizeof(req_len) / sizeof(req_len[0]) == sizeof(req_off) / sizeof(req_off[0]));

		for (i = 0; i < req->nseg; i++) {
		iovs[i].base = spdk_zmalloc(req_off[i] + req_len[i], 0x4000, NULL);
		iovs[i].offset = req_off[i];
		iovs[i].len = req_len[i];
		}
		}

		typedef void (nvme_build_io_req_fn_t)(struct io_request req);

		static void
		@@ -442,7 +484,9 @@ int main(int argc, char **argv)
		\|\| TEST(build_io_request_4)
		\|\| TEST(build_io_request_5)
		\|\| TEST(build_io_request_6)
		\|\| TEST(build_io_request_7)) {
		\|\| TEST(build_io_request_7)
		\|\| TEST(build_io_request_8)
		\|\| TEST(build_io_request_9)) {
		#undef TEST
		rc = 1;
		printf("%s: failed sgl tests\n", iter->name);

test/lib/nvme/unit/nvme_ns_cmd_c/nvme_ns_cmd_ut.c

+29 −9

Original line number	Diff line number	Diff line
		@@ -58,6 +58,16 @@ static void nvme_request_reset_sgl(void *cb_arg, uint32_t sgl_offset)

		static int nvme_request_next_sge(void cb_arg, void address, uint32_t length)
		{
		uint32_t *lba_count = cb_arg;

		/*
		* We need to set address to something here, since the SGL splitting code will
		* use it to determine PRP compatibility. Just use a rather arbitrary address
		* for now - these tests will not actually cause data to be read from or written
		* to this address.
		*/
		address = (void )(uintptr_t)0x10000000;
		length = lba_count;
		return 0;
		}

		@@ -187,6 +197,11 @@ prepare_for_test(struct spdk_nvme_ns ns, struct spdk_nvme_ctrlr ctrlr,
		uint32_t stripe_size)
		{
		ctrlr->max_xfer_size = max_xfer_size;
		/*
		* Clear the flags field - we especially want to make sure the SGL_SUPPORTED flag is not set
		* so that we test the SGL splitting path.
		*/
		ctrlr->flags = 0;
		memset(ns, 0, sizeof(*ns));
		ns->ctrlr = ctrlr;
		ns->sector_size = sector_size;
		@@ -583,11 +598,14 @@ test_nvme_ns_cmd_readv(void)
		struct spdk_nvme_qpair qpair;
		int rc = 0;
		void *cb_arg;
		uint32_t lba_count = 256;
		uint32_t sector_size = 512;
		uint64_t sge_length = lba_count * sector_size;

		cb_arg = malloc(512);
		prepare_for_test(&ns, &ctrlr, &qpair, 512, 128 * 1024, 0);
		rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0, nvme_request_reset_sgl,
		nvme_request_next_sge);
		prepare_for_test(&ns, &ctrlr, &qpair, sector_size, 128 * 1024, 0);
		rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, lba_count, NULL, &sge_length, 0,
		nvme_request_reset_sgl, nvme_request_next_sge);

		SPDK_CU_ASSERT_FATAL(rc == 0);
		SPDK_CU_ASSERT_FATAL(g_request != NULL);
		@@ -595,7 +613,7 @@ test_nvme_ns_cmd_readv(void)
		CU_ASSERT(g_request->payload.type == NVME_PAYLOAD_TYPE_SGL);
		CU_ASSERT(g_request->payload.u.sgl.reset_sgl_fn == nvme_request_reset_sgl);
		CU_ASSERT(g_request->payload.u.sgl.next_sge_fn == nvme_request_next_sge);
		CU_ASSERT(g_request->payload.u.sgl.cb_arg == cb_arg);
		CU_ASSERT(g_request->payload.u.sgl.cb_arg == &sge_length);
		CU_ASSERT(g_request->cmd.nsid == ns.id);

		rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0, nvme_request_reset_sgl,
		@@ -614,12 +632,14 @@ test_nvme_ns_cmd_writev(void)
		struct spdk_nvme_qpair qpair;
		int rc = 0;
		void *cb_arg;
		uint32_t lba_count = 256;
		uint32_t sector_size = 512;
		uint64_t sge_length = lba_count * sector_size;

		cb_arg = malloc(512);
		prepare_for_test(&ns, &ctrlr, &qpair, 512, 128 * 1024, 0);
		rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0,
		nvme_request_reset_sgl,
		nvme_request_next_sge);
		prepare_for_test(&ns, &ctrlr, &qpair, sector_size, 128 * 1024, 0);
		rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, lba_count, NULL, &sge_length, 0,
		nvme_request_reset_sgl, nvme_request_next_sge);

		SPDK_CU_ASSERT_FATAL(rc == 0);
		SPDK_CU_ASSERT_FATAL(g_request != NULL);
		@@ -627,7 +647,7 @@ test_nvme_ns_cmd_writev(void)
		CU_ASSERT(g_request->payload.type == NVME_PAYLOAD_TYPE_SGL);
		CU_ASSERT(g_request->payload.u.sgl.reset_sgl_fn == nvme_request_reset_sgl);
		CU_ASSERT(g_request->payload.u.sgl.next_sge_fn == nvme_request_next_sge);
		CU_ASSERT(g_request->payload.u.sgl.cb_arg == cb_arg);
		CU_ASSERT(g_request->payload.u.sgl.cb_arg == &sge_length);
		CU_ASSERT(g_request->cmd.nsid == ns.id);

		rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0,