Commit 86e8a920 authored by Jim Harris's avatar Jim Harris
Browse files

nvme: split non-compliant SGLs into multiple requests



Since nvme_ns_cmd.c now walks the SGL, some of the test code
needs to also be updated to initialize and return correct values
such as ctrlr->flags and sge_length.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I521213695def35d0897aabf57a0638a6c347632e
parent 38c09e5e
Loading
Loading
Loading
Loading
+7 −1
Original line number Diff line number Diff line
@@ -133,7 +133,7 @@ struct __attribute__((packed)) nvme_payload {
		/**
		 * Functions for retrieving physical addresses for scattered payloads.
		 */
		struct {
		struct nvme_sgl_args {
			spdk_nvme_req_reset_sgl_cb reset_sgl_fn;
			spdk_nvme_req_next_sge_cb next_sge_fn;
			void *cb_arg;
@@ -603,4 +603,10 @@ void nvme_ctrlr_proc_get_ref(struct spdk_nvme_ctrlr *ctrlr);
void	nvme_ctrlr_proc_put_ref(struct spdk_nvme_ctrlr *ctrlr);
int	nvme_ctrlr_get_ref_count(struct spdk_nvme_ctrlr *ctrlr);

static inline bool
_is_page_aligned(uint64_t address)
{
	return (address & (PAGE_SIZE - 1)) == 0;
}

#endif /* __NVME_INTERNAL_H__ */
+121 −11
Original line number Diff line number Diff line
@@ -37,7 +37,7 @@ static struct nvme_request *_nvme_ns_cmd_rw(struct spdk_nvme_ns *ns,
		const struct nvme_payload *payload, uint32_t payload_offset, uint32_t md_offset,
		uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn,
		void *cb_arg, uint32_t opc, uint32_t io_flags,
		uint16_t apptag_mask, uint16_t apptag);
		uint16_t apptag_mask, uint16_t apptag, bool check_sgl);

static void
nvme_cb_complete_child(void *child_arg, const struct spdk_nvme_cpl *cpl)
@@ -114,12 +114,12 @@ _nvme_add_child_request(struct spdk_nvme_ns *ns, const struct nvme_payload *payl
			uint32_t payload_offset, uint32_t md_offset,
			uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
			uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag,
			struct nvme_request *parent)
			struct nvme_request *parent, bool check_sgl)
{
	struct nvme_request	*child;

	child = _nvme_ns_cmd_rw(ns, payload, payload_offset, md_offset, lba, lba_count, cb_fn,
				cb_arg, opc, io_flags, apptag_mask, apptag);
				cb_arg, opc, io_flags, apptag_mask, apptag, check_sgl);
	if (child == NULL) {
		nvme_request_free_children(parent);
		nvme_free_request(parent);
@@ -157,7 +157,7 @@ _nvme_ns_cmd_split_request(struct spdk_nvme_ns *ns,

		child = _nvme_add_child_request(ns, payload, payload_offset, md_offset,
						lba, lba_count, cb_fn, cb_arg, opc,
						io_flags, apptag_mask, apptag, req);
						io_flags, apptag_mask, apptag, req, true);
		if (child == NULL) {
			return NULL;
		}
@@ -200,11 +200,117 @@ _nvme_ns_cmd_setup_request(struct spdk_nvme_ns *ns, struct nvme_request *req,
	cmd->cdw15 = (cmd->cdw15 << 16 | apptag);
}

static struct nvme_request *
_nvme_ns_cmd_split_sgl_request(struct spdk_nvme_ns *ns,
			       const struct nvme_payload *payload,
			       uint32_t payload_offset, uint32_t md_offset,
			       uint64_t lba, uint32_t lba_count,
			       spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
			       uint32_t io_flags, struct nvme_request *req,
			       uint16_t apptag_mask, uint16_t apptag)
{
	struct nvme_sgl_args *args;
	bool start_valid, end_valid, last_sge, child_equals_parent;
	uint64_t child_lba = lba;
	uint32_t req_current_length = 0;
	uint32_t child_length = 0;
	uint32_t sge_length;
	uintptr_t address;

	args = &req->payload.u.sgl;

	args->reset_sgl_fn(args->cb_arg, payload_offset);
	args->next_sge_fn(args->cb_arg, (void **)&address, &sge_length);
	while (req_current_length < req->payload_size) {

		if (sge_length == 0) {
			continue;
		} else if (req_current_length + sge_length > req->payload_size) {
			sge_length = req->payload_size - req_current_length;
		}

		/*
		 * The start of the SGE is invalid if the start address is not page aligned,
		 *  unless it is the first SGE in the child request.
		 */
		start_valid = child_length == 0 || _is_page_aligned(address);

		/* Boolean for whether this is the last SGE in the parent request. */
		last_sge = (req_current_length + sge_length == req->payload_size);

		/*
		 * The end of the SGE is invalid if the end address is not page aligned,
		 *  unless it is the last SGE in the parent request.
		 */
		end_valid = last_sge || _is_page_aligned(address + sge_length);

		/*
		 * This child request equals the parent request, meaning that no splitting
		 *  was required for the parent request (the one passed into this function).
		 *  In this case, we do not create a child request at all - we just send
		 *  the original request as a single request at the end of this function.
		 */
		child_equals_parent = (child_length + sge_length == req->payload_size);

		if (start_valid) {
			/*
			 * The start of the SGE is valid, so advance the length parameters,
			 *  to include this SGE with previous SGEs for this child request
			 *  (if any).  If it is not valid, we do not advance the length
			 *  parameters nor get the next SGE, because we must send what has
			 *  been collected before this SGE as a child request.
			 */
			child_length += sge_length;
			req_current_length += sge_length;
			if (req_current_length < req->payload_size) {
				args->next_sge_fn(args->cb_arg, (void **)&address, &sge_length);
			}
		}

		/*
		 * If one of these criteria is met, send what we have accumlated so far as a
		 *  child request.
		 */
		if (!start_valid || !end_valid || !child_equals_parent) {
			struct nvme_request *child;
			uint32_t child_lba_count;

			if ((child_length % ns->sector_size) != 0) {
				return NULL;
			}
			child_lba_count = child_length / ns->sector_size;
			/*
			 * Note the last parameter is set to "false" - this tells the recursive
			 *  call to _nvme_ns_cmd_rw() to not bother with checking for SGL splitting
			 *  since we have already verified it here.
			 */
			child = _nvme_add_child_request(ns, payload, payload_offset, md_offset,
							child_lba, child_lba_count,
							cb_fn, cb_arg, opc, io_flags,
							apptag_mask, apptag, req, false);
			if (child == NULL) {
				return NULL;
			}
			payload_offset += child_length;
			md_offset += child_lba_count * ns->md_size;
			child_lba += child_lba_count;
			child_length = 0;
		}
	}

	if (child_length == req->payload_size) {
		/* No splitting was required, so setup the whole payload as one request. */
		_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag);
	}

	return req;
}

static struct nvme_request *
_nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, const struct nvme_payload *payload,
		uint32_t payload_offset, uint32_t md_offset,
		uint64_t lba, uint32_t lba_count, spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t opc,
		uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag)
		uint32_t io_flags, uint16_t apptag_mask, uint16_t apptag, bool check_sgl)
{
	struct nvme_request	*req;
	uint32_t		sector_size;
@@ -250,6 +356,10 @@ _nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, const struct nvme_payload *payload,
		return _nvme_ns_cmd_split_request(ns, payload, payload_offset, md_offset, lba, lba_count, cb_fn,
						  cb_arg, opc,
						  io_flags, req, sectors_per_max_io, 0, apptag_mask, apptag);
	} else if (req->payload.type == NVME_PAYLOAD_TYPE_SGL && check_sgl &&
		   !(ns->ctrlr->flags & SPDK_NVME_CTRLR_SGL_SUPPORTED)) {
		return _nvme_ns_cmd_split_sgl_request(ns, payload, payload_offset, md_offset, lba, lba_count,
						      cb_fn, cb_arg, opc, io_flags, req, apptag_mask, apptag);
	}

	_nvme_ns_cmd_setup_request(ns, req, opc, lba, lba_count, io_flags, apptag_mask, apptag);
@@ -271,7 +381,7 @@ spdk_nvme_ns_cmd_read(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, vo

	req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
			      io_flags, 0,
			      0);
			      0, true);
	if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
	} else {
@@ -295,7 +405,7 @@ spdk_nvme_ns_cmd_read_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *q

	req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
			      io_flags,
			      apptag_mask, apptag);
			      apptag_mask, apptag, true);
	if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
	} else {
@@ -323,7 +433,7 @@ spdk_nvme_ns_cmd_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
	payload.u.sgl.cb_arg = cb_arg;

	req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ,
			      io_flags, 0, 0);
			      io_flags, 0, 0, true);
	if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
	} else {
@@ -345,7 +455,7 @@ spdk_nvme_ns_cmd_write(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
	payload.md = NULL;

	req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
			      io_flags, 0, 0);
			      io_flags, 0, 0, true);
	if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
	} else {
@@ -367,7 +477,7 @@ spdk_nvme_ns_cmd_write_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *
	payload.md = metadata;

	req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
			      io_flags, apptag_mask, apptag);
			      io_flags, apptag_mask, apptag, true);
	if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
	} else {
@@ -395,7 +505,7 @@ spdk_nvme_ns_cmd_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
	payload.u.sgl.cb_arg = cb_arg;

	req = _nvme_ns_cmd_rw(ns, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE,
			      io_flags, 0, 0);
			      io_flags, 0, 0, true);
	if (req != NULL) {
		return nvme_qpair_submit_request(qpair, req);
	} else {
+9 −6
Original line number Diff line number Diff line
@@ -1666,12 +1666,15 @@ nvme_pcie_qpair_build_prps_sgl_request(struct spdk_nvme_qpair *qpair, struct nvm
			return -1;
		}

		/* Confirm that this sge is prp compatible. */
		if (phys_addr & 0x3 ||
		    (length < remaining_transfer_len && ((phys_addr + length) & (PAGE_SIZE - 1)))) {
			nvme_pcie_fail_request_bad_vtophys(qpair, tr);
			return -1;
		}
		/*
		 * Any incompatible sges should have been handled up in the splitting routine,
		 *  but assert here as an additional check.
		 */
		assert((phys_addr & 0x3) == 0); /* Address must be dword aligned. */
		/* All SGEs except last must end on a page boundary. */
		assert((length >= remaining_transfer_len) || _is_page_aligned(phys_addr + length));
		/* All SGe except first must start on a page boundary. */
		assert((sge_count == 0) || _is_page_aligned(phys_addr));

		data_transferred = nvme_min(remaining_transfer_len, length);

+45 −1
Original line number Diff line number Diff line
@@ -232,6 +232,48 @@ static void build_io_request_7(struct io_request *req)
	req->iovs[0].len = 0x10000;
}

static void build_io_request_8(struct io_request *req)
{
	req->nseg = 2;

	/*
	 * 1KB for 1st sge, make sure the iov address does not start and end
	 * at 0x1000 boundary
	 */
	req->iovs[0].base = spdk_zmalloc(0x1000, 0x1000, NULL);
	req->iovs[0].offset = 0x400;
	req->iovs[0].len = 0x400;

	/*
	 * 1KB for 1st sge, make sure the iov address does not start and end
	 * at 0x1000 boundary
	 */
	req->iovs[1].base = spdk_zmalloc(0x1000, 0x1000, NULL);
	req->iovs[1].offset = 0x400;
	req->iovs[1].len = 0x400;
}

static void build_io_request_9(struct io_request *req)
{
	/*
	 * Check if mixed PRP complaint and not complaint requests are handled
	 * properly by spliting them into subrequests.
	 * Construct buffers with following theme:
	 */
	const size_t req_len[] = {  2048, 4096, 2048,  4096,  2048,  1024 };
	const size_t req_off[] = { 0x800,  0x0,  0x0, 0x100, 0x800, 0x800 };
	struct sgl_element *iovs = req->iovs;
	uint32_t i;
	req->nseg = sizeof(req_len) / sizeof(req_len[0]);
	assert(sizeof(req_len) / sizeof(req_len[0]) == sizeof(req_off) / sizeof(req_off[0]));

	for (i = 0; i < req->nseg; i++) {
		iovs[i].base = spdk_zmalloc(req_off[i] + req_len[i], 0x4000, NULL);
		iovs[i].offset = req_off[i];
		iovs[i].len = req_len[i];
	}
}

typedef void (*nvme_build_io_req_fn_t)(struct io_request *req);

static void
@@ -442,7 +484,9 @@ int main(int argc, char **argv)
		    || TEST(build_io_request_4)
		    || TEST(build_io_request_5)
		    || TEST(build_io_request_6)
		    || TEST(build_io_request_7)) {
		    || TEST(build_io_request_7)
		    || TEST(build_io_request_8)
		    || TEST(build_io_request_9)) {
#undef TEST
			rc = 1;
			printf("%s: failed sgl tests\n", iter->name);
+29 −9
Original line number Diff line number Diff line
@@ -58,6 +58,16 @@ static void nvme_request_reset_sgl(void *cb_arg, uint32_t sgl_offset)

static int nvme_request_next_sge(void *cb_arg, void **address, uint32_t *length)
{
	uint32_t *lba_count = cb_arg;

	/*
	 * We need to set address to something here, since the SGL splitting code will
	 *  use it to determine PRP compatibility.  Just use a rather arbitrary address
	 *  for now - these tests will not actually cause data to be read from or written
	 *  to this address.
	 */
	*address = (void *)(uintptr_t)0x10000000;
	*length = *lba_count;
	return 0;
}

@@ -187,6 +197,11 @@ prepare_for_test(struct spdk_nvme_ns *ns, struct spdk_nvme_ctrlr *ctrlr,
		 uint32_t stripe_size)
{
	ctrlr->max_xfer_size = max_xfer_size;
	/*
	 * Clear the flags field - we especially want to make sure the SGL_SUPPORTED flag is not set
	 *  so that we test the SGL splitting path.
	 */
	ctrlr->flags = 0;
	memset(ns, 0, sizeof(*ns));
	ns->ctrlr = ctrlr;
	ns->sector_size = sector_size;
@@ -583,11 +598,14 @@ test_nvme_ns_cmd_readv(void)
	struct spdk_nvme_qpair		qpair;
	int				rc = 0;
	void				*cb_arg;
	uint32_t			lba_count = 256;
	uint32_t			sector_size = 512;
	uint64_t			sge_length = lba_count * sector_size;

	cb_arg = malloc(512);
	prepare_for_test(&ns, &ctrlr, &qpair, 512, 128 * 1024, 0);
	rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0, nvme_request_reset_sgl,
				    nvme_request_next_sge);
	prepare_for_test(&ns, &ctrlr, &qpair, sector_size, 128 * 1024, 0);
	rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, lba_count, NULL, &sge_length, 0,
				    nvme_request_reset_sgl, nvme_request_next_sge);

	SPDK_CU_ASSERT_FATAL(rc == 0);
	SPDK_CU_ASSERT_FATAL(g_request != NULL);
@@ -595,7 +613,7 @@ test_nvme_ns_cmd_readv(void)
	CU_ASSERT(g_request->payload.type == NVME_PAYLOAD_TYPE_SGL);
	CU_ASSERT(g_request->payload.u.sgl.reset_sgl_fn == nvme_request_reset_sgl);
	CU_ASSERT(g_request->payload.u.sgl.next_sge_fn == nvme_request_next_sge);
	CU_ASSERT(g_request->payload.u.sgl.cb_arg == cb_arg);
	CU_ASSERT(g_request->payload.u.sgl.cb_arg == &sge_length);
	CU_ASSERT(g_request->cmd.nsid == ns.id);

	rc = spdk_nvme_ns_cmd_readv(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0, nvme_request_reset_sgl,
@@ -614,12 +632,14 @@ test_nvme_ns_cmd_writev(void)
	struct spdk_nvme_qpair		qpair;
	int				rc = 0;
	void				*cb_arg;
	uint32_t			lba_count = 256;
	uint32_t			sector_size = 512;
	uint64_t			sge_length = lba_count * sector_size;

	cb_arg = malloc(512);
	prepare_for_test(&ns, &ctrlr, &qpair, 512, 128 * 1024, 0);
	rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0,
				     nvme_request_reset_sgl,
				     nvme_request_next_sge);
	prepare_for_test(&ns, &ctrlr, &qpair, sector_size, 128 * 1024, 0);
	rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, lba_count, NULL, &sge_length, 0,
				     nvme_request_reset_sgl, nvme_request_next_sge);

	SPDK_CU_ASSERT_FATAL(rc == 0);
	SPDK_CU_ASSERT_FATAL(g_request != NULL);
@@ -627,7 +647,7 @@ test_nvme_ns_cmd_writev(void)
	CU_ASSERT(g_request->payload.type == NVME_PAYLOAD_TYPE_SGL);
	CU_ASSERT(g_request->payload.u.sgl.reset_sgl_fn == nvme_request_reset_sgl);
	CU_ASSERT(g_request->payload.u.sgl.next_sge_fn == nvme_request_next_sge);
	CU_ASSERT(g_request->payload.u.sgl.cb_arg == cb_arg);
	CU_ASSERT(g_request->payload.u.sgl.cb_arg == &sge_length);
	CU_ASSERT(g_request->cmd.nsid == ns.id);

	rc = spdk_nvme_ns_cmd_writev(&ns, &qpair, 0x1000, 256, NULL, cb_arg, 0,