Commit f4c10e1a authored by Aleksey Marchuk's avatar Aleksey Marchuk Committed by Jim Harris
Browse files

nvmf/rdma: Bypass NEED_BUFFER state for reqs with ICD



Requests with in-capsule data already have data buffer, there is
no sense to put them into a common pending_buf queue. That allows to
process e.g. admin/abort commands when the system is running low on
buffers.
This change also aligns TCP and RDMA behaviour in handling of ICD

Last fix for #3646

Change-Id: I9a3086b738499135a63483088c9954039d6e4ce7
Signed-off-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Reviewed-on: https://review.spdk.io/c/spdk/spdk/+/26249


Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
Tested-by: default avatarSPDK Automated Test System <spdkbot@gmail.com>
Reviewed-by: default avatarKonrad Sztyber <ksztyber@nvidia.com>
parent 2ae1eca1
Loading
Loading
Loading
Loading
+21 −3
Original line number Diff line number Diff line
@@ -1851,6 +1851,8 @@ nvmf_rdma_request_parse_icd(struct spdk_nvmf_rdma_transport *rtransport,
	uint64_t offset = sgl->address;
	uint32_t max_len = rtransport->transport.opts.in_capsule_data_size;

	assert(sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
	       sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET);
	SPDK_DEBUGLOG(nvmf, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
		      offset, sgl->unkeyed.length);

@@ -1933,9 +1935,6 @@ nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
			      req->iovcnt);

		return 0;
	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
		return nvmf_rdma_request_parse_icd(rtransport, rdma_req);
	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT &&
		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {

@@ -1953,6 +1952,11 @@ nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
			      req->iovcnt);

		return 0;
	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
		SPDK_ERRLOG("Request %p with ICD called in wrong place\n", rdma_req);
		assert(0);
		return -EINVAL;
	}

	SPDK_ERRLOG("Invalid NVMf I/O Command SGL:  Type 0x%x, Subtype 0x%x\n",
@@ -2105,6 +2109,7 @@ nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
	struct spdk_nvme_cpl		*rsp = &rdma_req->req.rsp->nvme_cpl;
	int				rc;
	struct spdk_nvmf_rdma_recv	*rdma_recv;
	struct spdk_nvme_sgl_descriptor *sgl;
	enum spdk_nvmf_rdma_request_state prev_state;
	bool				progress = false;
	int				data_posted;
@@ -2201,6 +2206,19 @@ nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
				break;
			}

			sgl = &rdma_req->req.cmd->nvme_cmd.dptr.sgl1;
			if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
			    sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
				/* In-capsule data, no need to get a buffer */
				rc = nvmf_rdma_request_parse_icd(rtransport, rdma_req);
				if (spdk_unlikely(rc < 0)) {
					STAILQ_INSERT_TAIL(&rqpair->pending_rdma_send_queue, rdma_req, state_link);
					rdma_req->state = RDMA_REQUEST_STATE_READY_TO_COMPLETE_PENDING;
					break;
				}
				rdma_req->state = RDMA_REQUEST_STATE_READY_TO_EXECUTE;
				break;
			}
			rdma_req->state = RDMA_REQUEST_STATE_NEED_BUFFER;
			nvmf_rdma_poll_group_insert_need_buffer_req(rgroup, rdma_req);
			break;
+53 −5
Original line number Diff line number Diff line
@@ -269,7 +269,7 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
	reset_nvmf_rdma_request(&rdma_req);
	sgl->address = 0;
	sgl->unkeyed.length = rtransport.transport.opts.in_capsule_data_size;
	rc = nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
	rc = nvmf_rdma_request_parse_icd(&rtransport, &rdma_req);

	CU_ASSERT(rc == 0);
	CU_ASSERT(rdma_req.req.iovcnt == 1);
@@ -281,7 +281,7 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
	reset_nvmf_rdma_request(&rdma_req);
	sgl->address = rtransport.transport.opts.in_capsule_data_size;
	sgl->unkeyed.length = rtransport.transport.opts.in_capsule_data_size;
	rc = nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
	rc = nvmf_rdma_request_parse_icd(&rtransport, &rdma_req);

	CU_ASSERT(rc == -1);

@@ -289,7 +289,7 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
	reset_nvmf_rdma_request(&rdma_req);
	sgl->address = 0;
	sgl->unkeyed.length = rtransport.transport.opts.in_capsule_data_size * 2;
	rc = nvmf_rdma_request_parse_sgl(&rtransport, &device, &rdma_req);
	rc = nvmf_rdma_request_parse_icd(&rtransport, &rdma_req);

	CU_ASSERT(rc == -1);

@@ -526,6 +526,7 @@ test_spdk_nvmf_rdma_request_process(void)
	struct spdk_nvmf_rdma_qpair rqpair = {};
	struct spdk_nvmf_rdma_recv *rdma_recv;
	struct spdk_nvmf_rdma_request *rdma_req;
	union nvmf_h2c_msg *cmd;
	struct spdk_iobuf_channel ch = {};
	bool progress;

@@ -571,11 +572,22 @@ test_spdk_nvmf_rdma_request_process(void)
	poller_reset(&poller, &group);
	qpair_reset(&rqpair, &poller, &device, &resources, &rtransport.transport);

	/* Test 2: single SGL WRITE request */
	/* Test 2.1: single SGL WRITE request */
	rdma_recv = create_recv(&rqpair, SPDK_NVME_OPC_WRITE);
	rdma_req = create_req(&rqpair, rdma_recv);
	rqpair.current_recv_depth = 1;
	/* NEW -> TRANSFERRING_H2C */
	MOCK_SET(spdk_iobuf_get, NULL);
	/* NEW -> NEED_BUFFER */
	progress = nvmf_rdma_request_process(&rtransport, rdma_req);
	CU_ASSERT(progress == true);
	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_NEED_BUFFER);
	CU_ASSERT(rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER);
	/* NEED_BUFFER -> NEED_BUFFER */
	progress = nvmf_rdma_request_process(&rtransport, rdma_req);
	CU_ASSERT(progress == false);
	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_NEED_BUFFER);
	MOCK_CLEAR(spdk_iobuf_get);
	/* NEED_BUFFER -> TRANSFERRING_H2C */
	progress = nvmf_rdma_request_process(&rtransport, rdma_req);
	CU_ASSERT(progress == true);
	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
@@ -603,6 +615,42 @@ test_spdk_nvmf_rdma_request_process(void)
	poller_reset(&poller, &group);
	qpair_reset(&rqpair, &poller, &device, &resources, &rtransport.transport);

	/* Test 2.2: single ICD WRITE request */
	rdma_recv = create_recv(&rqpair, SPDK_NVME_OPC_WRITE);
	rdma_req = create_req(&rqpair, rdma_recv);
	cmd = (union nvmf_h2c_msg *)rdma_recv->sgl[0].addr;
	cmd->nvme_cmd.dptr.sgl1.generic.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
	cmd->nvme_cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
	cmd->nvme_cmd.dptr.sgl1.unkeyed.length = 1024;
	cmd->nvme_cmd.dptr.sgl1.address = 0;

	rqpair.current_recv_depth = 1;
	/* A request with ICD must bypass NEED_BUFFER state */
	MOCK_SET(spdk_iobuf_get, NULL);
	/* NEW -> RDMA_REQUEST_STATE_EXECUTING */
	STAILQ_INIT(&poller.qpairs_pending_send);
	progress = nvmf_rdma_request_process(&rtransport, rdma_req);
	CU_ASSERT(progress == true);
	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_EXECUTING);
	/* EXECUTED -> COMPLETING */
	rdma_req->state = RDMA_REQUEST_STATE_EXECUTED;
	MOCK_CLEAR(spdk_iobuf_get);
	progress = nvmf_rdma_request_process(&rtransport, rdma_req);
	CU_ASSERT(progress == true);
	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_COMPLETING);
	CU_ASSERT(rdma_req->recv == NULL);
	/* COMPLETED -> FREE */
	rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
	progress = nvmf_rdma_request_process(&rtransport, rdma_req);
	CU_ASSERT(progress == true);
	CU_ASSERT(rdma_req->state == RDMA_REQUEST_STATE_FREE);

	free_recv(rdma_recv);
	free_req(rdma_req);
	poller_reset(&poller, &group);
	qpair_reset(&rqpair, &poller, &device, &resources, &rtransport.transport);


	/* Test 3: WRITE+WRITE ibv_send batching */
	{
		struct spdk_nvmf_rdma_recv *recv1, *recv2;