Commit 834e3c5a authored by Evgeniy Kochetov's avatar Evgeniy Kochetov Committed by Jim Harris
Browse files

nvme: Fix submission queue overflow



SPDK can submit more commands to remote NVMf target than allowed by
negotiated queue size. SPDK submits up to SQSIZE commands, but only
SQSIZE-1 are allowed.

Here is a relevant quote from NVMe over Fabrics rev.1.1a ch.2.4.1
“Submission Queue Flow Control Negotiation”:

If SQ flow control is disabled, then the host should limit the number
of outstanding commands for a queue pair to be less than the size of
the Submission Queue. If the controller detects that the number of
outstanding commands for a queue pair is greater than or equal to the
size of the Submission Queue, then the controller shall:

a) stop processing commands and set the Controller Fatal
Status (CSTS.CFS) bit to ‘1’ (refer to section 10.5 in the NVMe Base
specification); and

b) terminate the NVMe Transport connection and end the association
between the host and the controller.

Signed-off-by: default avatarEvgeniy Kochetov <evgeniik@nvidia.com>
Change-Id: Ifbcf5d51911fc4ddcea1f7cde3135571648606f3
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11413


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
parent 48642652
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -55,10 +55,12 @@ extern "C" {

#define SPDK_NVME_MAX_IO_QUEUES		(65535)

#define SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES	2
#define SPDK_NVME_QUEUE_MIN_ENTRIES		(2)

#define SPDK_NVME_ADMIN_QUEUE_MIN_ENTRIES	SPDK_NVME_QUEUE_MIN_ENTRIES
#define SPDK_NVME_ADMIN_QUEUE_MAX_ENTRIES	4096

#define SPDK_NVME_IO_QUEUE_MIN_ENTRIES		2
#define SPDK_NVME_IO_QUEUE_MIN_ENTRIES		SPDK_NVME_QUEUE_MIN_ENTRIES
#define SPDK_NVME_IO_QUEUE_MAX_ENTRIES		65536

/**
+15 −5
Original line number Diff line number Diff line
@@ -508,7 +508,7 @@ nvme_rdma_qpair_process_cm_event(struct nvme_rdma_qpair *rqpair)
				rc = -1;
			} else {
				SPDK_DEBUGLOG(nvme, "Requested queue depth %d. Target receive queue depth %d.\n",
					      rqpair->num_entries, accept_data->crqsize);
					      rqpair->num_entries + 1, accept_data->crqsize);
			}
			break;
		case RDMA_CM_EVENT_DISCONNECTED:
@@ -1151,8 +1151,8 @@ nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
	assert(rctrlr != NULL);

	request_data.qid = rqpair->qpair.id;
	request_data.hrqsize = rqpair->num_entries;
	request_data.hsqsize = rqpair->num_entries - 1;
	request_data.hrqsize = rqpair->num_entries + 1;
	request_data.hsqsize = rqpair->num_entries;
	request_data.cntlid = ctrlr->cntlid;

	param.private_data = &request_data;
@@ -1311,7 +1311,7 @@ _nvme_rdma_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_q
		return -1;
	}

	rc = nvme_fabric_qpair_connect(&rqpair->qpair, rqpair->num_entries);
	rc = nvme_fabric_qpair_connect(&rqpair->qpair, rqpair->num_entries + 1);
	if (rc < 0) {
		rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
		SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
@@ -1750,13 +1750,23 @@ nvme_rdma_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
	struct spdk_nvme_qpair *qpair;
	int rc;

	if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) {
		SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n",
			    qsize, SPDK_NVME_QUEUE_MIN_ENTRIES);
		return NULL;
	}

	rqpair = nvme_rdma_calloc(1, sizeof(struct nvme_rdma_qpair));
	if (!rqpair) {
		SPDK_ERRLOG("failed to get create rqpair\n");
		return NULL;
	}

	rqpair->num_entries = qsize;
	/* Set num_entries one less than queue size. According to NVMe
	 * and NVMe-oF specs we can not submit queue size requests,
	 * one slot shall always remain empty.
	 */
	rqpair->num_entries = qsize - 1;
	rqpair->delay_cmd_submit = delay_cmd_submit;
	qpair = &rqpair->qpair;
	rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, false);
+12 −2
Original line number Diff line number Diff line
@@ -1964,7 +1964,7 @@ nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvm
		rc = -EAGAIN;
		break;
	case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND:
		rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries);
		rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1);
		if (rc < 0) {
			SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
			break;
@@ -2052,13 +2052,23 @@ nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
	struct spdk_nvme_qpair *qpair;
	int rc;

	if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) {
		SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n",
			    qsize, SPDK_NVME_QUEUE_MIN_ENTRIES);
		return NULL;
	}

	tqpair = calloc(1, sizeof(struct nvme_tcp_qpair));
	if (!tqpair) {
		SPDK_ERRLOG("failed to get create tqpair\n");
		return NULL;
	}

	tqpair->num_entries = qsize;
	/* Set num_entries one less than queue size. According to NVMe
	 * and NVMe-oF specs we can not submit queue size requests,
	 * one slot shall always remain empty.
	 */
	tqpair->num_entries = qsize - 1;
	qpair = &tqpair->qpair;
	rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async);
	if (rc != 0) {
+27 −3
Original line number Diff line number Diff line
@@ -569,7 +569,7 @@ test_nvme_rdma_ctrlr_create_qpair(void)
	CU_ASSERT(qpair != NULL);
	rqpair = SPDK_CONTAINEROF(qpair, struct nvme_rdma_qpair, qpair);
	CU_ASSERT(qpair == &rqpair->qpair);
	CU_ASSERT(rqpair->num_entries == qsize);
	CU_ASSERT(rqpair->num_entries == qsize - 1);
	CU_ASSERT(rqpair->delay_cmd_submit == false);
	CU_ASSERT(rqpair->rsp_sgls != NULL);
	CU_ASSERT(rqpair->rsp_recv_wrs != NULL);
@@ -580,13 +580,37 @@ test_nvme_rdma_ctrlr_create_qpair(void)
	nvme_rdma_free(rqpair);
	rqpair = NULL;

	/* Test case 2: queue qsize zero. ExpectL FAIL */
	/* Test case 2: queue size 2. Expect: PASS */
	qsize = 2;
	qpair = nvme_rdma_ctrlr_create_qpair(&ctrlr, qid, qsize,
					     SPDK_NVME_QPRIO_URGENT, 1,
					     false);
	CU_ASSERT(qpair != NULL);
	rqpair = SPDK_CONTAINEROF(qpair, struct nvme_rdma_qpair, qpair);
	CU_ASSERT(rqpair->num_entries == qsize - 1);
	CU_ASSERT(rqpair->rsp_sgls != NULL);
	CU_ASSERT(rqpair->rsp_recv_wrs != NULL);
	CU_ASSERT(rqpair->rsps != NULL);

	nvme_rdma_free_reqs(rqpair);
	nvme_rdma_free_rsps(rqpair);
	nvme_rdma_free(rqpair);
	rqpair = NULL;

	/* Test case 3: queue size zero. Expect: FAIL */
	qsize = 0;

	qpair = nvme_rdma_ctrlr_create_qpair(&ctrlr, qid, qsize,
					     SPDK_NVME_QPRIO_URGENT, 1,
					     false);
	SPDK_CU_ASSERT_FATAL(qpair == NULL);

	/* Test case 4: queue size 1. Expect: FAIL */
	qsize = 1;
	qpair = nvme_rdma_ctrlr_create_qpair(&ctrlr, qid, qsize,
					     SPDK_NVME_QPRIO_URGENT, 1,
					     false);
	SPDK_CU_ASSERT_FATAL(qpair == NULL);
}

DEFINE_STUB(ibv_create_cq, struct ibv_cq *, (struct ibv_context *context, int cqe, void *cq_context,
@@ -764,7 +788,7 @@ test_nvme_rdma_ctrlr_construct(void)

	SPDK_CU_ASSERT_FATAL(ctrlr->adminq != NULL);
	rqpair = SPDK_CONTAINEROF(ctrlr->adminq, struct nvme_rdma_qpair, qpair);
	CU_ASSERT(rqpair->num_entries == opts.admin_queue_size);
	CU_ASSERT(rqpair->num_entries == opts.admin_queue_size - 1);
	CU_ASSERT(rqpair->delay_cmd_submit == false);
	CU_ASSERT(rqpair->rsp_sgls != NULL);
	CU_ASSERT(rqpair->rsp_recv_wrs != NULL);
+26 −4
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
 *
 *   Copyright (c) Intel Corporation.
 *   All rights reserved.
 *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *   Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
@@ -1493,8 +1493,8 @@ test_nvme_tcp_ctrlr_create_io_qpair(void)
	struct spdk_nvme_qpair *qpair = NULL;
	struct spdk_nvme_ctrlr ctrlr = {};
	uint16_t qid = 1;
	const struct spdk_nvme_io_qpair_opts opts = {
		.io_queue_size = 1,
	struct spdk_nvme_io_qpair_opts opts = {
		.io_queue_size = 2,
		.qprio = SPDK_NVME_QPRIO_URGENT,
		.io_queue_requests = 1,
	};
@@ -1516,11 +1516,33 @@ test_nvme_tcp_ctrlr_create_io_qpair(void)
	CU_ASSERT(qpair->qprio == SPDK_NVME_QPRIO_URGENT);
	CU_ASSERT(qpair->trtype == SPDK_NVME_TRANSPORT_TCP);
	CU_ASSERT(qpair->poll_group == (void *)0xDEADBEEF);
	CU_ASSERT(tqpair->num_entries = 1);
	CU_ASSERT(tqpair->num_entries == 1);

	free(tqpair->tcp_reqs);
	spdk_free(tqpair->send_pdus);
	free(tqpair);

	/* Max queue size shall pass */
	opts.io_queue_size = 0xffff;
	qpair = nvme_tcp_ctrlr_create_io_qpair(&ctrlr, qid, &opts);
	tqpair = nvme_tcp_qpair(qpair);

	CU_ASSERT(qpair != NULL);
	CU_ASSERT(tqpair->num_entries == 0xfffe);

	free(tqpair->tcp_reqs);
	spdk_free(tqpair->send_pdus);
	free(tqpair);

	/* Queue size 0 shall fail */
	opts.io_queue_size = 0;
	qpair = nvme_tcp_ctrlr_create_io_qpair(&ctrlr, qid, &opts);
	CU_ASSERT(qpair == NULL);

	/* Queue size 1 shall fail */
	opts.io_queue_size = 1;
	qpair = nvme_tcp_ctrlr_create_io_qpair(&ctrlr, qid, &opts);
	CU_ASSERT(qpair == NULL);
}

static void