Commit b1a23196 authored by Jim Harris's avatar Jim Harris
Browse files

nvmf: retry QID check if duplicate detected



A host will consider a QID as reusable once it disconnects
from the target.  But our target does not immediately
free the QID's bit from the ctrlr->qpair_mask - it waits
until after a message is sent to the ctrlr's thread.

So this opens up a small window where the host makes
a valid connection with a recently free QID, but the
target rejects it.

When this happens, we will now start a 100us poller, and
recheck again.  This will give those messages time to
execute in this case, and avoid unnecessarily rejecting
the CONNECT command.

Tested with local patch that injects 10us delay before
clearing bit in qpair_mask, along with fused_ordering
test that allocates and frees qpair in quick succession.
Also tested with unit tests added in this patch.

Fixes issue #2955.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I850b895c29d86be9c5070a0e6126657e7a0578fe
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17362


Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
parent aaba5d9c
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -126,7 +126,10 @@ struct spdk_nvmf_qpair {
	bool					connect_received;
	bool					disconnect_started;

	union {
		struct spdk_nvmf_request	*first_fused_req;
		struct spdk_nvmf_request	*connect_req;
	};

	TAILQ_HEAD(, spdk_nvmf_request)		outstanding;
	TAILQ_ENTRY(spdk_nvmf_qpair)		link;
+32 −4
Original line number Diff line number Diff line
@@ -31,6 +31,8 @@

#define NVMF_CTRLR_RESET_SHN_TIMEOUT_IN_MS	(NVMF_CC_RESET_SHN_TIMEOUT_IN_MS + 5000)

#define DUPLICATE_QID_RETRY_US 100

/*
 * Report the SPDK version as the firmware revision.
 * SPDK_VERSION_STRING won't fit into FR (only 8 bytes), so try to fit the most important parts.
@@ -209,6 +211,8 @@ nvmf_ctrlr_start_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
	}
}

static int _retry_qid_check(void *ctx);

static void
ctrlr_add_qpair_and_send_rsp(struct spdk_nvmf_qpair *qpair,
			     struct spdk_nvmf_ctrlr *ctrlr,
@@ -219,10 +223,22 @@ ctrlr_add_qpair_and_send_rsp(struct spdk_nvmf_qpair *qpair,
	assert(ctrlr->admin_qpair->group->thread == spdk_get_thread());

	if (spdk_bit_array_get(ctrlr->qpair_mask, qpair->qid)) {
		if (qpair->connect_req != NULL) {
			SPDK_ERRLOG("Got I/O connect with duplicate QID %u\n", qpair->qid);
			rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
			rsp->status.sc = SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER;
			qpair->connect_req = NULL;
			qpair->ctrlr = NULL;
			spdk_nvmf_request_complete(req);
		} else {
			SPDK_WARNLOG("Duplicate QID detected, re-check in %dus\n",
				     DUPLICATE_QID_RETRY_US);
			qpair->connect_req = req;
			/* Set qpair->ctrlr here so that we'll have it when the poller expires. */
			qpair->ctrlr = ctrlr;
			req->poller = SPDK_POLLER_REGISTER(_retry_qid_check, qpair,
							   DUPLICATE_QID_RETRY_US);
		}
		return;
	}

@@ -239,6 +255,18 @@ ctrlr_add_qpair_and_send_rsp(struct spdk_nvmf_qpair *qpair,
			   ctrlr->hostnqn);
}

static int
_retry_qid_check(void *ctx)
{
	struct spdk_nvmf_qpair *qpair = ctx;
	struct spdk_nvmf_request *req = qpair->connect_req;
	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;

	spdk_poller_unregister(&req->poller);
	ctrlr_add_qpair_and_send_rsp(qpair, ctrlr, req);
	return SPDK_POLLER_BUSY;
}

static void
_nvmf_ctrlr_add_admin_qpair(void *ctx)
{
+34 −1
Original line number Diff line number Diff line
@@ -811,13 +811,46 @@ test_connect(void)
	sgroups[subsystem.id].mgmt_io_outstanding++;
	TAILQ_INSERT_TAIL(&qpair.outstanding, &req, link);
	rc = nvmf_ctrlr_cmd_connect(&req);
	poll_threads();
	CU_ASSERT(rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS);
	poll_threads();
	/* First time, it will detect duplicate QID and schedule a retry.  So for
	 * now we should expect the response to still be all zeroes.
	 */
	CU_ASSERT(spdk_mem_all_zero(&rsp, sizeof(rsp)));
	CU_ASSERT(sgroups[subsystem.id].mgmt_io_outstanding == 1);

	/* Now advance the clock, so that the retry poller executes. */
	spdk_delay_us(DUPLICATE_QID_RETRY_US * 2);
	poll_threads();
	CU_ASSERT(rsp.nvme_cpl.status.sct == SPDK_NVME_SCT_COMMAND_SPECIFIC);
	CU_ASSERT(rsp.nvme_cpl.status.sc == SPDK_NVME_SC_INVALID_QUEUE_IDENTIFIER);
	CU_ASSERT(qpair.ctrlr == NULL);
	CU_ASSERT(sgroups[subsystem.id].mgmt_io_outstanding == 0);

	/* I/O connect with temporarily duplicate queue ID. This covers race
	 * where qpair_mask bit may not yet be cleared, even though initiator
	 * has closed the connection.  See issue #2955. */
	memset(&rsp, 0, sizeof(rsp));
	sgroups[subsystem.id].mgmt_io_outstanding++;
	TAILQ_INSERT_TAIL(&qpair.outstanding, &req, link);
	rc = nvmf_ctrlr_cmd_connect(&req);
	CU_ASSERT(rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS);
	poll_threads();
	/* First time, it will detect duplicate QID and schedule a retry.  So for
	 * now we should expect the response to still be all zeroes.
	 */
	CU_ASSERT(spdk_mem_all_zero(&rsp, sizeof(rsp)));
	CU_ASSERT(sgroups[subsystem.id].mgmt_io_outstanding == 1);

	/* Now advance the clock, so that the retry poller executes. */
	spdk_bit_array_clear(ctrlr.qpair_mask, 1);
	spdk_delay_us(DUPLICATE_QID_RETRY_US * 2);
	poll_threads();
	CU_ASSERT(nvme_status_success(&rsp.nvme_cpl.status));
	CU_ASSERT(qpair.ctrlr == &ctrlr);
	CU_ASSERT(sgroups[subsystem.id].mgmt_io_outstanding == 0);
	qpair.ctrlr = NULL;

	/* I/O connect when admin qpair is being destroyed */
	admin_qpair.group = NULL;
	admin_qpair.state = SPDK_NVMF_QPAIR_DEACTIVATING;