Commit 92180eed authored by Jim Harris's avatar Jim Harris Committed by Tomasz Zawadzki
Browse files

nvme: add new RESET failure reason



We can use this to differentiate between disconnects
due to a reset v. real transport failures.

This is needed because when using nvme multi-process,
a process calling spdk_nvme_ctrlr_reset() cannot
try to reinitialize foreign IO qpairs from other
processes. This patch handles this correctly now.

This required adding a new value to the transport
failure reason enum, which extended the required size
from 2 to 3 bits. So this patch also does some rearranging
to account for the new bits, including shrinking qprio
to 2 bits which is all that is needed based on spec.

Signed-off-by: default avatarJim Harris <jim.harris@samsung.com>
Change-Id: I64dfd06c7b0c274ec8f6735bb837eccee658a34d
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/21212


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent 0f826dbb
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -383,6 +383,7 @@ enum spdk_nvme_qp_failure_reason {
	SPDK_NVME_QPAIR_FAILURE_LOCAL,
	SPDK_NVME_QPAIR_FAILURE_REMOTE,
	SPDK_NVME_QPAIR_FAILURE_UNKNOWN,
	SPDK_NVME_QPAIR_FAILURE_RESET,
};

typedef enum spdk_nvme_qp_failure_reason spdk_nvme_qp_failure_reason;
+15 −1
Original line number Diff line number Diff line
@@ -1702,7 +1702,8 @@ nvme_ctrlr_reinitialize_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme
	bool async;
	int rc;

	if (spdk_nvme_ctrlr_is_fabrics(ctrlr) || nvme_qpair_is_admin_queue(qpair)) {
	if (nvme_ctrlr_get_current_process(ctrlr) != qpair->active_proc ||
	    spdk_nvme_ctrlr_is_fabrics(ctrlr) || nvme_qpair_is_admin_queue(qpair)) {
		assert(false);
		return -EINVAL;
	}
@@ -1749,8 +1750,21 @@ spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr)
	if (rc == 0 && !spdk_nvme_ctrlr_is_fabrics(ctrlr)) {
		/* Reinitialize qpairs */
		TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
			/* Always clear the qid bit here, even for a foreign qpair. We need
			 * to make sure another process doesn't get the chance to grab that
			 * qid.
			 */
			assert(spdk_bit_array_get(ctrlr->free_io_qids, qpair->id));
			spdk_bit_array_clear(ctrlr->free_io_qids, qpair->id);
			if (nvme_ctrlr_get_current_process(ctrlr) != qpair->active_proc) {
				/*
				 * We cannot reinitialize a foreign qpair. The qpair's owning
				 * process will take care of it. Set failure reason to FAILURE_RESET
				 * to ensure that happens.
				 */
				qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_RESET;
				continue;
			}
			rc_tmp = nvme_ctrlr_reinitialize_io_qpair(ctrlr, qpair);
			if (rc_tmp != 0) {
				rc = rc_tmp;
+6 −6
Original line number Diff line number Diff line
@@ -415,7 +415,7 @@ struct spdk_nvme_qpair {

	uint16_t				id;

	uint8_t					qprio;
	uint8_t					qprio: 2;

	uint8_t					state: 3;

@@ -423,6 +423,7 @@ struct spdk_nvme_qpair {

	uint8_t					is_new_qpair: 1;

	uint8_t					abort_dnr: 1;
	/*
	 * Members for handling IO qpair deletion inside of a completion context.
	 * These are specifically defined as single bits, so that they do not
@@ -439,10 +440,9 @@ struct spdk_nvme_qpair {

	uint8_t					last_fuse: 2;

	uint8_t					transport_failure_reason: 2;
	uint8_t					last_transport_failure_reason: 2;
	uint8_t					transport_failure_reason: 3;
	uint8_t					last_transport_failure_reason: 3;

	uint8_t					abort_dnr: 1;
	/* The user is destroying qpair */
	uint8_t					destroy_in_progress: 1;

+7 −0
Original line number Diff line number Diff line
@@ -681,6 +681,13 @@ nvme_qpair_check_enabled(struct spdk_nvme_qpair *qpair)
		if (qpair->ctrlr->trid.trtype != SPDK_NVME_TRANSPORT_PCIE) {
			nvme_ctrlr_disconnect_qpair(qpair);
		}
		if (qpair->transport_failure_reason == SPDK_NVME_QPAIR_FAILURE_RESET) {
			/*
			 * For multi-process, a synchronous reset may not reconnect
			 * foreign IO qpairs. So we will reconnect them here instead.
			 */
			nvme_ctrlr_reinitialize_io_qpair(qpair->ctrlr, qpair);
		}
		return false;
	}

+2 −0
Original line number Diff line number Diff line
@@ -33,6 +33,8 @@ DEFINE_STUB_V(nvme_ctrlr_disconnect_qpair, (struct spdk_nvme_qpair *qpair));

DEFINE_STUB_V(nvme_ctrlr_complete_queued_async_events, (struct spdk_nvme_ctrlr *ctrlr));
DEFINE_STUB_V(nvme_ctrlr_abort_queued_aborts, (struct spdk_nvme_ctrlr *ctrlr));
DEFINE_STUB(nvme_ctrlr_reinitialize_io_qpair, int,
	    (struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair), 0);

void
nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr, bool hot_remove)