Commit 24bca2ea authored by Seth Howell's avatar Seth Howell Committed by Tomasz Zawadzki
Browse files

nvme: add an enum for why a qpair disconnected



Change-Id: I1a9517d9673051615942c873416505704740691a
Signed-off-by: default avatarSeth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/475805


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
parent 39119220
Loading
Loading
Loading
Loading
+34 −0
Original line number Diff line number Diff line
@@ -235,6 +235,21 @@ bool spdk_nvme_ctrlr_is_discovery(struct spdk_nvme_ctrlr *ctrlr);
void spdk_nvme_ctrlr_get_default_ctrlr_opts(struct spdk_nvme_ctrlr_opts *opts,
		size_t opts_size);

/**
 * Reason for qpair disconnect at the transport layer.
 *
 * NONE implies that the qpair is still connected while UNKNOWN means that the
 * qpair is disconnected, but the cause was not apparent.
 */
enum spdk_nvme_qp_failure_reason {
	SPDK_NVME_QPAIR_FAILURE_NONE = 0,
	SPDK_NVME_QPAIR_FAILURE_LOCAL,
	SPDK_NVME_QPAIR_FAILURE_REMOTE,
	SPDK_NVME_QPAIR_FAILURE_UNKNOWN,
};

typedef enum spdk_nvme_qp_failure_reason spdk_nvme_qp_failure_reason;

/**
 * NVMe library transports
 *
@@ -1106,6 +1121,16 @@ struct spdk_nvme_qpair *spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *c
 */
int spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair);

/**
 * Returns the reason the admin qpair for a given controller is disconnected.
 *
 * \param ctrlr The controller to check.
 *
 * \return a valid spdk_nvme_qp_failure_reason.
 */
spdk_nvme_qp_failure_reason spdk_nvme_ctrlr_get_admin_qp_failure_reason(
	struct spdk_nvme_ctrlr *ctrlr);

/**
 * Free an I/O queue pair that was allocated by spdk_nvme_ctrlr_alloc_io_qpair().
 *
@@ -1252,6 +1277,15 @@ int spdk_nvme_ctrlr_cmd_io_raw_with_md(struct spdk_nvme_ctrlr *ctrlr,
int32_t spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair,
		uint32_t max_completions);

/**
 * Returns the reason the qpair is disconnected.
 *
 * \param qpair The qpair to check.
 *
 * \return a valid spdk_nvme_qp_failure_reason.
 */
spdk_nvme_qp_failure_reason spdk_nvme_qpair_get_failure_reason(struct spdk_nvme_qpair *qpair);

/**
 * Send the given admin command to the NVMe controller.
 *
+12 −0
Original line number Diff line number Diff line
@@ -422,6 +422,7 @@ spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair)
		rc = -EAGAIN;
		goto out;
	}
	qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_NONE;
	nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);

out:
@@ -429,6 +430,12 @@ out:
	return rc;
}

spdk_nvme_qp_failure_reason
spdk_nvme_ctrlr_get_admin_qp_failure_reason(struct spdk_nvme_ctrlr *ctrlr)
{
	return ctrlr->adminq->transport_failure_reason;
}

/*
 * This internal function will attempt to take the controller
 * lock before calling disconnect on a controller qpair.
@@ -1076,11 +1083,13 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)

	/* Disable all queues before disabling the controller hardware. */
	TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
		qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
		nvme_qpair_set_state(qpair, NVME_QPAIR_DISABLED);
	}
	nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_DISABLED);
	nvme_qpair_complete_error_reqs(ctrlr->adminq);
	nvme_transport_qpair_abort_reqs(ctrlr->adminq, 0 /* retry */);
	ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
	nvme_transport_ctrlr_disconnect_qpair(ctrlr, ctrlr->adminq);
	if (nvme_transport_ctrlr_connect_qpair(ctrlr, ctrlr->adminq) != 0) {
		SPDK_ERRLOG("Controller reinitialization failed.\n");
@@ -1088,6 +1097,7 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
		rc = -1;
		goto out;
	}
	ctrlr->adminq->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_NONE;
	nvme_qpair_set_state(ctrlr->adminq, NVME_QPAIR_CONNECTED);

	/* Doorbell buffer config is invalid during reset */
@@ -1116,10 +1126,12 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
		/* Reinitialize qpairs */
		TAILQ_FOREACH(qpair, &ctrlr->active_io_qpairs, tailq) {
			if (nvme_transport_ctrlr_connect_qpair(ctrlr, qpair) != 0) {
				qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
				nvme_qpair_set_state(qpair, NVME_QPAIR_DISABLED);
				rc = -1;
				continue;
			}
			qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_NONE;
			nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
		}
	}
+2 −0
Original line number Diff line number Diff line
@@ -381,6 +381,8 @@ struct spdk_nvme_qpair {
	struct spdk_nvme_ctrlr_process	*active_proc;

	void				*req_buf;

	uint8_t				transport_failure_reason: 2;
};

struct spdk_nvme_ns {
+6 −0
Original line number Diff line number Diff line
@@ -506,6 +506,12 @@ spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
	return ret;
}

spdk_nvme_qp_failure_reason
spdk_nvme_qpair_get_failure_reason(struct spdk_nvme_qpair *qpair)
{
	return qpair->transport_failure_reason;
}

int
nvme_qpair_init(struct spdk_nvme_qpair *qpair, uint16_t id,
		struct spdk_nvme_ctrlr *ctrlr,
+13 −1
Original line number Diff line number Diff line
@@ -287,13 +287,18 @@ nvme_rdma_qpair_process_cm_event(struct nvme_rdma_qpair *rqpair)
			}
			break;
		case RDMA_CM_EVENT_DISCONNECTED:
			rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_REMOTE;
			nvme_qpair_set_state(&rqpair->qpair, NVME_QPAIR_DISABLED);
			break;
		case RDMA_CM_EVENT_DEVICE_REMOVAL:
			rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
			nvme_qpair_set_state(&rqpair->qpair, NVME_QPAIR_DISABLED);
			break;
		case RDMA_CM_EVENT_MULTICAST_JOIN:
		case RDMA_CM_EVENT_MULTICAST_ERROR:
			break;
		case RDMA_CM_EVENT_ADDR_CHANGE:
			rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_LOCAL;
			nvme_qpair_set_state(&rqpair->qpair, NVME_QPAIR_DISABLED);
			break;
		case RDMA_CM_EVENT_TIMEWAIT_EXIT:
@@ -1060,6 +1065,7 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)

	rc = nvme_fabric_qpair_connect(&rqpair->qpair, rqpair->num_entries);
	if (rc < 0) {
		rqpair->qpair.transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
		nvme_qpair_set_state(&rqpair->qpair, NVME_QPAIR_DISABLED);
		SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
		return -1;
@@ -1876,7 +1882,7 @@ nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair,
{
	struct nvme_rdma_qpair		*rqpair = nvme_rdma_qpair(qpair);
	struct ibv_wc			wc[MAX_COMPLETIONS_PER_POLL];
	int				i, rc, batch_size;
	int				i, rc = 0, batch_size;
	uint32_t			reaped;
	struct ibv_cq			*cq;
	struct spdk_nvme_rdma_req	*rdma_req;
@@ -1967,6 +1973,12 @@ fail:
	 * we can call nvme_rdma_qpair_disconnect. For other qpairs we need
	 * to call the generic function which will take the lock for us.
	 */
	if (rc == IBV_WC_RETRY_EXC_ERR) {
		qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_REMOTE;
	} else if (qpair->transport_failure_reason == SPDK_NVME_QPAIR_FAILURE_NONE) {
		qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
	}

	if (nvme_qpair_is_admin_queue(qpair)) {
		nvme_rdma_qpair_disconnect(qpair);
	} else {
Loading