Commit e45b619c authored by Seth Howell's avatar Seth Howell Committed by Jim Harris
Browse files

nvme: add function to reconnect qpairs.



While it is unlikely that a single qpair will be failed, it is important
to make it possible to reconnect a single qpair.

This function is also handy at the application layer when going through
a reconnect workflow. If we get -ENXIO from a qpair when we poll, we
will turn around and call this function. If we get -ENXIO from this
function, then we know the whole controller is failed and we need to do
a reset.

Change-Id: I6a8ea0ce27fce2f5fc0a5b3db05834acd68e6a39
Signed-off-by: default avatarSeth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/471417


Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent efc0a864
Loading
Loading
Loading
Loading
+19 −0
Original line number Diff line number Diff line
@@ -1041,6 +1041,25 @@ struct spdk_nvme_qpair *spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *c
		const struct spdk_nvme_io_qpair_opts *opts,
		size_t opts_size);

/**
 * Attempt to reconnect the given qpair.
 *
 * This function is intended to be called on qpairs that have already been connected,
 * but have since entered a failed state as indicated by a return value of -ENXIO from
 * either spdk_nvme_qpair_process_completions or one of the spdk_nvme_ns_cmd_* functions.
 *
 * \param qpair The qpair to reconnect.
 *
 * \return 0 on success, or if the qpair was already connected.
 * -EAGAIN if the driver was unable to reconnect during this call,
 * but the controller is still connected and is either resetting or enabled.
 * -ENODEV if the controller is removed. In this case, the controller cannot be recovered
 * and the application will have to destroy it and the associated qpairs.
 * -ENXIO if the controller is in a failed state but is not yet resetting. In this case,
 * the application should call spdk_nvme_ctrlr_reset to reset the entire controller.
 */
int spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair);

/**
 * Free an I/O queue pair that was allocated by spdk_nvme_ctrlr_alloc_io_qpair().
 *
+46 −0
Original line number Diff line number Diff line
@@ -379,6 +379,52 @@ spdk_nvme_ctrlr_alloc_io_qpair(struct spdk_nvme_ctrlr *ctrlr,
	return qpair;
}

int
spdk_nvme_ctrlr_reconnect_io_qpair(struct spdk_nvme_qpair *qpair)
{
	struct spdk_nvme_ctrlr *ctrlr;
	int rc;

	assert(qpair != NULL);
	assert(nvme_qpair_is_admin_queue(qpair) == false);
	assert(qpair->ctrlr != NULL);

	ctrlr = qpair->ctrlr;
	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);

	if (ctrlr->is_removed) {
		rc = -ENODEV;
		goto out;
	}

	if (ctrlr->is_resetting) {
		rc = -EAGAIN;
		goto out;
	}

	if (ctrlr->is_failed) {
		rc = -ENXIO;
		goto out;
	}

	if (!qpair->transport_qp_is_failed) {
		rc = 0;
		goto out;
	}

	rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);
	if (rc) {
		nvme_qpair_set_state(qpair, NVME_QPAIR_DISABLED);
		rc = -EAGAIN;
		goto out;
	}
	nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);

out:
	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
	return rc;
}

int
spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
{
+65 −6
Original line number Diff line number Diff line
@@ -168,12 +168,6 @@ nvme_transport_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_
	return 0;
}

int
nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
	return 0;
}

void
nvme_transport_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
@@ -1424,6 +1418,69 @@ test_alloc_io_qpair_wrr_2(void)
	cleanup_qpairs(&ctrlr);
}

bool g_connect_qpair_called = false;
int g_connect_qpair_return_code = 0;
int nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
	g_connect_qpair_called = true;
	return g_connect_qpair_return_code;
}

static void
test_spdk_nvme_ctrlr_reconnect_io_qpair(void)
{
	struct spdk_nvme_ctrlr	ctrlr = {};
	struct spdk_nvme_qpair	qpair = {};
	int rc;

	/* Various states of controller disconnect. */
	qpair.id = 1;
	qpair.ctrlr = &ctrlr;
	ctrlr.is_removed = 1;
	ctrlr.is_failed = 0;
	ctrlr.is_resetting = 0;
	rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
	CU_ASSERT(rc == -ENODEV)

	ctrlr.is_removed = 0;
	ctrlr.is_failed = 1;
	rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
	CU_ASSERT(rc == -ENXIO)

	ctrlr.is_failed = 0;
	ctrlr.is_resetting = 1;
	rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
	CU_ASSERT(rc == -EAGAIN)

	/* Confirm precedence for controller states: removed > resetting > failed */
	ctrlr.is_removed = 1;
	ctrlr.is_failed = 1;
	rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
	CU_ASSERT(rc == -ENODEV)

	ctrlr.is_removed = 0;
	rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
	CU_ASSERT(rc == -EAGAIN)

	ctrlr.is_resetting = 0;
	rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
	CU_ASSERT(rc == -ENXIO)

	/* qpair not failed. Make sure we don't call down to the transport */
	ctrlr.is_failed = 0;
	qpair.transport_qp_is_failed = false;
	g_connect_qpair_called = false;
	rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
	CU_ASSERT(g_connect_qpair_called == false);
	CU_ASSERT(rc == 0)

	/* transport qpair is failed. make sure we call down to the transport */
	qpair.transport_qp_is_failed = true;
	rc = spdk_nvme_ctrlr_reconnect_io_qpair(&qpair);
	CU_ASSERT(g_connect_qpair_called == true);
	CU_ASSERT(rc == 0)
}

static void
test_nvme_ctrlr_fail(void)
{
@@ -1885,6 +1942,8 @@ int main(int argc, char **argv)
#endif
		|| CU_add_test(suite, "test nvme ctrlr function test_nvme_ctrlr_test_active_ns",
			       test_nvme_ctrlr_test_active_ns) == NULL
		|| CU_add_test(suite, "test_spdk_nvme_ctrlr_reconnect_io_qpair",
			       test_spdk_nvme_ctrlr_reconnect_io_qpair) == NULL
	) {
		CU_cleanup_registry();
		return CU_get_error();