Commit a76bbe35 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

bdev/nvme: Disconnect and then free I/O qpair in a ctrlr reset sequence



As we do when deleting ctrlr_channel, disconnect and then free I/O
qpair in a ctrlr reset sequence.

Deleting ctrlr_channel and resetting ctrlr_channel may cause conflicts.
This patch processes such conflicts correctly.

If destroy_ctrlr_channel_cb() is executed between pending and executing
reset_destroy_qpair(), reset_destroy_qpair() is not executed because
ctrlr_channel is not found. In this case, destroy_qpair_channel()
starts disconnecting qpair and deletes ctrlr_channel. Then
disconnected_qpair_cb() releases a reference to poll group.

If destroy_ctrlr_channel_cb() is excuted between executing reset_destroy_qpair()
and disconnected_qpair_cb(), destroy_ctrlr_channel_cb() skips
ctrlr_channel for a reset sequence.

Change-Id: I1f49f74b94aefbea178680aa53ded3a12876c676
Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10766


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 586d1d65
Loading
Loading
Loading
Loading
+43 −8
Original line number Diff line number Diff line
@@ -735,7 +735,15 @@ nvme_ns_is_accessible(struct nvme_ns *nvme_ns)
static inline bool
nvme_io_path_is_connected(struct nvme_io_path *io_path)
{
	return io_path->qpair->qpair != NULL;
	if (spdk_unlikely(io_path->qpair->qpair == NULL)) {
		return false;
	}

	if (spdk_unlikely(io_path->qpair->ctrlr_ch->reset_iter != NULL)) {
		return false;
	}

	return true;
}

static inline bool
@@ -1104,6 +1112,7 @@ bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_
{
	struct nvme_poll_group *group = poll_group_ctx;
	struct nvme_qpair *nvme_qpair;
	struct nvme_ctrlr_channel *ctrlr_ch;

	SPDK_NOTICELOG("qpair %p is disconnected, free the qpair and reset controller.\n", qpair);

@@ -1119,8 +1128,18 @@ bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_

		_bdev_nvme_clear_io_path_cache(nvme_qpair);

		if (nvme_qpair->ctrlr_ch != NULL) {
		ctrlr_ch = nvme_qpair->ctrlr_ch;

		if (ctrlr_ch != NULL) {
			if (ctrlr_ch->reset_iter != NULL) {
				/* If we are already in a full reset sequence, we do not have
				 * to restart it. Just move to the next ctrlr_channel.
				 */
				spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
				ctrlr_ch->reset_iter = NULL;
			} else {
				bdev_nvme_reset(nvme_qpair->ctrlr);
			}
		} else {
			/* In this case, ctrlr_channel is already deleted. */
			nvme_qpair_delete(nvme_qpair);
@@ -1523,12 +1542,17 @@ bdev_nvme_reset_destroy_qpair(struct spdk_io_channel_iter *i)
	_bdev_nvme_clear_io_path_cache(nvme_qpair);

	if (nvme_qpair->qpair != NULL) {
		spdk_nvme_ctrlr_free_io_qpair(nvme_qpair->qpair);
		nvme_qpair->qpair = NULL;
	}
		spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);

		/* The current full reset sequence will move to the next
		 * ctrlr_channel after the qpair is actually disconnected.
		 */
		assert(ctrlr_ch->reset_iter == NULL);
		ctrlr_ch->reset_iter = i;
	} else {
		spdk_for_each_channel_continue(i, 0);
	}
}

static void
bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status)
@@ -2161,14 +2185,25 @@ bdev_nvme_destroy_ctrlr_channel_cb(void *io_device, void *ctx_buf)
	_bdev_nvme_clear_io_path_cache(nvme_qpair);

	if (nvme_qpair->qpair != NULL) {
		if (ctrlr_ch->reset_iter == NULL) {
			spdk_nvme_ctrlr_disconnect_io_qpair(nvme_qpair->qpair);
		} else {
			/* Skip current ctrlr_channel in a full reset sequence because
			 * it is being deleted now. The qpair is already being disconnected.
			 * We do not have to restart disconnecting it.
			 */
			spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
		}

		/* We cannot release a reference to the poll group now.
		 * The qpair may be disconnected asynchronously later.
		 * We need to poll it until it is actually disconnected.
		 * Just detach the qpair from the deleting ctrlr_channel.
		 */
		nvme_qpair->ctrlr_ch = NULL;
	} else {
		assert(ctrlr_ch->reset_iter == NULL);

		nvme_qpair_delete(nvme_qpair);
	}
}
+2 −0
Original line number Diff line number Diff line
@@ -186,6 +186,8 @@ struct nvme_ctrlr_channel {
	struct nvme_ctrlr		*ctrlr;
	struct nvme_qpair		*qpair;
	TAILQ_HEAD(, spdk_bdev_io)	pending_resets;

	struct spdk_io_channel_iter	*reset_iter;
};

struct nvme_io_path {
+14 −10
Original line number Diff line number Diff line
@@ -1341,11 +1341,13 @@ test_reset_ctrlr(void)
	CU_ASSERT(ctrlr_ch1->qpair->qpair == NULL);
	CU_ASSERT(ctrlr_ch2->qpair->qpair != NULL);

	poll_thread_times(0, 1);
	poll_thread_times(1, 1);
	CU_ASSERT(ctrlr_ch1->qpair->qpair == NULL);
	CU_ASSERT(ctrlr_ch2->qpair->qpair == NULL);
	CU_ASSERT(ctrlr.is_failed == true);

	poll_thread_times(1, 1);
	poll_thread_times(0, 1);
	CU_ASSERT(ctrlr.is_failed == false);

@@ -2972,17 +2974,17 @@ test_reconnect_qpair(void)
	nvme_qpair2->qpair->is_failed = true;
	ctrlr->is_failed = true;

	poll_thread_times(1, 2);
	poll_thread_times(1, 3);
	CU_ASSERT(nvme_qpair1->qpair != NULL);
	CU_ASSERT(nvme_qpair2->qpair == NULL);
	CU_ASSERT(nvme_ctrlr->resetting == true);

	poll_thread_times(0, 2);
	poll_thread_times(1, 1);
	poll_thread_times(0, 3);
	CU_ASSERT(nvme_qpair1->qpair == NULL);
	CU_ASSERT(nvme_qpair2->qpair == NULL);
	CU_ASSERT(ctrlr->is_failed == true);

	poll_thread_times(1, 2);
	poll_thread_times(0, 1);
	CU_ASSERT(ctrlr->is_failed == false);

@@ -3006,12 +3008,12 @@ test_reconnect_qpair(void)
	ctrlr->is_failed = true;
	ctrlr->fail_reset = true;

	poll_thread_times(1, 2);
	poll_thread_times(1, 3);
	CU_ASSERT(nvme_qpair1->qpair != NULL);
	CU_ASSERT(nvme_qpair2->qpair == NULL);
	CU_ASSERT(nvme_ctrlr->resetting == true);

	poll_thread_times(0, 2);
	poll_thread_times(0, 3);
	poll_thread_times(1, 1);
	CU_ASSERT(nvme_qpair1->qpair == NULL);
	CU_ASSERT(nvme_qpair2->qpair == NULL);
@@ -3806,11 +3808,11 @@ test_reset_bdev_ctrlr(void)
	CU_ASSERT(nvme_ctrlr1->resetting == true);
	CU_ASSERT(nvme_ctrlr1->reset_cb_arg == first_bio);

	poll_thread_times(0, 2);
	poll_thread_times(0, 3);
	CU_ASSERT(io_path11->qpair->qpair == NULL);
	CU_ASSERT(io_path21->qpair->qpair != NULL);

	poll_thread_times(1, 1);
	poll_thread_times(1, 2);
	CU_ASSERT(io_path11->qpair->qpair == NULL);
	CU_ASSERT(io_path21->qpair->qpair == NULL);
	CU_ASSERT(ctrlr1->is_failed == true);
@@ -3838,11 +3840,11 @@ test_reset_bdev_ctrlr(void)
	CU_ASSERT(first_bio->io_path == io_path12);
	CU_ASSERT(nvme_ctrlr2->resetting == true);

	poll_thread_times(0, 2);
	poll_thread_times(0, 3);
	CU_ASSERT(io_path12->qpair->qpair == NULL);
	CU_ASSERT(io_path22->qpair->qpair != NULL);

	poll_thread_times(1, 1);
	poll_thread_times(1, 2);
	CU_ASSERT(io_path12->qpair->qpair == NULL);
	CU_ASSERT(io_path22->qpair->qpair == NULL);
	CU_ASSERT(ctrlr2->is_failed == true);
@@ -3939,7 +3941,9 @@ test_find_io_path(void)
	struct nvme_bdev_channel nbdev_ch = {
		.io_path_list = STAILQ_HEAD_INITIALIZER(nbdev_ch.io_path_list),
	};
	struct nvme_qpair nvme_qpair1 = {}, nvme_qpair2 = {};
	struct nvme_ctrlr_channel ctrlr_ch1 = {}, ctrlr_ch2 = {};
	struct nvme_qpair nvme_qpair1 = { .ctrlr_ch = &ctrlr_ch1, };
	struct nvme_qpair nvme_qpair2 = { .ctrlr_ch = &ctrlr_ch2, };
	struct nvme_ns nvme_ns1 = {}, nvme_ns2 = {};
	struct nvme_io_path io_path1 = { .qpair = &nvme_qpair1, .nvme_ns = &nvme_ns1, };
	struct nvme_io_path io_path2 = { .qpair = &nvme_qpair2, .nvme_ns = &nvme_ns2, };