Commit 0ba9ba5c authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Jim Harris
Browse files

bdev/nvme: Reset I/O cancels reconnect timer and starts reconnection



Previously, if a reconnect timer was registered when a reset request
came, the reset request failed with -EBUSY. However, this means the
reset request was queued for a long time until the reconnect timer was
expired.

When a reconnect timer is registered, reset is not actually in progress.
Hence, a new reset request can cancel the reconnect timer and can start
reconnection safely.

Add a unit test case to verify this change.

Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: Ied8dd0ad822d2fd6829d88cd56cb36bd4fad13f9
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16823


Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
parent 6b79f767
Loading
Loading
Loading
Loading
+31 −8
Original line number Diff line number Diff line
@@ -1863,7 +1863,11 @@ bdev_nvme_reconnect_delay_timer_expired(void *ctx)

	spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);

	assert(nvme_ctrlr->reconnect_is_delayed == true);
	if (!nvme_ctrlr->reconnect_is_delayed) {
		pthread_mutex_unlock(&nvme_ctrlr->mutex);
		return SPDK_POLLER_BUSY;
	}

	nvme_ctrlr->reconnect_is_delayed = false;

	if (nvme_ctrlr->destruct) {
@@ -2081,6 +2085,21 @@ bdev_nvme_reset_destroy_qpairs(struct nvme_ctrlr *nvme_ctrlr)
			      bdev_nvme_reset_ctrlr);
}

static void
_bdev_nvme_reconnect(void *ctx)
{
	struct nvme_ctrlr *nvme_ctrlr = ctx;

	assert(nvme_ctrlr->resetting == true);
	assert(nvme_ctrlr->thread == spdk_get_thread());

	spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);

	spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);

	bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
}

static void
_bdev_nvme_reset(void *ctx)
{
@@ -2099,6 +2118,8 @@ _bdev_nvme_reset(void *ctx)
static int
bdev_nvme_reset(struct nvme_ctrlr *nvme_ctrlr)
{
	spdk_msg_fn msg_fn;

	pthread_mutex_lock(&nvme_ctrlr->mutex);
	if (nvme_ctrlr->destruct) {
		pthread_mutex_unlock(&nvme_ctrlr->mutex);
@@ -2111,20 +2132,22 @@ bdev_nvme_reset(struct nvme_ctrlr *nvme_ctrlr)
		return -EBUSY;
	}

	if (nvme_ctrlr->reconnect_is_delayed) {
		pthread_mutex_unlock(&nvme_ctrlr->mutex);
		SPDK_NOTICELOG("Reconnect is already scheduled.\n");
		return -EBUSY;
	}

	nvme_ctrlr->resetting = true;

	if (nvme_ctrlr->reconnect_is_delayed) {
		SPDK_DEBUGLOG(bdev_nvme, "Reconnect is already scheduled.\n");
		msg_fn = _bdev_nvme_reconnect;
		nvme_ctrlr->reconnect_is_delayed = false;
	} else {
		msg_fn = _bdev_nvme_reset;
		assert(nvme_ctrlr->reset_start_tsc == 0);
	}

	nvme_ctrlr->reset_start_tsc = spdk_get_ticks();

	pthread_mutex_unlock(&nvme_ctrlr->mutex);

	spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset, nvme_ctrlr);
	spdk_thread_send_msg(nvme_ctrlr->thread, msg_fn, nvme_ctrlr);
	return 0;
}

+24 −0
Original line number Diff line number Diff line
@@ -5251,6 +5251,30 @@ test_reconnect_ctrlr(void)
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);

	/* A new reset starts from thread 0. */
	set_thread(1);

	/* The reset should cancel the reconnect timer and should start from reconnection.
	 * Then, the reset should fail and a reconnect timer should be registered again.
	 */
	ctrlr.fail_reset = true;
	ctrlr.is_failed = true;

	rc = bdev_nvme_reset(nvme_ctrlr);
	CU_ASSERT(rc == 0);
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false);
	CU_ASSERT(ctrlr.is_failed == true);

	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);
	CU_ASSERT(ctrlr.is_failed == false);
	CU_ASSERT(ctrlr_ch1->qpair->qpair == NULL);
	CU_ASSERT(ctrlr_ch2->qpair->qpair == NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);

	/* Then a reconnect retry should suceeed. */
	ctrlr.fail_reset = false;