Commit 3edf9f12 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Konrad Sztyber
Browse files

bdev/nvme: Fix race bug between clear_pending_resets and reset_ctrlr_complete()



This was a very long term potential bug.

bdev_nvme_reset/failover_ctrlr() cleared pending reset and then completed
reset. However, if external reset request was submitted from the bdev
layer between pending reset clear and reset completion, the external
reset request lost wakeup.

Fix this bug by moving pending reset list from per nvme_ctrlr_channel to
per nvme_ctrlr and move operation to clear pending resets into the same lock
as reset completion.

Adjust existing unit test cases.

Include unit test for reproduction into this patch.

Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: I09030b0fb118f65a16d6e23bed53e1755f59bffb
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/25291


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Community CI Samsung <spdk.community.ci.samsung@gmail.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
parent a90f7d98
Loading
Loading
Loading
Loading
+27 −57
Original line number Diff line number Diff line
@@ -1988,26 +1988,21 @@ err:
static void bdev_nvme_reset_io_continue(void *cb_arg, int rc);

static void
bdev_nvme_complete_pending_resets(struct nvme_ctrlr_channel_iter *i,
				  struct nvme_ctrlr *nvme_ctrlr,
				  struct nvme_ctrlr_channel *ctrlr_ch,
				  void *ctx)
bdev_nvme_complete_pending_resets(struct nvme_ctrlr *nvme_ctrlr, bool success)
{
	int rc = 0;
	struct nvme_bdev_io *bio;

	if (ctx != NULL) {
	if (!success) {
		rc = -1;
	}

	while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
		bio = TAILQ_FIRST(&ctrlr_ch->pending_resets);
		TAILQ_REMOVE(&ctrlr_ch->pending_resets, bio, retry_link);
	while (!TAILQ_EMPTY(&nvme_ctrlr->pending_resets)) {
		bio = TAILQ_FIRST(&nvme_ctrlr->pending_resets);
		TAILQ_REMOVE(&nvme_ctrlr->pending_resets, bio, retry_link);

		bdev_nvme_reset_io_continue(bio, rc);
	}

	nvme_ctrlr_for_each_channel_continue(i, 0);
}

/* This function marks the current trid as failed by storing the current ticks
@@ -2231,11 +2226,15 @@ bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)

static void remove_discovery_entry(struct nvme_ctrlr *nvme_ctrlr);

static void _bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status);

static void
bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
{
	bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
	void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
	enum bdev_nvme_op_after_reset op_after_reset;

	assert(nvme_ctrlr->thread == spdk_get_thread());

	pthread_mutex_lock(&nvme_ctrlr->mutex);
	if (!success) {
		/* Connecting the active trid failed. Set the next alternate trid to the
@@ -2263,29 +2262,11 @@ bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, bool success)
		 */
		nvme_ctrlr->active_path_id->last_failed_tsc = 0;
	}
	pthread_mutex_unlock(&nvme_ctrlr->mutex);

	NVME_CTRLR_INFOLOG(nvme_ctrlr, "Clear pending resets.\n");

	/* Make sure we clear any pending resets before returning. */
	nvme_ctrlr_for_each_channel(nvme_ctrlr,
				    bdev_nvme_complete_pending_resets,
				    success ? NULL : (void *)0x1,
				    _bdev_nvme_reset_ctrlr_complete);
}

static void
_bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
{
	bool success = (ctx == NULL);
	bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
	void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
	enum bdev_nvme_op_after_reset op_after_reset;

	assert(nvme_ctrlr->thread == spdk_get_thread());

	nvme_ctrlr->ctrlr_op_cb_fn = NULL;
	nvme_ctrlr->ctrlr_op_cb_arg = NULL;
	bdev_nvme_complete_pending_resets(nvme_ctrlr, success);

	if (!success) {
		NVME_CTRLR_ERRLOG(nvme_ctrlr, "Resetting controller failed.\n");
@@ -2293,11 +2274,13 @@ _bdev_nvme_reset_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int st
		NVME_CTRLR_NOTICELOG(nvme_ctrlr, "Resetting controller successful.\n");
	}

	pthread_mutex_lock(&nvme_ctrlr->mutex);
	nvme_ctrlr->resetting = false;
	nvme_ctrlr->dont_retry = false;
	nvme_ctrlr->in_failover = false;

	nvme_ctrlr->ctrlr_op_cb_fn = NULL;
	nvme_ctrlr->ctrlr_op_cb_arg = NULL;

	op_after_reset = bdev_nvme_check_op_after_reset(nvme_ctrlr, success);
	pthread_mutex_unlock(&nvme_ctrlr->mutex);

@@ -2668,20 +2651,8 @@ bdev_nvme_enable_ctrlr(struct nvme_ctrlr *nvme_ctrlr)
	return 0;
}

static void _bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status);

static void
bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr)
{
	/* Make sure we clear any pending resets before returning. */
	nvme_ctrlr_for_each_channel(nvme_ctrlr,
				    bdev_nvme_complete_pending_resets,
				    NULL,
				    _bdev_nvme_disable_ctrlr_complete);
}

static void
_bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int status)
{
	bdev_nvme_ctrlr_op_cb ctrlr_op_cb_fn = nvme_ctrlr->ctrlr_op_cb_fn;
	void *ctrlr_op_cb_arg = nvme_ctrlr->ctrlr_op_cb_arg;
@@ -2702,6 +2673,9 @@ _bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int
	nvme_ctrlr->disabled = true;
	spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);

	/* Make sure we clear any pending resets before returning. */
	bdev_nvme_complete_pending_resets(nvme_ctrlr, true);

	pthread_mutex_unlock(&nvme_ctrlr->mutex);

	if (ctrlr_op_cb_fn) {
@@ -2715,7 +2689,6 @@ _bdev_nvme_disable_ctrlr_complete(struct nvme_ctrlr *nvme_ctrlr, void *ctx, int
	default:
		break;
	}

}

static void
@@ -3075,7 +3048,6 @@ _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
	struct nvme_bdev *nbdev = (struct nvme_bdev *)bdev_io->bdev->ctxt;
	struct nvme_ctrlr *nvme_ctrlr = io_path->qpair->ctrlr;
	spdk_msg_fn msg_fn;
	struct nvme_ctrlr_channel *ctrlr_ch;
	int rc;

	assert(bio->io_path == NULL);
@@ -3083,6 +3055,14 @@ _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)

	pthread_mutex_lock(&nvme_ctrlr->mutex);
	rc = bdev_nvme_reset_ctrlr_unsafe(nvme_ctrlr, &msg_fn);
	if (rc == -EBUSY) {
		/*
		 * Reset call is queued only if it is from the app framework. This is on purpose so that
		 * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
		 * upper level. If they are in the middle of a reset, we won't try to schedule another one.
		 */
		TAILQ_INSERT_TAIL(&nvme_ctrlr->pending_resets, bio, retry_link);
	}
	pthread_mutex_unlock(&nvme_ctrlr->mutex);

	if (rc == 0) {
@@ -3096,15 +3076,6 @@ _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
		NVME_BDEV_INFOLOG(nbdev, "reset_io %p started resetting ctrlr [%s, %u].\n",
				  bio, CTRLR_STRING(nvme_ctrlr), CTRLR_ID(nvme_ctrlr));
	} else if (rc == -EBUSY) {
		ctrlr_ch = io_path->qpair->ctrlr_ch;
		assert(ctrlr_ch != NULL);
		/*
		 * Reset call is queued only if it is from the app framework. This is on purpose so that
		 * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
		 * upper level. If they are in the middle of a reset, we won't try to schedule another one.
		 */
		TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bio, retry_link);

		rc = 0;

		NVME_BDEV_INFOLOG(nbdev, "reset_io %p was queued to ctrlr [%s, %u].\n",
@@ -3627,8 +3598,6 @@ bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
	struct nvme_ctrlr *nvme_ctrlr = io_device;
	struct nvme_ctrlr_channel *ctrlr_ch = ctx_buf;

	TAILQ_INIT(&ctrlr_ch->pending_resets);

	return nvme_qpair_create(nvme_ctrlr, ctrlr_ch);
}

@@ -5859,6 +5828,7 @@ nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
	}

	TAILQ_INIT(&nvme_ctrlr->trids);
	TAILQ_INIT(&nvme_ctrlr->pending_resets);
	RB_INIT(&nvme_ctrlr->namespaces);

	/* Get another reference to the key, so the first one can be released from probe_ctx */
+2 −1
Original line number Diff line number Diff line
@@ -126,6 +126,8 @@ struct nvme_ctrlr {

	nvme_ctrlr_disconnected_cb		disconnected_cb;

	TAILQ_HEAD(, nvme_bdev_io)		pending_resets;

	/** linked list pointer for device list */
	TAILQ_ENTRY(nvme_ctrlr)			tailq;
	struct nvme_bdev_ctrlr			*nbdev_ctrlr;
@@ -185,7 +187,6 @@ struct nvme_qpair {

struct nvme_ctrlr_channel {
	struct nvme_qpair		*qpair;
	TAILQ_HEAD(, nvme_bdev_io)	pending_resets;

	struct nvme_ctrlr_channel_iter	*reset_iter;
	struct spdk_poller		*connect_poller;
+182 −39
Original line number Diff line number Diff line
@@ -1554,13 +1554,9 @@ test_reset_ctrlr(void)
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(curr_trid->last_failed_tsc != 0);

	poll_thread_times(0, 2);
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(curr_trid->last_failed_tsc == 0);
	poll_thread_times(1, 1);
	CU_ASSERT(nvme_ctrlr->resetting == true);
	poll_thread_times(0, 1);
	CU_ASSERT(nvme_ctrlr->resetting == false);
	CU_ASSERT(curr_trid->last_failed_tsc == 0);

	/* Case 4: ctrlr is already removed. */
	ctrlr.is_removed = true;
@@ -2000,7 +1996,7 @@ test_pending_reset(void)
	poll_thread_times(1, 2);

	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(TAILQ_EMPTY(&ctrlr_ch2->pending_resets));
	CU_ASSERT(TAILQ_EMPTY(&nvme_ctrlr->pending_resets));

	set_thread(0);

@@ -2012,7 +2008,7 @@ test_pending_reset(void)
	poll_thread_times(1, 1);
	poll_thread_times(0, 1);

	CU_ASSERT(spdk_bdev_io_from_ctx(TAILQ_FIRST(&ctrlr_ch1->pending_resets)) == second_bdev_io);
	CU_ASSERT(spdk_bdev_io_from_ctx(TAILQ_FIRST(&nvme_ctrlr->pending_resets)) == second_bdev_io);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
@@ -2036,7 +2032,7 @@ test_pending_reset(void)
	poll_thread_times(1, 2);

	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(TAILQ_EMPTY(&ctrlr_ch2->pending_resets));
	CU_ASSERT(TAILQ_EMPTY(&nvme_ctrlr->pending_resets));

	set_thread(0);

@@ -2048,7 +2044,7 @@ test_pending_reset(void)
	poll_thread_times(1, 1);
	poll_thread_times(0, 1);

	CU_ASSERT(spdk_bdev_io_from_ctx(TAILQ_FIRST(&ctrlr_ch1->pending_resets)) == second_bdev_io);
	CU_ASSERT(spdk_bdev_io_from_ctx(TAILQ_FIRST(&nvme_ctrlr->pending_resets)) == second_bdev_io);

	ctrlr->fail_reset = true;

@@ -4274,17 +4270,14 @@ test_reset_bdev_ctrlr(void)
	CU_ASSERT(io_path11->qpair->qpair != NULL);
	CU_ASSERT(io_path21->qpair->qpair != NULL);

	poll_thread_times(0, 2);
	CU_ASSERT(nvme_ctrlr1->resetting == true);
	poll_thread_times(1, 1);
	CU_ASSERT(nvme_ctrlr1->resetting == true);
	poll_thread_times(0, 2);
	poll_thread_times(0, 1);
	CU_ASSERT(nvme_ctrlr1->resetting == false);
	CU_ASSERT(curr_path1->last_failed_tsc == 0);
	poll_thread_times(0, 1);
	CU_ASSERT(first_bio->io_path == io_path12);
	CU_ASSERT(nvme_ctrlr2->resetting == true);

	poll_thread_times(0, 3);
	poll_thread_times(0, 2);
	CU_ASSERT(io_path12->qpair->qpair == NULL);
	CU_ASSERT(io_path22->qpair->qpair != NULL);

@@ -4311,15 +4304,13 @@ test_reset_bdev_ctrlr(void)
	CU_ASSERT(io_path12->qpair->qpair != NULL);
	CU_ASSERT(io_path22->qpair->qpair != NULL);

	poll_thread_times(0, 2);
	CU_ASSERT(nvme_ctrlr2->resetting == true);
	poll_thread_times(1, 1);
	CU_ASSERT(nvme_ctrlr2->resetting == true);
	poll_thread_times(0, 2);
	CU_ASSERT(first_bio->io_path == NULL);
	poll_thread_times(0, 1);
	CU_ASSERT(nvme_ctrlr2->resetting == false);
	CU_ASSERT(curr_path2->last_failed_tsc == 0);

	poll_thread_times(0, 1);
	CU_ASSERT(first_bio->io_path == NULL);

	poll_threads();

	/* There is a race between two reset requests from bdev_io.
@@ -4355,7 +4346,7 @@ test_reset_bdev_ctrlr(void)

	CU_ASSERT(nvme_ctrlr1->resetting == true);
	CU_ASSERT(nvme_ctrlr1->ctrlr_op_cb_arg == first_bio);
	CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) ==
	CU_ASSERT(TAILQ_FIRST(&nvme_ctrlr1->pending_resets) ==
		  (struct nvme_bdev_io *)second_bdev_io->driver_ctx);

	poll_threads();
@@ -4396,7 +4387,7 @@ test_reset_bdev_ctrlr(void)

	CU_ASSERT(nvme_ctrlr1->resetting == true);
	CU_ASSERT(nvme_ctrlr1->ctrlr_op_cb_arg == first_bio);
	CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) ==
	CU_ASSERT(TAILQ_FIRST(&nvme_ctrlr1->pending_resets) ==
		  (struct nvme_bdev_io *)second_bdev_io->driver_ctx);

	ctrlr2->fail_reset = true;
@@ -4448,7 +4439,7 @@ test_reset_bdev_ctrlr(void)

	CU_ASSERT(nvme_ctrlr1->resetting == true);
	CU_ASSERT(nvme_ctrlr1->ctrlr_op_cb_arg == first_bio);
	CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) ==
	CU_ASSERT(TAILQ_FIRST(&nvme_ctrlr1->pending_resets) ==
		  (struct nvme_bdev_io *)second_bdev_io->driver_ctx);

	ctrlr1->fail_reset = true;
@@ -4500,7 +4491,7 @@ test_reset_bdev_ctrlr(void)

	CU_ASSERT(nvme_ctrlr1->resetting == true);
	CU_ASSERT(nvme_ctrlr1->ctrlr_op_cb_arg == first_bio);
	CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) ==
	CU_ASSERT(TAILQ_FIRST(&nvme_ctrlr1->pending_resets) ==
		  (struct nvme_bdev_io *)second_bdev_io->driver_ctx);

	ctrlr1->fail_reset = true;
@@ -7101,13 +7092,6 @@ test_race_between_reset_and_disconnected(void)
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(curr_trid->last_failed_tsc != 0);

	poll_thread_times(0, 2);
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(curr_trid->last_failed_tsc == 0);
	poll_thread_times(1, 1);
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(nvme_ctrlr->pending_failover == false);

	/* Here is just one poll before _bdev_nvme_reset_complete() is executed.
	 *
	 * spdk_nvme_ctrlr_reconnect_poll_async() returns success before fabric
@@ -7121,13 +7105,10 @@ test_race_between_reset_and_disconnected(void)
	CU_ASSERT(rc == -EINPROGRESS);
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(nvme_ctrlr->pending_failover == true);
	CU_ASSERT(curr_trid->last_failed_tsc == 0);

	poll_thread_times(0, 1);

	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(nvme_ctrlr->pending_failover == false);
	CU_ASSERT(curr_trid->last_failed_tsc != 0);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
@@ -7551,10 +7532,6 @@ test_disable_enable_ctrlr(void)
	CU_ASSERT(ctrlr_ch2->qpair->qpair != NULL);
	CU_ASSERT(nvme_ctrlr->resetting == true);

	poll_thread_times(0, 2);
	CU_ASSERT(nvme_ctrlr->resetting == true);
	poll_thread_times(1, 1);
	CU_ASSERT(nvme_ctrlr->resetting == true);
	poll_thread_times(0, 1);
	CU_ASSERT(nvme_ctrlr->resetting == false);

@@ -8015,6 +7992,171 @@ test_bdev_reset_abort_io(void)
	g_opts.bdev_retry_count = 0;
}

static void
test_race_between_clear_pending_resets_and_reset_ctrlr_complete(void)
{
	struct nvme_path_id path = {};
	struct spdk_nvme_ctrlr *ctrlr;
	struct spdk_nvme_ctrlr_opts opts = {.hostnqn = UT_HOSTNQN};
	struct nvme_bdev_ctrlr *nbdev_ctrlr;
	struct nvme_ctrlr *nvme_ctrlr;
	const int STRING_SIZE = 32;
	const char *attached_names[STRING_SIZE];
	struct nvme_bdev *bdev;
	struct spdk_bdev_io *bdev_io;
	struct nvme_bdev_io *bio;
	struct spdk_io_channel *ch1, *ch2;
	struct nvme_bdev_channel *nbdev_ch1, *nbdev_ch2;
	struct nvme_io_path *io_path1, *io_path2;
	struct nvme_ctrlr_channel *ctrlr_ch1, *ctrlr_ch2;
	int rc;
	struct spdk_bdev_nvme_ctrlr_opts bdev_opts = {0};

	spdk_bdev_nvme_get_default_ctrlr_opts(&bdev_opts);
	bdev_opts.multipath = true;

	memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
	ut_init_trid(&path.trid);
	g_ut_attach_ctrlr_status = 0;
	g_ut_attach_bdev_count = 1;

	set_thread(0);

	ctrlr = ut_attach_ctrlr(&path.trid, 1, true, true);
	SPDK_CU_ASSERT_FATAL(ctrlr != NULL);

	rc = spdk_bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE,
				   attach_ctrlr_done, NULL, &opts, &bdev_opts);
	CU_ASSERT(rc == 0);

	spdk_delay_us(1000);
	poll_threads();

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0");
	SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL);

	nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid, opts.hostnqn);
	SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL);

	bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1);
	SPDK_CU_ASSERT_FATAL(bdev != NULL);

	set_thread(0);

	ch1 = spdk_get_io_channel(bdev);
	SPDK_CU_ASSERT_FATAL(ch1 != NULL);

	nbdev_ch1 = spdk_io_channel_get_ctx(ch1);
	io_path1 = ut_get_io_path_by_ctrlr(nbdev_ch1, nvme_ctrlr);
	SPDK_CU_ASSERT_FATAL(io_path1 != NULL);
	ctrlr_ch1 = io_path1->qpair->ctrlr_ch;
	SPDK_CU_ASSERT_FATAL(ctrlr_ch1 != NULL);

	set_thread(1);

	ch2 = spdk_get_io_channel(bdev);
	SPDK_CU_ASSERT_FATAL(ch2 != NULL);

	nbdev_ch2 = spdk_io_channel_get_ctx(ch2);
	io_path2 = ut_get_io_path_by_ctrlr(nbdev_ch2, nvme_ctrlr);
	SPDK_CU_ASSERT_FATAL(io_path2 != NULL);
	ctrlr_ch2 = io_path2->qpair->ctrlr_ch;
	SPDK_CU_ASSERT_FATAL(ctrlr_ch2 != NULL);

	/* Internal reset request started. */
	rc = bdev_nvme_failover_ctrlr(nvme_ctrlr);
	CU_ASSERT(rc == 0);
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(ctrlr_ch1->qpair != NULL);
	CU_ASSERT(ctrlr_ch2->qpair != NULL);

	poll_thread_times(0, 3);
	CU_ASSERT(ctrlr_ch1->qpair->qpair == NULL);
	CU_ASSERT(ctrlr_ch2->qpair->qpair != NULL);

	poll_thread_times(0, 1);
	poll_thread_times(1, 1);
	CU_ASSERT(ctrlr_ch1->qpair->qpair == NULL);
	CU_ASSERT(ctrlr_ch2->qpair->qpair == NULL);

	poll_thread_times(1, 1);
	poll_thread_times(0, 1);
	CU_ASSERT(ctrlr->adminq.is_connected == false);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 2);
	CU_ASSERT(ctrlr->adminq.is_connected == true);

	poll_thread_times(0, 1);
	CU_ASSERT(ctrlr_ch1->qpair->qpair != NULL);
	CU_ASSERT(ctrlr_ch2->qpair->qpair == NULL);

	poll_thread_times(1, 1);
	CU_ASSERT(ctrlr_ch1->qpair->qpair != NULL);
	CU_ASSERT(ctrlr_ch2->qpair->qpair != NULL);
	CU_ASSERT(nvme_ctrlr->resetting == true);

	set_thread(0);

	/* Submit external reset request from bdev_io just one polling before completing
	 * internal before reset request.
	 *
	 * Previously, there was a race window before clearing pending reset and completing
	 * reset request. If external reset request was submitted in the window, it never woke up.
	 *
	 * The lost wake up bug was fixed and there is no such race window.
	 *
	 * Hence, submit external reset request as late as possible to avoid future degradation.
	 */
	bdev_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_RESET, bdev, ch1);
	bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
	bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;

	/* For simplicity, skip freezing bdev channels. */
	bdev_nvme_freeze_bdev_channel_done(bdev, bio, 0);

	CU_ASSERT(spdk_bdev_io_from_ctx(TAILQ_FIRST(&nvme_ctrlr->pending_resets)) == bdev_io);

	poll_thread_times(0, 1);

	/* External reset request should be cleared. */
	CU_ASSERT(nvme_ctrlr->resetting == false);
	CU_ASSERT(TAILQ_EMPTY(&nvme_ctrlr->pending_resets));

	poll_threads();

	CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);

	set_thread(0);

	spdk_put_io_channel(ch1);

	set_thread(1);

	spdk_put_io_channel(ch2);

	poll_threads();

	set_thread(0);

	rc = bdev_nvme_delete("nvme0", &g_any_path, NULL, NULL);
	CU_ASSERT(rc == 0);

	poll_threads();
	spdk_delay_us(1000);
	poll_threads();

	CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL);

	free(bdev_io);
}

int
main(int argc, char **argv)
{
@@ -8075,6 +8217,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, test_ns_remove_during_reset);
	CU_ADD_TEST(suite, test_io_path_is_current);
	CU_ADD_TEST(suite, test_bdev_reset_abort_io);
	CU_ADD_TEST(suite, test_race_between_clear_pending_resets_and_reset_ctrlr_complete);

	allocate_threads(3);
	set_thread(0);