Commit aca0d56e authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

bdev/nvme: Reconnect ctrlr after it is disconnected at completion poller



spdk_nvme_ctrlr_disconnect() will be made asynchronous in the
following patches and so we will need to have some changes.

spdk_nvme_ctrlr_disconnect() disconnects adminq and ctrlr synchronously
now.

If spdk_nvme_ctrlr_disconnect() is made asynchronous,
spdk_nvme_ctrlr_process_admin_completions() will complete to disconnect
adminq and ctrlr, and will return -ENXIO only if adminq is disconnected.

However even now spdk_nvme_ctrlr_process_admin_completions() returns
-ENXIO if adminq is disconnected.

So as a preparation, set a callback before calling spdk_nvme_ctrlr_disconnect()
and call the callback if it is set and spdk_nvme_ctrlr_process_admin_completions()
returns -ENXIO.

Besides, fix the return value of bdev_nvme_poll_adminq() in this patch.

Change-Id: I2559f86bb8cf9a92b5b386ed816c00b08c9832df
Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10950


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent a76bbe35
Loading
Loading
Loading
Loading
+21 −4
Original line number Diff line number Diff line
@@ -1179,13 +1179,21 @@ bdev_nvme_poll_adminq(void *arg)
{
	int32_t rc;
	struct nvme_ctrlr *nvme_ctrlr = arg;
	nvme_ctrlr_disconnected_cb disconnected_cb;

	assert(nvme_ctrlr != NULL);

	rc = spdk_nvme_ctrlr_process_admin_completions(nvme_ctrlr->ctrlr);
	if (rc < 0) {
		disconnected_cb = nvme_ctrlr->disconnected_cb;
		nvme_ctrlr->disconnected_cb = NULL;

		if (rc == -ENXIO && disconnected_cb != NULL) {
			disconnected_cb(nvme_ctrlr);
		} else {
			bdev_nvme_failover(nvme_ctrlr, false);
		}
	}

	return rc == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY;
}
@@ -1503,8 +1511,13 @@ _bdev_nvme_reset_complete(struct spdk_io_channel_iter *i, int status)
		_bdev_nvme_delete(nvme_ctrlr, false);
		break;
	case OP_DELAYED_RECONNECT:
		/* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
		 * Set callback here to start reconnect delay timer after ctrlr is really disconnected.
		 */
		assert(nvme_ctrlr->disconnected_cb == NULL);
		nvme_ctrlr->disconnected_cb = bdev_nvme_start_reconnect_delay_timer;

		spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
		bdev_nvme_start_reconnect_delay_timer(nvme_ctrlr);
		break;
	default:
		break;
@@ -1626,14 +1639,18 @@ bdev_nvme_reset_ctrlr(struct spdk_io_channel_iter *i, int status)

	assert(status == 0);

	/* spdk_nvme_ctrlr_disconnect() may complete asynchronously later by polling adminq.
	 * Set callback here to reconnect after ctrlr is really disconnected.
	 */
	assert(nvme_ctrlr->disconnected_cb == NULL);
	nvme_ctrlr->disconnected_cb = bdev_nvme_reconnect_ctrlr;

	/* Disconnect fails if ctrlr is already resetting or removed. Both cases are
	 * not possible. Reset is controlled and the callback to hot remove is called
	 * when ctrlr is hot removed.
	 */
	rc = spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
	assert(rc == 0);

	bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
}

static void
+3 −0
Original line number Diff line number Diff line
@@ -102,6 +102,7 @@ struct nvme_path_id {
};

typedef void (*bdev_nvme_reset_cb)(void *cb_arg, bool success);
typedef void (*nvme_ctrlr_disconnected_cb)(struct nvme_ctrlr *nvme_ctrlr);

struct nvme_ctrlr {
	/**
@@ -137,6 +138,8 @@ struct nvme_ctrlr {
	uint64_t				reset_start_tsc;
	struct spdk_poller			*reconnect_delay_timer;

	nvme_ctrlr_disconnected_cb		disconnected_cb;

	/** linked list pointer for device list */
	TAILQ_ENTRY(nvme_ctrlr)			tailq;
	struct nvme_bdev_ctrlr			*nbdev_ctrlr;
+114 −9
Original line number Diff line number Diff line
@@ -1350,6 +1350,11 @@ test_reset_ctrlr(void)
	poll_thread_times(1, 1);
	poll_thread_times(0, 1);
	CU_ASSERT(ctrlr.is_failed == false);
	CU_ASSERT(ctrlr.adminq.is_connected == false);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 2);
	CU_ASSERT(ctrlr.adminq.is_connected == true);

	poll_thread_times(0, 1);
	CU_ASSERT(ctrlr_ch1->qpair->qpair != NULL);
@@ -1431,6 +1436,8 @@ test_race_between_reset_and_destruct_ctrlr(void)
	CU_ASSERT(nvme_ctrlr->destruct == true);
	CU_ASSERT(nvme_ctrlr->resetting == true);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	/* Reset completed but ctrlr is not still destructed yet. */
@@ -1524,6 +1531,8 @@ test_failover_ctrlr(void)
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(curr_trid->is_failed == true);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	curr_trid = TAILQ_FIRST(&nvme_ctrlr->trids);
@@ -1566,6 +1575,8 @@ test_failover_ctrlr(void)
	CU_ASSERT(next_trid == nvme_ctrlr->active_path_id);
	CU_ASSERT(spdk_nvme_transport_id_compare(&next_trid->trid, &trid2) == 0);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);
@@ -1652,6 +1663,8 @@ test_race_between_failover_and_add_secondary_trid(void)
	rc = bdev_nvme_reset(nvme_ctrlr);
	CU_ASSERT(rc == 0);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(path_id1->is_failed == true);
@@ -1672,6 +1685,8 @@ test_race_between_failover_and_add_secondary_trid(void)
	SPDK_CU_ASSERT_FATAL(path_id3 != NULL);
	CU_ASSERT(spdk_nvme_transport_id_compare(&path_id3->trid, &trid3) == 0);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	spdk_put_io_channel(ch1);
@@ -1779,6 +1794,8 @@ test_pending_reset(void)
	bdev_nvme_submit_request(ch1, second_bdev_io);
	CU_ASSERT(TAILQ_FIRST(&ctrlr_ch1->pending_resets) == second_bdev_io);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);
@@ -1804,6 +1821,8 @@ test_pending_reset(void)

	ctrlr->fail_reset = true;

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);
@@ -2362,6 +2381,8 @@ test_add_remove_trid(void)
	}
	CU_ASSERT(spdk_nvme_transport_id_compare(&nvme_ctrlr->active_path_id->trid, &path3.trid) == 0);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);
@@ -2616,6 +2637,10 @@ test_abort(void)
	CU_ASSERT(write_io->internal.in_submit_request == false);
	CU_ASSERT(write_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	spdk_put_io_channel(ch1);

	set_thread(1);
@@ -2987,6 +3012,11 @@ test_reconnect_qpair(void)
	poll_thread_times(1, 2);
	poll_thread_times(0, 1);
	CU_ASSERT(ctrlr->is_failed == false);
	CU_ASSERT(ctrlr->adminq.is_connected == false);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 2);
	CU_ASSERT(ctrlr->adminq.is_connected == true);

	poll_thread_times(0, 1);
	poll_thread_times(1, 1);
@@ -3019,7 +3049,8 @@ test_reconnect_qpair(void)
	CU_ASSERT(nvme_qpair2->qpair == NULL);
	CU_ASSERT(ctrlr->is_failed == true);

	poll_thread_times(0, 2);
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 3);
	poll_thread_times(1, 1);
	poll_thread_times(0, 1);
	CU_ASSERT(ctrlr->is_failed == true);
@@ -3820,8 +3851,13 @@ test_reset_bdev_ctrlr(void)
	poll_thread_times(0, 1);
	CU_ASSERT(nvme_ctrlr1->resetting == true);
	CU_ASSERT(ctrlr1->is_failed == false);
	CU_ASSERT(ctrlr1->adminq.is_connected == false);
	CU_ASSERT(curr_path1->is_failed == true);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 2);
	CU_ASSERT(ctrlr1->adminq.is_connected == true);

	poll_thread_times(0, 1);
	CU_ASSERT(io_path11->qpair->qpair != NULL);
	CU_ASSERT(io_path21->qpair->qpair == NULL);
@@ -3852,8 +3888,13 @@ test_reset_bdev_ctrlr(void)
	poll_thread_times(0, 2);
	CU_ASSERT(nvme_ctrlr2->resetting == true);
	CU_ASSERT(ctrlr2->is_failed == false);
	CU_ASSERT(ctrlr2->adminq.is_connected == false);
	CU_ASSERT(curr_path2->is_failed == true);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 2);
	CU_ASSERT(ctrlr2->adminq.is_connected == true);

	poll_thread_times(0, 1);
	CU_ASSERT(io_path12->qpair->qpair != NULL);
	CU_ASSERT(io_path22->qpair->qpair == NULL);
@@ -3901,6 +3942,10 @@ test_reset_bdev_ctrlr(void)
	CU_ASSERT(nvme_ctrlr1->reset_cb_arg == first_bio);
	CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) == second_bdev_io);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(ctrlr1->is_failed == false);
@@ -4580,7 +4625,8 @@ test_concurrent_read_ana_log_page(void)
	CU_ASSERT(rc == 0);

	poll_threads();

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

@@ -4595,6 +4641,10 @@ test_concurrent_read_ana_log_page(void)

	CU_ASSERT(nvme_ctrlr->ana_log_page_updating == false);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	set_thread(0);

	rc = bdev_nvme_delete("nvme0", &g_any_path);
@@ -5193,12 +5243,14 @@ test_retry_io_if_ctrlr_is_resetting(void)
	CU_ASSERT(bdev_io1 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io2 == TAILQ_NEXT(bdev_io1, module_link));

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_qpair->qpair != NULL);
	CU_ASSERT(nvme_ctrlr->resetting == false);

	spdk_delay_us(999999);
	spdk_delay_us(999999 - g_opts.nvme_adminq_poll_period_us);

	poll_thread_times(0, 1);

@@ -5332,11 +5384,13 @@ test_retry_admin_passthru_if_ctrlr_is_resetting(void)
	CU_ASSERT(admin_io->internal.in_submit_request == true);
	CU_ASSERT(admin_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);

	spdk_delay_us(1000000);
	spdk_delay_us(1000000 - g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 1);

	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
@@ -5417,12 +5471,20 @@ test_reconnect_ctrlr(void)
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(ctrlr.is_failed == true);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);
	CU_ASSERT(ctrlr.is_failed == false);
	CU_ASSERT(ctrlr_ch1->qpair->qpair == NULL);
	CU_ASSERT(ctrlr_ch2->qpair->qpair == NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);

@@ -5451,12 +5513,20 @@ test_reconnect_ctrlr(void)
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(ctrlr.is_failed == true);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);
	CU_ASSERT(ctrlr.is_failed == false);
	CU_ASSERT(ctrlr_ch1->qpair->qpair == NULL);
	CU_ASSERT(ctrlr_ch2->qpair->qpair == NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);

@@ -5467,6 +5537,8 @@ test_reconnect_ctrlr(void)
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);
@@ -5561,16 +5633,24 @@ test_retry_failover_ctrlr(void)
	rc = bdev_nvme_reset(nvme_ctrlr);
	CU_ASSERT(rc == 0);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);
	CU_ASSERT(ctrlr.is_failed == false);
	CU_ASSERT(ctrlr_ch->qpair->qpair == NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false);

	CU_ASSERT(path_id1->is_failed == true);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);

	path_id2 = ut_get_path_id_by_trid(nvme_ctrlr, &trid2);
	SPDK_CU_ASSERT_FATAL(path_id2 != NULL);
	CU_ASSERT(path_id2->is_failed == false);
@@ -5600,6 +5680,8 @@ test_retry_failover_ctrlr(void)
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(path_id3->is_failed == false);
@@ -5710,15 +5792,22 @@ test_fail_path(void)
	CU_ASSERT(nvme_ctrlr->resetting == true);
	CU_ASSERT(ctrlr->is_failed == true);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);
	CU_ASSERT(ctrlr->is_failed == false);
	CU_ASSERT(ctrlr_ch->qpair->qpair == NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
	CU_ASSERT(nvme_ctrlr->reset_start_tsc != 0);
	CU_ASSERT(nvme_ctrlr->fast_io_fail_timedout == false);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);

	/* I/O should be queued. */
	bdev_io->internal.in_submit_request = true;

@@ -5739,10 +5828,15 @@ test_fail_path(void)
	CU_ASSERT(nvme_ctrlr->resetting == false);
	CU_ASSERT(ctrlr->is_failed == false);
	CU_ASSERT(ctrlr_ch->qpair->qpair == NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
	CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == false);
	CU_ASSERT(nvme_ctrlr->fast_io_fail_timedout == false);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);

	/* After two seconds, ctrlr_fail_timeout_sec should expire. */
	spdk_delay_us(SPDK_SEC_TO_USEC);
	poll_threads();
@@ -5750,14 +5844,22 @@ test_fail_path(void)
	CU_ASSERT(nvme_ctrlr->resetting == false);
	CU_ASSERT(ctrlr->is_failed == false);
	CU_ASSERT(ctrlr_ch->qpair->qpair == NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
	CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == false);
	CU_ASSERT(nvme_ctrlr->fast_io_fail_timedout == true);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);

	/* Then within a second, pending I/O should be failed. */
	spdk_delay_us(SPDK_SEC_TO_USEC);
	poll_threads();

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(bdev_io->internal.in_submit_request == false);
	CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_FAILED);
	CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list));
@@ -5776,6 +5878,9 @@ test_fail_path(void)
	spdk_delay_us(SPDK_SEC_TO_USEC);
	poll_threads();

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr == nvme_ctrlr_get_by_name("nvme0"));
	CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == true);
	CU_ASSERT(nvme_ctrlr->destruct == true);