Commit b6883829 authored by Jacek Kalwas's avatar Jacek Kalwas Committed by Tomasz Zawadzki
Browse files

ut/bdev/nvme: cover ctrlr register vs adminq process race



This test improves coverage for the situation where processing admin
queue completions returns an error before the controller is registered
as an I/O device.

A potential error can occur when the admin queue failure getter returns
an error but the initial ANA log page is not processed (see
nvme_ctrlr_init_ana_log_page_done), so this test simulates a similar
scenario.

Without the fix
(bdev/nvme: register ctrlr early to avoid spdk_for_each_channel error)
this test fails with following error
 > thread.c:2707:spdk_for_each_channel: *ERROR*: could not find io_device 0x513000002880
during bdev_nvme_clear_io_path_caches.

For the reason why single admin qpair failure is not fatal see
> bdev/nvme: Check not only I/O qpair but also adminq when finding optimal I/O path

Change-Id: I6c8b884fb09e65ae7d0ebf9da9a862f9df41d721
Signed-off-by: default avatarJacek Kalwas <jacek.kalwas@nutanix.com>
Reviewed-on: https://review.spdk.io/c/spdk/spdk/+/26939


Tested-by: default avatarSPDK Automated Test System <spdkbot@gmail.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz@tzawadzki.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
parent f05ee5e8
Loading
Loading
Loading
Loading
+62 −0
Original line number Diff line number Diff line
@@ -395,6 +395,7 @@ static int g_ut_register_bdev_status;
static struct spdk_bdev *g_ut_registered_bdev;
static uint16_t g_ut_cntlid;
static struct spdk_nvme_path_id g_any_path = {};
static int g_ut_pause_process_adminq;

static void
ut_init_trid(struct spdk_nvme_transport_id *trid)
@@ -1019,6 +1020,10 @@ spdk_nvme_ctrlr_cmd_abort_ext(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qp
int32_t
spdk_nvme_ctrlr_process_admin_completions(struct spdk_nvme_ctrlr *ctrlr)
{
	if (g_ut_pause_process_adminq && g_ut_pause_process_adminq--) {
		return 0;
	}

	return spdk_nvme_qpair_process_completions(&ctrlr->adminq, 0);
}

@@ -2235,6 +2240,62 @@ test_attach_ctrlr(void)
	g_ut_register_bdev_status = 0;
}

static void
test_attach_ctrlr_race_process_adminq_failure(void)
{
	struct spdk_nvme_transport_id trid = {};
	struct spdk_nvme_ctrlr *ctrlr;
	struct spdk_nvme_ctrlr_opts opts = {.hostnqn = UT_HOSTNQN};
	const int STRING_SIZE = 32;
	const char *attached_names[STRING_SIZE];
	int rc;
	struct spdk_bdev_nvme_ctrlr_opts bdev_opts = {0};

	spdk_bdev_nvme_get_default_ctrlr_opts(&bdev_opts);
	bdev_opts.multipath = false;

	set_thread(0);

	memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
	ut_init_trid(&trid);

	ctrlr = ut_attach_ctrlr(&trid, 0, true, false);
	SPDK_CU_ASSERT_FATAL(ctrlr != NULL);

	g_ut_attach_ctrlr_status = 0;
	g_ut_attach_bdev_count = 0;

	rc = spdk_bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE,
				   attach_ctrlr_done, NULL, &opts, &bdev_opts);
	CU_ASSERT(rc == 0);

	spdk_delay_us(1000);
	poll_threads();

	/* Such a failure is not considered fatal; hence, the nvme0 controller should still be found
	 * afterwards. Need to pause adminq process so err handling can happen before outstanding
	 * requests (init ANA log page) are completed. */
	ctrlr->adminq.failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
	g_ut_pause_process_adminq = 1;
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	g_ut_pause_process_adminq = 0;
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") != NULL);

	rc = spdk_bdev_nvme_delete("nvme0", &g_any_path, NULL, NULL);
	CU_ASSERT(rc == 0);

	poll_threads();
	spdk_delay_us(1000);
	poll_threads();

	g_ut_attach_ctrlr_status = 0;
}

static void
test_aer_cb(void)
{
@@ -8324,6 +8385,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, test_race_between_failover_and_add_secondary_trid);
	CU_ADD_TEST(suite, test_pending_reset);
	CU_ADD_TEST(suite, test_attach_ctrlr);
	CU_ADD_TEST(suite, test_attach_ctrlr_race_process_adminq_failure);
	CU_ADD_TEST(suite, test_aer_cb);
	CU_ADD_TEST(suite, test_submit_nvme_cmd);
	CU_ADD_TEST(suite, test_add_remove_trid);