Commit db75f4b6 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

bdev/nvme: Remove admin passthrough retry and failover



Admin passthrough supported retry and failover as same as I/O by
using the bdev_retry_count. However, doing retry or failover for
admin passthrough may have unexpected side effects and its value
is not clear. The safest way is to limit retry and failover for I/O.
If we need to support retry and failover for admin passthrough,
restore the code and add a new option bdev_admin_retry_count.

Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: I680513a40a80041f6ea6f546c74c672f2a81812d
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14227


Community-CI: Mellanox Build Bot
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
parent fc912a02
Loading
Loading
Loading
Loading
+0 −62
Original line number Diff line number Diff line
@@ -945,20 +945,6 @@ any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
	return false;
}

static bool
any_ctrlr_may_become_available(struct nvme_bdev_channel *nbdev_ch)
{
	struct nvme_io_path *io_path;

	STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
		if (!nvme_io_path_is_failed(io_path)) {
			return true;
		}
	}

	return false;
}

static int
bdev_nvme_retry_ios(void *arg)
{
@@ -1122,7 +1108,6 @@ static inline void
bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc)
{
	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
	struct nvme_bdev_channel *nbdev_ch;
	enum spdk_bdev_io_status io_status;

	switch (rc) {
@@ -1133,20 +1118,12 @@ bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc)
		io_status = SPDK_BDEV_IO_STATUS_NOMEM;
		break;
	case -ENXIO:
		nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));

		if (any_ctrlr_may_become_available(nbdev_ch)) {
			bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
			return;
		}

	/* fallthrough */
	default:
		io_status = SPDK_BDEV_IO_STATUS_FAILED;
		break;
	}

	bio->retry_count = 0;
	__bdev_nvme_io_complete(bdev_io, io_status, NULL);
}

@@ -5842,48 +5819,9 @@ bdev_nvme_admin_passthru_complete_nvme_status(void *ctx)
	struct nvme_bdev_io *bio = ctx;
	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
	const struct spdk_nvme_cpl *cpl = &bio->cpl;
	struct nvme_bdev_channel *nbdev_ch;
	struct nvme_ctrlr *nvme_ctrlr;
	const struct spdk_nvme_ctrlr_data *cdata;
	uint64_t delay_ms;

	assert(bdev_nvme_io_type_is_admin(bdev_io->type));

	if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
		goto complete;
	}

	if (cpl->status.dnr != 0 || spdk_nvme_cpl_is_aborted_by_request(cpl) ||
	    (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
		goto complete;
	}

	nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
	nvme_ctrlr = bio->io_path->qpair->ctrlr;

	if (spdk_nvme_cpl_is_path_error(cpl) ||
	    spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
	    !nvme_ctrlr_is_available(nvme_ctrlr)) {
		delay_ms = 0;
	} else {
		bio->retry_count++;

		cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);

		if (cpl->status.crd != 0) {
			delay_ms = cdata->crdt[cpl->status.crd] * 100;
		} else {
			delay_ms = 0;
		}
	}

	if (any_ctrlr_may_become_available(nbdev_ch)) {
		bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
		return;
	}

complete:
	bio->retry_count = 0;
	__bdev_nvme_io_complete(bdev_io, 0, cpl);
}

+0 −426
Original line number Diff line number Diff line
@@ -4866,305 +4866,6 @@ test_retry_io_for_ana_error(void)
	g_opts.bdev_retry_count = 0;
}

static void
test_retry_admin_passthru_for_path_error(void)
{
	struct nvme_path_id path1 = {}, path2 = {};
	struct spdk_nvme_ctrlr *ctrlr1, *ctrlr2;
	struct nvme_bdev_ctrlr *nbdev_ctrlr;
	struct nvme_ctrlr *nvme_ctrlr1, *nvme_ctrlr2;
	const int STRING_SIZE = 32;
	const char *attached_names[STRING_SIZE];
	struct nvme_bdev *bdev;
	struct spdk_bdev_io *admin_io;
	struct spdk_io_channel *ch;
	struct ut_nvme_req *req;
	struct spdk_uuid uuid1 = { .u.raw = { 0x1 } };
	int rc;

	memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
	ut_init_trid(&path1.trid);
	ut_init_trid2(&path2.trid);

	g_opts.bdev_retry_count = 1;

	set_thread(0);

	g_ut_attach_ctrlr_status = 0;
	g_ut_attach_bdev_count = 1;

	ctrlr1 = ut_attach_ctrlr(&path1.trid, 1, true, true);
	SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL);

	ctrlr1->ns[0].uuid = &uuid1;

	rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE,
			      attach_ctrlr_done, NULL, NULL, NULL, true);
	CU_ASSERT(rc == 0);

	spdk_delay_us(1000);
	poll_threads();

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0");
	SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL);

	nvme_ctrlr1 = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path1.trid);
	CU_ASSERT(nvme_ctrlr1 != NULL);

	bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1);
	CU_ASSERT(bdev != NULL);

	admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL);
	admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES;

	ch = spdk_get_io_channel(bdev);
	SPDK_CU_ASSERT_FATAL(ch != NULL);

	admin_io->internal.ch = (struct spdk_bdev_channel *)ch;

	/* Admin passthrough got a path error, but it should not retry if DNR is set. */
	admin_io->internal.in_submit_request = true;

	bdev_nvme_submit_request(ch, admin_io);

	CU_ASSERT(ctrlr1->adminq.num_outstanding_reqs == 1);
	CU_ASSERT(admin_io->internal.in_submit_request == true);

	req = ut_get_outstanding_nvme_request(&ctrlr1->adminq, admin_io->driver_ctx);
	SPDK_CU_ASSERT_FATAL(req != NULL);

	req->cpl.status.sc = SPDK_NVME_SC_INTERNAL_PATH_ERROR;
	req->cpl.status.sct = SPDK_NVME_SCT_PATH;
	req->cpl.status.dnr = 1;

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 2);

	CU_ASSERT(ctrlr1->adminq.num_outstanding_reqs == 0);
	CU_ASSERT(admin_io->internal.in_submit_request == false);
	CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR);

	/* Admin passthrough got a path error, but it should succeed after retry. */
	admin_io->internal.in_submit_request = true;

	bdev_nvme_submit_request(ch, admin_io);

	CU_ASSERT(ctrlr1->adminq.num_outstanding_reqs == 1);
	CU_ASSERT(admin_io->internal.in_submit_request == true);

	req = ut_get_outstanding_nvme_request(&ctrlr1->adminq, admin_io->driver_ctx);
	SPDK_CU_ASSERT_FATAL(req != NULL);

	req->cpl.status.sc = SPDK_NVME_SC_INTERNAL_PATH_ERROR;
	req->cpl.status.sct = SPDK_NVME_SCT_PATH;

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 2);

	CU_ASSERT(ctrlr1->adminq.num_outstanding_reqs == 1);
	CU_ASSERT(admin_io->internal.in_submit_request == true);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(ctrlr1->adminq.num_outstanding_reqs == 0);
	CU_ASSERT(admin_io->internal.in_submit_request == false);
	CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);

	/* Add ctrlr2 dynamically, and create a multipath configuration. */
	ctrlr2 = ut_attach_ctrlr(&path2.trid, 1, true, true);
	SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL);

	ctrlr2->ns[0].uuid = &uuid1;

	rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE,
			      attach_ctrlr_done, NULL, NULL, NULL, true);
	CU_ASSERT(rc == 0);

	spdk_delay_us(1000);
	poll_threads();

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	nvme_ctrlr2 = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path2.trid);
	CU_ASSERT(nvme_ctrlr2 != NULL);

	/* Admin passthrough was submitted to ctrlr1, but ctrlr1 was failed.
	 * Hence the admin passthrough was aborted. But ctrlr2 is avaialble.
	 * So after a retry, the admin passthrough is submitted to ctrlr2 and
	 * should succeed.
	 */
	admin_io->internal.in_submit_request = true;

	bdev_nvme_submit_request(ch, admin_io);

	CU_ASSERT(ctrlr1->adminq.num_outstanding_reqs == 1);
	CU_ASSERT(ctrlr2->adminq.num_outstanding_reqs == 0);
	CU_ASSERT(admin_io->internal.in_submit_request == true);

	req = ut_get_outstanding_nvme_request(&ctrlr1->adminq, admin_io->driver_ctx);
	SPDK_CU_ASSERT_FATAL(req != NULL);

	req->cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
	req->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
	ctrlr1->is_failed = true;

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 2);

	CU_ASSERT(ctrlr1->adminq.num_outstanding_reqs == 0);
	CU_ASSERT(ctrlr2->adminq.num_outstanding_reqs == 1);
	CU_ASSERT(admin_io->internal.in_submit_request == true);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(ctrlr2->adminq.num_outstanding_reqs == 0);
	CU_ASSERT(admin_io->internal.in_submit_request == false);
	CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);

	free(admin_io);

	spdk_put_io_channel(ch);

	poll_threads();

	rc = bdev_nvme_delete("nvme0", &g_any_path);
	CU_ASSERT(rc == 0);

	poll_threads();
	spdk_delay_us(1000);
	poll_threads();

	CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL);

	g_opts.bdev_retry_count = 0;
}

static void
test_retry_admin_passthru_by_count(void)
{
	struct nvme_path_id path = {};
	struct spdk_nvme_ctrlr *ctrlr;
	struct nvme_bdev_ctrlr *nbdev_ctrlr;
	struct nvme_ctrlr *nvme_ctrlr;
	const int STRING_SIZE = 32;
	const char *attached_names[STRING_SIZE];
	struct nvme_bdev *bdev;
	struct spdk_bdev_io *admin_io;
	struct nvme_bdev_io *admin_bio;
	struct spdk_io_channel *ch;
	struct ut_nvme_req *req;
	int rc;

	memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
	ut_init_trid(&path.trid);

	set_thread(0);

	ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false);
	SPDK_CU_ASSERT_FATAL(ctrlr != NULL);

	g_ut_attach_ctrlr_status = 0;
	g_ut_attach_bdev_count = 1;

	rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE,
			      attach_ctrlr_done, NULL, NULL, NULL, false);
	CU_ASSERT(rc == 0);

	spdk_delay_us(1000);
	poll_threads();

	nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0");
	SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL);

	nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid);
	CU_ASSERT(nvme_ctrlr != NULL);

	bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1);
	CU_ASSERT(bdev != NULL);

	admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL);
	admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES;

	admin_bio = (struct nvme_bdev_io *)admin_io->driver_ctx;

	ch = spdk_get_io_channel(bdev);
	SPDK_CU_ASSERT_FATAL(ch != NULL);

	admin_io->internal.ch = (struct spdk_bdev_channel *)ch;

	/* If admin passthrough is aborted by request, it should not be retried. */
	g_opts.bdev_retry_count = 1;

	admin_io->internal.in_submit_request = true;

	bdev_nvme_submit_request(ch, admin_io);

	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
	CU_ASSERT(admin_io->internal.in_submit_request == true);

	req = ut_get_outstanding_nvme_request(&ctrlr->adminq, admin_bio);
	SPDK_CU_ASSERT_FATAL(req != NULL);

	req->cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
	req->cpl.status.sct = SPDK_NVME_SCT_GENERIC;

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 2);

	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0);
	CU_ASSERT(admin_io->internal.in_submit_request == false);
	CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED);

	/* If bio->retry_count is not less than g_opts.bdev_retry_count,
	 * the failed admin passthrough should not be retried.
	 */
	g_opts.bdev_retry_count = 4;

	admin_io->internal.in_submit_request = true;

	bdev_nvme_submit_request(ch, admin_io);

	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
	CU_ASSERT(admin_io->internal.in_submit_request == true);

	req = ut_get_outstanding_nvme_request(&ctrlr->adminq, admin_bio);
	SPDK_CU_ASSERT_FATAL(req != NULL);

	req->cpl.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
	req->cpl.status.sct = SPDK_NVME_SCT_GENERIC;
	admin_bio->retry_count = 4;

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 2);

	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0);
	CU_ASSERT(admin_io->internal.in_submit_request == false);
	CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR);

	free(admin_io);

	spdk_put_io_channel(ch);

	poll_threads();

	rc = bdev_nvme_delete("nvme0", &g_any_path);
	CU_ASSERT(rc == 0);

	poll_threads();
	spdk_delay_us(1000);
	poll_threads();

	CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL);

	g_opts.bdev_retry_count = 0;
}

static void
test_check_multipath_params(void)
{
@@ -5358,130 +5059,6 @@ test_retry_io_if_ctrlr_is_resetting(void)
	CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL);
}

static void
test_retry_admin_passthru_if_ctrlr_is_resetting(void)
{
	struct nvme_path_id path = {};
	struct nvme_ctrlr_opts opts = {};
	struct spdk_nvme_ctrlr *ctrlr;
	struct nvme_bdev_ctrlr *nbdev_ctrlr;
	struct nvme_ctrlr *nvme_ctrlr;
	const int STRING_SIZE = 32;
	const char *attached_names[STRING_SIZE];
	struct nvme_bdev *bdev;
	struct spdk_bdev_io *admin_io;
	struct spdk_io_channel *ch;
	struct nvme_bdev_channel *nbdev_ch;
	int rc;

	memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
	ut_init_trid(&path.trid);

	g_opts.bdev_retry_count = 1;

	set_thread(0);

	ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false);
	SPDK_CU_ASSERT_FATAL(ctrlr != NULL);

	g_ut_attach_ctrlr_status = 0;
	g_ut_attach_bdev_count = 1;

	opts.ctrlr_loss_timeout_sec = -1;
	opts.reconnect_delay_sec = 1;

	rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE,
			      attach_ctrlr_done, NULL, NULL, &opts, false);
	CU_ASSERT(rc == 0);

	spdk_delay_us(1000);
	poll_threads();

	nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0");
	SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL);

	nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid);
	CU_ASSERT(nvme_ctrlr != NULL);

	bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1);
	CU_ASSERT(bdev != NULL);

	admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL);
	admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES;

	ch = spdk_get_io_channel(bdev);
	SPDK_CU_ASSERT_FATAL(ch != NULL);

	nbdev_ch = spdk_io_channel_get_ctx(ch);

	admin_io->internal.ch = (struct spdk_bdev_channel *)ch;

	/* If ctrlr is available, admin passthrough should succeed. */
	admin_io->internal.in_submit_request = true;

	bdev_nvme_submit_request(ch, admin_io);

	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
	CU_ASSERT(admin_io->internal.in_submit_request == true);

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(admin_io->internal.in_submit_request == false);
	CU_ASSERT(admin_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS);

	/* If ctrlr is resetting, admin passthrough request should be queued
	 * if it is submitted while resetting ctrlr.
	 */
	bdev_nvme_reset(nvme_ctrlr);

	poll_thread_times(0, 1);

	admin_io->internal.in_submit_request = true;

	bdev_nvme_submit_request(ch, admin_io);

	CU_ASSERT(admin_io->internal.in_submit_request == true);
	CU_ASSERT(admin_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->resetting == false);

	spdk_delay_us(1000000 - g_opts.nvme_adminq_poll_period_us);
	poll_thread_times(0, 1);

	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
	CU_ASSERT(admin_io->internal.in_submit_request == true);
	CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list));

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0);
	CU_ASSERT(admin_io->internal.in_submit_request == false);
	CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);

	free(admin_io);

	spdk_put_io_channel(ch);

	poll_threads();

	rc = bdev_nvme_delete("nvme0", &g_any_path);
	CU_ASSERT(rc == 0);

	poll_threads();
	spdk_delay_us(1000);
	poll_threads();

	CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL);

	g_opts.bdev_retry_count = 0;
}

static void
test_reconnect_ctrlr(void)
{
@@ -6377,11 +5954,8 @@ main(int argc, const char **argv)
	CU_ADD_TEST(suite, test_retry_io_count);
	CU_ADD_TEST(suite, test_concurrent_read_ana_log_page);
	CU_ADD_TEST(suite, test_retry_io_for_ana_error);
	CU_ADD_TEST(suite, test_retry_admin_passthru_for_path_error);
	CU_ADD_TEST(suite, test_retry_admin_passthru_by_count);
	CU_ADD_TEST(suite, test_check_multipath_params);
	CU_ADD_TEST(suite, test_retry_io_if_ctrlr_is_resetting);
	CU_ADD_TEST(suite, test_retry_admin_passthru_if_ctrlr_is_resetting);
	CU_ADD_TEST(suite, test_reconnect_ctrlr);
	CU_ADD_TEST(suite, test_retry_failover_ctrlr);
	CU_ADD_TEST(suite, test_fail_path);