Commit 13ca6e52 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

bdev/nvme: Handle ANA transition (change or inaccessible state) correctly



Previously, if a namespace is in ANA inaccessible state, I/O had been
queued infinitely. Fix this issue according to the NVMe spec.

Add a temporary poller anatt_timer and a flag ana_transition_timedout for
each nvme_ns.

Start anatt_timer if the nvme_ns enters ANA transition. If anatt_timer
is expired, set ana_transition_timedout to true. Cancel anatt_timer or
clear ana_transition_timedout if the nvme_ns exits ANA transition.

nvme_io_path_become_available() returns false if ana_transition_timedout
is true.

Add unit test case to verify these addition.

Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: Ic76933242046b3e8e553de88221b943ad097c91c
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12194


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarMonica Kenguva <monica.kenguva@intel.com>
parent da2fc15f
Loading
Loading
Loading
Loading
+41 −0
Original line number Diff line number Diff line
@@ -875,6 +875,10 @@ any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
	struct nvme_io_path *io_path;

	STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
		if (io_path->nvme_ns->ana_transition_timedout) {
			continue;
		}

		if (nvme_io_path_is_connected(io_path) ||
		    !nvme_io_path_is_failed(io_path)) {
			return true;
@@ -2624,13 +2628,48 @@ bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
	return rc;
}

static int
nvme_ns_ana_transition_timedout(void *ctx)
{
	struct nvme_ns *nvme_ns = ctx;

	spdk_poller_unregister(&nvme_ns->anatt_timer);
	nvme_ns->ana_transition_timedout = true;

	return SPDK_POLLER_BUSY;
}

static void
_nvme_ns_set_ana_state(struct nvme_ns *nvme_ns,
		       const struct spdk_nvme_ana_group_descriptor *desc)
{
	const struct spdk_nvme_ctrlr_data *cdata;

	nvme_ns->ana_group_id = desc->ana_group_id;
	nvme_ns->ana_state = desc->ana_state;
	nvme_ns->ana_state_updating = false;

	switch (nvme_ns->ana_state) {
	case SPDK_NVME_ANA_OPTIMIZED_STATE:
	case SPDK_NVME_ANA_NON_OPTIMIZED_STATE:
		nvme_ns->ana_transition_timedout = false;
		spdk_poller_unregister(&nvme_ns->anatt_timer);
		break;

	case SPDK_NVME_ANA_INACCESSIBLE_STATE:
	case SPDK_NVME_ANA_CHANGE_STATE:
		if (nvme_ns->anatt_timer != NULL) {
			break;
		}

		cdata = spdk_nvme_ctrlr_get_data(nvme_ns->ctrlr->ctrlr);
		nvme_ns->anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
				       nvme_ns,
				       cdata->anatt * SPDK_SEC_TO_USEC);
		break;
	default:
		break;
	}
}

static int
@@ -3114,6 +3153,8 @@ nvme_ctrlr_depopulate_namespace(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *n
{
	struct nvme_bdev *bdev;

	spdk_poller_unregister(&nvme_ns->anatt_timer);

	bdev = nvme_ns->bdev;
	if (bdev != NULL) {
		pthread_mutex_lock(&bdev->mutex);
+2 −0
Original line number Diff line number Diff line
@@ -85,6 +85,8 @@ struct nvme_ns {
	uint32_t			ana_group_id;
	enum spdk_nvme_ana_state	ana_state;
	bool				ana_state_updating;
	bool				ana_transition_timedout;
	struct spdk_poller		*anatt_timer;
	struct nvme_async_probe_ctx	*probe_ctx;
	TAILQ_ENTRY(nvme_ns)		tailq;
	RB_ENTRY(nvme_ns)		node;
+58 −0
Original line number Diff line number Diff line
@@ -5930,6 +5930,63 @@ test_nvme_ns_cmp(void)
	CU_ASSERT(nvme_ns_cmp(&nvme_ns2, &nvme_ns1) > 0);
}

static void
test_ana_transition(void)
{
	struct spdk_nvme_ctrlr ctrlr = { .cdata.anatt = 10, };
	struct nvme_ctrlr nvme_ctrlr = { .ctrlr = &ctrlr, };
	struct nvme_ns nvme_ns = { .ctrlr = &nvme_ctrlr, };
	struct spdk_nvme_ana_group_descriptor desc = { .ana_group_id = 1, };

	/* case 1: ANA transition timedout is canceled. */
	nvme_ns.ana_state = SPDK_NVME_ANA_CHANGE_STATE;
	nvme_ns.ana_transition_timedout = true;

	desc.ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;

	_nvme_ns_set_ana_state(&nvme_ns, &desc);

	CU_ASSERT(nvme_ns.ana_transition_timedout == false);
	CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE);

	/* case 2: ANATT timer is kept. */
	nvme_ns.ana_state = SPDK_NVME_ANA_CHANGE_STATE;
	nvme_ns.anatt_timer = SPDK_POLLER_REGISTER(nvme_ns_ana_transition_timedout,
			      &nvme_ns,
			      ctrlr.cdata.anatt * SPDK_SEC_TO_USEC);

	desc.ana_state = SPDK_NVME_ANA_INACCESSIBLE_STATE;

	_nvme_ns_set_ana_state(&nvme_ns, &desc);

	CU_ASSERT(nvme_ns.anatt_timer != NULL);
	CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE);

	/* case 3: ANATT timer is stopped. */
	desc.ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;

	_nvme_ns_set_ana_state(&nvme_ns, &desc);

	CU_ASSERT(nvme_ns.anatt_timer == NULL);
	CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE);

	/* ANATT timer is started. */
	desc.ana_state = SPDK_NVME_ANA_CHANGE_STATE;

	_nvme_ns_set_ana_state(&nvme_ns, &desc);

	CU_ASSERT(nvme_ns.anatt_timer != NULL);
	CU_ASSERT(nvme_ns.ana_state == SPDK_NVME_ANA_CHANGE_STATE);

	/* ANATT timer is expired. */
	spdk_delay_us(ctrlr.cdata.anatt * SPDK_SEC_TO_USEC);

	poll_threads();

	CU_ASSERT(nvme_ns.anatt_timer == NULL);
	CU_ASSERT(nvme_ns.ana_transition_timedout == true);
}

int
main(int argc, const char **argv)
{
@@ -5978,6 +6035,7 @@ main(int argc, const char **argv)
	CU_ADD_TEST(suite, test_retry_failover_ctrlr);
	CU_ADD_TEST(suite, test_fail_path);
	CU_ADD_TEST(suite, test_nvme_ns_cmp);
	CU_ADD_TEST(suite, test_ana_transition);

	CU_basic_set_mode(CU_BRM_VERBOSE);