Commit f3fec96c authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

bdev/nvme: Protect ANA log page from concurrent reads by using an new flag



If an I/O failed by ANA error, the corresponding ANA state might be
out of date. In the following patches, for this case, read the latest
ANA log page and update the ANA state. Such reading ANA log page may be
done on multiple threads concurrently including AER ANA change.
Hence protect ANA log page by adding an new flag ana_log_page_updating
to struct nvme_ctrlr and using it.

Signed-off-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: I8bb84091d50a5fdc0d9893b585be972dfd31c0f1
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9526


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
parent 1e79d219
Loading
Loading
Loading
Loading
+26 −19
Original line number Diff line number Diff line
@@ -532,7 +532,7 @@ nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
	nvme_ctrlr->ref--;

	if (nvme_ctrlr->ref > 0 || !nvme_ctrlr->destruct ||
	    nvme_ctrlr->resetting) {
	    nvme_ctrlr->resetting || nvme_ctrlr->ana_log_page_updating) {
		pthread_mutex_unlock(&nvme_ctrlr->mutex);
		return;
	}
@@ -1149,7 +1149,8 @@ _bdev_nvme_reset_complete(struct spdk_io_channel_iter *i, int status)

	path_id->is_failed = !success;

	if (nvme_ctrlr->ref == 0 && nvme_ctrlr->destruct) {
	if (nvme_ctrlr->ref == 0 && nvme_ctrlr->destruct &&
	    !nvme_ctrlr->ana_log_page_updating) {
		/* Complete pending destruct after reset completes. */
		complete_pending_destruct = true;
	}
@@ -2716,19 +2717,6 @@ nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
	}
}

static bool
nvme_ctrlr_acquire(struct nvme_ctrlr *nvme_ctrlr)
{
	pthread_mutex_lock(&nvme_ctrlr->mutex);
	if (nvme_ctrlr->destruct || nvme_ctrlr->resetting) {
		pthread_mutex_unlock(&nvme_ctrlr->mutex);
		return false;
	}
	nvme_ctrlr->ref++;
	pthread_mutex_unlock(&nvme_ctrlr->mutex);
	return true;
}

static int
nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
			  void *cb_arg)
@@ -2763,12 +2751,25 @@ nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
{
	struct nvme_ctrlr *nvme_ctrlr = ctx;

	if (spdk_nvme_cpl_is_success(cpl)) {
	if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
		bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
					     nvme_ctrlr);
	}

	nvme_ctrlr_release(nvme_ctrlr);
	pthread_mutex_lock(&nvme_ctrlr->mutex);

	assert(nvme_ctrlr->ana_log_page_updating == true);
	nvme_ctrlr->ana_log_page_updating = false;

	if (nvme_ctrlr->ref > 0 || !nvme_ctrlr->destruct ||
	    nvme_ctrlr->resetting) {
		pthread_mutex_unlock(&nvme_ctrlr->mutex);
		return;
	}

	pthread_mutex_unlock(&nvme_ctrlr->mutex);

	nvme_ctrlr_unregister(nvme_ctrlr);
}

static void
@@ -2780,10 +2781,16 @@ nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
		return;
	}

	if (!nvme_ctrlr_acquire(nvme_ctrlr)) {
	pthread_mutex_lock(&nvme_ctrlr->mutex);
	if (nvme_ctrlr->destruct || nvme_ctrlr->resetting ||
	    nvme_ctrlr->ana_log_page_updating) {
		pthread_mutex_unlock(&nvme_ctrlr->mutex);
		return;
	}

	nvme_ctrlr->ana_log_page_updating = true;
	pthread_mutex_unlock(&nvme_ctrlr->mutex);

	rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
					      SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
					      SPDK_NVME_GLOBAL_NS_TAG,
@@ -2792,7 +2799,7 @@ nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
					      nvme_ctrlr_read_ana_log_page_done,
					      nvme_ctrlr);
	if (rc != 0) {
		nvme_ctrlr_release(nvme_ctrlr);
		nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
	}
}

+1 −0
Original line number Diff line number Diff line
@@ -104,6 +104,7 @@ struct nvme_ctrlr {
	uint32_t				resetting : 1;
	uint32_t				failover_in_progress : 1;
	uint32_t				destruct : 1;
	uint32_t				ana_log_page_updating : 1;
	/**
	 * PI check flags. This flags is set to NVMe controllers created only
	 * through bdev_nvme_attach_controller RPC or .INI config file. Hot added
+85 −0
Original line number Diff line number Diff line
@@ -4392,6 +4392,90 @@ test_retry_io_count(void)
	g_opts.bdev_retry_count = 0;
}

static void
test_concurrent_read_ana_log_page(void)
{
	struct spdk_nvme_transport_id trid = {};
	struct spdk_nvme_ctrlr *ctrlr;
	struct nvme_ctrlr *nvme_ctrlr;
	const int STRING_SIZE = 32;
	const char *attached_names[STRING_SIZE];
	int rc;

	memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
	ut_init_trid(&trid);

	set_thread(0);

	ctrlr = ut_attach_ctrlr(&trid, 1, true, false);
	SPDK_CU_ASSERT_FATAL(ctrlr != NULL);

	ctrlr->ns[0].ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;

	g_ut_attach_ctrlr_status = 0;
	g_ut_attach_bdev_count = 1;

	rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
			      attach_ctrlr_done, NULL, NULL, false);
	CU_ASSERT(rc == 0);

	spdk_delay_us(1000);
	poll_threads();

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0");
	SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL);

	nvme_ctrlr_read_ana_log_page(nvme_ctrlr);

	CU_ASSERT(nvme_ctrlr->ana_log_page_updating == true);
	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);

	/* Following read request should be rejected. */
	nvme_ctrlr_read_ana_log_page(nvme_ctrlr);

	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);

	set_thread(1);

	nvme_ctrlr_read_ana_log_page(nvme_ctrlr);

	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);

	/* Reset request while reading ANA log page should not be rejected. */
	rc = bdev_nvme_reset(nvme_ctrlr);
	CU_ASSERT(rc == 0);

	poll_threads();

	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(nvme_ctrlr->ana_log_page_updating == false);
	CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0);

	/* Read ANA log page while resetting ctrlr should be rejected. */
	rc = bdev_nvme_reset(nvme_ctrlr);
	CU_ASSERT(rc == 0);

	nvme_ctrlr_read_ana_log_page(nvme_ctrlr);

	CU_ASSERT(nvme_ctrlr->ana_log_page_updating == false);

	set_thread(0);

	rc = bdev_nvme_delete("nvme0", &g_any_path);
	CU_ASSERT(rc == 0);

	poll_threads();
	spdk_delay_us(1000);
	poll_threads();

	CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL);
}

int
main(int argc, const char **argv)
{
@@ -4428,6 +4512,7 @@ main(int argc, const char **argv)
	CU_ADD_TEST(suite, test_retry_io_if_ctrlr_is_resetting);
	CU_ADD_TEST(suite, test_retry_io_for_io_path_error);
	CU_ADD_TEST(suite, test_retry_io_count);
	CU_ADD_TEST(suite, test_concurrent_read_ana_log_page);

	CU_basic_set_mode(CU_BRM_VERBOSE);