Commit 6c35d974 authored by Nathan Claudel's avatar Nathan Claudel Committed by Jim Harris
Browse files

lib/nvme: destruct controllers that failed init asynchronously



The controller destroy sequence is as follows:
- Set `CC.SHN` to request shutdown
- Wait for `CSTS.SHST` to be set to `0b10` (Shutdown complete)
- Destroy the associated structs when it's done or after a timeout
To do it, two things should be done:
- First, call `nvme_ctrlr_destruct_async`
- Then, poll `nvme_ctrlr_destruct_poll_async`

However, when a controller fails to initialize on probe, this polling is
done synchronously using `nvme_ctrlr_destruct`, which introduces 1ms
sleep between each poll.

This is really bad if a controller does not behave as expected and does
not set its `CSTS.SHST` in a timely manner because it burdens the
probe thread with tons of sleep. If hot-plug is enabled, it makes things
even worse because this operation is retried again and again.

Fix this by doing an asynchronous destruct when the controller fails to
initialize. Add contexts for this operation on the probe context and
poll for controllers destruction in the probe poller function.

Signed-off-by: default avatarNathan Claudel <nclaudel@kalrayinc.com>
Change-Id: Ic072a2b7c3351a229d3b6e5c667b71dca2a84b93
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/25414


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: default avatarVasuki Manikarnike <vasuki.manikarnike@hpe.com>
Community-CI: Community CI Samsung <spdk.community.ci.samsung@gmail.com>
Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAnkit Kumar <ankit.kumar@samsung.com>
Reviewed-by: default avatarChangpeng Liu <changpeliu@tencent.com>
Community-CI: Mellanox Build Bot
parent 414f91a0
Loading
Loading
Loading
Loading
+31 −2
Original line number Diff line number Diff line
@@ -699,6 +699,7 @@ nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
			 struct spdk_nvme_probe_ctx *probe_ctx)
{
	int rc = 0;
	struct nvme_ctrlr_detach_ctx *detach_ctx;

	rc = nvme_ctrlr_process_init(ctrlr);

@@ -710,9 +711,19 @@ nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
		nvme_ctrlr_lock(ctrlr);
		nvme_ctrlr_fail(ctrlr, false);
		nvme_ctrlr_unlock(ctrlr);

		/* allocate a context to detach this controller asynchronously */
		detach_ctx = calloc(1, sizeof(*detach_ctx));
		if (detach_ctx == NULL) {
			SPDK_WARNLOG("Failed to allocate asynchronous detach context. Performing synchronous destruct.\n");
			nvme_ctrlr_destruct(ctrlr);
			return;
		}
		detach_ctx->ctrlr = ctrlr;
		TAILQ_INSERT_TAIL(&probe_ctx->failed_ctxs.head, detach_ctx, link);
		nvme_ctrlr_destruct_async(ctrlr, detach_ctx);
		return;
	}

	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
		return;
@@ -909,6 +920,7 @@ nvme_probe_ctx_init(struct spdk_nvme_probe_ctx *probe_ctx,
	}
	probe_ctx->remove_cb = remove_cb;
	TAILQ_INIT(&probe_ctx->init_ctrlrs);
	TAILQ_INIT(&probe_ctx->failed_ctxs.head);
}

int
@@ -1602,6 +1614,8 @@ int
spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
{
	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;
	struct nvme_ctrlr_detach_ctx *detach_ctx, *detach_ctx_tmp;
	int rc;

	if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
		free(probe_ctx);
@@ -1612,7 +1626,22 @@ spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
		nvme_ctrlr_poll_internal(ctrlr, probe_ctx);
	}

	if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) {
	/* poll failed controllers destruction */
	TAILQ_FOREACH_SAFE(detach_ctx, &probe_ctx->failed_ctxs.head, link, detach_ctx_tmp) {
		rc = nvme_ctrlr_destruct_poll_async(detach_ctx->ctrlr, detach_ctx);
		if (rc == -EAGAIN) {
			continue;
		}

		if (rc != 0) {
			SPDK_ERRLOG("Failure while polling the controller destruction (rc = %d)\n", rc);
		}

		TAILQ_REMOVE(&probe_ctx->failed_ctxs.head, detach_ctx, link);
		free(detach_ctx);
	}

	if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs) && TAILQ_EMPTY(&probe_ctx->failed_ctxs.head)) {
		nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
		g_spdk_nvme_driver->initialized = true;
		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
+6 −4
Original line number Diff line number Diff line
@@ -1125,6 +1125,10 @@ struct spdk_nvme_ctrlr {
	uint32_t				auth_seqnum;
};

struct spdk_nvme_detach_ctx {
	TAILQ_HEAD(, nvme_ctrlr_detach_ctx)	head;
};

struct spdk_nvme_probe_ctx {
	struct spdk_nvme_transport_id		trid;
	const struct spdk_nvme_ctrlr_opts	*opts;
@@ -1134,6 +1138,8 @@ struct spdk_nvme_probe_ctx {
	spdk_nvme_attach_fail_cb		attach_fail_cb;
	spdk_nvme_remove_cb			remove_cb;
	TAILQ_HEAD(, spdk_nvme_ctrlr)		init_ctrlrs;
	/* detach contexts allocated for controllers that failed to initialize */
	struct spdk_nvme_detach_ctx		failed_ctxs;
};

typedef void (*nvme_ctrlr_detach_cb)(struct spdk_nvme_ctrlr *ctrlr);
@@ -1156,10 +1162,6 @@ struct nvme_ctrlr_detach_ctx {
	TAILQ_ENTRY(nvme_ctrlr_detach_ctx)	link;
};

struct spdk_nvme_detach_ctx {
	TAILQ_HEAD(, nvme_ctrlr_detach_ctx)	head;
};

struct nvme_driver {
	pthread_mutex_t			lock;

+3 −0
Original line number Diff line number Diff line
@@ -344,6 +344,7 @@ test_nvme_init_get_probe_ctx(void)
	probe_ctx = calloc(1, sizeof(*probe_ctx));
	SPDK_CU_ASSERT_FATAL(probe_ctx != NULL);
	TAILQ_INIT(&probe_ctx->init_ctrlrs);
	TAILQ_INIT(&probe_ctx->failed_ctxs.head);

	return probe_ctx;
}
@@ -850,6 +851,7 @@ test_nvme_ctrlr_probe(void)
	nvme_get_default_hostnqn(ctrlr.opts.hostnqn, sizeof(ctrlr.opts.hostnqn));

	TAILQ_INIT(&probe_ctx.init_ctrlrs);
	TAILQ_INIT(&probe_ctx.failed_ctxs.head);

	/* test when probe_cb returns false */

@@ -1463,6 +1465,7 @@ test_nvme_ctrlr_probe_internal(void)
	rc = nvme_probe_internal(probe_ctx, false);
	CU_ASSERT(rc < 0);
	CU_ASSERT(TAILQ_EMPTY(&probe_ctx->init_ctrlrs));
	CU_ASSERT(TAILQ_EMPTY(&probe_ctx->failed_ctxs.head));
	CU_ASSERT(ut_attach_fail_cb_rc == -EFAULT);

	free(probe_ctx);