Commit ddf86600 authored by Jim Harris's avatar Jim Harris Committed by Tomasz Zawadzki
Browse files

nvme: continue probing ctrlrs even if one fails



It is possible that a single probe_ctx could be used
to probe multiple newly attached nvme controllers.  If
one of those controllers is removed during this process,
the rest of the controllers do not get probed and can
even get stuck in a zombie state.

It is better to just continue with probing the rest of
the controllers.

Fixes issue #1611.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I4156ee8b50e8d52cfeee7224f210a58bb773e939

Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4945


Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarMichael Haeuptle <michaelhaeuptle@gmail.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarVasuki Manikarnike <vasuki.manikarnike@hpe.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
parent 44090079
Loading
Loading
Loading
Loading
+9 −3
Original line number Diff line number Diff line
@@ -773,9 +773,16 @@ struct spdk_nvme_probe_ctx *spdk_nvme_probe_async(const struct spdk_nvme_transpo
		spdk_nvme_remove_cb remove_cb);

/**
 * Start controllers in the context list.
 * Proceed with attaching contollers associated with the probe context.
 *
 * Users may call the function util it returns True.
 * The probe context is one returned from a previous call to
 * spdk_nvme_probe_async().  Users must call this function on the
 * probe context until it returns 0.
 *
 * If any controllers fail to attach, there is no explicit notification.
 * Users can detect attachment failure by comparing attach_cb invocations
 * with the number of times where the user returned true for the
 * probe_cb.
 *
 * \param probe_ctx Context used to track probe actions.
 *
@@ -783,7 +790,6 @@ struct spdk_nvme_probe_ctx *spdk_nvme_probe_async(const struct spdk_nvme_transpo
 * is also freed and no longer valid.
 * \return -EAGAIN if there are still pending probe operations; user must call
 * spdk_nvme_probe_poll_async again to continue progress.
 * \return value other than 0 and -EAGAIN probe error with one controller.
 */
int spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx);

+7 −15
Original line number Diff line number Diff line
@@ -688,7 +688,7 @@ nvme_ctrlr_probe(const struct spdk_nvme_transport_id *trid,
	return 1;
}

static int
static void
nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
			 struct spdk_nvme_probe_ctx *probe_ctx)
{
@@ -704,11 +704,11 @@ nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,
		nvme_ctrlr_fail(ctrlr, false);
		nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
		nvme_ctrlr_destruct(ctrlr);
		return rc;
		return;
	}

	if (ctrlr->state != NVME_CTRLR_STATE_READY) {
		return 0;
		return;
	}

	STAILQ_INIT(&ctrlr->io_producers);
@@ -735,10 +735,7 @@ nvme_ctrlr_poll_internal(struct spdk_nvme_ctrlr *ctrlr,

	if (probe_ctx->attach_cb) {
		probe_ctx->attach_cb(probe_ctx->cb_ctx, &ctrlr->trid, ctrlr, &ctrlr->opts);
		return 0;
	}

	return 0;
}

static int
@@ -1545,7 +1542,6 @@ spdk_nvme_probe_async(const struct spdk_nvme_transport_id *trid,
int
spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
{
	int rc = 0;
	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;

	if (!spdk_process_is_primary() && probe_ctx->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
@@ -1554,19 +1550,15 @@ spdk_nvme_probe_poll_async(struct spdk_nvme_probe_ctx *probe_ctx)
	}

	TAILQ_FOREACH_SAFE(ctrlr, &probe_ctx->init_ctrlrs, tailq, ctrlr_tmp) {
		rc = nvme_ctrlr_poll_internal(ctrlr, probe_ctx);
		if (rc != 0) {
			rc = -EIO;
			break;
		}
		nvme_ctrlr_poll_internal(ctrlr, probe_ctx);
	}

	if (rc != 0 || TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) {
	if (TAILQ_EMPTY(&probe_ctx->init_ctrlrs)) {
		nvme_robust_mutex_lock(&g_spdk_nvme_driver->lock);
		g_spdk_nvme_driver->initialized = true;
		nvme_robust_mutex_unlock(&g_spdk_nvme_driver->lock);
		free(probe_ctx);
		return rc;
		return 0;
	}

	return -EAGAIN;
+1 −1
Original line number Diff line number Diff line
@@ -382,7 +382,7 @@ test_nvme_init_controllers(void)
	probe_ctx->attach_cb = attach_cb;
	probe_ctx->trid.trtype = SPDK_NVME_TRANSPORT_PCIE;
	rc = nvme_init_controllers(probe_ctx);
	CU_ASSERT(rc != 0);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_spdk_nvme_driver->initialized == true);
	CU_ASSERT(ut_destruct_called == true);