Commit 20abbe8a authored by Daniel Verkamp's avatar Daniel Verkamp
Browse files

nvme: perform resets in parallel during attach



When multiple NVMe controllers are being initialized during
spdk_nvme_probe(), we can overlap the hardware resets of all controllers
to improve startup time.

Rewrite the initialization sequence as a polling function,
nvme_ctrlr_process_init(), that maintains a per-controller state machine
to determine which initialization step is underway.  Each step also has
a timeout to ensure the process will terminate if the hardware is hung.

Currently, only the hardware reset (toggling of CC.EN and waiting for
CSTS.RDY) is done in parallel; the rest of initialization is done
sequentially in nvme_ctrlr_start() as before.  These steps could also be
parallelized in a similar framework if measurements indicate that they
take a significant amount of time.

Change-Id: I02ce5863f1b5c13ad65ccd8be571085528d98bd5
Signed-off-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
parent a62b194f
Loading
Loading
Loading
Loading
+39 −29
Original line number Diff line number Diff line
@@ -292,7 +292,7 @@ spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb a
{
	int rc, start_rc;
	struct nvme_enum_ctx enum_ctx;
	struct spdk_nvme_ctrlr *ctrlr;
	struct spdk_nvme_ctrlr *ctrlr, *ctrlr_tmp;

	nvme_mutex_lock(&g_nvme_driver.lock);

@@ -305,25 +305,36 @@ spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb a
	 *  but maintain the value of rc to signal errors when we return.
	 */

	/* TODO: This could be reworked to start all the controllers in parallel. */
	/* Initialize all new controllers in the init_ctrlrs list in parallel. */
	while (!TAILQ_EMPTY(&g_nvme_driver.init_ctrlrs)) {
		/* Remove ctrlr from init_ctrlrs and attempt to start it */
		ctrlr = TAILQ_FIRST(&g_nvme_driver.init_ctrlrs);
		TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq);

		/*
		 * Drop the driver lock while calling nvme_ctrlr_start() since it needs to acquire
		 *  the driver lock internally.
		TAILQ_FOREACH_SAFE(ctrlr, &g_nvme_driver.init_ctrlrs, tailq, ctrlr_tmp) {
			/* Drop the driver lock while calling nvme_ctrlr_process_init()
			 *  since it needs to acquire the driver lock internally when calling
			 *  nvme_ctrlr_start().
			 *
			 * TODO: Rethink the locking - maybe reset should take the lock so that start() and
			 *  the functions it calls (in particular nvme_ctrlr_set_num_qpairs())
			 *  can assume it is held.
			 */
			nvme_mutex_unlock(&g_nvme_driver.lock);
		start_rc = nvme_ctrlr_start(ctrlr);
			start_rc = nvme_ctrlr_process_init(ctrlr);
			nvme_mutex_lock(&g_nvme_driver.lock);

		if (start_rc == 0) {
			if (start_rc) {
				/* Controller failed to initialize. */
				TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq);
				nvme_ctrlr_destruct(ctrlr);
				nvme_free(ctrlr);
				rc = -1;
				break;
			}

			if (ctrlr->state == NVME_CTRLR_STATE_READY) {
				/*
				 * Controller has been initialized.
				 *  Move it to the attached_ctrlrs list.
				 */
				TAILQ_REMOVE(&g_nvme_driver.init_ctrlrs, ctrlr, tailq);
				TAILQ_INSERT_TAIL(&g_nvme_driver.attached_ctrlrs, ctrlr, tailq);

				/*
@@ -333,10 +344,9 @@ spdk_nvme_probe(void *cb_ctx, spdk_nvme_probe_cb probe_cb, spdk_nvme_attach_cb a
				nvme_mutex_unlock(&g_nvme_driver.lock);
				attach_cb(cb_ctx, ctrlr->devhandle, ctrlr);
				nvme_mutex_lock(&g_nvme_driver.lock);
		} else {
			nvme_ctrlr_destruct(ctrlr);
			nvme_free(ctrlr);
			rc = -1;

				break;
			}
		}
	}

+127 −39
Original line number Diff line number Diff line
@@ -326,18 +326,13 @@ static int
nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
{
	union spdk_nvme_cc_register	cc;
	union spdk_nvme_csts_register	csts;
	union spdk_nvme_aqa_register	aqa;

	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
	csts.raw = nvme_mmio_read_4(ctrlr, csts);

	if (cc.bits.en == 1) {
		if (csts.bits.rdy == 1) {
			return 0;
		} else {
			return nvme_ctrlr_wait_for_ready(ctrlr, 1);
		}
	if (cc.bits.en != 0) {
		nvme_printf(ctrlr, "%s called with CC.EN = 1\n", __func__);
		return EINVAL;
	}

	nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
@@ -361,41 +356,26 @@ nvme_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)

	nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);

	return nvme_ctrlr_wait_for_ready(ctrlr, 1);
	return 0;
}

static int
nvme_ctrlr_hw_reset(struct spdk_nvme_ctrlr *ctrlr)
static void
nvme_ctrlr_set_state(struct spdk_nvme_ctrlr *ctrlr, enum nvme_ctrlr_state state,
		     uint64_t timeout_in_ms)
{
	uint32_t i;
	int rc;
	union spdk_nvme_cc_register cc;

	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
	if (cc.bits.en) {
		nvme_qpair_disable(&ctrlr->adminq);
		for (i = 0; i < ctrlr->num_io_queues; i++) {
			nvme_qpair_disable(&ctrlr->ioq[i]);
		}
	ctrlr->state = state;
	if (timeout_in_ms == NVME_TIMEOUT_INFINITE) {
		ctrlr->state_timeout_tsc = NVME_TIMEOUT_INFINITE;
	} else {
		/*
		 * The controller was already disabled. We will assume that nothing
		 *  has been changed since cc.en was set to 0,
		 *  meaning that we don't need to do an extra reset, and we can just
		 *  re-enable the controller.
		 */
		ctrlr->state_timeout_tsc = nvme_get_tsc() + (timeout_in_ms * nvme_get_tsc_hz()) / 1000;
	}

	nvme_ctrlr_disable(ctrlr);
	rc = nvme_ctrlr_enable(ctrlr);

	return rc;
}

int
spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
{
	int rc;
	int rc = 0;
	uint32_t i;

	nvme_mutex_lock(&ctrlr->ctrlr_lock);

@@ -412,10 +392,23 @@ spdk_nvme_ctrlr_reset(struct spdk_nvme_ctrlr *ctrlr)
	ctrlr->is_resetting = true;

	nvme_printf(ctrlr, "resetting controller\n");
	/* nvme_ctrlr_start() issues a reset as its first step */
	rc = nvme_ctrlr_start(ctrlr);
	if (rc) {

	/* Disable all queues before disabling the controller hardware. */
	nvme_qpair_disable(&ctrlr->adminq);
	for (i = 0; i < ctrlr->num_io_queues; i++) {
		nvme_qpair_disable(&ctrlr->ioq[i]);
	}

	/* Set the state back to INIT to cause a full hardware reset. */
	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);

	while (ctrlr->state != NVME_CTRLR_STATE_READY) {
		if (nvme_ctrlr_process_init(ctrlr) != 0) {
			nvme_printf(ctrlr, "%s: controller reinitialization failed\n", __func__);
			nvme_ctrlr_fail(ctrlr);
			rc = -1;
			break;
		}
	}

	ctrlr->is_resetting = false;
@@ -699,13 +692,107 @@ nvme_ctrlr_configure_aer(struct spdk_nvme_ctrlr *ctrlr)
	return 0;
}

/**
 * This function will be called repeatedly during initialization until the controller is ready.
 */
int
nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr)
nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
{
	if (nvme_ctrlr_hw_reset(ctrlr) != 0) {
	union spdk_nvme_cc_register cc;
	union spdk_nvme_csts_register csts;
	union spdk_nvme_cap_lo_register cap_lo;
	uint32_t ready_timeout_in_ms;
	int rc;

	cc.raw = nvme_mmio_read_4(ctrlr, cc.raw);
	csts.raw = nvme_mmio_read_4(ctrlr, csts);
	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo.raw);

	ready_timeout_in_ms = 500 * cap_lo.bits.to;

	/*
	 * Check if the current initialization step is done or has timed out.
	 */
	switch (ctrlr->state) {
	case NVME_CTRLR_STATE_INIT:
		/* Begin the hardware initialization by making sure the controller is disabled. */
		if (cc.bits.en) {
			/*
			 * Controller is currently enabled. We need to disable it to cause a reset.
			 *
			 * If CC.EN = 1 && CSTS.RDY = 0, the controller is in the process of becoming ready.
			 *  Wait for the ready bit to be 1 before disabling the controller.
			 */
			if (csts.bits.rdy == 0) {
				nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
				return 0;
			}

			/* CC.EN = 1 && CSTS.RDY == 1, so we can immediately disable the controller. */
			cc.bits.en = 0;
			nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
			return 0;
		} else {
			/*
			 * Controller is currently disabled. We can jump straight to enabling it.
			 */
			nvme_ctrlr_enable(ctrlr);
			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
			return 0;
		}
		break;

	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1:
		if (csts.bits.rdy == 1) {
			/* CC.EN = 1 && CSTS.RDY = 1, so we can set CC.EN = 0 now. */
			cc.bits.en = 0;
			nvme_mmio_write_4(ctrlr, cc.raw, cc.raw);
			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0, ready_timeout_in_ms);
			return 0;
		}
		break;

	case NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0:
		if (csts.bits.rdy == 0) {
			/* CC.EN = 0 && CSTS.RDY = 0, so we can enable the controller now. */
			nvme_ctrlr_enable(ctrlr);
			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1, ready_timeout_in_ms);
			return 0;
		}
		break;

	case NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1:
		if (csts.bits.rdy == 1) {
			/*
			 * The controller has been enabled.
			 *  Perform the rest of initialization in nvme_ctrlr_start() serially.
			 */
			rc = nvme_ctrlr_start(ctrlr);
			nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_READY, NVME_TIMEOUT_INFINITE);
			return rc;
		}
		break;

	default:
		nvme_assert(0, ("unhandled ctrlr state %d\n", ctrlr->state));
		nvme_ctrlr_fail(ctrlr);
		return -1;
	}

	if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
	    nvme_get_tsc() > ctrlr->state_timeout_tsc) {
		nvme_printf(ctrlr, "Initialization timed out in state %d\n", ctrlr->state);
		nvme_ctrlr_fail(ctrlr);
		return -1;
	}

	return 0;
}

int
nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr)
{
	nvme_qpair_reset(&ctrlr->adminq);

	nvme_qpair_enable(&ctrlr->adminq);
@@ -771,6 +858,7 @@ nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr, void *devhandle)
	int				status;
	int				rc;

	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
	ctrlr->devhandle = devhandle;

	status = nvme_ctrlr_allocate_bars(ctrlr);
+36 −0
Original line number Diff line number Diff line
@@ -287,6 +287,38 @@ struct spdk_nvme_ns {
	uint16_t			flags;
};

/**
 * State of struct spdk_nvme_ctrlr (in particular, during initialization).
 */
enum nvme_ctrlr_state {
	/**
	 * Controller has not been initialized yet.
	 */
	NVME_CTRLR_STATE_INIT,

	/**
	 * Waiting for CSTS.RDY to transition from 0 to 1 so that CC.EN may be set to 0.
	 */
	NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_1,

	/**
	 * Waiting for CSTS.RDY to transition from 1 to 0 so that CC.EN may be set to 1.
	 */
	NVME_CTRLR_STATE_DISABLE_WAIT_FOR_READY_0,

	/**
	 * Waiting for CSTS.RDY to transition from 0 to 1 after enabling the controller.
	 */
	NVME_CTRLR_STATE_ENABLE_WAIT_FOR_READY_1,

	/**
	 * Controller initialization has completed and the controller is ready.
	 */
	NVME_CTRLR_STATE_READY
};

#define NVME_TIMEOUT_INFINITE	UINT64_MAX

/*
 * One of these per allocated PCI device.
 */
@@ -310,6 +342,9 @@ struct spdk_nvme_ctrlr {

	/* Cold data (not accessed in normal I/O path) is after this point. */

	enum nvme_ctrlr_state		state;
	uint64_t			state_timeout_tsc;

	TAILQ_ENTRY(spdk_nvme_ctrlr)	tailq;

	/** All the log pages supported */
@@ -433,6 +468,7 @@ void nvme_completion_poll_cb(void *arg, const struct spdk_nvme_cpl *cpl);

int	nvme_ctrlr_construct(struct spdk_nvme_ctrlr *ctrlr, void *devhandle);
void	nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr);
int	nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr);
int	nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr);

void	nvme_ctrlr_submit_admin_request(struct spdk_nvme_ctrlr *ctrlr,
+6 −0
Original line number Diff line number Diff line
@@ -57,6 +57,12 @@ nvme_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
{
}

int
nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
{
	return 0;
}

int
nvme_ctrlr_start(struct spdk_nvme_ctrlr *ctrlr)
{
+2 −0
Original line number Diff line number Diff line
@@ -47,6 +47,8 @@ static uint16_t g_pci_subdevice_id;

char outbuf[OUTBUF_SIZE];

uint64_t g_ut_tsc = 0;

__thread int    nvme_thread_ioq_index = -1;

uint16_t
Loading