Commit 3c2190c2 authored by Jim Harris's avatar Jim Harris Committed by Tomasz Zawadzki
Browse files

nvme: capture ticks for timeout before checking state



In some extreme use cases, an SPDK process could get
swapped out for a long period of time just after
we checked the state but before we called spdk_get_ticks().
So now we will only timeout if the timer expired before
we checked the state *and* the state did not advance.

It's possible we could just move the timeout check
to before the ctrlr->state switch, but I was
hesitant to change the flow for this case.

Fixes issue #1720.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I95b1db3365b5d2d8a65e528f53c302a724d44460

Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5596


Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent 0dc567eb
Loading
Loading
Loading
Loading
+13 −4
Original line number Diff line number Diff line
@@ -2936,15 +2936,18 @@ nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
	union spdk_nvme_cc_register cc;
	union spdk_nvme_csts_register csts;
	uint32_t ready_timeout_in_ms;
	uint64_t ticks;
	int rc = 0;

	ticks = spdk_get_ticks();

	/*
	 * May need to avoid accessing any register on the target controller
	 * for a while. Return early without touching the FSM.
	 * Check sleep_timeout_tsc > 0 for unit test.
	 */
	if ((ctrlr->sleep_timeout_tsc > 0) &&
	    (spdk_get_ticks() <= ctrlr->sleep_timeout_tsc)) {
	    (ticks <= ctrlr->sleep_timeout_tsc)) {
		return 0;
	}
	ctrlr->sleep_timeout_tsc = 0;
@@ -2980,7 +2983,7 @@ nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
			 * - directly after a VFIO reset.
			 */
			SPDK_DEBUGLOG(nvme, "Adding 2 second delay before initializing the controller\n");
			ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2000 * spdk_get_ticks_hz() / 1000);
			ctrlr->sleep_timeout_tsc = ticks + (2000 * spdk_get_ticks_hz() / 1000);
		}
		break;

@@ -3015,7 +3018,7 @@ nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
			 */
			if (ctrlr->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY) {
				SPDK_DEBUGLOG(nvme, "Applying quirk: delay 2.5 seconds before reading registers\n");
				ctrlr->sleep_timeout_tsc = spdk_get_ticks() + (2500 * spdk_get_ticks_hz() / 1000);
				ctrlr->sleep_timeout_tsc = ticks + (2500 * spdk_get_ticks_hz() / 1000);
			}
			return 0;
		} else {
@@ -3177,8 +3180,14 @@ nvme_ctrlr_process_init(struct spdk_nvme_ctrlr *ctrlr)
	}

init_timeout:
	/* Note: we use the ticks captured when we entered this function.
	 * This covers environments where the SPDK process gets swapped out after
	 * we tried to advance the state but before we check the timeout here.
	 * It is not normal for this to happen, but harmless to handle it in this
	 * way.
	 */
	if (ctrlr->state_timeout_tsc != NVME_TIMEOUT_INFINITE &&
	    spdk_get_ticks() > ctrlr->state_timeout_tsc) {
	    ticks > ctrlr->state_timeout_tsc) {
		SPDK_ERRLOG("Initialization timed out in state %d\n", ctrlr->state);
		return -1;
	}