Commit 455a5d78 authored by Monica Kenguva's avatar Monica Kenguva Committed by Tomasz Zawadzki
Browse files

nvme/pcie: Create queue pairs asynchronously



The generic transport layer still does a busy wait, but at least
the logic in the PCIe transport now creates the queue pair
asynchronously.

Signed-off-by: default avatarMonica Kenguva <monica.kenguva@intel.com>
Change-Id: I9669ccb81a90ee0a36d3f5512bc49c503923b293
Signed-off-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8910


Community-CI: Mellanox Build Bot
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
parent f93f3ee3
Loading
Loading
Loading
Loading
+104 −74
Original line number Diff line number Diff line
@@ -260,6 +260,7 @@ nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t nu

	pqpair->num_entries = num_entries;
	pqpair->flags.delay_cmd_submit = 0;
	pqpair->pcie_state = NVME_PCIE_QPAIR_READY;

	ctrlr->adminq = &pqpair->qpair;

@@ -437,21 +438,96 @@ nvme_pcie_ctrlr_cmd_delete_io_sq(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme
	return nvme_ctrlr_submit_admin_request(ctrlr, req);
}

static int
_nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
				 uint16_t qid)
static void
nvme_completion_sq_error_delete_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
{
	struct spdk_nvme_qpair *qpair = arg;
	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);

	if (spdk_nvme_cpl_is_error(cpl)) {
		SPDK_ERRLOG("delete_io_cq failed!\n");
	}

	pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
	nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
}

static void
nvme_completion_create_sq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
{
	struct spdk_nvme_qpair *qpair = arg;
	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);
	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
	struct nvme_pcie_ctrlr	*pctrlr = nvme_pcie_ctrlr(ctrlr);
	int rc;

	if (spdk_nvme_cpl_is_error(cpl)) {
		SPDK_ERRLOG("nvme_create_io_sq failed, deleting cq!\n");
		rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_sq_error_delete_cq_cb,
						      qpair);
		if (rc != 0) {
			SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
			pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
			nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
		}
		return;
	}
	pqpair->pcie_state = NVME_PCIE_QPAIR_READY;
	nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
	if (ctrlr->shadow_doorbell) {
		pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) *
						  pctrlr->doorbell_stride_u32;
		pqpair->shadow_doorbell.cq_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) *
						  pctrlr->doorbell_stride_u32;
		pqpair->shadow_doorbell.sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) *
						      pctrlr->doorbell_stride_u32;
		pqpair->shadow_doorbell.cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) *
						      pctrlr->doorbell_stride_u32;
		pqpair->flags.has_shadow_doorbell = 1;
	} else {
		pqpair->flags.has_shadow_doorbell = 0;
	}
	nvme_pcie_qpair_reset(qpair);

}

static void
nvme_completion_create_cq_cb(void *arg, const struct spdk_nvme_cpl *cpl)
{
	struct spdk_nvme_qpair *qpair = arg;
	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
	struct nvme_completion_poll_status	*status;
	int rc;

	status = calloc(1, sizeof(*status));
	if (!status) {
		SPDK_ERRLOG("Failed to allocate status tracker\n");
		return -ENOMEM;
	if (spdk_nvme_cpl_is_error(cpl)) {
		pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
		nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
		SPDK_ERRLOG("nvme_create_io_cq failed!\n");
		return;
	}

	rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_create_sq_cb, qpair);

	if (rc != 0) {
		SPDK_ERRLOG("Failed to send request to create_io_sq, deleting cq!\n");
		rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_sq_error_delete_cq_cb,
						      qpair);
		if (rc != 0) {
			SPDK_ERRLOG("Failed to send request to delete_io_cq with rc=%d\n", rc);
			pqpair->pcie_state = NVME_PCIE_QPAIR_FAILED;
			nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
		}
		return;
	}
	pqpair->pcie_state = NVME_PCIE_QPAIR_WAIT_FOR_SQ;
}

static int
_nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair,
				 uint16_t qid)
{
	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
	int	rc;

	/* Statistics may already be allocated in the case of controller reset */
	if (!pqpair->stat) {
		if (qpair->poll_group) {
@@ -464,78 +540,21 @@ _nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme
			pqpair->stat = calloc(1, sizeof(*pqpair->stat));
			if (!pqpair->stat) {
				SPDK_ERRLOG("Failed to allocate qpair statistics\n");
				free(status);
				nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
				return -ENOMEM;
			}
		}
	}

	rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_poll_cb, status);
	if (rc != 0) {
		free(status);
		return rc;
	}

	if (nvme_wait_for_completion(ctrlr->adminq, status)) {
		SPDK_ERRLOG("nvme_create_io_cq failed!\n");
		if (!status->timed_out) {
			free(status);
		}
		return -1;
	}
	rc = nvme_pcie_ctrlr_cmd_create_io_cq(ctrlr, qpair, nvme_completion_create_cq_cb, qpair);

	memset(status, 0, sizeof(*status));
	rc = nvme_pcie_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair, nvme_completion_poll_cb, status);
	if (rc != 0) {
		free(status);
		SPDK_ERRLOG("Failed to send request to create_io_cq\n");
		nvme_qpair_set_state(qpair, NVME_QPAIR_DISCONNECTED);
		return rc;
	}

	if (nvme_wait_for_completion(ctrlr->adminq, status)) {
		SPDK_ERRLOG("nvme_create_io_sq failed!\n");
		if (status->timed_out) {
			/* Request is still queued, the memory will be freed in a completion callback.
			   allocate a new request */
			status = calloc(1, sizeof(*status));
			if (!status) {
				SPDK_ERRLOG("Failed to allocate status tracker\n");
				return -ENOMEM;
			}
		}

		memset(status, 0, sizeof(*status));
		/* Attempt to delete the completion queue */
		rc = nvme_pcie_ctrlr_cmd_delete_io_cq(qpair->ctrlr, qpair, nvme_completion_poll_cb, status);
		if (rc != 0) {
			/* The originall or newly allocated status structure can be freed since
			 * the corresponding request has been completed of failed to submit */
			free(status);
			return -1;
		}
		nvme_wait_for_completion(ctrlr->adminq, status);
		if (!status->timed_out) {
			/* status can be freed regardless of nvme_wait_for_completion return value */
			free(status);
		}
		return -1;
	}

	if (ctrlr->shadow_doorbell) {
		pqpair->shadow_doorbell.sq_tdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 0) *
						  pctrlr->doorbell_stride_u32;
		pqpair->shadow_doorbell.cq_hdbl = ctrlr->shadow_doorbell + (2 * qpair->id + 1) *
						  pctrlr->doorbell_stride_u32;
		pqpair->shadow_doorbell.sq_eventidx = ctrlr->eventidx + (2 * qpair->id + 0) *
						      pctrlr->doorbell_stride_u32;
		pqpair->shadow_doorbell.cq_eventidx = ctrlr->eventidx + (2 * qpair->id + 1) *
						      pctrlr->doorbell_stride_u32;
		pqpair->flags.has_shadow_doorbell = 1;
	} else {
		pqpair->flags.has_shadow_doorbell = 0;
	}
	nvme_pcie_qpair_reset(qpair);
	free(status);

	pqpair->pcie_state = NVME_PCIE_QPAIR_WAIT_FOR_CQ;
	return 0;
}

@@ -546,9 +565,7 @@ nvme_pcie_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qp

	if (!nvme_qpair_is_admin_queue(qpair)) {
		rc = _nvme_pcie_ctrlr_create_io_qpair(ctrlr, qpair, qpair->id);
	}

	if (rc == 0) {
	} else {
		nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
	}

@@ -804,6 +821,19 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
	uint16_t		 next_cq_head;
	uint8_t			 next_phase;
	bool			 next_is_valid = false;
	int			 rc;

	if (spdk_unlikely(pqpair->pcie_state == NVME_PCIE_QPAIR_FAILED)) {
		return -ENXIO;
	}

	if (spdk_unlikely(pqpair->pcie_state != NVME_PCIE_QPAIR_READY)) {
		rc = spdk_nvme_qpair_process_completions(ctrlr->adminq, 0);
		if (rc < 0) {
			return rc;
		}
		return 0;
	}

	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
+9 −0
Original line number Diff line number Diff line
@@ -138,6 +138,13 @@ struct nvme_pcie_poll_group {
	struct spdk_nvme_pcie_stat stats;
};

enum nvme_pcie_qpair_state {
	NVME_PCIE_QPAIR_WAIT_FOR_CQ = 1,
	NVME_PCIE_QPAIR_WAIT_FOR_SQ,
	NVME_PCIE_QPAIR_READY,
	NVME_PCIE_QPAIR_FAILED,
};

/* PCIe transport extensions for spdk_nvme_qpair */
struct nvme_pcie_qpair {
	/* Submission queue tail doorbell */
@@ -162,6 +169,8 @@ struct nvme_pcie_qpair {

	uint16_t num_entries;

	uint8_t pcie_state;

	uint8_t retry_count;

	uint16_t max_completions_cap;
+7 −2
Original line number Diff line number Diff line
@@ -370,7 +370,12 @@ nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nv
	}

	/* Busy wait until the qpair exits the connecting state */
	while (nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING) { }
	while (nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING) {
		rc = spdk_nvme_qpair_process_completions(qpair, 0);
		if (rc < 0) {
			goto err;
		}
	}

	if (qpair->poll_group) {
		rc = nvme_poll_group_connect_qpair(qpair);
@@ -379,7 +384,7 @@ nvme_transport_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nv
		}
	}

	return rc;
	return 0;

err:
	/* If the qpair was unable to reconnect, restore the original failure reason. */
+12 −24
Original line number Diff line number Diff line
@@ -354,6 +354,9 @@ test_nvme_pcie_ctrlr_connect_qpair(void)
	CU_ASSERT(req[0].cmd.cdw10_bits.create_io_q.qsize == 0);
	CU_ASSERT(req[0].cmd.cdw11_bits.create_io_cq.pc == 1);
	CU_ASSERT(req[0].cmd.dptr.prp.prp1 == 0xDEADBEEF);

	/* Complete the first request, which triggers the second. */
	req[0].cb_fn(req[0].cb_arg, &cpl);
	CU_ASSERT(req[1].cmd.opc == SPDK_NVME_OPC_CREATE_IO_SQ);
	CU_ASSERT(req[1].cmd.cdw10_bits.create_io_q.qid == 1);
	CU_ASSERT(req[1].cmd.cdw10_bits.create_io_q.qsize == 0);
@@ -362,6 +365,9 @@ test_nvme_pcie_ctrlr_connect_qpair(void)
	CU_ASSERT(req[1].cmd.cdw11_bits.create_io_sq.cqid = 1);
	CU_ASSERT(req[1].cmd.dptr.prp.prp1 == 0xDDADBEEF);

	/* Complete the second request */
	req[1].cb_fn(req[1].cb_arg, &cpl);

	/* doorbell stride and qid are 1 */
	CU_ASSERT(pqpair.shadow_doorbell.sq_tdbl == pctrlr.ctrlr.shadow_doorbell + 2);
	CU_ASSERT(pqpair.shadow_doorbell.cq_hdbl == pctrlr.ctrlr.shadow_doorbell + 3);
@@ -398,6 +404,9 @@ test_nvme_pcie_ctrlr_connect_qpair(void)
	CU_ASSERT(req[0].cmd.cdw10_bits.create_io_q.qsize == 0);
	CU_ASSERT(req[0].cmd.cdw11_bits.create_io_cq.pc == 1);
	CU_ASSERT(req[0].cmd.dptr.prp.prp1 == 0xDEADBEEF);

	/* Complete the first request, which triggers the second. */
	req[0].cb_fn(req[0].cb_arg, &cpl);
	CU_ASSERT(req[1].cmd.opc == SPDK_NVME_OPC_CREATE_IO_SQ);
	CU_ASSERT(req[1].cmd.cdw10_bits.create_io_q.qid == 1);
	CU_ASSERT(req[1].cmd.cdw10_bits.create_io_q.qsize == 0);
@@ -406,35 +415,14 @@ test_nvme_pcie_ctrlr_connect_qpair(void)
	CU_ASSERT(req[1].cmd.cdw11_bits.create_io_sq.cqid = 1);
	CU_ASSERT(req[1].cmd.dptr.prp.prp1 == 0xDDADBEEF);

	/* Complete the second request */
	req[1].cb_fn(req[1].cb_arg, &cpl);

	CU_ASSERT(pqpair.shadow_doorbell.sq_tdbl == NULL);
	CU_ASSERT(pqpair.shadow_doorbell.sq_eventidx == NULL);
	CU_ASSERT(pqpair.flags.has_shadow_doorbell == 0);
	CU_ASSERT(STAILQ_EMPTY(&pctrlr.ctrlr.adminq->free_req));

	/* Completion error */
	memset(req, 0, sizeof(struct nvme_request) * 2);
	memset(&pqpair, 0, sizeof(pqpair));
	pqpair.cpl = &cpl;
	pqpair.qpair.ctrlr = &pctrlr.ctrlr;
	pqpair.qpair.id = 1;
	pqpair.num_entries = 1;
	pqpair.cpl_bus_addr = 0xDEADBEEF;
	pqpair.cmd_bus_addr = 0xDDADBEEF;
	pqpair.qpair.qprio = SPDK_NVME_QPRIO_HIGH;
	pqpair.stat = NULL;
	pqpair.qpair.poll_group = &poll_group;
	for (int i = 0; i < 2; i++) {
		STAILQ_INSERT_TAIL(&pctrlr.ctrlr.adminq->free_req, &req[i], stailq);
	}
	MOCK_SET(nvme_wait_for_completion, -EIO);

	rc = nvme_pcie_ctrlr_connect_qpair(&pctrlr.ctrlr, &pqpair.qpair);
	CU_ASSERT(rc == -1);
	/* Remove unused request */
	STAILQ_REMOVE_HEAD(&pctrlr.ctrlr.adminq->free_req, stailq);
	CU_ASSERT(STAILQ_EMPTY(&pctrlr.ctrlr.adminq->free_req));
	MOCK_CLEAR(nvme_wait_for_completion);

	/* No available request used */
	memset(req, 0, sizeof(struct nvme_request) * 2);
	memset(&pqpair, 0, sizeof(pqpair));
+2 −0
Original line number Diff line number Diff line
@@ -44,6 +44,8 @@ DEFINE_STUB(nvme_poll_group_disconnect_qpair, int, (struct spdk_nvme_qpair *qpai
DEFINE_STUB(spdk_nvme_ctrlr_free_io_qpair, int, (struct spdk_nvme_qpair *qpair), 0);
DEFINE_STUB(spdk_nvme_transport_id_trtype_str, const char *,
	    (enum spdk_nvme_transport_type trtype), NULL);
DEFINE_STUB(spdk_nvme_qpair_process_completions, int32_t, (struct spdk_nvme_qpair *qpair,
		uint32_t max_completions), 0);

static void
ut_construct_transport(struct spdk_nvme_transport *transport, const char name[])