Commit 7f82fb65 authored by Seth Howell's avatar Seth Howell Committed by Tomasz Zawadzki
Browse files

nvme/rdma: Move stale connection retries to connect call.



This gives us a more standard path in the create_io_qpair path. Eventually
this will allow us to bring the connection commands out to the generic layer
in alloc_io_qpair. Then we can split the calls to create and connect at the
generic level making it possible to add rdma qpairs to a poll group in a meaningful
way.

Signed-off-by: default avatarSeth Howell <seth.howell@intel.com>
Change-Id: Ib1b125f834c3c39a2b5050ff4a9bc4a053b95c99
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1119


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
parent 18508424
Loading
Loading
Loading
Loading
+28 −17
Original line number Diff line number Diff line
@@ -1100,7 +1100,7 @@ nvme_rdma_unregister_mem(struct nvme_rdma_qpair *rqpair)
}

static int
nvme_rdma_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
_nvme_rdma_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
	struct sockaddr_storage dst_addr;
	struct sockaddr_storage src_addr;
@@ -1207,6 +1207,32 @@ nvme_rdma_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qp
	return 0;
}

static int
nvme_rdma_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
	int rc;
	int retry_count = 0;

	rc = _nvme_rdma_ctrlr_connect_qpair(ctrlr, qpair);

	/*
	 * -EAGAIN represents the special case where the target side still thought it was connected.
	 * Most NICs will fail the first connection attempt, and the NICs will clean up whatever
	 * state they need to. After that, subsequent connection attempts will succeed.
	 */
	if (rc == -EAGAIN) {
		SPDK_NOTICELOG("Detected stale connection on Target side for qpid: %d\n", qpair->id);
		do {
			nvme_delay(NVME_RDMA_STALE_CONN_RETRY_DELAY_US);
			nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);
			rc = _nvme_rdma_ctrlr_connect_qpair(ctrlr, qpair);
			retry_count++;
		} while (rc == -EAGAIN && retry_count < NVME_RDMA_STALE_CONN_RETRY_MAX);
	}

	return rc == -EAGAIN ? -1 : rc;
}

/*
 * Build SGL describing empty payload.
 */
@@ -1584,7 +1610,7 @@ nvme_rdma_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
{
	struct nvme_rdma_qpair *rqpair;
	struct spdk_nvme_qpair *qpair;
	int rc, retry_count = 0;
	int rc;

	rqpair = calloc(1, sizeof(struct nvme_rdma_qpair));
	if (!rqpair) {
@@ -1618,21 +1644,6 @@ nvme_rdma_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,

	rc = nvme_transport_ctrlr_connect_qpair(ctrlr, qpair);

	/*
	 * -EAGAIN represents the special case where the target side still thought it was connected.
	 * Most NICs will fail the first connection attempt, and the NICs will clean up whatever
	 * state they need to. After that, subsequent connection attempts will succeed.
	 */
	if (rc == -EAGAIN) {
		SPDK_NOTICELOG("Detected stale connection on Target side for qpid: %d\n", rqpair->qpair.id);
		do {
			nvme_delay(NVME_RDMA_STALE_CONN_RETRY_DELAY_US);
			nvme_transport_ctrlr_disconnect_qpair(ctrlr, &rqpair->qpair);
			rc = nvme_transport_ctrlr_connect_qpair(ctrlr, &rqpair->qpair);
			retry_count++;
		} while (rc == -EAGAIN && retry_count < NVME_RDMA_STALE_CONN_RETRY_MAX);
	}

	if (rc < 0) {
		nvme_rdma_ctrlr_delete_io_qpair(ctrlr, qpair);
		return NULL;