Commit c8f986c7 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

Revert "nvme/rdma: Correct qpair disconnect process"



This reverts commit eb09178a.

Reason for revert:

This caused a degradation for adminq.
For adminq, ctrlr_delete_io_qpair() is not called until ctrlr is destructed.
So necessary delete operations are not done for adminq.

Reverting the patch is practical for now.

Change-Id: Ib55ff81dfe97ee1e2c83876912e851c61f20e354
Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10878


Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 194dc9e2
Loading
Loading
Loading
Loading
+46 −76
Original line number Diff line number Diff line
@@ -308,7 +308,6 @@ static const char *rdma_cm_event_str[] = {

struct nvme_rdma_qpair *nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group,
		uint32_t qp_num);
static void nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);

static TAILQ_HEAD(, nvme_rdma_memory_domain) g_memory_domains = TAILQ_HEAD_INITIALIZER(
			g_memory_domains);
@@ -1794,33 +1793,12 @@ nvme_rdma_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme
{
	struct nvme_rdma_qpair *rqpair = nvme_rdma_qpair(qpair);
	struct nvme_rdma_ctrlr *rctrlr = NULL;
	int rc;

	if (rqpair->rdma_qp) {
		rc = spdk_rdma_qp_disconnect(rqpair->rdma_qp);
		rctrlr = nvme_rdma_ctrlr(qpair->ctrlr);
		if (rctrlr != NULL && rc == 0) {
			if (nvme_rdma_process_event(rqpair, rctrlr->cm_channel, RDMA_CM_EVENT_DISCONNECTED)) {
				SPDK_DEBUGLOG(nvme, "Target did not respond to qpair disconnect.\n");
			}
		}
	}
}

static int
nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
	struct nvme_rdma_qpair *rqpair;
	struct nvme_rdma_ctrlr *rctrlr = NULL;
	struct nvme_rdma_cm_event_entry *entry, *tmp;
	int rc;

	assert(qpair != NULL);
	rqpair = nvme_rdma_qpair(qpair);

	if (rqpair->defer_deletion_to_pg) {
		nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING);
		return 0;
	}
	spdk_rdma_free_mem_map(&rqpair->mr_map);
	nvme_rdma_unregister_reqs(rqpair);
	nvme_rdma_unregister_rsps(rqpair);

	if (rqpair->evt) {
		rdma_ack_cm_event(rqpair->evt);
@@ -1842,24 +1820,18 @@ nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_
		}
	}

	spdk_rdma_free_mem_map(&rqpair->mr_map);
	nvme_rdma_unregister_reqs(rqpair);
	nvme_rdma_unregister_rsps(rqpair);

	nvme_rdma_qpair_abort_reqs(qpair, 0);
	nvme_qpair_deinit(qpair);

	nvme_rdma_put_memory_domain(rqpair->memory_domain);

	nvme_rdma_free_reqs(rqpair);
	nvme_rdma_free_rsps(rqpair);

	if (rqpair->cm_id) {
		if (rqpair->rdma_qp) {
			/* qpair is already disconnected at this stage */
			rc = spdk_rdma_qp_disconnect(rqpair->rdma_qp);
			if ((rctrlr != NULL) && (rc == 0)) {
				if (nvme_rdma_process_event(rqpair, rctrlr->cm_channel, RDMA_CM_EVENT_DISCONNECTED)) {
					SPDK_DEBUGLOG(nvme, "Target did not respond to qpair disconnect.\n");
				}
			}
			spdk_rdma_qp_destroy(rqpair->rdma_qp);
			rqpair->rdma_qp = NULL;
		}

		rdma_destroy_id(rqpair->cm_id);
		rqpair->cm_id = NULL;
	}
@@ -1868,7 +1840,30 @@ nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_
		ibv_destroy_cq(rqpair->cq);
		rqpair->cq = NULL;
	}
}

static void nvme_rdma_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);

static int
nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
	struct nvme_rdma_qpair *rqpair;

	assert(qpair != NULL);
	rqpair = nvme_rdma_qpair(qpair);

	if (rqpair->defer_deletion_to_pg) {
		nvme_qpair_set_state(qpair, NVME_QPAIR_DESTROYING);
		return 0;
	}

	nvme_rdma_qpair_abort_reqs(qpair, 1);
	nvme_qpair_deinit(qpair);

	nvme_rdma_put_memory_domain(rqpair->memory_domain);

	nvme_rdma_free_reqs(rqpair);
	nvme_rdma_free_rsps(rqpair);
	nvme_rdma_free(rqpair);

	return 0;
@@ -2180,24 +2175,16 @@ nvme_rdma_log_wc_status(struct nvme_rdma_qpair *rqpair, struct ibv_wc *wc)
	if (wc->status == IBV_WC_WR_FLUSH_ERR) {
		/* If qpair is in ERR state, we will receive completions for all posted and not completed
		 * Work Requests with IBV_WC_WR_FLUSH_ERR status. Don't log an error in that case */
		SPDK_DEBUGLOG(nvme,
			      "WC error, qid %u, qp state %d, request 0x%"PRIx64" type %d, status: (%d): %s\n",
		SPDK_DEBUGLOG(nvme, "WC error, qid %u, qp state %d, request 0x%lu type %d, status: (%d): %s\n",
			      rqpair->qpair.id, rqpair->qpair.state, wc->wr_id, rdma_wr->type, wc->status,
			      ibv_wc_status_str(wc->status));
	} else {
		SPDK_ERRLOG("WC error, qid %u, qp state %d, request 0x%"PRIx64" type %d, status: (%d): %s\n",
		SPDK_ERRLOG("WC error, qid %u, qp state %d, request 0x%lu type %d, status: (%d): %s\n",
			    rqpair->qpair.id, rqpair->qpair.state, wc->wr_id, rdma_wr->type, wc->status,
			    ibv_wc_status_str(wc->status));
	}
}

static inline bool
nvme_rdma_is_rxe_device(struct ibv_device_attr *dev_attr)
{
	return dev_attr->vendor_id == SPDK_RDMA_RXE_VENDOR_ID_OLD ||
	       dev_attr->vendor_id == SPDK_RDMA_RXE_VENDOR_ID_NEW;
}

static int
nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size,
				 struct nvme_rdma_poll_group *group,
@@ -2273,7 +2260,12 @@ nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size,
							wc[i].qp_num);
				}
				if (!rqpair) {
					assert(0);
					/* When poll_group is used, several qpairs share the same CQ and it is possible to
					 * receive a completion with error (e.g. IBV_WC_WR_FLUSH_ERR) for already disconnected qpair
					 * That happens due to qpair is destroyed while there are submitted but not completed send/receive
					 * Work Requests
					 * TODO: ibv qpair must be destroyed only when all submitted Work Requests are completed */
					assert(group);
					continue;
				}
				assert(rqpair->current_num_sends > 0);
@@ -2284,26 +2276,6 @@ nvme_rdma_cq_process_completions(struct ibv_cq *cq, uint32_t batch_size,
				continue;
			}

			if (spdk_unlikely(rdma_req->req == NULL)) {
				struct ibv_device_attr dev_attr;
				int query_status;

				/* Bug in Soft Roce - we may receive a completion without error status when qpair is disconnected/destroyed.
				 * As sanity check - log an error if we use a real HW (it should never happen) */
				query_status = ibv_query_device(cq->context, &dev_attr);
				if (query_status == 0) {
					if (!nvme_rdma_is_rxe_device(&dev_attr)) {
						SPDK_ERRLOG("Received malformed completion: request 0x%"PRIx64" type %d\n", wc->wr_id,
							    rdma_wr->type);
						assert(0);
					}
				} else {
					SPDK_ERRLOG("Failed to query ib device\n");
					assert(0);
				}
				continue;
			}

			rqpair = nvme_rdma_qpair(rdma_req->req->qpair);
			rdma_req->completion_flags |= NVME_RDMA_SEND_COMPLETED;
			rqpair->current_num_sends--;
@@ -2647,14 +2619,12 @@ nvme_rdma_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair)

	rqpair->cq = NULL;

	/* qpair has requests submitted to HW, need to wait for their completion.
	 * Allocate a tracker and attach it to poll group */
	destroyed_qpair = calloc(1, sizeof(*destroyed_qpair));
	if (destroyed_qpair == NULL) {
	/*
	 * If this fails, the system is in serious trouble,
	 * just let the qpair get cleaned up immediately.
	 */
	destroyed_qpair = calloc(1, sizeof(*destroyed_qpair));
	if (destroyed_qpair == NULL) {
		return 0;
	}

+0 −1
Original line number Diff line number Diff line
@@ -58,7 +58,6 @@ DEFINE_STUB(rdma_ack_cm_event, int, (struct rdma_cm_event *event), 0);
DEFINE_STUB_V(rdma_free_devices, (struct ibv_context **list));
DEFINE_STUB(fcntl, int, (int fd, int cmd, ...), 0);
DEFINE_STUB_V(rdma_destroy_event_channel, (struct rdma_event_channel *channel));
DEFINE_STUB(rdma_destroy_id, int, (struct rdma_cm_id *id), 0);

DEFINE_STUB(ibv_dereg_mr, int, (struct ibv_mr *mr), 0);
DEFINE_STUB(ibv_resize_cq, int, (struct ibv_cq *cq, int cqe), 0);