Commit 01201d3e authored by Seth Howell's avatar Seth Howell Committed by Jim Harris
Browse files

rdma: remove compile time config for SRQ



Change-Id: I44af3ee4dc6ec76045e1d0614910402487098a3d
Signed-off-by: default avatarSeth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/447120


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent cf467578
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -38,6 +38,8 @@ block except for metadata.
### NVMe-oF Target

Support for per-device shared receive queues in the RDMA transport has been added.
It is enabled by default for any device that supports it.

The size of a shared receive queue is defined by transport configuration file parameter
`MaxSRQDepth` and `nvmf_create_transport` RPC method parameter `max_srq_depth`.
Default size is 4096.
+97 −108
Original line number Diff line number Diff line
@@ -581,8 +581,6 @@ spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
	return 0;
}

#ifndef SPDK_CONFIG_RDMA_SRQ

static void
nvmf_rdma_dump_request(struct spdk_nvmf_rdma_request *req)
{
@@ -608,8 +606,6 @@ nvmf_rdma_dump_qpair_contents(struct spdk_nvmf_rdma_qpair *rqpair)
	}
}

#endif

static void
nvmf_rdma_resources_destroy(struct spdk_nvmf_rdma_resources *resources)
{
@@ -793,29 +789,25 @@ cleanup:
static void
spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
{
#ifdef SPDK_CONFIG_RDMA_SRQ
	struct spdk_nvmf_rdma_recv	*rdma_recv, *recv_tmp;
	struct ibv_recv_wr		*bad_recv_wr = NULL;
	int				rc;
#endif

	spdk_trace_record(TRACE_RDMA_QP_DESTROY, 0, 0, (uintptr_t)rqpair->cm_id, 0);

	spdk_poller_unregister(&rqpair->destruct_poller);

	if (rqpair->qd != 0) {
#ifndef SPDK_CONFIG_RDMA_SRQ
		if (rqpair->poller->srq == NULL) {
			nvmf_rdma_dump_qpair_contents(rqpair);
#endif
		}
		SPDK_WARNLOG("Destroying qpair when queue depth is %d\n", rqpair->qd);
	}

	if (rqpair->poller) {
		TAILQ_REMOVE(&rqpair->poller->qpairs, rqpair, link);
	}


#ifdef SPDK_CONFIG_RDMA_SRQ
		if (rqpair->poller->srq != NULL) {
			/* Drop all received but unprocessed commands for this queue and return them to SRQ */
			STAILQ_FOREACH_SAFE(rdma_recv, &rqpair->resources->incoming_queue, link, recv_tmp) {
				if (rqpair == rdma_recv->qpair) {
@@ -826,7 +818,8 @@ spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
					}
				}
			}
#endif
		}
	}

	if (rqpair->cm_id) {
		rdma_destroy_qp(rqpair->cm_id);
@@ -837,9 +830,9 @@ spdk_nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
		}
	}

#ifndef SPDK_CONFIG_RDMA_SRQ
	if (rqpair->poller != NULL && rqpair->poller->srq == NULL) {
		nvmf_rdma_resources_destroy(rqpair->resources);
#endif
	}

	free(rqpair);
}
@@ -850,11 +843,9 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
	struct spdk_nvmf_rdma_qpair		*rqpair;
	struct spdk_nvmf_rdma_poller		*rpoller;
	int					rc, num_cqe, required_num_wr;
#ifndef SPDK_CONFIG_RDMA_SRQ
	struct spdk_nvmf_rdma_transport		*rtransport;
	struct spdk_nvmf_transport		*transport;
	struct spdk_nvmf_rdma_resource_opts	opts;
#endif
	struct spdk_nvmf_rdma_device		*device;
	struct ibv_qp_init_attr			ibv_init_attr;

@@ -866,15 +857,16 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
	ibv_init_attr.qp_type		= IBV_QPT_RC;
	ibv_init_attr.send_cq		= rqpair->poller->cq;
	ibv_init_attr.recv_cq		= rqpair->poller->cq;
#ifdef SPDK_CONFIG_RDMA_SRQ

	if (rqpair->poller->srq) {
		ibv_init_attr.srq		= rqpair->poller->srq;
#endif
	ibv_init_attr.cap.max_send_wr	= rqpair->max_queue_depth *
					  2 + 1; /* SEND, READ, and WRITE operations + dummy drain WR */
#ifndef SPDK_CONFIG_RDMA_SRQ
	} else {
		ibv_init_attr.cap.max_recv_wr	= rqpair->max_queue_depth +
						  1; /* RECV operations + dummy drain WR */
#endif
	}

	ibv_init_attr.cap.max_send_wr	= rqpair->max_queue_depth *
					  2 + 1; /* SEND, READ, and WRITE operations + dummy drain WR */
	ibv_init_attr.cap.max_send_sge	= spdk_min(device->attr.max_sge, NVMF_DEFAULT_TX_SGE);
	ibv_init_attr.cap.max_recv_sge	= spdk_min(device->attr.max_sge, NVMF_DEFAULT_RX_SGE);

@@ -910,6 +902,8 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
		rpoller->num_cqe = num_cqe;
	}

	rpoller->required_num_wr = required_num_wr;

	rc = rdma_create_qp(rqpair->cm_id, rqpair->port->device->pd, &ibv_init_attr);
	if (rc) {
		SPDK_ERRLOG("rdma_create_qp failed: errno %d: %s\n", errno, spdk_strerror(errno));
@@ -919,8 +913,6 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
		return -1;
	}

	rpoller->required_num_wr = required_num_wr;

	rqpair->max_send_depth = spdk_min((uint32_t)(rqpair->max_queue_depth * 2 + 1),
					  ibv_init_attr.cap.max_send_wr);
	rqpair->max_send_sge = spdk_min(NVMF_DEFAULT_TX_SGE, ibv_init_attr.cap.max_send_sge);
@@ -928,7 +920,7 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
	spdk_trace_record(TRACE_RDMA_QP_CREATE, 0, 0, (uintptr_t)rqpair->cm_id, 0);
	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "New RDMA Connection: %p\n", qpair);

#ifndef SPDK_CONFIG_RDMA_SRQ
	if (rqpair->poller->srq == NULL) {
		rtransport = SPDK_CONTAINEROF(qpair->transport, struct spdk_nvmf_rdma_transport, transport);
		transport = &rtransport->transport;

@@ -948,9 +940,9 @@ spdk_nvmf_rdma_qpair_initialize(struct spdk_nvmf_qpair *qpair)
			spdk_nvmf_rdma_qpair_destroy(rqpair);
			return -1;
		}
#else
	} else {
		rqpair->resources = rqpair->poller->resources;
#endif
	}

	rqpair->current_recv_depth = 0;
	STAILQ_INIT(&rqpair->pending_rdma_read_queue);
@@ -1016,11 +1008,13 @@ request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
	assert(rdma_req->recv != NULL);
	SPDK_DEBUGLOG(SPDK_LOG_RDMA, "RDMA RECV POSTED. Recv: %p Connection: %p\n", rdma_req->recv,
		      rqpair);
#ifndef SPDK_CONFIG_RDMA_SRQ
	if (rqpair->poller->srq == NULL) {
		rc = ibv_post_recv(rqpair->cm_id->qp, &rdma_req->recv->wr, &bad_recv_wr);
#else
	} else {
		rdma_req->recv->qpair = NULL;
		rc = ibv_post_srq_recv(rqpair->poller->srq, &rdma_req->recv->wr, &bad_recv_wr);
#endif
	}

	if (rc) {
		SPDK_ERRLOG("Unable to re-post rx descriptor\n");
		return rc;
@@ -2267,12 +2261,14 @@ spdk_nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport
		STAILQ_REMOVE_HEAD(&resources->free_queue, state_link);
		rdma_req->recv = STAILQ_FIRST(&resources->incoming_queue);
		STAILQ_REMOVE_HEAD(&resources->incoming_queue, link);
#ifdef SPDK_CONFIG_RDMA_SRQ

		if (rqpair->poller->srq != NULL) {
			rdma_req->req.qpair = &rdma_req->recv->qpair->qpair;
			rdma_req->recv->qpair->qd++;
#else
		} else {
			rqpair->qd++;
#endif
		}

		rdma_req->state = RDMA_REQUEST_STATE_NEW;
		if (spdk_nvmf_rdma_request_process(rtransport, rdma_req) == false) {
			break;
@@ -2593,12 +2589,9 @@ spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
	struct spdk_nvmf_rdma_poll_group	*rgroup;
	struct spdk_nvmf_rdma_poller		*poller;
	struct spdk_nvmf_rdma_device		*device;
#ifdef SPDK_CONFIG_RDMA_SRQ
	struct ibv_srq_init_attr		srq_init_attr;
	struct spdk_nvmf_rdma_resource_opts	opts;

#endif

	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);

	rgroup = calloc(1, sizeof(*rgroup));
@@ -2634,8 +2627,7 @@ spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
		poller->num_cqe = DEFAULT_NVMF_RDMA_CQ_SIZE;

		TAILQ_INSERT_TAIL(&rgroup->pollers, poller, link);

#ifdef SPDK_CONFIG_RDMA_SRQ
		if (device->attr.max_srq != 0) {
			poller->max_srq_depth = transport->opts.max_srq_depth;

			memset(&srq_init_attr, 0, sizeof(struct ibv_srq_init_attr));
@@ -2662,7 +2654,7 @@ spdk_nvmf_rdma_poll_group_create(struct spdk_nvmf_transport *transport)
				spdk_nvmf_rdma_poll_group_destroy(&rgroup->group);
				pthread_mutex_unlock(&rtransport->lock);
			}
#endif
		}
	}

	pthread_mutex_unlock(&rtransport->lock);
@@ -2685,15 +2677,12 @@ spdk_nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
	TAILQ_FOREACH_SAFE(poller, &rgroup->pollers, link, tmp) {
		TAILQ_REMOVE(&rgroup->pollers, poller, link);

#ifdef SPDK_CONFIG_RDMA_SRQ
		if (poller->srq) {
			nvmf_rdma_resources_destroy(poller->resources);
			ibv_destroy_srq(poller->srq);
			SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Destroyed RDMA shared queue %p\n", poller->srq);
		}

		nvmf_rdma_resources_destroy(poller->resources);
#endif

		if (poller->cq) {
			ibv_destroy_cq(poller->cq);
		}
@@ -2841,7 +2830,6 @@ spdk_nvmf_rdma_close_qpair(struct spdk_nvmf_qpair *qpair)
				  NVMF_RDMA_QPAIR_DESTROY_TIMEOUT_US);
}

#ifdef SPDK_CONFIG_RDMA_SRQ
static struct spdk_nvmf_rdma_qpair *
get_rdma_qpair_from_wc(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_wc *wc)
{
@@ -2855,7 +2843,6 @@ get_rdma_qpair_from_wc(struct spdk_nvmf_rdma_poller *rpoller, struct ibv_wc *wc)
	SPDK_ERRLOG("Didn't find QP with qp_num %u\n", wc->qp_num);
	return NULL;
}
#endif

#ifdef DEBUG
static int
@@ -2913,13 +2900,15 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
				spdk_nvmf_rdma_request_process(rtransport, rdma_req);
				break;
			case RDMA_WR_TYPE_RECV:
				/* rdma_recv->qpair will be NULL if using an SRQ.  In that case we have to get the qpair from the wc. */
				rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr);
#ifdef SPDK_CONFIG_RDMA_SRQ
				if (rdma_recv->qpair == NULL) {
					rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]);
				assert(rdma_recv->qpair != NULL);
#endif
				}
				rqpair = rdma_recv->qpair;

				assert(rqpair != NULL);

				/* Dump this into the incoming queue. This gets cleaned up when
				 * the queue pair disconnects or recovers. */
				STAILQ_INSERT_TAIL(&rqpair->resources->incoming_queue, rdma_recv, link);
@@ -3010,11 +2999,11 @@ spdk_nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,

		case IBV_WC_RECV:
			assert(rdma_wr->type == RDMA_WR_TYPE_RECV);
			/* rdma_recv->qpair will be NULL if using an SRQ.  In that case we have to get the qpair from the wc. */
			rdma_recv = SPDK_CONTAINEROF(rdma_wr, struct spdk_nvmf_rdma_recv, rdma_wr);
#ifdef SPDK_CONFIG_RDMA_SRQ
			if (rdma_recv->qpair == NULL) {
				rdma_recv->qpair = get_rdma_qpair_from_wc(rpoller, &wc[i]);
			assert(rdma_recv->qpair != NULL);
#endif
			}
			rqpair = rdma_recv->qpair;
			/* The qpair should not send more requests than are allowed per qpair. */
			if (rqpair->current_recv_depth >= rqpair->max_queue_depth) {