Commit 5ac814e3 authored by Seth Howell's avatar Seth Howell Committed by Jim Harris
Browse files

nvme_rdma: share the cm_event channel between qpairs.



This enables us to create a single file descriptor and a single event
channel to poll for completions. With that accomplished, we can easily
poll for events on the admin qpair each time we check it for
completions.

Change-Id: I8b901252510744a956bef12594d1e045715e002e
Signed-off-by: default avatarSeth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/467549


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent f12e6bc0
Loading
Loading
Loading
Loading
+146 −42
Original line number Diff line number Diff line
@@ -63,9 +63,12 @@
#define NVME_RDMA_DEFAULT_TX_SGE		2
#define NVME_RDMA_DEFAULT_RX_SGE		1


/* Max number of NVMe-oF SGL descriptors supported by the host */
#define NVME_RDMA_MAX_SGL_DESCRIPTORS		16

/* number of STAILQ entries for holding pending RDMA CM events. */
#define NVME_RDMA_NUM_CM_EVENTS			256

struct spdk_nvmf_cmd {
	struct spdk_nvme_cmd cmd;
	struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
@@ -81,6 +84,12 @@ struct spdk_nvme_rdma_mr_map {
	LIST_ENTRY(spdk_nvme_rdma_mr_map)	link;
};

/* STAILQ wrapper for cm events. */
struct nvme_rdma_cm_event_entry {
	struct rdma_cm_event			*evt;
	STAILQ_ENTRY(nvme_rdma_cm_event_entry)	link;
};

/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */
struct nvme_rdma_ctrlr {
	struct spdk_nvme_ctrlr			ctrlr;
@@ -88,6 +97,14 @@ struct nvme_rdma_ctrlr {
	struct ibv_pd				*pd;

	uint16_t				max_sge;

	struct rdma_event_channel		*cm_channel;

	STAILQ_HEAD(, nvme_rdma_cm_event_entry)	pending_cm_events;

	STAILQ_HEAD(, nvme_rdma_cm_event_entry)	free_cm_events;

	struct nvme_rdma_cm_event_entry		*cm_events;
};

/* NVMe RDMA qpair extensions for spdk_nvme_qpair */
@@ -130,8 +147,6 @@ struct nvme_rdma_qpair {
	TAILQ_HEAD(, spdk_nvme_rdma_req)	outstanding_reqs;

	/* Placed at the end of the struct since it is not used frequently */
	struct rdma_event_channel		*cm_channel;

	struct rdma_cm_event			*evt;
};

@@ -282,39 +297,92 @@ nvme_rdma_qpair_process_cm_event(struct nvme_rdma_qpair *rqpair)
	return rc;
}

/*
 * This function must be called under the nvme controller's lock
 * because it touches global controller variables. The lock is taken
 * by the generic transport code before invoking a few of the functions
 * in this file: nvme_rdma_ctrlr_connect_qpair, nvme_rdma_ctrlr_delete_io_qpair,
 * and conditionally nvme_rdma_qpair_process_completions when it is calling
 * completions on the admin qpair. When adding a new call to this function, please
 * verify that it is in a situation where it falls under the lock.
 */
static int
nvme_rdma_poll_events(struct nvme_rdma_ctrlr *rctrlr)
{
	struct nvme_rdma_cm_event_entry	*entry, *tmp;
	struct nvme_rdma_qpair		*event_qpair;
	struct rdma_cm_event		*event;
	struct rdma_event_channel	*channel = rctrlr->cm_channel;

	STAILQ_FOREACH_SAFE(entry, &rctrlr->pending_cm_events, link, tmp) {
		event_qpair = nvme_rdma_qpair(entry->evt->id->context);
		if (event_qpair->evt == NULL) {
			event_qpair->evt = entry->evt;
			STAILQ_REMOVE(&rctrlr->pending_cm_events, entry, nvme_rdma_cm_event_entry, link);
			STAILQ_INSERT_HEAD(&rctrlr->free_cm_events, entry, link);
		}
	}

	while (rdma_get_cm_event(channel, &event) == 0) {
		event_qpair = nvme_rdma_qpair(event->id->context);
		if (event_qpair->evt == NULL) {
			event_qpair->evt = event;
		} else {
			assert(rctrlr == nvme_rdma_ctrlr(event_qpair->qpair.ctrlr));
			entry = STAILQ_FIRST(&rctrlr->free_cm_events);
			if (entry == NULL) {
				rdma_ack_cm_event(event);
				return -ENOMEM;
			}
			STAILQ_REMOVE(&rctrlr->free_cm_events, entry, nvme_rdma_cm_event_entry, link);
			entry->evt = event;
			STAILQ_INSERT_TAIL(&rctrlr->pending_cm_events, entry, link);
		}
	}

	if (errno == EAGAIN || errno == EWOULDBLOCK) {
		return 0;
	} else {
		return errno;
	}
}

static int
nvme_rdma_process_event(struct nvme_rdma_qpair *rqpair,
			struct rdma_event_channel *channel,
			enum rdma_cm_event_type evt)
{
	struct rdma_cm_event	*event;
	int			rc = 0;
	struct nvme_rdma_ctrlr	*rctrlr;
	int	rc = 0, rc2;

	if (rqpair->evt != NULL) {
		return -EINVAL;
		rc = nvme_rdma_qpair_process_cm_event(rqpair);
		if (rc) {
			return rc;
		}
	}

	while (rdma_get_cm_event(channel, &event) != 0) {
		if (errno == EAGAIN || errno == EWOULDBLOCK) {
			continue;
	rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr);
	assert(rctrlr != NULL);
	while (!rqpair->evt && !rc) {
		rc = nvme_rdma_poll_events(rctrlr);
	}

		SPDK_ERRLOG("Failed to get event from CM event channel. Error %d (%s)\n",
			    errno, spdk_strerror(errno));
		return -errno;
	if (rc) {
		return rc;
	}

	if (event->event != evt) {
	if (rqpair->evt->event != evt) {
		SPDK_ERRLOG("Expected %s but received %s (%d) from CM event channel (status = %d)\n",
			    nvme_rdma_cm_event_str_get(evt),
			    nvme_rdma_cm_event_str_get(event->event), event->event,
			    event->status);
			    nvme_rdma_cm_event_str_get(rqpair->evt->event), rqpair->evt->event,
			    rqpair->evt->status);
		rc = -EBADMSG;
	}

	rqpair->evt = event;
	rc |= nvme_rdma_qpair_process_cm_event(rqpair);
	return rc;
	rc2 = nvme_rdma_qpair_process_cm_event(rqpair);
	/* bad message takes precedence over the other error codes from processing the event. */
	return rc == 0 ? rc2 : rc;
}

static int
@@ -652,6 +720,7 @@ nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
	struct ibv_device_attr				attr;
	int						ret;
	struct spdk_nvme_ctrlr				*ctrlr;
	struct nvme_rdma_ctrlr				*rctrlr;

	ret = ibv_query_device(rqpair->cm_id->verbs, &attr);
	if (ret != 0) {
@@ -665,6 +734,8 @@ nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
	if (!ctrlr) {
		return -1;
	}
	rctrlr = nvme_rdma_ctrlr(ctrlr);
	assert(rctrlr != NULL);

	request_data.qid = rqpair->qpair.id;
	request_data.hrqsize = rqpair->num_entries;
@@ -682,7 +753,7 @@ nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
		return ret;
	}

	ret = nvme_rdma_process_event(rqpair, rqpair->cm_channel, RDMA_CM_EVENT_ESTABLISHED);
	ret = nvme_rdma_process_event(rqpair, rctrlr->cm_channel, RDMA_CM_EVENT_ESTABLISHED);
	if (ret) {
		SPDK_ERRLOG("RDMA connect error\n");
		return -1;
@@ -850,22 +921,12 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
	bool src_addr_specified;
	int rc;
	struct spdk_nvme_ctrlr *ctrlr;
	struct nvme_rdma_ctrlr *rctrlr;
	int family;
	int flag;

	rqpair->cm_channel = rdma_create_event_channel();
	if (rqpair->cm_channel == NULL) {
		SPDK_ERRLOG("rdma_create_event_channel() failed\n");
		return -1;
	}

	flag = fcntl(rqpair->cm_channel->fd, F_GETFL);
	if (fcntl(rqpair->cm_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) {
		SPDK_ERRLOG("Cannot set event channel to non blocking\n");
		return -1;
	}

	ctrlr = rqpair->qpair.ctrlr;
	rctrlr = nvme_rdma_ctrlr(ctrlr);
	assert(rctrlr != NULL);

	switch (ctrlr->trid.adrfam) {
	case SPDK_NVMF_ADRFAM_IPV4:
@@ -902,7 +963,7 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
		src_addr_specified = false;
	}

	rc = rdma_create_id(rqpair->cm_channel, &rqpair->cm_id, rqpair, RDMA_PS_TCP);
	rc = rdma_create_id(rctrlr->cm_channel, &rqpair->cm_id, rqpair, RDMA_PS_TCP);
	if (rc < 0) {
		SPDK_ERRLOG("rdma_create_id() failed\n");
		return -1;
@@ -910,7 +971,7 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)

	rc = nvme_rdma_resolve_addr(rqpair,
				    src_addr_specified ? (struct sockaddr *)&src_addr : NULL,
				    (struct sockaddr *)&dst_addr, rqpair->cm_channel);
				    (struct sockaddr *)&dst_addr, rctrlr->cm_channel);
	if (rc < 0) {
		SPDK_ERRLOG("nvme_rdma_resolve_addr() failed\n");
		return -1;
@@ -1409,11 +1470,6 @@ nvme_rdma_qpair_disconnect(struct spdk_nvme_qpair *qpair)
		ibv_destroy_cq(rqpair->cq);
		rqpair->cq = NULL;
	}

	if (rqpair->cm_channel) {
		rdma_destroy_event_channel(rqpair->cm_channel);
		rqpair->cm_channel = NULL;
	}
}

static int
@@ -1528,7 +1584,7 @@ struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transpo
	union spdk_nvme_vs_register vs;
	struct ibv_context **contexts;
	struct ibv_device_attr dev_attr;
	int i, rc;
	int i, flag, rc;

	rctrlr = calloc(1, sizeof(struct nvme_rdma_ctrlr));
	if (rctrlr == NULL) {
@@ -1570,6 +1626,33 @@ struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transpo
		return NULL;
	}

	STAILQ_INIT(&rctrlr->pending_cm_events);
	STAILQ_INIT(&rctrlr->free_cm_events);
	rctrlr->cm_events = calloc(NVME_RDMA_NUM_CM_EVENTS, sizeof(*rctrlr->cm_events));
	if (rctrlr->cm_events == NULL) {
		SPDK_ERRLOG("unable to allocat buffers to hold CM events.\n");
		nvme_rdma_ctrlr_destruct(&rctrlr->ctrlr);
		return NULL;
	}

	for (i = 0; i < NVME_RDMA_NUM_CM_EVENTS; i++) {
		STAILQ_INSERT_TAIL(&rctrlr->free_cm_events, &rctrlr->cm_events[i], link);
	}

	rctrlr->cm_channel = rdma_create_event_channel();
	if (rctrlr->cm_channel == NULL) {
		SPDK_ERRLOG("rdma_create_event_channel() failed\n");
		nvme_rdma_ctrlr_destruct(&rctrlr->ctrlr);
		return NULL;
	}

	flag = fcntl(rctrlr->cm_channel->fd, F_GETFL);
	if (fcntl(rctrlr->cm_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) {
		SPDK_ERRLOG("Cannot set event channel to non blocking\n");
		nvme_rdma_ctrlr_destruct(&rctrlr->ctrlr);
		return NULL;
	}

	rctrlr->ctrlr.adminq = nvme_rdma_ctrlr_create_qpair(&rctrlr->ctrlr, 0,
			       SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES, 0, SPDK_NVMF_MIN_ADMIN_QUEUE_ENTRIES);
	if (!rctrlr->ctrlr.adminq) {
@@ -1606,11 +1689,25 @@ int
nvme_rdma_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
{
	struct nvme_rdma_ctrlr *rctrlr = nvme_rdma_ctrlr(ctrlr);
	struct nvme_rdma_cm_event_entry *entry;

	if (ctrlr->adminq) {
		nvme_rdma_qpair_destroy(ctrlr->adminq);
	}

	STAILQ_FOREACH(entry, &rctrlr->pending_cm_events, link) {
		rdma_ack_cm_event(entry->evt);
	}

	STAILQ_INIT(&rctrlr->free_cm_events);
	STAILQ_INIT(&rctrlr->pending_cm_events);
	free(rctrlr->cm_events);

	if (rctrlr->cm_channel) {
		rdma_destroy_event_channel(rctrlr->cm_channel);
		rctrlr->cm_channel = NULL;
	}

	nvme_ctrlr_destruct_finish(ctrlr);

	free(rctrlr);
@@ -1780,6 +1877,7 @@ nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair,
	uint32_t			reaped;
	struct ibv_cq			*cq;
	struct spdk_nvme_rdma_req	*rdma_req;
	struct nvme_rdma_ctrlr		*rctrlr;

	if (max_completions == 0) {
		max_completions = rqpair->num_entries;
@@ -1787,6 +1885,12 @@ nvme_rdma_qpair_process_completions(struct spdk_nvme_qpair *qpair,
		max_completions = spdk_min(max_completions, rqpair->num_entries);
	}

	if (nvme_qpair_is_admin_queue(&rqpair->qpair)) {
		rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr);
		nvme_rdma_poll_events(rctrlr);
	}
	nvme_rdma_qpair_process_cm_event(rqpair);

	cq = rqpair->cq;

	reaped = 0;