Commit 7e23841d authored by Ben Walker's avatar Ben Walker
Browse files

nvmf: Separate the send and recv completion queues.



This allows the target to poll for internal completions
at higher priority.

Change-Id: I895c33a594a7d7c0545aa3a8405a296be3c106fb
Signed-off-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 04beb566
Loading
Loading
Loading
Loading
+78 −57
Original line number Diff line number Diff line
@@ -77,10 +77,6 @@ struct spdk_nvmf_rdma_conn {
	struct spdk_nvmf_conn			conn;

	struct rdma_cm_id			*cm_id;
	struct ibv_context			*ctx;
	struct ibv_comp_channel			*comp_channel;
	struct ibv_cq				*cq;
	struct ibv_qp				*qp;

	/* The maximum number of I/O outstanding on this connection at one time */
	uint16_t				queue_depth;
@@ -169,14 +165,6 @@ spdk_nvmf_rdma_conn_destroy(struct spdk_nvmf_rdma_conn *rdma_conn)
		rdma_destroy_qp(rdma_conn->cm_id);
	}

	if (rdma_conn->cq) {
		ibv_destroy_cq(rdma_conn->cq);
	}

	if (rdma_conn->comp_channel) {
		ibv_destroy_comp_channel(rdma_conn->comp_channel);
	}

	if (rdma_conn->cm_id) {
		rdma_destroy_id(rdma_conn->cm_id);
	}
@@ -206,38 +194,10 @@ spdk_nvmf_rdma_conn_create(struct rdma_cm_id *id, uint16_t queue_depth, uint16_t

	rdma_conn->queue_depth = queue_depth;
	rdma_conn->rw_depth = rw_depth;
	rdma_conn->ctx = id->verbs;
	rdma_conn->cm_id = id;

	rdma_conn->comp_channel = ibv_create_comp_channel(id->verbs);
	if (!rdma_conn->comp_channel) {
		SPDK_ERRLOG("create completion channel error!\n");
		spdk_nvmf_rdma_conn_destroy(rdma_conn);
		return NULL;
	}

	rc = fcntl(rdma_conn->comp_channel->fd, F_SETFL, O_NONBLOCK);
	if (rc < 0) {
		SPDK_ERRLOG("fcntl to set comp channel to non-blocking failed\n");
		spdk_nvmf_rdma_conn_destroy(rdma_conn);
		return NULL;
	}

	/*
	 * Size the CQ to handle completions for RECV, SEND, and either READ or WRITE.
	 */
	rdma_conn->cq = ibv_create_cq(id->verbs, (queue_depth * 3), rdma_conn, rdma_conn->comp_channel,
				      0);
	if (!rdma_conn->cq) {
		SPDK_ERRLOG("create cq error!\n");
		spdk_nvmf_rdma_conn_destroy(rdma_conn);
		return NULL;
	}

	memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
	attr.qp_type		= IBV_QPT_RC;
	attr.send_cq		= rdma_conn->cq;
	attr.recv_cq		= rdma_conn->cq;
	attr.cap.max_send_wr	= rdma_conn->queue_depth * 2; /* SEND, READ, and WRITE operations */
	attr.cap.max_recv_wr	= rdma_conn->queue_depth; /* RECV operations */
	attr.cap.max_send_sge	= NVMF_DEFAULT_TX_SGE;
@@ -249,7 +209,24 @@ spdk_nvmf_rdma_conn_create(struct rdma_cm_id *id, uint16_t queue_depth, uint16_t
		spdk_nvmf_rdma_conn_destroy(rdma_conn);
		return NULL;
	}
	rdma_conn->qp = rdma_conn->cm_id->qp;

	SPDK_TRACELOG(SPDK_TRACE_RDMA,
		      "New RDMA QP created. Send CQ: %p Recv CQ: %p Send Queue Depth: %d Recv Queue Depth: %d\n",
		      rdma_conn->cm_id->send_cq, rdma_conn->cm_id->recv_cq, attr.cap.max_send_wr, attr.cap.max_recv_wr);

	rc = fcntl(rdma_conn->cm_id->send_cq_channel->fd, F_SETFL, O_NONBLOCK);
	if (rc < 0) {
		SPDK_ERRLOG("fcntl to set comp channel to non-blocking failed\n");
		spdk_nvmf_rdma_conn_destroy(rdma_conn);
		return NULL;
	}

	rc = fcntl(rdma_conn->cm_id->recv_cq_channel->fd, F_SETFL, O_NONBLOCK);
	if (rc < 0) {
		SPDK_ERRLOG("fcntl to set comp channel to non-blocking failed\n");
		spdk_nvmf_rdma_conn_destroy(rdma_conn);
		return NULL;
	}

	conn = &rdma_conn->conn;
	conn->transport = &spdk_nvmf_transport_rdma;
@@ -363,7 +340,7 @@ nvmf_post_rdma_read(struct spdk_nvmf_request *req)
	nvmf_ibv_send_wr_set_rkey(&wr, req);

	spdk_trace_record(TRACE_RDMA_READ_START, 0, 0, (uintptr_t)req, 0);
	rc = ibv_post_send(rdma_conn->qp, &wr, &bad_wr);
	rc = ibv_post_send(rdma_conn->cm_id->qp, &wr, &bad_wr);
	if (rc) {
		SPDK_ERRLOG("Failure posting rdma read send, rc = 0x%x\n", rc);
	}
@@ -392,7 +369,7 @@ nvmf_post_rdma_write(struct spdk_nvmf_request *req)
	nvmf_ibv_send_wr_set_rkey(&wr, req);

	spdk_trace_record(TRACE_RDMA_WRITE_START, 0, 0, (uintptr_t)req, 0);
	rc = ibv_post_send(rdma_conn->qp, &wr, &bad_wr);
	rc = ibv_post_send(rdma_conn->cm_id->qp, &wr, &bad_wr);
	if (rc) {
		SPDK_ERRLOG("Failure posting rdma write send, rc = 0x%x\n", rc);
	}
@@ -428,7 +405,7 @@ nvmf_post_rdma_recv(struct spdk_nvmf_request *req)
	wr.sg_list = sg_list;
	wr.num_sge = 2;

	rc = ibv_post_recv(rdma_conn->qp, &wr, &bad_wr);
	rc = ibv_post_recv(rdma_conn->cm_id->qp, &wr, &bad_wr);
	if (rc) {
		SPDK_ERRLOG("Failure posting rdma recv, rc = 0x%x\n", rc);
	}
@@ -455,7 +432,7 @@ nvmf_post_rdma_send(struct spdk_nvmf_request *req)
	nvmf_ibv_send_wr_init(&wr, req, &sge, IBV_WR_SEND, IBV_SEND_SIGNALED);

	spdk_trace_record(TRACE_NVMF_IO_COMPLETE, 0, 0, (uintptr_t)req, 0);
	rc = ibv_post_send(rdma_conn->qp, &wr, &bad_wr);
	rc = ibv_post_send(rdma_conn->cm_id->qp, &wr, &bad_wr);
	if (rc) {
		SPDK_ERRLOG("Failure posting rdma send for NVMf completion, rc = 0x%x\n", rc);
	}
@@ -1072,30 +1049,30 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
	struct spdk_nvmf_rdma_conn *rdma_conn = get_rdma_conn(conn);
	struct spdk_nvmf_rdma_request *rdma_req;
	struct spdk_nvmf_request *req;
	int rc, count;
	int rc;
	int count = 0;

	count = 0;
	/* Poll the send completion queue to check for completing
	 * operations that the target initiated. */
	while (true) {
		rc = ibv_poll_cq(rdma_conn->cq, 1, &wc);
		if (rc == 0) // No completions at this time
		rc = ibv_poll_cq(rdma_conn->cm_id->send_cq, 1, &wc);
		if (rc == 0) {
			break;

		if (rc < 0) {
			SPDK_ERRLOG("Poll CQ error!(%d): %s\n",
		} else if (rc < 0) {
			SPDK_ERRLOG("Error polling Send CQ! (%d): %s\n",
				    errno, strerror(errno));
			return -1;
		}

		/* OK, process the single successful cq event */
		if (wc.status) {
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "CQ completion error status %d (%s), exiting handler\n",
			SPDK_ERRLOG("Send CQ error (%d): %s\n",
				    wc.status, ibv_wc_status_str(wc.status));
			return -1;
		}

		rdma_req = (struct spdk_nvmf_rdma_request *)wc.wr_id;
		if (rdma_req == NULL) {
			SPDK_ERRLOG("Got CQ completion for NULL rdma_req\n");
			SPDK_ERRLOG("NULL wr_id in RDMA work completion\n");
			return -1;
		}

@@ -1136,6 +1113,43 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
			count++;
			break;

		case IBV_WC_RECV:
			SPDK_ERRLOG("Unexpectedly received a RECV completion on the Send CQ\n");
			return -1;

		default:
			SPDK_ERRLOG("Received an unknown opcode on the Send CQ: %d\n", wc.opcode);
			return -1;
		}

	}

	/* Poll the recv completion queue for incoming requests */
	while (rdma_conn->num_outstanding_reqs < rdma_conn->queue_depth) {
		rc = ibv_poll_cq(rdma_conn->cm_id->recv_cq, 1, &wc);
		if (rc == 0) {
			break;
		} else if (rc < 0) {
			SPDK_ERRLOG("Error polling Send CQ! (%d): %s\n",
				    errno, strerror(errno));
			return -1;
		}

		if (wc.status) {
			SPDK_ERRLOG("Send CQ error (%d): %s\n",
				    wc.status, ibv_wc_status_str(wc.status));
			return -1;
		}

		rdma_req = (struct spdk_nvmf_rdma_request *)wc.wr_id;
		if (rdma_req == NULL) {
			SPDK_ERRLOG("NULL wr_id in RDMA work completion\n");
			return -1;
		}

		req = &rdma_req->req;

		switch (wc.opcode) {
		case IBV_WC_RECV:
			if (wc.byte_len < sizeof(struct spdk_nvmf_capsule_cmd)) {
				SPDK_ERRLOG("recv length %u less than capsule header\n", wc.byte_len);
@@ -1171,8 +1185,15 @@ spdk_nvmf_rdma_poll(struct spdk_nvmf_conn *conn)
			}
			break;

		case IBV_WC_SEND:
		case IBV_WC_RDMA_WRITE:
		case IBV_WC_RDMA_READ:
			SPDK_ERRLOG("Unexpectedly received a Send/Write/Read completion on the Recv CQ\n");
			return -1;
			break;

		default:
			SPDK_ERRLOG("Poll cq opcode type unknown!!!!! completion\n");
			SPDK_ERRLOG("Received an unknown opcode on the Recv CQ: %d\n", wc.opcode);
			return -1;
		}
	}