Commit 6e5f700b authored by Daniel Verkamp's avatar Daniel Verkamp
Browse files

nvmf: move RDMA connection handling to rdma.c



Also split the generic nvmf_trace_command() function out of
the RDMA-specific handler and move it to request.c

Change-Id: If29b89db33c5e080c9816977ae5b18b90884e775
Signed-off-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
parent 2a3e39a1
Loading
Loading
Loading
Loading
+0 −201
Original line number Diff line number Diff line
@@ -338,56 +338,6 @@ void spdk_shutdown_nvmf_conns(void)
			rte_get_master_lcore(), spdk_nvmf_conn_check_shutdown, NULL);
}

/* Check the nvmf message received */
static void nvmf_trace_command(struct spdk_nvmf_capsule_cmd *cap_hdr, enum conn_type conn_type)
{
	struct spdk_nvme_cmd *cmd = (struct spdk_nvme_cmd *)cap_hdr;
	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
	uint8_t opc;

	SPDK_TRACELOG(SPDK_TRACE_NVMF, "NVMf %s%s Command:\n",
		      conn_type == CONN_TYPE_AQ ? "Admin" : "I/O",
		      cmd->opc == SPDK_NVME_OPC_FABRIC ? " Fabrics" : "");

	if (cmd->opc == SPDK_NVME_OPC_FABRIC) {
		opc = cap_hdr->fctype;
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  fctype 0x%02x\n", cap_hdr->fctype);
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  cid 0x%x\n", cap_hdr->cid);
	} else {
		opc = cmd->opc;
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  opc 0x%02x\n", cmd->opc);
		if (cmd->fuse) {
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  fuse %x\n", cmd->fuse);
		}
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  psdt %u\n", cmd->psdt);
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  cid 0x%x\n", cmd->cid);
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  nsid %u\n", cmd->nsid);
		if (cmd->mptr) {
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  mptr 0x%" PRIx64 "\n", cmd->mptr);
		}
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  cdw10 0x%08x\n", cmd->cdw10);
	}

	if (spdk_nvme_opc_get_data_transfer(opc) != SPDK_NVME_DATA_NONE) {
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL type 0x%x\n", sgl->generic.type);
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL subtype 0x%x\n", sgl->generic.subtype);
		if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK) {

			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL address 0x%lx\n",
				      sgl->address);
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL key 0x%x\n",
				      sgl->keyed.key);
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL length 0x%x\n",
				      sgl->keyed.length);
		} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) {
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL %s 0x%" PRIx64 "\n",
				      sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET ? "offset" : "address",
				      sgl->address);
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL length 0x%x\n", sgl->unkeyed.length);
		}
	}
}

void
nvmf_init_conn_properites(struct spdk_nvmf_conn *conn,
			  struct nvmf_session *session,
@@ -425,157 +375,6 @@ nvmf_init_conn_properites(struct spdk_nvmf_conn *conn,

}

static int nvmf_recv(struct spdk_nvmf_conn *conn, struct ibv_wc *wc)
{
	struct nvme_qp_rx_desc *rx_desc;
	struct nvme_qp_tx_desc *tx_desc;
	struct spdk_nvmf_capsule_cmd *cap_hdr;
	struct spdk_nvmf_request *req;
	int ret;

	rx_desc = (struct nvme_qp_rx_desc *)wc->wr_id;
	cap_hdr = &rx_desc->cmd.nvmf_cmd;

	if (wc->byte_len < sizeof(*cap_hdr)) {
		SPDK_ERRLOG("recv length less than capsule header\n");
		return -1;
	}
	SPDK_TRACELOG(SPDK_TRACE_NVMF, "recv byte count 0x%x\n", wc->byte_len);

	/* get a response buffer */
	if (STAILQ_EMPTY(&conn->rdma.qp_tx_desc)) {
		SPDK_ERRLOG("tx desc pool empty!\n");
		return -1;
	}
	tx_desc = STAILQ_FIRST(&conn->rdma.qp_tx_desc);
	nvmf_active_tx_desc(tx_desc);

	req = &tx_desc->req;
	req->conn = conn;
	req->tx_desc = tx_desc;
	req->rx_desc = rx_desc;
	req->cid = cap_hdr->cid;
	req->cmd = &rx_desc->cmd;

	nvmf_trace_command(cap_hdr, conn->type);

	ret = spdk_nvmf_request_prep_data(req,
					  rx_desc->bb, wc->byte_len - sizeof(*cap_hdr),
					  rx_desc->bb, rx_desc->bb_sgl.length);
	if (ret < 0) {
		SPDK_ERRLOG("prep_data failed\n");
	} else if (ret == 0) {
		/* Data is available now; execute command immediately. */
		ret = spdk_nvmf_request_exec(req);
		if (ret < 0) {
			SPDK_ERRLOG("Command execution failed\n");
		}
	} else if (ret > 0) {
		/*
		 * Pending transfer from host to controller; command will continue
		 * once transfer is complete.
		 */
		ret = 0;
	}

	if (ret < 0) {
		/* recover the tx_desc */
		nvmf_deactive_tx_desc(tx_desc);
	}

	return ret;
}

static int nvmf_check_rdma_completions(struct spdk_nvmf_conn *conn)
{
	struct ibv_wc wc;
	struct nvme_qp_tx_desc *tx_desc;
	struct spdk_nvmf_request *req;
	int rc;
	int cq_count = 0;
	int i;

	for (i = 0; i < conn->rdma.sq_depth; i++) {
		tx_desc = NULL;

		rc = ibv_poll_cq(conn->rdma.cq, 1, &wc);
		if (rc == 0) // No completions at this time
			break;

		if (rc < 0) {
			SPDK_ERRLOG("Poll CQ error!(%d): %s\n",
				    errno, strerror(errno));
			goto handler_error;
		}

		/* OK, process the single successful cq event */
		cq_count += rc;

		if (wc.status) {
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "CQ completion error status %d, exiting handler\n",
				      wc.status);
			break;
		}

		switch (wc.opcode) {
		case IBV_WC_SEND:
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ send completion\n");
			tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id;
			nvmf_deactive_tx_desc(tx_desc);
			break;

		case IBV_WC_RDMA_WRITE:
			/*
			 * Will get this event only if we set IBV_SEND_SIGNALED
			 * flag in rdma_write, to trace rdma write latency
			 */
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ rdma write completion\n");
			tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id;
			req = &tx_desc->req;
			spdk_trace_record(TRACE_RDMA_WRITE_COMPLETE, 0, 0, (uint64_t)req, 0);
			break;

		case IBV_WC_RDMA_READ:
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ rdma read completion\n");
			tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id;
			req = &tx_desc->req;
			spdk_trace_record(TRACE_RDMA_READ_COMPLETE, 0, 0, (uint64_t)req, 0);
			rc = spdk_nvmf_request_exec(req);
			if (rc) {
				SPDK_ERRLOG("request_exec error %d after RDMA Read completion\n", rc);
				goto handler_error;
			}

			rc = nvmf_process_pending_rdma(conn);
			if (rc) {
				goto handler_error;
			}
			break;

		case IBV_WC_RECV:
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ recv completion\n");
			spdk_trace_record(TRACE_NVMF_IO_START, 0, 0, wc.wr_id, 0);
			rc = nvmf_recv(conn, &wc);
			if (rc) {
				SPDK_ERRLOG("nvmf_recv processing failure\n");
				goto handler_error;
			}
			break;

		default:
			SPDK_ERRLOG("Poll cq opcode type unknown!!!!! completion\n");
			goto handler_error;
		}
	}
	return cq_count;

handler_error:
	if (tx_desc != NULL)
		nvmf_deactive_tx_desc(tx_desc);
	SPDK_ERRLOG("handler error, exiting!\n");
	return -1;
}

static void
spdk_nvmf_conn_do_work(void *arg)
{
+152 −0
Original line number Diff line number Diff line
@@ -1132,4 +1132,156 @@ nvmf_process_pending_rdma(struct spdk_nvmf_conn *conn)
	return 0;
}


static int
nvmf_recv(struct spdk_nvmf_conn *conn, struct ibv_wc *wc)
{
	struct nvme_qp_rx_desc *rx_desc;
	struct nvme_qp_tx_desc *tx_desc;
	struct spdk_nvmf_capsule_cmd *cap_hdr;
	struct spdk_nvmf_request *req;
	int ret;

	rx_desc = (struct nvme_qp_rx_desc *)wc->wr_id;
	cap_hdr = &rx_desc->cmd.nvmf_cmd;

	if (wc->byte_len < sizeof(*cap_hdr)) {
		SPDK_ERRLOG("recv length less than capsule header\n");
		return -1;
	}
	SPDK_TRACELOG(SPDK_TRACE_NVMF, "recv byte count 0x%x\n", wc->byte_len);

	/* get a response buffer */
	if (STAILQ_EMPTY(&conn->rdma.qp_tx_desc)) {
		SPDK_ERRLOG("tx desc pool empty!\n");
		return -1;
	}
	tx_desc = STAILQ_FIRST(&conn->rdma.qp_tx_desc);
	nvmf_active_tx_desc(tx_desc);

	req = &tx_desc->req;
	req->conn = conn;
	req->tx_desc = tx_desc;
	req->rx_desc = rx_desc;
	req->cid = cap_hdr->cid;
	req->cmd = &rx_desc->cmd;

	ret = spdk_nvmf_request_prep_data(req,
					  rx_desc->bb, wc->byte_len - sizeof(*cap_hdr),
					  rx_desc->bb, rx_desc->bb_sgl.length);
	if (ret < 0) {
		SPDK_ERRLOG("prep_data failed\n");
	} else if (ret == 0) {
		/* Data is available now; execute command immediately. */
		ret = spdk_nvmf_request_exec(req);
		if (ret < 0) {
			SPDK_ERRLOG("Command execution failed\n");
		}
	} else if (ret > 0) {
		/*
		 * Pending transfer from host to controller; command will continue
		 * once transfer is complete.
		 */
		ret = 0;
	}

	if (ret < 0) {
		/* recover the tx_desc */
		nvmf_deactive_tx_desc(tx_desc);
	}

	return ret;
}

int
nvmf_check_rdma_completions(struct spdk_nvmf_conn *conn)
{
	struct ibv_wc wc;
	struct nvme_qp_tx_desc *tx_desc;
	struct spdk_nvmf_request *req;
	int rc;
	int cq_count = 0;
	int i;

	for (i = 0; i < conn->rdma.sq_depth; i++) {
		tx_desc = NULL;

		rc = ibv_poll_cq(conn->rdma.cq, 1, &wc);
		if (rc == 0) // No completions at this time
			break;

		if (rc < 0) {
			SPDK_ERRLOG("Poll CQ error!(%d): %s\n",
				    errno, strerror(errno));
			goto handler_error;
		}

		/* OK, process the single successful cq event */
		cq_count += rc;

		if (wc.status) {
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "CQ completion error status %d, exiting handler\n",
				      wc.status);
			break;
		}

		switch (wc.opcode) {
		case IBV_WC_SEND:
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ send completion\n");
			tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id;
			nvmf_deactive_tx_desc(tx_desc);
			break;

		case IBV_WC_RDMA_WRITE:
			/*
			 * Will get this event only if we set IBV_SEND_SIGNALED
			 * flag in rdma_write, to trace rdma write latency
			 */
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ rdma write completion\n");
			tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id;
			req = &tx_desc->req;
			spdk_trace_record(TRACE_RDMA_WRITE_COMPLETE, 0, 0, (uint64_t)req, 0);
			break;

		case IBV_WC_RDMA_READ:
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ rdma read completion\n");
			tx_desc = (struct nvme_qp_tx_desc *)wc.wr_id;
			req = &tx_desc->req;
			spdk_trace_record(TRACE_RDMA_READ_COMPLETE, 0, 0, (uint64_t)req, 0);
			rc = spdk_nvmf_request_exec(req);
			if (rc) {
				SPDK_ERRLOG("request_exec error %d after RDMA Read completion\n", rc);
				goto handler_error;
			}

			rc = nvmf_process_pending_rdma(conn);
			if (rc) {
				goto handler_error;
			}
			break;

		case IBV_WC_RECV:
			SPDK_TRACELOG(SPDK_TRACE_RDMA, "\nCQ recv completion\n");
			spdk_trace_record(TRACE_NVMF_IO_START, 0, 0, wc.wr_id, 0);
			rc = nvmf_recv(conn, &wc);
			if (rc) {
				SPDK_ERRLOG("nvmf_recv processing failure\n");
				goto handler_error;
			}
			break;

		default:
			SPDK_ERRLOG("Poll cq opcode type unknown!!!!! completion\n");
			goto handler_error;
		}
	}
	return cq_count;

handler_error:
	if (tx_desc != NULL)
		nvmf_deactive_tx_desc(tx_desc);
	SPDK_ERRLOG("handler error, exiting!\n");
	return -1;
}

SPDK_LOG_REGISTER_TRACE_FLAG("rdma", SPDK_TRACE_RDMA)
+2 −0
Original line number Diff line number Diff line
@@ -100,4 +100,6 @@ void nvmf_acceptor_stop(void);
void nvmf_active_tx_desc(struct nvme_qp_tx_desc *tx_desc);
void nvmf_deactive_tx_desc(struct nvme_qp_tx_desc *tx_desc);

int nvmf_check_rdma_completions(struct spdk_nvmf_conn *conn);

#endif /* _NVMF_RDMA_H_ */
+53 −0
Original line number Diff line number Diff line
@@ -543,6 +543,57 @@ nvmf_process_fabrics_command(struct spdk_nvmf_request *req)
	}
}

static void
nvmf_trace_command(union nvmf_h2c_msg *h2c_msg, enum conn_type conn_type)
{
	struct spdk_nvmf_capsule_cmd *cap_hdr = &h2c_msg->nvmf_cmd;
	struct spdk_nvme_cmd *cmd = &h2c_msg->nvme_cmd;
	struct spdk_nvme_sgl_descriptor *sgl = &cmd->dptr.sgl1;
	uint8_t opc;

	SPDK_TRACELOG(SPDK_TRACE_NVMF, "NVMf %s%s Command:\n",
		      conn_type == CONN_TYPE_AQ ? "Admin" : "I/O",
		      cmd->opc == SPDK_NVME_OPC_FABRIC ? " Fabrics" : "");

	if (cmd->opc == SPDK_NVME_OPC_FABRIC) {
		opc = cap_hdr->fctype;
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  fctype 0x%02x\n", cap_hdr->fctype);
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  cid 0x%x\n", cap_hdr->cid);
	} else {
		opc = cmd->opc;
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  opc 0x%02x\n", cmd->opc);
		if (cmd->fuse) {
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  fuse %x\n", cmd->fuse);
		}
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  psdt %u\n", cmd->psdt);
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  cid 0x%x\n", cmd->cid);
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  nsid %u\n", cmd->nsid);
		if (cmd->mptr) {
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  mptr 0x%" PRIx64 "\n", cmd->mptr);
		}
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  cdw10 0x%08x\n", cmd->cdw10);
	}

	if (spdk_nvme_opc_get_data_transfer(opc) != SPDK_NVME_DATA_NONE) {
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL type 0x%x\n", sgl->generic.type);
		SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL subtype 0x%x\n", sgl->generic.subtype);
		if (sgl->generic.type == SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK) {

			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL address 0x%lx\n",
				      sgl->address);
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL key 0x%x\n",
				      sgl->keyed.key);
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL length 0x%x\n",
				      sgl->keyed.length);
		} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK) {
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL %s 0x%" PRIx64 "\n",
				      sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET ? "offset" : "address",
				      sgl->address);
			SPDK_TRACELOG(SPDK_TRACE_NVMF, "	SQE:  SGL length 0x%x\n", sgl->unkeyed.length);
		}
	}
}

int
spdk_nvmf_request_prep_data(struct spdk_nvmf_request *req,
			    void *in_cap_data, uint32_t in_cap_len,
@@ -553,6 +604,8 @@ spdk_nvmf_request_prep_data(struct spdk_nvmf_request *req,
	enum spdk_nvme_data_transfer xfer;
	int ret;

	nvmf_trace_command(req->cmd, conn->type);

	req->length = 0;
	req->xfer = SPDK_NVME_DATA_NONE;
	req->data = NULL;