Commit daee62a0 authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Tomasz Zawadzki
Browse files

rdma: Add mlx5_dv RDMA provider



The new RDMA provider can be enabled by passing
--with-rdma=mlx5_dv parameter to configure script
This provider uses "externally created qpair"
functionality of rdma cm - it must move a qpair
to RTS state manually

Change-Id: I72484f6edd1f4dad15430e2c8d36b65d1975e8a2
Signed-off-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1658


Community-CI: Mellanox Build Bot
Community-CI: Broadcom CI
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent 63c8cea7
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -15,6 +15,13 @@ The software accel engine implemenation has added support for CRC-32C.

IDXD engine support for CRC-32C has been added.

### rdma

A new `rdma` library has been added. It is an abstraction layer over different RDMA providers.
Two providers are available - verbs (used by default when RDMA is enabled or enabled explicitly
using --with-rdma=verbs) and mlx5 Direct Verbs aka DV (enabled by --with-rdma=mlx5_dv).
Using mlx5_dv requires libmlx5 installed on the system.

## v20.04:

### configuration
+1 −0
Original line number Diff line number Diff line
@@ -100,6 +100,7 @@ CONFIG_FIO_SOURCE_DIR=/usr/src/fio
CONFIG_RDMA=n
CONFIG_RDMA_SEND_WITH_INVAL=n
CONFIG_RDMA_SET_ACK_TIMEOUT=n
CONFIG_RDMA_PROV=verbs

# Enable NVMe Character Devices.
CONFIG_NVME_CUSE=n
+25 −1
Original line number Diff line number Diff line
@@ -74,7 +74,8 @@ function usage()
	echo " rbd                       Build Ceph RBD bdev module."
	echo "                           No path required."
	echo " rdma                      Build RDMA transport for NVMf target and initiator."
	echo "                           No path required."
	echo "                           Accepts optional RDMA provider name. Can be \"verbs\" or \"mlx5_dv\"."
	echo "                           If no provider specified, \"verbs\" provider is used by default."
	echo " fc                        Build FC transport for NVMf target."
	echo "                           If an argument is provided, it is considered a directory containing"
	echo "                           libufc.a and fc_lld.h. Otherwise the regular system paths will"
@@ -292,8 +293,13 @@ for i in "$@"; do
		--without-rbd)
			CONFIG[RBD]=n
			;;
		--with-rdma=*)
			CONFIG[RDMA]=y
			CONFIG[RDMA_PROV]=${i#*=}
			;;
		--with-rdma)
			CONFIG[RDMA]=y
			CONFIG[RDMA_PROV]="verbs"
			;;
		--without-rdma)
			CONFIG[RDMA]=n
@@ -593,6 +599,11 @@ if [[ $sys_name == "FreeBSD" ]]; then
fi

if [ "${CONFIG[RDMA]}" = "y" ]; then
  if [[ ! "${CONFIG[RDMA_PROV]}" == "verbs" ]] && [[ ! "${CONFIG[RDMA_PROV]}" == "mlx5_dv" ]]; then
    echo "Invalid RDMA provider specified, must be \"verbs\" or \"mlx5_dv\""
    exit 1
  fi

	if ! echo -e '#include <infiniband/verbs.h>\n#include <rdma/rdma_verbs.h>\n' \
		'int main(void) { return 0; }\n' \
		| ${BUILD_CMD[@]} -libverbs -lrdmacm - 2>/dev/null; then
@@ -632,6 +643,19 @@ than or equal to 4.14 will see significantly reduced performance.
		echo "RDMA_OPTION_ID_ACK_TIMEOUT is not supported"
	fi

	if [ "${CONFIG[RDMA_PROV]}" == "mlx5_dv" ]; then
	  if ! echo -e '#include <spdk/stdinc.h>\n' \
	    '#include <infiniband/mlx5dv.h>\n' \
	    '#include <rdma/rdma_cma.h>\n' \
		  'int main(void) { return rdma_establish(NULL) || ' \
		  '!!IBV_QP_INIT_ATTR_SEND_OPS_FLAGS || !!MLX5_OPCODE_RDMA_WRITE; }\n' \
		  | ${BUILD_CMD[@]} -lmlx5 -I${rootdir}/include -c - 2>/dev/null; then
      echo "mlx5_dv provider is not supported"
		  exit 1
	  fi
	fi

	echo "Using "${CONFIG[RDMA_PROV]}" RDMA provider"
fi

if [[ "${CONFIG[FC]}" = "y" ]]; then
+9 −0
Original line number Diff line number Diff line
@@ -45,6 +45,7 @@ struct spdk_rdma_qp_init_attr {
	struct ibv_srq	       *srq;
	struct ibv_qp_cap	cap;
	struct ibv_pd	       *pd;
	bool			initiator_side;
};

struct spdk_rdma_qp {
@@ -61,6 +62,14 @@ struct spdk_rdma_qp {
struct spdk_rdma_qp *spdk_rdma_qp_create(struct rdma_cm_id *cm_id,
		struct spdk_rdma_qp_init_attr *qp_attr);

/**
 * Complete the connection process, must be called by the active
 * side (NVMEoF initiator) upon receipt RDMA_CM_EVENT_CONNECT_RESPONSE
 * @param spdk_rdma_qp pointer to a qpair
 * @return 0 on success, errno on failure
 */
int spdk_rdma_qp_complete_connect(struct spdk_rdma_qp *spdk_rdma_qp);

/**
 * Destroy RDMA provider specific qpair
 * \param spdk_rdma_qp Pointer to qpair to be destroyed
+21 −7
Original line number Diff line number Diff line
@@ -332,13 +332,14 @@ nvme_rdma_qpair_process_cm_event(struct nvme_rdma_qpair *rqpair)
			break;
		case RDMA_CM_EVENT_CONNECT_REQUEST:
			break;
		case RDMA_CM_EVENT_CONNECT_RESPONSE:
			break;
		case RDMA_CM_EVENT_CONNECT_ERROR:
			break;
		case RDMA_CM_EVENT_UNREACHABLE:
		case RDMA_CM_EVENT_REJECTED:
			break;
		case RDMA_CM_EVENT_CONNECT_RESPONSE:
			rc = spdk_rdma_qp_complete_connect(rqpair->rdma_qp);
		/* fall through */
		case RDMA_CM_EVENT_ESTABLISHED:
			accept_data = (struct spdk_nvmf_rdma_accept_private_data *)event->param.conn.private_data;
			if (accept_data == NULL) {
@@ -443,6 +444,13 @@ nvme_rdma_validate_cm_event(enum rdma_cm_event_type expected_evt_type,
		 */
		if (reaped_evt->event == RDMA_CM_EVENT_REJECTED && reaped_evt->status == 10) {
			rc = -ESTALE;
		} else if (reaped_evt->event == RDMA_CM_EVENT_CONNECT_RESPONSE) {
			/*
			 *  If we are using a qpair which is not created using rdma cm API
			 *  then we will receive RDMA_CM_EVENT_CONNECT_RESPONSE instead of
			 *  RDMA_CM_EVENT_ESTABLISHED.
			 */
			return 0;
		}
		break;
	default:
@@ -530,6 +538,7 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
	attr.cap.max_recv_wr	= rqpair->num_entries; /* RECV operations */
	attr.cap.max_send_sge	= spdk_min(NVME_RDMA_DEFAULT_TX_SGE, dev_attr.max_sge);
	attr.cap.max_recv_sge	= spdk_min(NVME_RDMA_DEFAULT_RX_SGE, dev_attr.max_sge);
	attr.initiator_side	= true;

	rqpair->rdma_qp = spdk_rdma_qp_create(rqpair->cm_id, &attr);

@@ -541,7 +550,7 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
	rqpair->max_send_sge = spdk_min(NVME_RDMA_DEFAULT_TX_SGE, attr.cap.max_send_sge);
	rqpair->max_recv_sge = spdk_min(NVME_RDMA_DEFAULT_RX_SGE, attr.cap.max_recv_sge);

	rctrlr->pd = rqpair->cm_id->qp->pd;
	rctrlr->pd = rqpair->rdma_qp->qp->pd;

	rqpair->cm_id->context = &rqpair->qpair;

@@ -555,7 +564,7 @@ nvme_rdma_qpair_submit_sends(struct nvme_rdma_qpair *rqpair)
	int rc;

	if (rqpair->sends_to_post.first) {
		rc = ibv_post_send(rqpair->cm_id->qp, rqpair->sends_to_post.first, &bad_send_wr);
		rc = ibv_post_send(rqpair->rdma_qp->qp, rqpair->sends_to_post.first, &bad_send_wr);
		if (spdk_unlikely(rc)) {
			SPDK_ERRLOG("Failed to post WRs on send queue, errno %d (%s), bad_wr %p\n",
				    rc, spdk_strerror(rc), bad_send_wr);
@@ -579,7 +588,7 @@ nvme_rdma_qpair_submit_recvs(struct nvme_rdma_qpair *rqpair)
	int rc;

	if (rqpair->recvs_to_post.first) {
		rc = ibv_post_recv(rqpair->cm_id->qp, rqpair->recvs_to_post.first, &bad_recv_wr);
		rc = ibv_post_recv(rqpair->rdma_qp->qp, rqpair->recvs_to_post.first, &bad_recv_wr);
		if (spdk_unlikely(rc)) {
			SPDK_ERRLOG("Failed to post WRs on receive queue, errno %d (%s), bad_wr %p\n",
				    rc, spdk_strerror(rc), bad_recv_wr);
@@ -933,6 +942,11 @@ nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
	param.retry_count = ctrlr->opts.transport_retry_count;
	param.rnr_retry_count = 7;

	/* Fields below are ignored by rdma cm if qpair has been
	 * created using rdma cm API. */
	param.srq = 0;
	param.qp_num = rqpair->rdma_qp->qp->qp_num;

	ret = rdma_connect(rqpair->cm_id, &param);
	if (ret) {
		SPDK_ERRLOG("nvme rdma connect error\n");
@@ -944,7 +958,7 @@ nvme_rdma_connect(struct nvme_rdma_qpair *rqpair)
		SPDK_NOTICELOG("Received a stale connection notice during connection.\n");
		return -EAGAIN;
	} else if (ret) {
		SPDK_ERRLOG("RDMA connect error\n");
		SPDK_ERRLOG("RDMA connect error %d\n", ret);
		return -1;
	} else {
		return 0;
@@ -1033,7 +1047,7 @@ nvme_rdma_check_contiguous_entries(uint64_t addr_1, uint64_t addr_2)
static int
nvme_rdma_register_mem(struct nvme_rdma_qpair *rqpair)
{
	struct ibv_pd *pd = rqpair->cm_id->qp->pd;
	struct ibv_pd *pd = rqpair->rdma_qp->qp->pd;
	struct spdk_nvme_rdma_mr_map *mr_map;
	const struct spdk_mem_map_ops nvme_rdma_map_ops = {
		.notify_cb = nvme_rdma_mr_map_notify,
Loading