Commit b557c9c8 authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Konrad Sztyber
Browse files

lib/mlx5: Add native implementation of CQ and QP



Add functions to create, destory CQ and QP
Next patches add implementation of other API
used in IO path to replace mlx5_dv API. Under the
hood CQ and QP are created used verbs API and then
low level objects are extracted to be used directly.

mlx5_ifc.h is a binary interface to the HW according
to the PRM.
mlx5_ifc.h is a copy of rdma-core header version v51.0
with some modifications. Original copyright is kept.
This file is available under a choice of one of two
licenses: GPL v2 or BSD-2-Clause (the clause is not
specified explicitly but the text matches the 2-clause)
LICENSE file was updated with mentioning that mlx5_ifc.h
in SPDK tree is BSD-2-Clause

Signed-off-by: default avatarAlexey Marchuk <alexeymar@nvidia.com>
Change-Id: If5415729019c85eda8e6a1494164d3d0bab9ee94
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/23102


Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <ben@nvidia.com>
Community-CI: Mellanox Build Bot
parent 18384adc
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -28,3 +28,5 @@ Exceptions:
  FreeBSD source tree.

* lib/util/base64_neon.c is BSD-2-Clause.

* lib/mlx5/mlx5_ifc.h is BSD-2-Clause .
+105 −0
Original line number Diff line number Diff line
@@ -10,9 +10,38 @@

#define SPDK_MLX5_DEV_MAX_NAME_LEN 64

/* API for low level PRM based mlx5 driver implementation. Some terminology:
 * PRM - Programming Reference Manual
 * QP - Queue Pair
 * SQ - Submission Queue
 * CQ - Completion Queue
 * WQE - Work Queue Element
 * WQEBB - Work Queue Element Build Block (64 bytes)
 * CQE - Completion Queue Entry
 */

struct spdk_mlx5_crypto_dek;
struct spdk_mlx5_crypto_keytag;

enum {
	/** Error Completion Event - generate CQE on error for every CTRL segment, even one without CQ_UPDATE bit.
	 * Don't generate CQE in other cases. Default behaviour */
	SPDK_MLX5_WQE_CTRL_CE_CQ_ECE			= 3 << 2,
	/** Do not generate IBV_WC_WR_FLUSH_ERR for non-signaled CTRL segments. Completions are generated only for
	 * signaled (CQ_UPDATE) CTRL segments and the first error */
	SPDK_MLX5_WQE_CTRL_CE_CQ_NO_FLUSH_ERROR		= 1 << 2,
	/** Always generate CQE for CTRL segment WQE */
	SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE			= MLX5_WQE_CTRL_CQ_UPDATE,
	SPDK_MLX5_WQE_CTRL_CE_MASK			= 3 << 2,
	SPDK_MLX5_WQE_CTRL_SOLICITED			= MLX5_WQE_CTRL_SOLICITED,
	/** WQE starts execution only after all previous Read/Atomic WQEs complete */
	SPDK_MLX5_WQE_CTRL_FENCE			= MLX5_WQE_CTRL_FENCE,
	/** WQE starts execution after all local WQEs (memory operation, gather) complete */
	SPDK_MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE	= MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE,
	/** WQE starts execution only after all previous WQEs complete */
	SPDK_MLX5_WQE_CTRL_STRONG_ORDERING		= 3 << 5,
};

struct spdk_mlx5_crypto_dek_create_attr {
	/* Data Encryption Key in binary form */
	char *dek;
@@ -20,6 +49,82 @@ struct spdk_mlx5_crypto_dek_create_attr {
	size_t dek_len;
};

struct spdk_mlx5_cq;
struct spdk_mlx5_qp;

struct spdk_mlx5_cq_attr {
	uint32_t cqe_cnt;
	uint32_t cqe_size;
	void *cq_context;
	struct ibv_comp_channel *comp_channel;
	int comp_vector;
};

struct spdk_mlx5_qp_attr {
	struct ibv_qp_cap cap;
	bool sigall;
	/* If set then CQ_UPDATE will be cleared for every ctrl WQE and only last ctrl WQE before ringing the doorbell
	 * will be updated with CQ_UPDATE flag */
	bool siglast;
};

struct spdk_mlx5_cq_completion {
	union {
		uint64_t wr_id;
		uint32_t mkey; /* applicable if status == MLX5_CQE_SYNDROME_SIGERR */
	};
	int status;
};

/**
 * Create Completion Queue
 *
 * \note: CQ and all associated qpairs must be accessed in scope of a single thread
 * \note: CQ size must be enough to hold completions of all connected qpairs
 *
 * \param pd Protection Domain
 * \param cq_attr Attributes to be used to create CQ
 * \param cq_out Pointer created CQ
 * \return 0 on success, negated errno on failure. \b cq_out is set only on success result
 */
int spdk_mlx5_cq_create(struct ibv_pd *pd, struct spdk_mlx5_cq_attr *cq_attr,
			struct spdk_mlx5_cq **cq_out);

/**
 * Destroy Completion Queue
 *
 * \param cq CQ created with \ref spdk_mlx5_cq_create
 */
int spdk_mlx5_cq_destroy(struct spdk_mlx5_cq *cq);

/**
 * Create loopback qpair suitable for RDMA operations
 *
 * \param pd Protection Domain
 * \param cq Completion Queue to bind QP to
 * \param qp_attr Attributes to be used to create QP
 * \param qp_out Pointer created QP
 * \return 0 on success, negated errno on failure. \b qp_out is set only on success result
 */
int spdk_mlx5_qp_create(struct ibv_pd *pd, struct spdk_mlx5_cq *cq,
			struct spdk_mlx5_qp_attr *qp_attr, struct spdk_mlx5_qp **qp_out);

/**
 * Changes internal qpair state to error causing all unprocessed Work Requests to be completed with IBV_WC_WR_FLUSH_ERR
 * status code.
 *
 * \param qp qpair pointer
 * \return 0 on success, negated errno on failure
 */
int spdk_mlx5_qp_set_error_state(struct spdk_mlx5_qp *qp);

/**
 * Destroy qpair
 *
 * \param qp QP created with \ref spdk_mlx5_qp_create
 */
void spdk_mlx5_qp_destroy(struct spdk_mlx5_qp *qp);

/**
 * Return a NULL terminated array of devices which support crypto operation on Nvidia NICs
 *
+1 −1
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
SO_VER := 3
SO_MINOR := 0

C_SRCS = mlx5_crypto.c
C_SRCS = mlx5_crypto.c mlx5_qp.c
LIBNAME = mlx5

LOCAL_SYS_LIBS += -lmlx5 -libverbs -lrdmacm

lib/mlx5/mlx5_ifc.h

0 → 100644
+5688 −0

File added.

Preview size limit exceeded, changes collapsed.

lib/mlx5/mlx5_priv.h

0 → 100644
+82 −0
Original line number Diff line number Diff line
/*   SPDX-License-Identifier: BSD-3-Clause
 *   Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 */

#include "spdk/stdinc.h"
#include "spdk/queue.h"
#include "spdk/barrier.h"
#include "spdk/likely.h"

#include <infiniband/mlx5dv.h>
#include "spdk_internal/mlx5.h"

/**
 * Low level CQ representation, suitable for the direct polling
 */
struct mlx5_hw_cq {
	uint64_t cq_addr;
	uint32_t cqe_cnt;
	uint32_t cqe_size;
	uint32_t ci;
	uint32_t cq_num;
};

/**
 * Low level CQ representation, suitable for the WQEs submission.
 * Only submission queue is supported, receive queue is omitted since not used right now
 */
struct mlx5_hw_qp {
	uint64_t dbr_addr;
	uint64_t sq_addr;
	uint64_t sq_bf_addr;
	uint32_t sq_wqe_cnt;
	uint16_t sq_pi;
	uint32_t sq_tx_db_nc;
	uint32_t qp_num;
};

/* qp_num is 24 bits. 2D lookup table uses upper and lower 12 bits to find a qp by qp_num */
#define SPDK_MLX5_QP_NUM_UPPER_SHIFT (12)
#define SPDK_MLX5_QP_NUM_LOWER_MASK ((1 << SPDK_MLX5_QP_NUM_UPPER_SHIFT) - 1)
#define SPDK_MLX5_QP_NUM_LUT_SIZE (1 << 12)

struct spdk_mlx5_cq {
	struct mlx5_hw_cq hw;
	struct {
		struct spdk_mlx5_qp **table;
		uint32_t count;
	} qps [SPDK_MLX5_QP_NUM_LUT_SIZE];
	struct ibv_cq *verbs_cq;
	uint32_t qps_count;
};

struct mlx5_qp_sq_completion {
	uint64_t wr_id;
	/* Number of unsignaled completions before this one. Used to track qp overflow */
	uint32_t completions;
};

struct spdk_mlx5_qp {
	struct mlx5_hw_qp hw;
	struct mlx5_qp_sq_completion *completions;
	/* Pointer to a last WQE controll segment written to SQ */
	struct mlx5_wqe_ctrl_seg *ctrl;
	struct spdk_mlx5_cq *cq;
	struct ibv_qp *verbs_qp;
	/* Number of WQEs submitted to HW which won't produce a CQE */
	uint16_t nonsignaled_outstanding;
	uint16_t max_send_sge;
	/* Number of WQEs available for submission */
	uint16_t tx_available;
	uint16_t last_pi;
	uint8_t sigmode;
};

enum {
	/* Default mode, use flags passed by the user */
	SPDK_MLX5_QP_SIG_NONE = 0,
	/* Enable completion for every control WQE segment, regardless of the flags passed by the user */
	SPDK_MLX5_QP_SIG_ALL = 1,
	/* Enable completion only for the last control WQE segment, regardless of the flags passed by the user */
	SPDK_MLX5_QP_SIG_LAST = 2,
};
Loading