Commit 0edc184e authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Jim Harris
Browse files

accel/mlx5: Support mkey registration



Register scatter/gather mkey if accel_mlx5 driver
is enabled and a copy task with RDMA memory domain
as a destination is set. The UMR can be sent in
NVMF capsule to represent scattered memory as
virtually contiguous.

Signed-off-by: default avatarAlexey Marchuk <alexeymar@nvidia.com>
Change-Id: I92e8c417bbf8c4018ac77e7fc074ac7ae9a68b55
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/24708


Community-CI: Mellanox Build Bot
Reviewed-by: default avatarBen Walker <ben@nvidia.com>
Community-CI: Community CI Samsung <spdk.community.ci.samsung@gmail.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 06358c25
Loading
Loading
Loading
Loading
+160 −3
Original line number Diff line number Diff line
@@ -73,6 +73,7 @@ enum accel_mlx5_opcode {
	ACCEL_MLX5_OPC_CRYPTO,
	ACCEL_MLX5_OPC_CRC32C,
	ACCEL_MLX5_OPC_CRYPTO_MKEY,
	ACCEL_MLX5_OPC_MKEY,
	ACCEL_MLX5_OPC_LAST
};

@@ -82,6 +83,7 @@ SPDK_STATIC_ASSERT(ACCEL_MLX5_OPC_LAST <= 0xf,
struct accel_mlx5_stats {
	uint64_t crypto_umrs;
	uint64_t sig_umrs;
	uint64_t umrs;
	uint64_t rdma_reads;
	uint64_t rdma_writes;
	uint64_t polls;
@@ -343,6 +345,9 @@ accel_mlx5_task_fail(struct accel_mlx5_task *task, int rc)
			spdk_mlx5_mkey_pool_put_bulk(dev->sig_mkeys, task->mkeys, task->num_ops);
			spdk_mempool_put(dev->dev_ctx->psv_pool, task->psv);
		}
		if (task->mlx5_opcode == ACCEL_MLX5_OPC_MKEY) {
			spdk_mlx5_mkey_pool_put_bulk(dev->mkeys, task->mkeys, task->num_ops);
		}
	}
	next = spdk_accel_sequence_next_task(&task->base);
	seq = task->base.seq;
@@ -1842,6 +1847,134 @@ accel_mlx5_crypto_mkey_task_complete(struct accel_mlx5_task *mlx5_task)
	spdk_accel_task_complete(&mlx5_task->base, 0);
}

static inline int
accel_mlx5_mkey_task_init(struct accel_mlx5_task *mlx5_task)
{
	struct spdk_accel_task *task = &mlx5_task->base;
	struct accel_mlx5_qp *qp = mlx5_task->qp;
	struct accel_mlx5_dev *dev = qp->dev;
	int rc;
	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);

	if (spdk_unlikely(task->s.iovcnt > ACCEL_MLX5_MAX_SGE)) {
		/* With `external mkey` we can't split task or register several UMRs */
		SPDK_ERRLOG("src buffer is too fragmented\n");
		return -EINVAL;
	}
	if (spdk_unlikely(task->src_domain == spdk_accel_get_memory_domain())) {
		SPDK_ERRLOG("accel domain is not supported\n");
		return -EINVAL;
	}
	if (spdk_unlikely(spdk_accel_sequence_next_task(task) != NULL)) {
		SPDK_ERRLOG("Mkey registration is only supported for single task\n");
		return -ENOTSUP;
	}

	accel_mlx5_iov_sgl_init(&mlx5_task->src, task->s.iovs, task->s.iovcnt);
	mlx5_task->num_reqs = 1;

	if (spdk_unlikely(qp_slot == 0)) {
		mlx5_task->num_ops = 0;
		dev->stats.nomem_qdepth++;
		return -ENOMEM;
	}
	rc = spdk_mlx5_mkey_pool_get_bulk(dev->mkeys, mlx5_task->mkeys, 1);
	if (spdk_unlikely(rc)) {
		mlx5_task->num_ops = 0;
		dev->stats.nomem_mkey++;
		return -ENOMEM;
	}
	mlx5_task->num_ops = 1;

	SPDK_DEBUGLOG(accel_mlx5, "crypto_mkey task num_blocks %u, src_len %zu\n", mlx5_task->num_reqs,
		      task->nbytes);

	return 0;
}

static inline int
accel_mlx5_mkey_task_process(struct accel_mlx5_task *mlx5_task)
{
	struct spdk_mlx5_umr_attr umr_attr;
	struct ibv_sge src_sge[ACCEL_MLX5_MAX_SGE];
	struct spdk_accel_task *task = &mlx5_task->base;
	struct accel_mlx5_qp *qp = mlx5_task->qp;
	struct accel_mlx5_dev *dev = qp->dev;
	uint32_t remaining = 0;
	int rc;

	if (spdk_unlikely(!mlx5_task->num_ops)) {
		return -EINVAL;
	}
	SPDK_DEBUGLOG(accel_mlx5, "begin, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);

	mlx5_task->num_wrs = 0;

	rc = accel_mlx5_fill_block_sge(dev, src_sge, &mlx5_task->src,  task->nbytes, &remaining,
				       task->src_domain, task->src_domain_ctx);
	if (spdk_unlikely(rc <= 0 || remaining)) {
		rc = rc ? rc : -EINVAL;
		SPDK_ERRLOG("Failed to set src sge, rc %d, remaining %u\n", rc, remaining);
		return rc;
	}
	umr_attr.mkey = mlx5_task->mkeys[0]->mkey;
	umr_attr.sge = src_sge;
	umr_attr.sge_count = rc;
	umr_attr.umr_len = task->nbytes;

	rc = spdk_mlx5_umr_configure(qp->qp, &umr_attr, (uint64_t)mlx5_task,
				     SPDK_MLX5_WQE_CTRL_CE_CQ_UPDATE);
	if (spdk_unlikely(rc)) {
		SPDK_ERRLOG("UMR configure failed with %d\n", rc);
		return rc;
	}
	dev->stats.umrs++;
	mlx5_task->num_submitted_reqs++;
	ACCEL_MLX5_UPDATE_ON_WR_SUBMITTED_SIGNALED(dev, qp, mlx5_task);
	STAILQ_INSERT_TAIL(&qp->in_hw, mlx5_task, link);

	SPDK_DEBUGLOG(accel_mlx5, "end, task %p, dst_domain_ctx %p\n", mlx5_task, task->dst_domain_ctx);

	return 0;
}

static inline int
accel_mlx5_mkey_task_continue(struct accel_mlx5_task *task)
{
	struct accel_mlx5_qp *qp = task->qp;
	struct accel_mlx5_dev *dev = qp->dev;
	int rc;
	uint16_t qp_slot = accel_mlx5_dev_get_available_slots(dev, qp);

	if (task->num_ops == 0) {
		rc = spdk_mlx5_mkey_pool_get_bulk(dev->mkeys, task->mkeys, 1);
		if (spdk_unlikely(rc)) {
			dev->stats.nomem_mkey++;
			STAILQ_INSERT_TAIL(&dev->nomem, task, link);
			return -ENOMEM;
		}
		task->num_ops = 1;
	}
	if (spdk_unlikely(qp_slot == 0)) {
		dev->stats.nomem_qdepth++;
		STAILQ_INSERT_TAIL(&dev->nomem, task, link);
		return -ENOMEM;
	}
	return accel_mlx5_mkey_task_process(task);
}

static inline void
accel_mlx5_mkey_task_complete(struct accel_mlx5_task *mlx5_task)
{
	struct accel_mlx5_dev *dev = mlx5_task->qp->dev;

	assert(mlx5_task->num_ops);
	assert(mlx5_task->base.seq);

	spdk_mlx5_mkey_pool_put_bulk(dev->mkeys, mlx5_task->mkeys, 1);
	spdk_accel_task_complete(&mlx5_task->base, 0);
}

static int
accel_mlx5_task_op_not_implemented(struct accel_mlx5_task *mlx5_task)
{
@@ -1889,6 +2022,12 @@ static struct accel_mlx5_task_operations g_accel_mlx5_tasks_ops[] = {
		.cont = accel_mlx5_crypto_mkey_task_continue,
		.complete = accel_mlx5_crypto_mkey_task_complete,
	},
	[ACCEL_MLX5_OPC_MKEY] = {
		.init = accel_mlx5_mkey_task_init,
		.process = accel_mlx5_mkey_task_process,
		.cont = accel_mlx5_mkey_task_continue,
		.complete = accel_mlx5_mkey_task_complete,
	},
	[ACCEL_MLX5_OPC_LAST] = {
		.init = accel_mlx5_task_op_not_supported,
		.process = accel_mlx5_task_op_not_implemented,
@@ -1922,7 +2061,7 @@ accel_mlx5_memory_domain_transfer(struct accel_mlx5_task *task)
	struct accel_mlx5_dev *dev = task->qp->dev;
	int rc;

	assert(task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY);
	assert(task->mlx5_opcode == ACCEL_MLX5_OPC_CRYPTO_MKEY || task->mlx5_opcode == ACCEL_MLX5_OPC_MKEY);
	/* UMR is an offset in the addess space, so the start address is 0 */
	translation.iov.iov_base = NULL;
	translation.iov.iov_len = base->nbytes;
@@ -2363,6 +2502,7 @@ accel_mlx5_add_stats(struct accel_mlx5_stats *stats, const struct accel_mlx5_sta

	stats->crypto_umrs += to_add->crypto_umrs;
	stats->sig_umrs += to_add->sig_umrs;
	stats->umrs += to_add->umrs;
	stats->rdma_reads += to_add->rdma_reads;
	stats->rdma_writes += to_add->rdma_writes;
	stats->polls += to_add->polls;
@@ -3135,7 +3275,8 @@ accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header,
	spdk_json_write_named_object_begin(w, "umrs");
	spdk_json_write_named_uint64(w, "crypto_umrs", stats->crypto_umrs);
	spdk_json_write_named_uint64(w, "sig_umrs", stats->sig_umrs);
	spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs);
	spdk_json_write_named_uint64(w, "umrs", stats->umrs);
	spdk_json_write_named_uint64(w, "total", stats->crypto_umrs + stats->sig_umrs + stats->umrs);
	spdk_json_write_object_end(w);

	spdk_json_write_named_object_begin(w, "rdma");
@@ -3159,6 +3300,7 @@ accel_mlx5_dump_stats_json(struct spdk_json_write_ctx *w, const char *header,
	spdk_json_write_named_uint64(w, "crypto", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO]);
	spdk_json_write_named_uint64(w, "crypto_mkey", stats->opcodes[ACCEL_MLX5_OPC_CRYPTO_MKEY]);
	spdk_json_write_named_uint64(w, "crc32c", stats->opcodes[ACCEL_MLX5_OPC_CRC32C]);
	spdk_json_write_named_uint64(w, "mkey", stats->opcodes[ACCEL_MLX5_OPC_MKEY]);
	spdk_json_write_named_uint64(w, "total", total_tasks);
	spdk_json_write_object_end(w);

@@ -3360,7 +3502,22 @@ accel_mlx5_driver_examine_sequence(struct spdk_accel_sequence *seq,
	accel_mlx5_task_reset(first);
	SPDK_DEBUGLOG(accel_mlx5, "first %p, opc %d; next %p, opc %d\n", first_base, first_base->op_code,
		      next_base,  next_base ? next_base->op_code : -1);
	if (next_base) {
	if (!next_base) {
		if (first_base->op_code == SPDK_ACCEL_OPC_COPY && first_base->dst_domain &&
		    spdk_memory_domain_get_dma_device_type(first_base->dst_domain) ==
		    SPDK_DMA_DEVICE_TYPE_RDMA &&
		    accel_mlx5_compare_iovs(first_base->d.iovs, first_base->s.iovs, first_base->s.iovcnt)) {
			SPDK_DEBUGLOG(accel_mlx5, "MKEY task %p\n", first);
			rc = accel_mlx5_task_assign_qp_by_domain_pd(first, accel_ch, first_base->dst_domain);
			if (spdk_unlikely(rc)) {
				return rc;
			}
			first->mlx5_opcode = ACCEL_MLX5_OPC_MKEY;
			first->needs_data_transfer = 1;
			first->inplace = 1;
			return 0;
		}
	} else {
		switch (first_base->op_code) {
		case SPDK_ACCEL_OPC_COPY:
			if (next_base->op_code == SPDK_ACCEL_OPC_DECRYPT &&