Commit 9fb69476 authored by zkhatami88's avatar zkhatami88 Committed by Ben Walker
Browse files

nvme: Add mechanism to override RDMA pd/mr behavior



Add a mechanism to modify the RDMA transport's behavior
when creating protection domains and registering memory.
This is entirely optional.

Change-Id: I7cd850e76a673bf5521ca4815b779c53ab9567e8
Signed-off-by: default avatarzkhatami88 <z.khatami88@gmail.com>
Reviewed-on: https://review.gerrithub.io/421415


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 161af0b5
Loading
Loading
Loading
Loading
+55 −0
Original line number Diff line number Diff line
@@ -44,6 +44,7 @@
extern "C" {
#endif

#include "spdk/config.h"
#include "spdk/env.h"
#include "spdk/nvme_spec.h"
#include "spdk/nvmf_spec.h"
@@ -2038,6 +2039,60 @@ void spdk_nvme_qpair_remove_cmd_error_injection(struct spdk_nvme_ctrlr *ctrlr,
		struct spdk_nvme_qpair *qpair,
		uint8_t opc);

#ifdef SPDK_CONFIG_RDMA
struct ibv_context;
struct ibv_pd;
struct ibv_mr;

/**
 * RDMA Transport Hooks
 */
struct spdk_nvme_rdma_hooks {
	/**
	 * \brief Get a transport id specific context to be passed to
	 *  the other hooks.
	 *
	 * \param trid the transport id
	 *
	 * \return ctx to be passed to the other hooks
	 */
	void *(*get_ctx)(const struct spdk_nvme_transport_id *trid);

	/**
	 * \brief Get an InfiniBand Verbs protection domain.
	 *
	 * \param ctx Context returned from get_hook_ctx.
	 * \param verbs Infiniband verbs context
	 *
	 * \return pd of the nvme ctrlr
	 */
	struct ibv_pd *(*get_ibv_pd)(void *ctx, struct ibv_context *verbs);

	/**
	 * \brief Get an InfiniBand Verbs memory region for a buffer.
	 *
	 * \param ctx Context returned from get_hook_ctx.
	 * \param buf Memory buffer for which an rkey should be returned.
	 * \param size size of buf
	 *
	 * \return Infiniband remote key (rkey) for this buf
	 */
	uint64_t (*get_rkey)(void *ctx, void *buf, size_t size);
};

/**
 * \brief Set the global hooks for the RDMA transport, if necessary.
 *
 * This call is optional and must be performed prior to probing for
 * any devices. By default, the RDMA transport will use the ibverbs
 * library to create protection domains and register memory. This
 * is a mechanism to subvert that and use an existing registration.
 *
 * \param hooks for initializing global hooks
 */
void spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks);

#endif

#ifdef __cplusplus
}
+85 −28
Original line number Diff line number Diff line
@@ -71,6 +71,8 @@ struct spdk_nvmf_cmd {
	struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
};

struct spdk_nvme_rdma_hooks g_nvme_hooks = {};

/* Mapping from virtual address to ibv_mr pointer for a protection domain */
struct spdk_nvme_rdma_mr_map {
	struct ibv_pd				*pd;
@@ -82,6 +84,10 @@ struct spdk_nvme_rdma_mr_map {
/* NVMe RDMA transport extensions for spdk_nvme_ctrlr */
struct nvme_rdma_ctrlr {
	struct spdk_nvme_ctrlr			ctrlr;

	struct spdk_nvme_rdma_hooks		hooks;
	void					*hook_ctx;
	struct ibv_pd				*pd;
};

/* NVMe RDMA qpair extensions for spdk_nvme_qpair */
@@ -241,6 +247,7 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
{
	int			rc;
	struct ibv_qp_init_attr	attr;
	struct nvme_rdma_ctrlr *rctrlr;

	rqpair->cq = ibv_create_cq(rqpair->cm_id->verbs, rqpair->num_entries * 2, rqpair, NULL, 0);
	if (!rqpair->cq) {
@@ -248,6 +255,13 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
		return -1;
	}

	rctrlr = nvme_rdma_ctrlr(rqpair->qpair.ctrlr);
	if (rctrlr->hooks.get_ibv_pd) {
		rctrlr->pd = rctrlr->hooks.get_ibv_pd(rctrlr->hook_ctx, rqpair->cm_id->verbs);
	} else {
		rctrlr->pd = NULL;
	}

	memset(&attr, 0, sizeof(struct ibv_qp_init_attr));
	attr.qp_type		= IBV_QPT_RC;
	attr.send_cq		= rqpair->cq;
@@ -257,11 +271,12 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
	attr.cap.max_send_sge	= NVME_RDMA_DEFAULT_TX_SGE;
	attr.cap.max_recv_sge	= NVME_RDMA_DEFAULT_RX_SGE;

	rc = rdma_create_qp(rqpair->cm_id, NULL, &attr);
	rc = rdma_create_qp(rqpair->cm_id, rctrlr->pd, &attr);
	if (rc) {
		SPDK_ERRLOG("rdma_create_qp failed\n");
		return -1;
	}
	rctrlr->pd = rqpair->cm_id->qp->pd;

	rqpair->cm_id->context = &rqpair->qpair;

@@ -611,12 +626,15 @@ nvme_rdma_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map,
			enum spdk_mem_map_notify_action action,
			void *vaddr, size_t size)
{
	struct ibv_pd *pd = cb_ctx;
	struct nvme_rdma_ctrlr *rctrlr = cb_ctx;
	struct ibv_pd *pd;
	struct ibv_mr *mr;
	int rc;

	switch (action) {
	case SPDK_MEM_MAP_NOTIFY_REGISTER:
		if (!rctrlr->hooks.get_rkey) {
			pd = rctrlr->pd;
			mr = ibv_reg_mr(pd, vaddr, size,
					IBV_ACCESS_LOCAL_WRITE |
					IBV_ACCESS_REMOTE_READ |
@@ -627,13 +645,19 @@ nvme_rdma_mr_map_notify(void *cb_ctx, struct spdk_mem_map *map,
			} else {
				rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size, (uint64_t)mr);
			}
		} else {
			rc = spdk_mem_map_set_translation(map, (uint64_t)vaddr, size,
							  rctrlr->hooks.get_rkey(rctrlr->hook_ctx, vaddr, size));
		}
		break;
	case SPDK_MEM_MAP_NOTIFY_UNREGISTER:
		if (!rctrlr->hooks.get_rkey) {
			mr = (struct ibv_mr *)spdk_mem_map_translate(map, (uint64_t)vaddr, NULL);
		rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
			if (mr) {
				ibv_dereg_mr(mr);
			}
		}
		rc = spdk_mem_map_clear_translation(map, (uint64_t)vaddr, size);
		break;
	default:
		SPDK_UNREACHABLE();
@@ -673,7 +697,8 @@ nvme_rdma_register_mem(struct nvme_rdma_qpair *rqpair)

	mr_map->ref = 1;
	mr_map->pd = pd;
	mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops, pd);
	mr_map->map = spdk_mem_map_alloc((uint64_t)NULL, &nvme_rdma_map_ops,
					 nvme_rdma_ctrlr(rqpair->qpair.ctrlr));
	if (mr_map->map == NULL) {
		SPDK_ERRLOG("spdk_mem_map_alloc() failed\n");
		free(mr_map);
@@ -918,9 +943,21 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);

	requested_size = req->payload_size;
	if (!nvme_rdma_ctrlr(rqpair->qpair.ctrlr)->hooks.get_rkey) {

		mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)payload,
				&requested_size);
	if (mr == NULL || requested_size < req->payload_size) {
		if (mr == NULL) {
			return -1;
		}
		req->cmd.dptr.sgl1.keyed.key = mr->rkey;
	} else {
		req->cmd.dptr.sgl1.keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
					       (uint64_t)payload,
					       &requested_size);
	}

	if (requested_size < req->payload_size) {
		return -1;
	}

@@ -937,7 +974,6 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
	req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
	req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
	req->cmd.dptr.sgl1.keyed.length = req->payload_size;
	req->cmd.dptr.sgl1.keyed.key = mr->rkey;
	req->cmd.dptr.sgl1.address = (uint64_t)payload;

	return 0;
@@ -977,17 +1013,27 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
		sge_length = spdk_min(remaining_size, sge_length);
		mr_length = sge_length;

		mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)virt_addr,
		if (!nvme_rdma_ctrlr(rqpair->qpair.ctrlr)->hooks.get_rkey) {
			mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map,
					(uint64_t)virt_addr,
					&mr_length);
			if (mr == NULL) {
				return -1;
			}
			cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
		} else {
			cmd->sgl[num_sgl_desc].keyed.key = spdk_mem_map_translate(rqpair->mr_map->map,
							   (uint64_t)virt_addr,
							   &mr_length);
		}

		if (mr == NULL || mr_length < sge_length) {
		if (mr_length < sge_length) {
			return -1;
		}

		cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
		cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
		cmd->sgl[num_sgl_desc].keyed.length = sge_length;
		cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
		cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr;

		remaining_size -= sge_length;
@@ -1017,11 +1063,11 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
		 * the NVMe command. */
		rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);

		req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
		req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
		req->cmd.dptr.sgl1.keyed.length = req->payload_size;
		req->cmd.dptr.sgl1.keyed.key = mr->rkey;
		req->cmd.dptr.sgl1.address = rqpair->cmds[rdma_req->id].sgl[0].address;
		req->cmd.dptr.sgl1.keyed.type = cmd->sgl[0].keyed.type;
		req->cmd.dptr.sgl1.keyed.subtype = cmd->sgl[0].keyed.subtype;
		req->cmd.dptr.sgl1.keyed.length = cmd->sgl[0].keyed.length;
		req->cmd.dptr.sgl1.keyed.key = cmd->sgl[0].keyed.key;
		req->cmd.dptr.sgl1.address = cmd->sgl[0].address;
	} else {
		/*
		 * Otherwise, The SGL descriptor embedded in the command must point to the list of
@@ -1363,6 +1409,11 @@ struct spdk_nvme_ctrlr *nvme_rdma_ctrlr_construct(const struct spdk_nvme_transpo

	nvme_ctrlr_init_cap(&rctrlr->ctrlr, &cap, &vs);

	if (g_nvme_hooks.get_ctx) {
		rctrlr->hooks = g_nvme_hooks;
		rctrlr->hook_ctx = rctrlr->hooks.get_ctx(&rctrlr->ctrlr.trid);
	}

	SPDK_DEBUGLOG(SPDK_LOG_NVME, "successfully initialized the nvmf ctrlr\n");
	return &rctrlr->ctrlr;
}
@@ -1632,3 +1683,9 @@ nvme_rdma_ctrlr_free_cmb_io_buffer(struct spdk_nvme_ctrlr *ctrlr, void *buf, siz
{
	return 0;
}

void
spdk_nvme_rdma_init_hooks(struct spdk_nvme_rdma_hooks *hooks)
{
	g_nvme_hooks = *hooks;
}