Commit d06b6097 authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Tomasz Zawadzki
Browse files

nvme/rdma: Create memory domain per Protection Domain



Add a global list of memory domains with reference counter.
Memory domains are used by NVME RDMA qpairs.

Also refactor ibv_resize_cq in nvme_rdma_ut.c to stub

Signed-off-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Change-Id: Ie58b7e99fcb2c57c967f5dee0417e74845d9e2d1
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8127


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarPaul Luse <paul.e.luse@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarZiye Yang <ziye.yang@intel.com>
parent 4e527910
Loading
Loading
Loading
Loading
+90 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
 *
 *   Copyright (c) Intel Corporation. All rights reserved.
 *   Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
 *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
@@ -38,6 +39,7 @@
#include "spdk/stdinc.h"

#include "spdk/assert.h"
#include "spdk/dma.h"
#include "spdk/log.h"
#include "spdk/trace.h"
#include "spdk/queue.h"
@@ -102,6 +104,13 @@

#define WC_PER_QPAIR(queue_depth)	(queue_depth * 2)

struct nvme_rdma_memory_domain {
	TAILQ_ENTRY(nvme_rdma_memory_domain) link;
	uint32_t ref;
	struct ibv_pd *pd;
	struct spdk_memory_domain *domain;
};

enum nvme_rdma_wr_type {
	RDMA_WR_TYPE_RECV,
	RDMA_WR_TYPE_SEND,
@@ -223,6 +232,8 @@ struct nvme_rdma_qpair {
	TAILQ_HEAD(, spdk_nvme_rdma_req)	free_reqs;
	TAILQ_HEAD(, spdk_nvme_rdma_req)	outstanding_reqs;

	struct nvme_rdma_memory_domain		*memory_domain;

	/* Counts of outstanding send and recv objects */
	uint16_t				current_num_recvs;
	uint16_t				current_num_sends;
@@ -289,6 +300,77 @@ static const char *rdma_cm_event_str[] = {
struct nvme_rdma_qpair *nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group,
		uint32_t qp_num);

static TAILQ_HEAD(, nvme_rdma_memory_domain) g_memory_domains = TAILQ_HEAD_INITIALIZER(
			g_memory_domains);
static pthread_mutex_t g_memory_domains_lock = PTHREAD_MUTEX_INITIALIZER;

static struct nvme_rdma_memory_domain *
nvme_rdma_get_memory_domain(struct ibv_pd *pd)
{
	struct nvme_rdma_memory_domain *domain = NULL;
	struct spdk_memory_domain_ctx dev_ctx;
	int rc;

	pthread_mutex_lock(&g_memory_domains_lock);

	TAILQ_FOREACH(domain, &g_memory_domains, link) {
		if (domain->pd == pd) {
			domain->ref++;
			pthread_mutex_unlock(&g_memory_domains_lock);
			return domain;
		}
	}

	domain = calloc(1, sizeof(*domain));
	if (!domain) {
		SPDK_ERRLOG("Memory allocation failed\n");
		pthread_mutex_unlock(&g_memory_domains_lock);
		return NULL;
	}

	dev_ctx.size = sizeof(dev_ctx);
	dev_ctx.rdma.ibv_pd = pd;

	rc = spdk_memory_domain_create(&domain->domain, SPDK_DMA_DEVICE_TYPE_RDMA, &dev_ctx,
				       SPDK_RDMA_DMA_DEVICE);
	if (rc) {
		SPDK_ERRLOG("Failed to create memory domain\n");
		free(domain);
		pthread_mutex_unlock(&g_memory_domains_lock);
		return NULL;
	}

	domain->pd = pd;
	domain->ref = 1;
	TAILQ_INSERT_TAIL(&g_memory_domains, domain, link);

	pthread_mutex_unlock(&g_memory_domains_lock);

	return domain;
}

static void
nvme_rdma_put_memory_domain(struct nvme_rdma_memory_domain *device)
{
	if (!device) {
		return;
	}

	pthread_mutex_lock(&g_memory_domains_lock);

	assert(device->ref > 0);

	device->ref--;

	if (device->ref == 0) {
		spdk_memory_domain_destroy(device->domain);
		TAILQ_REMOVE(&g_memory_domains, device, link);
		free(device);
	}

	pthread_mutex_unlock(&g_memory_domains_lock);
}

static inline void *
nvme_rdma_calloc(size_t nmemb, size_t size)
{
@@ -625,6 +707,12 @@ nvme_rdma_qpair_init(struct nvme_rdma_qpair *rqpair)
		return -1;
	}

	rqpair->memory_domain = nvme_rdma_get_memory_domain(rqpair->rdma_qp->qp->pd);
	if (!rqpair->memory_domain) {
		SPDK_ERRLOG("Failed to get memory domain\n");
		return -1;
	}

	/* ibv_create_qp will change the values in attr.cap. Make sure we store the proper value. */
	rqpair->max_send_sge = spdk_min(NVME_RDMA_DEFAULT_TX_SGE, attr.cap.max_send_sge);
	rqpair->max_recv_sge = spdk_min(NVME_RDMA_DEFAULT_RX_SGE, attr.cap.max_recv_sge);
@@ -1712,6 +1800,8 @@ nvme_rdma_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_
	nvme_rdma_qpair_abort_reqs(qpair, 1);
	nvme_qpair_deinit(qpair);

	nvme_rdma_put_memory_domain(rqpair->memory_domain);

	nvme_rdma_free_reqs(rqpair);
	nvme_rdma_free_rsps(rqpair);
	nvme_rdma_free(rqpair);
+1 −1
Original line number Diff line number Diff line
@@ -64,7 +64,7 @@ DEPDIRS-thread := log util trace

DEPDIRS-nvme := log sock util
ifeq ($(CONFIG_RDMA),y)
DEPDIRS-nvme += rdma
DEPDIRS-nvme += rdma dma
endif
ifeq ($(CONFIG_VFIO_USER),y)
DEPDIRS-nvme += vfio_user
+86 −4
Original line number Diff line number Diff line
/*-
 *   BSD LICENSE
 *
 *   Copyright (c) Intel Corporation.
 *   All rights reserved.
 *   Copyright (c) Intel Corporation. All rights reserved.
 *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
@@ -60,12 +60,35 @@ DEFINE_STUB(fcntl, int, (int fd, int cmd, ...), 0);
DEFINE_STUB_V(rdma_destroy_event_channel, (struct rdma_event_channel *channel));

DEFINE_STUB(ibv_dereg_mr, int, (struct ibv_mr *mr), 0);
DEFINE_STUB(ibv_resize_cq, int, (struct ibv_cq *cq, int cqe), 0);

int ibv_resize_cq(struct ibv_cq *cq, int cqe)
DEFINE_RETURN_MOCK(spdk_memory_domain_create, int);
int
spdk_memory_domain_create(struct spdk_memory_domain **domain, enum spdk_dma_device_type type,
			  struct spdk_memory_domain_ctx *ctx, const char *id)
{
	static struct spdk_memory_domain *__dma_dev = (struct spdk_memory_domain *)0xdeaddead;

	HANDLE_RETURN_MOCK(spdk_memory_domain_create);

	*domain = __dma_dev;

	return 0;
}

DEFINE_STUB(spdk_memory_domain_get_context, struct spdk_memory_domain_ctx *,
	    (struct spdk_memory_domain *device), NULL);
DEFINE_STUB(spdk_memory_domain_get_dma_device_type, enum spdk_dma_device_type,
	    (struct spdk_memory_domain *device), SPDK_DMA_DEVICE_TYPE_RDMA);
DEFINE_STUB_V(spdk_memory_domain_destroy, (struct spdk_memory_domain *device));
DEFINE_STUB(spdk_memory_domain_fetch_data, int, (struct spdk_memory_domain *src_domain,
		void *src_domain_ctx, struct iovec *src_iov, uint32_t src_iov_cnt, struct iovec *dst_iov,
		uint32_t dst_iov_cnt, spdk_memory_domain_fetch_data_cpl_cb cpl_cb, void *cpl_cb_arg), 0);
DEFINE_STUB(spdk_memory_domain_translate_data, int, (struct spdk_memory_domain *src_domain,
		void *src_domain_ctx, struct spdk_memory_domain *dst_domain,
		struct spdk_memory_domain_translation_ctx *dst_domain_ctx, void *addr, size_t len,
		struct spdk_memory_domain_translation_result *result), 0);

/* ibv_reg_mr can be a macro, need to undefine it */
#ifdef ibv_reg_mr
#undef ibv_reg_mr
@@ -1055,7 +1078,8 @@ test_nvme_rdma_qpair_init(void)
{
	struct nvme_rdma_qpair		rqpair = {};
	struct rdma_cm_id		 cm_id = {};
	struct ibv_qp			    qp = {};
	struct ibv_pd				*pd = (struct ibv_pd *)0xfeedbeef;
	struct ibv_qp				qp = { .pd = pd };
	struct nvme_rdma_ctrlr	rctrlr = {};
	int rc = 0;

@@ -1075,6 +1099,7 @@ test_nvme_rdma_qpair_init(void)
	CU_ASSERT(rqpair.current_num_sends == 0);
	CU_ASSERT(rqpair.current_num_recvs == 0);
	CU_ASSERT(rqpair.cq == (struct ibv_cq *)0xFEEDBEEF);
	CU_ASSERT(rqpair.memory_domain != NULL);
}

static void
@@ -1119,6 +1144,62 @@ test_nvme_rdma_qpair_submit_request(void)
	nvme_rdma_free_reqs(&rqpair);
}

static void
test_nvme_rdma_memory_domain(void)
{
	struct nvme_rdma_memory_domain *domain_1 = NULL, *domain_2 = NULL, *domain_tmp;
	struct ibv_pd *pd_1 = (struct ibv_pd *)0x1, *pd_2 = (struct ibv_pd *)0x2;
	/* Counters below are used to check the number of created/destroyed rdma_dma_device objects.
	 * Since other unit tests may create dma_devices, we can't just check that the queue is empty or not */
	uint32_t dma_dev_count_start = 0, dma_dev_count = 0, dma_dev_count_end = 0;

	TAILQ_FOREACH(domain_tmp, &g_memory_domains, link) {
		dma_dev_count_start++;
	}

	/* spdk_memory_domain_create failed, expect fail */
	MOCK_SET(spdk_memory_domain_create, -1);
	domain_1 = nvme_rdma_get_memory_domain(pd_1);
	CU_ASSERT(domain_1 == NULL);
	MOCK_CLEAR(spdk_memory_domain_create);

	/* Normal scenario */
	domain_1 = nvme_rdma_get_memory_domain(pd_1);
	SPDK_CU_ASSERT_FATAL(domain_1 != NULL);
	CU_ASSERT(domain_1->domain != NULL);
	CU_ASSERT(domain_1->pd == pd_1);
	CU_ASSERT(domain_1->ref == 1);

	/* Request the same pd, ref counter increased */
	CU_ASSERT(nvme_rdma_get_memory_domain(pd_1) == domain_1);
	CU_ASSERT(domain_1->ref == 2);

	/* Request another pd */
	domain_2 = nvme_rdma_get_memory_domain(pd_2);
	SPDK_CU_ASSERT_FATAL(domain_2 != NULL);
	CU_ASSERT(domain_2->domain != NULL);
	CU_ASSERT(domain_2->pd == pd_2);
	CU_ASSERT(domain_2->ref == 1);

	TAILQ_FOREACH(domain_tmp, &g_memory_domains, link) {
		dma_dev_count++;
	}
	CU_ASSERT(dma_dev_count == dma_dev_count_start + 2);

	/* put domain_1, decrement refcount */
	nvme_rdma_put_memory_domain(domain_1);

	/* Release both devices */
	CU_ASSERT(domain_2->ref == 1);
	nvme_rdma_put_memory_domain(domain_1);
	nvme_rdma_put_memory_domain(domain_2);

	TAILQ_FOREACH(domain_tmp, &g_memory_domains, link) {
		dma_dev_count_end++;
	}
	CU_ASSERT(dma_dev_count_start == dma_dev_count_end);
}

int main(int argc, char **argv)
{
	CU_pSuite	suite = NULL;
@@ -1147,6 +1228,7 @@ int main(int argc, char **argv)
	CU_ADD_TEST(suite, test_nvme_rdma_parse_addr);
	CU_ADD_TEST(suite, test_nvme_rdma_qpair_init);
	CU_ADD_TEST(suite, test_nvme_rdma_qpair_submit_request);
	CU_ADD_TEST(suite, test_nvme_rdma_memory_domain);

	CU_basic_set_mode(CU_BRM_VERBOSE);
	CU_basic_run_tests();