Commit 00277bc5 authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Tomasz Zawadzki
Browse files

nvme/rdma: Fix searching for qpair by qp_num



Poll group holds lists of qpairs in different states and
when we got rdma completion with error, we iterate these
lists to find a qpair which qp_num matches. qp_num
is stored inside of ibv_qp which belongs to spdk_rdma_qp
structure. When nvme_rdma_qpair is disconnected, pointer
to spdk_rdma_qp is cleaned but qpair may still exist in
poll group list and when we start searhing for qpair by
qp_num we may dereference NULL pointer.

This patch adds a check that pointer to spdk_rdma_qp
is valid before dereferencing it. To minimize boilerplate code,
wrap all check in macro. Add unit test to verify this fix.

Signed-off-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Change-Id: I1925f93efb633fd5c176323d3bbd3641a1a632a9
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9050


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 97385af1
Loading
Loading
Loading
Loading
+7 −5
Original line number Diff line number Diff line
@@ -104,6 +104,9 @@

#define WC_PER_QPAIR(queue_depth)	(queue_depth * 2)

#define NVME_RDMA_POLL_GROUP_CHECK_QPN(_rqpair, qpn)				\
	((_rqpair)->rdma_qp && (_rqpair)->rdma_qp->qp->qp_num == (qpn))	\

struct nvme_rdma_memory_domain {
	TAILQ_ENTRY(nvme_rdma_memory_domain) link;
	uint32_t ref;
@@ -2504,22 +2507,21 @@ nvme_rdma_poll_group_get_qpair_by_id(struct nvme_rdma_poll_group *group, uint32_

	STAILQ_FOREACH(qpair, &group->group.disconnected_qpairs, poll_group_stailq) {
		rqpair = nvme_rdma_qpair(qpair);
		if (rqpair->rdma_qp->qp->qp_num == qp_num) {
		if (NVME_RDMA_POLL_GROUP_CHECK_QPN(rqpair, qp_num)) {
			return rqpair;
		}
	}

	STAILQ_FOREACH(qpair, &group->group.connected_qpairs, poll_group_stailq) {
		rqpair = nvme_rdma_qpair(qpair);
		if (rqpair->rdma_qp->qp->qp_num == qp_num) {
		if (NVME_RDMA_POLL_GROUP_CHECK_QPN(rqpair, qp_num)) {
			return rqpair;
		}
	}

	STAILQ_FOREACH(rqpair_tracker, &group->destroyed_qpairs, link) {
		rqpair = rqpair_tracker->destroyed_qpair_tracker;
		if (rqpair->rdma_qp->qp->qp_num == qp_num) {
			return rqpair;
		if (NVME_RDMA_POLL_GROUP_CHECK_QPN(rqpair_tracker->destroyed_qpair_tracker, qp_num)) {
			return rqpair_tracker->destroyed_qpair_tracker;
		}
	}

+54 −6
Original line number Diff line number Diff line
@@ -1286,6 +1286,53 @@ test_rdma_get_memory_translation(void)
	nvme_rdma_put_memory_domain(rqpair.memory_domain);
}

static void
test_nvme_rdma_poll_group_get_qpair_by_id(void)
{
	const uint32_t test_qp_num = 123;
	struct nvme_rdma_poll_group	group = {};
	struct nvme_rdma_destroyed_qpair tracker = {};
	struct nvme_rdma_qpair rqpair = {};
	struct spdk_rdma_qp rdma_qp = {};
	struct ibv_qp qp = { .qp_num = test_qp_num };

	STAILQ_INIT(&group.group.disconnected_qpairs);
	STAILQ_INIT(&group.group.connected_qpairs);
	STAILQ_INIT(&group.destroyed_qpairs);
	rqpair.qpair.trtype = SPDK_NVME_TRANSPORT_RDMA;
	tracker.destroyed_qpair_tracker = &rqpair;

	/* Test 1 - Simulate case when nvme_rdma_qpair is disconnected but still in one of lists.
	 * nvme_rdma_poll_group_get_qpair_by_id must return NULL */
	STAILQ_INSERT_HEAD(&group.group.disconnected_qpairs, &rqpair.qpair, poll_group_stailq);
	CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == NULL);
	STAILQ_REMOVE_HEAD(&group.group.disconnected_qpairs, poll_group_stailq);

	STAILQ_INSERT_HEAD(&group.group.connected_qpairs, &rqpair.qpair, poll_group_stailq);
	CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == NULL);
	STAILQ_REMOVE_HEAD(&group.group.connected_qpairs, poll_group_stailq);

	STAILQ_INSERT_HEAD(&group.destroyed_qpairs, &tracker, link);
	CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == NULL);
	STAILQ_REMOVE_HEAD(&group.destroyed_qpairs, link);

	/* Test 2 - nvme_rdma_qpair with valid rdma_qp/ibv_qp and qp_num */
	rdma_qp.qp = &qp;
	rqpair.rdma_qp = &rdma_qp;

	STAILQ_INSERT_HEAD(&group.group.disconnected_qpairs, &rqpair.qpair, poll_group_stailq);
	CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == &rqpair);
	STAILQ_REMOVE_HEAD(&group.group.disconnected_qpairs, poll_group_stailq);

	STAILQ_INSERT_HEAD(&group.group.connected_qpairs, &rqpair.qpair, poll_group_stailq);
	CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == &rqpair);
	STAILQ_REMOVE_HEAD(&group.group.connected_qpairs, poll_group_stailq);

	STAILQ_INSERT_HEAD(&group.destroyed_qpairs, &tracker, link);
	CU_ASSERT(nvme_rdma_poll_group_get_qpair_by_id(&group, test_qp_num) == &rqpair);
	STAILQ_REMOVE_HEAD(&group.destroyed_qpairs, link);
}

int main(int argc, char **argv)
{
	CU_pSuite	suite = NULL;
@@ -1317,6 +1364,7 @@ int main(int argc, char **argv)
	CU_ADD_TEST(suite, test_nvme_rdma_memory_domain);
	CU_ADD_TEST(suite, test_rdma_ctrlr_get_memory_domain);
	CU_ADD_TEST(suite, test_rdma_get_memory_translation);
	CU_ADD_TEST(suite, test_nvme_rdma_poll_group_get_qpair_by_id);

	CU_basic_set_mode(CU_BRM_VERBOSE);
	CU_basic_run_tests();