Commit 113075be authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Tomasz Zawadzki
Browse files

nvme/rdma: Fix cq/qp destroy order



Cq can only be deleted when all qpairs attached
to it are destroyed.
The problem happened in nvme_rdma_poll_group_remove -
we may destroy CQ before last qpair is destroyed.
Destruction flow is quite complicated, and we can't
just remove everything from nvme_rdma_poll_group_remove
since other functions check if all qpairs requests
are flushed - i.e. nvme_rdma_ctrlr_delete_io_qpair
expects that qpair is empty and disconnected.
If the user calls spdk_nvme_ctrlr_free_io_qpair and qpair
is not disconnected, we have to start the disconnect
process and wait for it to finish. To handle this case,
add a flag destroy_in_progress to nvme_qpair and check it
in nvme_rdma_ctrlr_disconnect_qpair to poll qpair until
disconnected even if it is in async mode.

Fixes #3016

Signed-off-by: default avatarAlexey Marchuk <alexeymar@nvidia.com>
Change-Id: I796833f70f81d8b8552263b61b2f002890d6dea9
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/18287


Reviewed-by: default avatarKarol Latecki <karol.latecki@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Community-CI: Mellanox Build Bot
parent bead1d5a
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
/*   SPDX-License-Identifier: BSD-3-Clause
 *   Copyright (C) 2015 Intel Corporation. All rights reserved.
 *   Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
 *   Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 */

#include "spdk/stdinc.h"
@@ -596,6 +596,8 @@ spdk_nvme_ctrlr_free_io_qpair(struct spdk_nvme_qpair *qpair)
		return 0;
	}

	qpair->destroy_in_progress = 1;

	nvme_transport_ctrlr_disconnect_qpair(ctrlr, qpair);

	if (qpair->poll_group && (qpair->active_proc == nvme_ctrlr_get_current_process(ctrlr))) {
+3 −1
Original line number Diff line number Diff line
/*   SPDX-License-Identifier: BSD-3-Clause
 *   Copyright (C) 2015 Intel Corporation. All rights reserved.
 *   Copyright (c) 2020, 2021 Mellanox Technologies LTD. All rights reserved.
 *   Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 */

#ifndef __NVME_INTERNAL_H__
@@ -440,6 +440,8 @@ struct spdk_nvme_qpair {
	uint8_t					last_transport_failure_reason: 2;

	uint8_t					abort_dnr: 1;
	/* The user is destroying qpair */
	uint8_t					destroy_in_progress: 1;

	enum spdk_nvme_transport_type		trtype;

+6 −15
Original line number Diff line number Diff line
/*   SPDX-License-Identifier: BSD-3-Clause
 *   Copyright (C) 2016 Intel Corporation. All rights reserved.
 *   Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
 *   Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 */

/*
@@ -1936,7 +1936,7 @@ nvme_rdma_qpair_disconnected(struct nvme_rdma_qpair *rqpair, int ret)
	}

	if (rqpair->poller == NULL) {
		/* If poller is not used, cq is not shared or already destroyed.
		/* If poller is not used, cq is not shared.
		 * So complete disconnecting qpair immediately.
		 */
		goto quiet;
@@ -2055,8 +2055,11 @@ nvme_rdma_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme
	 * It is ensured that poll_group_process_completions() calls disconnected_qpair_cb
	 * for any disconnected qpair. Hence, we do not have to check if the qpair is in
	 * a poll group or not.
	 * At the same time, if the qpair is being destroyed, i.e. this function is called by
	 * spdk_nvme_ctrlr_free_io_qpair then we need to wait until qpair is disconnected, otherwise
	 * we may leak some resources.
	 */
	if (qpair->async) {
	if (qpair->async && !qpair->destroy_in_progress) {
		return;
	}

@@ -3072,18 +3075,6 @@ static int
nvme_rdma_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup,
			    struct spdk_nvme_qpair *qpair)
{
	struct nvme_rdma_qpair		*rqpair = nvme_rdma_qpair(qpair);
	struct nvme_rdma_poll_group	*group = nvme_rdma_poll_group(tgroup);

	assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs);

	if (rqpair->poller) {
		nvme_rdma_poll_group_put_poller(group, rqpair->poller);

		rqpair->poller = NULL;
		rqpair->cq = NULL;
	}

	return 0;
}

+4 −7
Original line number Diff line number Diff line
/*   SPDX-License-Identifier: BSD-3-Clause
 *   Copyright (C) 2018 Intel Corporation. All rights reserved.
 *   Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 */

#include "spdk/stdinc.h"
@@ -1409,10 +1409,7 @@ test_nvme_rdma_qpair_set_poller(void)

	rqpair.qpair.poll_group_tailq_head = &tgroup->disconnected_qpairs;

	rc = nvme_rdma_poll_group_remove(tgroup, &rqpair.qpair);
	CU_ASSERT(rc == 0);
	CU_ASSERT(rqpair.cq == NULL);
	CU_ASSERT(rqpair.poller == NULL);
	nvme_rdma_poll_group_put_poller(group, rqpair.poller);
	CU_ASSERT(STAILQ_EMPTY(&group->pollers));

	rqpair.qpair.poll_group_tailq_head = &tgroup->connected_qpairs;
@@ -1441,8 +1438,8 @@ test_nvme_rdma_qpair_set_poller(void)

	rqpair.qpair.poll_group_tailq_head = &tgroup->disconnected_qpairs;

	rc = nvme_rdma_poll_group_remove(tgroup, &rqpair.qpair);
	CU_ASSERT(rc == 0);
	nvme_rdma_poll_group_put_poller(group, rqpair.poller);
	CU_ASSERT(STAILQ_EMPTY(&group->pollers));

	rc = nvme_rdma_poll_group_destroy(tgroup);
	CU_ASSERT(rc == 0);