Commit 8564f005 authored by Konrad Sztyber's avatar Konrad Sztyber Committed by Tomasz Zawadzki
Browse files

nvme/tcp: delay qpair disconnect until accel ops are done



If requests with outstanding accel operations are aborted, accel
completion callbacks might get executed with a freed qpair as context
(if user frees qpairs when they get disconnected).  So, to avoid this,
qpairs are marked as DISCONNECTED only after all accel operations are
completed.

Signed-off-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I9aa4b456135523d44cf96a3dd21e1876819834ca
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/19057


Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@gmail.com>
parent 17f3ea2d
Loading
Loading
Loading
Loading
+46 −4
Original line number Diff line number Diff line
@@ -394,18 +394,30 @@ nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_
	}

	nvme_tcp_qpair_abort_reqs(qpair, 0);

	/* If the qpair is marked as asynchronous, let it go through the process_completions() to
	 * let any outstanding requests (e.g. those with outstanding accel operations) complete.
	 * Otherwise, there's no way of waiting for them, so tqpair->outstanding_reqs has to be
	 * empty.
	 */
	if (qpair->async) {
		nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
	} else {
		assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));
		nvme_transport_ctrlr_disconnect_qpair_done(qpair);
	}
}

static int
nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
{
	struct nvme_tcp_qpair *tqpair;
	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);

	assert(qpair != NULL);
	nvme_tcp_qpair_abort_reqs(qpair, 0);
	assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));

	nvme_qpair_deinit(qpair);
	tqpair = nvme_tcp_qpair(qpair);
	nvme_tcp_free_reqs(tqpair);
	if (!tqpair->shared_stats) {
		free(tqpair->stats);
@@ -1046,6 +1058,11 @@ nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
	cpl.status.dnr = dnr;

	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
		/* We cannot abort requests with accel operations in progress */
		if (tcp_req->ordering.bits.in_progress_accel) {
			continue;
		}

		nvme_tcp_req_complete(tcp_req, tqpair, &cpl, true);
	}
}
@@ -2035,6 +2052,10 @@ nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped, uint32_t max_
			break;
		case NVME_TCP_PDU_RECV_STATE_QUIESCING:
			if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
				if (nvme_qpair_get_state(&tqpair->qpair) == NVME_QPAIR_DISCONNECTING) {
					nvme_transport_ctrlr_disconnect_qpair_done(&tqpair->qpair);
				}

				nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
			}
			break;
@@ -2107,6 +2128,16 @@ nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_c
			if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) {
				nvme_tcp_qpair_check_timeout(qpair);
			}

			if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
				if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
					nvme_transport_ctrlr_disconnect_qpair_done(qpair);
				}

				/* Don't return errors until the qpair gets disconnected */
				return 0;
			}

			goto fail;
		}
	}
@@ -2791,8 +2822,19 @@ nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *t
	num_events = spdk_sock_group_poll(group->sock_group);

	STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) {
		tqpair = nvme_tcp_qpair(qpair);
		if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
			if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
				nvme_transport_ctrlr_disconnect_qpair_done(qpair);
			}
		}
		/* Wait until the qpair transitions to the DISCONNECTED state, otherwise user might
		 * want to free it from disconnect_qpair_cb, while it's not fully disconnected (and
		 * might still have outstanding requests) */
		if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTED) {
			disconnected_qpair_cb(qpair, tgroup->group->ctx);
		}
	}

	/* If any qpairs were marked as needing to be polled due to an asynchronous write completion
	 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */
+152 −4
Original line number Diff line number Diff line
@@ -5,13 +5,20 @@
 */

#include "spdk/stdinc.h"
#include "spdk/nvme.h"

#include "spdk_internal/cunit.h"

#include "common/lib/test_sock.c"
#include "nvme/nvme_internal.h"
#include "common/lib/nvme/common_stubs.h"

/* nvme_transport_ctrlr_disconnect_qpair_done() stub is defined in common_stubs.h, but we need to
 * override it here */
static void nvme_transport_ctrlr_disconnect_qpair_done_mocked(struct spdk_nvme_qpair *qpair);
#define nvme_transport_ctrlr_disconnect_qpair_done nvme_transport_ctrlr_disconnect_qpair_done_mocked

#include "nvme/nvme_tcp.c"
#include "common/lib/nvme/common_stubs.h"

SPDK_LOG_REGISTER_COMPONENT(nvme)

@@ -42,6 +49,12 @@ DEFINE_STUB_V(spdk_nvme_qpair_print_command, (struct spdk_nvme_qpair *qpair,
DEFINE_STUB_V(spdk_nvme_qpair_print_completion, (struct spdk_nvme_qpair *qpair,
		struct spdk_nvme_cpl *cpl));

static void
nvme_transport_ctrlr_disconnect_qpair_done_mocked(struct spdk_nvme_qpair *qpair)
{
	qpair->state = NVME_QPAIR_DISCONNECTED;
}

static void
test_nvme_tcp_pdu_set_data_buf(void)
{
@@ -1442,23 +1455,50 @@ test_nvme_tcp_ctrlr_connect_qpair(void)
	nvme_tcp_ctrlr_delete_io_qpair(&ctrlr, qpair);
}

static void
ut_disconnect_qpair_req_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
{
	CU_ASSERT_EQUAL(cpl->status.sc, SPDK_NVME_SC_ABORTED_SQ_DELETION);
	CU_ASSERT_EQUAL(cpl->status.sct, SPDK_NVME_SCT_GENERIC);
}

static void
ut_disconnect_qpair_poll_group_cb(struct spdk_nvme_qpair *qpair, void *ctx)
{
	int *disconnected = ctx;

	(*disconnected)++;
}

static void
test_nvme_tcp_ctrlr_disconnect_qpair(void)
{
	struct spdk_nvme_ctrlr ctrlr = {};
	struct spdk_nvme_qpair *qpair;
	struct nvme_tcp_pdu pdu = {}, recv_pdu = {};
	struct nvme_tcp_qpair tqpair = {
		.qpair.trtype = SPDK_NVME_TRANSPORT_TCP,
		.qpair = {
			.trtype = SPDK_NVME_TRANSPORT_TCP,
			.ctrlr = &ctrlr,
			.async = true,
		},
		.recv_pdu = &recv_pdu,
	};
	struct nvme_tcp_poll_group tgroup = {};
	struct nvme_tcp_pdu pdu = {};
	struct spdk_nvme_poll_group group = {};
	struct nvme_tcp_poll_group tgroup = { .group.group = &group };
	struct nvme_request req = { .qpair = &tqpair.qpair, .cb_fn = ut_disconnect_qpair_req_cb };
	struct nvme_tcp_req treq = { .req = &req, .tqpair = &tqpair };
	int rc, disconnected;

	qpair = &tqpair.qpair;
	qpair->poll_group = &tgroup.group;
	tqpair.sock = (struct spdk_sock *)0xDEADBEEF;
	tqpair.needs_poll = true;
	TAILQ_INIT(&tgroup.needs_poll);
	STAILQ_INIT(&tgroup.group.disconnected_qpairs);
	TAILQ_INIT(&tqpair.send_queue);
	TAILQ_INIT(&tqpair.free_reqs);
	TAILQ_INIT(&tqpair.outstanding_reqs);
	TAILQ_INSERT_TAIL(&tgroup.needs_poll, &tqpair, link);
	TAILQ_INSERT_TAIL(&tqpair.send_queue, &pdu, tailq);

@@ -1467,6 +1507,114 @@ test_nvme_tcp_ctrlr_disconnect_qpair(void)
	CU_ASSERT(tqpair.needs_poll == false);
	CU_ASSERT(tqpair.sock == NULL);
	CU_ASSERT(TAILQ_EMPTY(&tqpair.send_queue) == true);

	/* Check that outstanding requests are aborted */
	treq.state = NVME_TCP_REQ_ACTIVE;
	qpair->num_outstanding_reqs = 1;
	qpair->state = NVME_QPAIR_DISCONNECTING;
	TAILQ_INSERT_TAIL(&tqpair.outstanding_reqs, &treq, link);

	nvme_tcp_ctrlr_disconnect_qpair(&ctrlr, qpair);

	CU_ASSERT(TAILQ_EMPTY(&tqpair.outstanding_reqs));
	CU_ASSERT_EQUAL(qpair->num_outstanding_reqs, 0);
	CU_ASSERT_EQUAL(&treq, TAILQ_FIRST(&tqpair.free_reqs));
	CU_ASSERT_EQUAL(qpair->state, NVME_QPAIR_DISCONNECTING);

	/* Check that a request with an accel operation in progress won't be aborted until that
	 * operation is completed */
	treq.state = NVME_TCP_REQ_ACTIVE;
	treq.ordering.bits.in_progress_accel = 1;
	qpair->poll_group = NULL;
	qpair->num_outstanding_reqs = 1;
	qpair->state = NVME_QPAIR_DISCONNECTING;
	TAILQ_REMOVE(&tqpair.free_reqs, &treq, link);
	TAILQ_INSERT_TAIL(&tqpair.outstanding_reqs, &treq, link);

	nvme_tcp_ctrlr_disconnect_qpair(&ctrlr, qpair);

	CU_ASSERT_EQUAL(&treq, TAILQ_FIRST(&tqpair.outstanding_reqs));
	CU_ASSERT_EQUAL(qpair->num_outstanding_reqs, 1);
	CU_ASSERT_EQUAL(qpair->state, NVME_QPAIR_DISCONNECTING);

	/* Check that a qpair will be transitioned to a DISCONNECTED state only once the accel
	 * operation is completed */
	rc = nvme_tcp_qpair_process_completions(qpair, 0);
	CU_ASSERT_EQUAL(rc, 0);
	CU_ASSERT_EQUAL(&treq, TAILQ_FIRST(&tqpair.outstanding_reqs));
	CU_ASSERT_EQUAL(qpair->num_outstanding_reqs, 1);
	CU_ASSERT_EQUAL(qpair->state, NVME_QPAIR_DISCONNECTING);

	treq.ordering.bits.in_progress_accel = 0;
	qpair->num_outstanding_reqs = 0;
	TAILQ_REMOVE(&tqpair.outstanding_reqs, &treq, link);

	rc = nvme_tcp_qpair_process_completions(qpair, 0);
	CU_ASSERT_EQUAL(rc, -ENXIO);
	CU_ASSERT_EQUAL(qpair->state, NVME_QPAIR_DISCONNECTED);

	/* Check the same scenario but this time with spdk_sock_flush() returning errors */
	treq.state = NVME_TCP_REQ_ACTIVE;
	treq.ordering.bits.in_progress_accel = 1;
	qpair->num_outstanding_reqs = 1;
	qpair->state = NVME_QPAIR_DISCONNECTING;
	TAILQ_INSERT_TAIL(&tqpair.outstanding_reqs, &treq, link);

	nvme_tcp_ctrlr_disconnect_qpair(&ctrlr, qpair);

	CU_ASSERT_EQUAL(&treq, TAILQ_FIRST(&tqpair.outstanding_reqs));
	CU_ASSERT_EQUAL(qpair->num_outstanding_reqs, 1);
	CU_ASSERT_EQUAL(qpair->state, NVME_QPAIR_DISCONNECTING);

	MOCK_SET(spdk_sock_flush, -ENOTCONN);
	treq.ordering.bits.in_progress_accel = 0;
	qpair->num_outstanding_reqs = 0;
	TAILQ_REMOVE(&tqpair.outstanding_reqs, &treq, link);

	rc = nvme_tcp_qpair_process_completions(qpair, 0);
	CU_ASSERT_EQUAL(rc, 0);
	CU_ASSERT_EQUAL(qpair->state, NVME_QPAIR_DISCONNECTED);
	rc = nvme_tcp_qpair_process_completions(qpair, 0);
	CU_ASSERT_EQUAL(rc, -ENXIO);
	CU_ASSERT_EQUAL(qpair->state, NVME_QPAIR_DISCONNECTED);
	MOCK_CLEAR(spdk_sock_flush);

	/* Now check the same scenario, but with a qpair that's part of a poll group */
	disconnected = 0;
	group.ctx = &disconnected;
	treq.state = NVME_TCP_REQ_ACTIVE;
	treq.ordering.bits.in_progress_accel = 1;
	qpair->poll_group = &tgroup.group;
	qpair->num_outstanding_reqs = 1;
	qpair->state = NVME_QPAIR_DISCONNECTING;
	STAILQ_INSERT_TAIL(&tgroup.group.disconnected_qpairs, qpair, poll_group_stailq);
	TAILQ_INSERT_TAIL(&tqpair.outstanding_reqs, &treq, link);

	nvme_tcp_poll_group_process_completions(&tgroup.group, 0,
						ut_disconnect_qpair_poll_group_cb);
	/* Until there's an outstanding request, disconnect_cb shouldn't be executed */
	CU_ASSERT_EQUAL(disconnected, 0);
	CU_ASSERT_EQUAL(qpair->num_outstanding_reqs, 1);
	CU_ASSERT_EQUAL(&treq, TAILQ_FIRST(&tqpair.outstanding_reqs));
	CU_ASSERT_EQUAL(qpair->state, NVME_QPAIR_DISCONNECTING);

	treq.ordering.bits.in_progress_accel = 0;
	qpair->num_outstanding_reqs = 0;
	TAILQ_REMOVE(&tqpair.outstanding_reqs, &treq, link);

	nvme_tcp_poll_group_process_completions(&tgroup.group, 0,
						ut_disconnect_qpair_poll_group_cb);
	CU_ASSERT_EQUAL(disconnected, 1);
	CU_ASSERT_EQUAL(qpair->state, NVME_QPAIR_DISCONNECTED);

	/* Check that a non-async qpair is marked as disconnected immediately */
	qpair->poll_group = NULL;
	qpair->state = NVME_QPAIR_DISCONNECTING;
	qpair->async = false;

	nvme_tcp_ctrlr_disconnect_qpair(&ctrlr, qpair);

	CU_ASSERT_EQUAL(qpair->state, NVME_QPAIR_DISCONNECTED);
}

static void