Commit 243b7d79 authored by Krzysztof Goreczny's avatar Krzysztof Goreczny Committed by Jim Harris
Browse files

nvmf/tcp: abort request waiting for the buffer early



_nvmf_qpair_destroy first calls the spdk_nvmf_poll_group_remove that
sets the qpair->group pointer to NULL and only then
nvmf_transport_qpair_fini. At that point we no longer can use the poll
group pointer to clean up the list of requests that awaits the buffer in
the iobuf maintained queue.
To resolve this nvmf_tcp_cleanup_all_states is split into two steps now:
1. Abort all requests awaiting buffer in the nvmf_tcp_poll_group_remove
   and mark all requests as completed. That way nvmf_tcp_req_process
   won't try to abort them again. Do not process the requests to not
   change the overall cleanup order.
2. Drain the "completed" requests in the nvmf_tcp_cleanup_all_states.
   There is no need to drain TCP_REQUEST_STATE_NEED_BUFFER state, at
   this point all requests in such state are aborted and marked as
   completed.

This fixes github issue 3627
Extend existing test with a dirty flow that mimics the github issue.

Change-Id: I1c5cc20fe758797ce790b596f39af0338189d868
Signed-off-by: default avatarKrzysztof Goreczny <krzysztof.goreczny@dell.com>
Reviewed-on: https://review.spdk.io/c/spdk/spdk/+/25785


Reviewed-by: default avatarJacek Kalwas <jacek.kalwas@nutanix.com>
Tested-by: default avatarSPDK Automated Test System <spdkbot@gmail.com>
Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
Reviewed-by: default avatarBen Walker <ben@nvidia.com>
Community-CI: Mellanox Build Bot
parent aa9fb0b8
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -579,10 +579,11 @@ nvmf_tcp_abort_await_buffer_reqs(struct spdk_nvmf_tcp_qpair *tqpair)
{
	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;

	/* Wipe the requests waiting for buffer from the waiting list */
	/* Remove requests waiting for buffer from the waiting list and mark as completed */
	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->tcp_req_working_queue, state_link, req_tmp) {
		if (tcp_req->state == TCP_REQUEST_STATE_NEED_BUFFER) {
			nvmf_tcp_request_get_buffers_abort(tcp_req);
			nvmf_tcp_req_set_state(tcp_req, TCP_REQUEST_STATE_COMPLETED);
		}
	}
}
@@ -592,11 +593,10 @@ nvmf_tcp_cleanup_all_states(struct spdk_nvmf_tcp_qpair *tqpair)
{
	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_CONTROLLER_TO_HOST);
	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEW);
	nvmf_tcp_abort_await_buffer_reqs(tqpair);
	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_NEED_BUFFER);
	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_EXECUTING);
	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER);
	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_AWAITING_R2T_ACK);
	nvmf_tcp_drain_state_queue(tqpair, TCP_REQUEST_STATE_COMPLETED);
}

static void
@@ -3498,6 +3498,8 @@ nvmf_tcp_poll_group_remove(struct spdk_nvmf_transport_poll_group *group,
			    spdk_strerror(errno), errno);
	}

	nvmf_tcp_abort_await_buffer_reqs(tqpair);

	return rc;
}

+2 −1
Original line number Diff line number Diff line
@@ -41,7 +41,8 @@ if [ "$SPDK_TEST_NVMF_TRANSPORT" = "tcp" ]; then
	run_test "nvmf_tls" $rootdir/test/nvmf/target/tls.sh "${TEST_ARGS[@]}"
	run_test "nvmf_fips" $rootdir/test/nvmf/fips/fips.sh "${TEST_ARGS[@]}"
	run_test "nvmf_control_msg_list" $rootdir/test/nvmf/target/control_msg_list.sh "${TEST_ARGS[@]}"
	run_test "nvmf_wait_for_buf" $rootdir/test/nvmf/target/wait_for_buf.sh "${TEST_ARGS[@]}"
	run_test "nvmf_wait_for_buf_clean_flow" $rootdir/test/nvmf/target/wait_for_buf.sh clean_flow "${TEST_ARGS[@]}"
	run_test "nvmf_wait_for_buf_dirty_flow" $rootdir/test/nvmf/target/wait_for_buf.sh dirty_flow "${TEST_ARGS[@]}"
fi

if [ $RUN_NIGHTLY -eq 1 ]; then
+25 −4
Original line number Diff line number Diff line
@@ -9,6 +9,18 @@ rootdir=$(readlink -f $testdir/../../..)
source $rootdir/test/common/autotest_common.sh
source $rootdir/test/nvmf/common.sh

FLOW=$1
CLEAN_FLOW=-1

if [ "$FLOW" = "clean_flow" ]; then
	CLEAN_FLOW=1
elif [ "$FLOW" = "dirty_flow" ]; then
	CLEAN_FLOW=0
else
	echo "Usage: $0 clean_flow|dirty_flow"
	exit 1
fi

nvmftestinit
nvmfappstart --wait-for-rpc

@@ -29,12 +41,21 @@ $rpc_py nvmf_subsystem_add_listener "$subnqn" -t "$TEST_TRANSPORT" -a "$NVMF_FIR

# 131072 (io size) = 16*8192 (io_unit_size). We have 24 buffers available, so only the very first request can allocate
# all required buffers at once, following requests must wait and go through the iobuf queuing scenario.
$perf "${perf_opt[@]}" -t 1

if [ "$CLEAN_FLOW" -eq 1 ]; then
	# Clean flow tests if everything works fine with regular traffic scenario.
	$perf "${perf_opt[@]}" -t 1
	retry_count=$($rpc_py iobuf_get_stats | jq -r '.[] | select(.module == "nvmf_TCP") | .small_pool.retry')
	if [[ $retry_count -eq 0 ]]; then
		return 1
	fi
else
	# Dirty flow tests if target can correctly clean up awaiting requests when initiator is suddenly gone.
	$perf "${perf_opt[@]}" -t 10 &
	perf_pid=$!
	sleep 4
	kill -9 $perfpid || true
fi

trap - SIGINT SIGTERM EXIT
nvmftestfini