Commit 358e8720 authored by Krzysztof Goreczny's avatar Krzysztof Goreczny Committed by Konrad Sztyber
Browse files

nvmf/tcp: fix spdk_nvmf_tcp_control_msg_list queuing



control msg buffers are in use when an admin or fabric command has in
capsule data size bigger than the transport->opts.in_capsule_data_size.
Number of buffers is limited and when there are no free ones request is
queued in the pending_buf_queue.
However, PDU processing in nvmf_tcp_sock_process continues ignoring lack
of the buffer, calling nvmf_tcp_capsule_cmd_hdr_handle() again so
another request is queued for the same PDU, again not having the control
msg buffer available and so on.
Fix this by adding one more PDU processing state that indicates lack of
buffer for the PDU. This way PDU processing continues only after
nvmf_tcp_process() acquires the buffer.

Change-Id: I5e16d7fa3931efc45646f8fdd23794a640c75e93
Signed-off-by: default avatarKrzysztof Goreczny <krzysztof.goreczny@dell.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/24306


Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
parent 840a5d2e
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -147,6 +147,9 @@ enum nvme_tcp_pdu_recv_state {
	/* Active tqpair waiting for a tcp request, only use in target side */
	NVME_TCP_PDU_RECV_STATE_AWAIT_REQ,

	/* Active tqpair waiting for a free buffer to store PDU */
	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_BUF,

	/* Active tqpair waiting for payload */
	NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD,

+4 −0
Original line number Diff line number Diff line
@@ -2455,6 +2455,9 @@ nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
		case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ:
			nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);
			break;
		/* Wait for the request processing loop to acquire a buffer for the PDU */
		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_BUF:
			break;
		case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
			/* check whether the data is valid, if not we just return */
			if (!pdu->data_len) {
@@ -2642,6 +2645,7 @@ nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
				if (!req->iov[0].iov_base) {
					/* No available buffers. Queue this request up. */
					SPDK_DEBUGLOG(nvmf_tcp, "No available ICD buffers. Queueing request %p\n", tcp_req);
					nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_BUF);
					return 0;
				}
			} else {
+1 −0
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@ if [ "$SPDK_TEST_NVMF_TRANSPORT" = "tcp" ]; then
	run_test "nvmf_bdevio_no_huge" $rootdir/test/nvmf/target/bdevio.sh "${TEST_ARGS[@]}" --no-hugepages
	run_test "nvmf_tls" $rootdir/test/nvmf/target/tls.sh "${TEST_ARGS[@]}"
	run_test "nvmf_fips" $rootdir/test/nvmf/fips/fips.sh "${TEST_ARGS[@]}"
	run_test "nvmf_control_msg_list" $rootdir/test/nvmf/target/control_msg_list.sh "${TEST_ARGS[@]}"
fi

if [ $RUN_NIGHTLY -eq 1 ]; then
+38 −0
Original line number Diff line number Diff line
#!/usr/bin/env bash
#  SPDX-License-Identifier: BSD-3-Clause
#  Copyright (C) 2024 Dell Inc, or its subsidiaries.
#  All rights reserved.
#

testdir=$(readlink -f $(dirname $0))
rootdir=$(readlink -f $testdir/../../..)
source $rootdir/test/common/autotest_common.sh
source $rootdir/test/nvmf/common.sh

nvmftestinit
nvmfappstart

subnqn="nqn.2024-07.io.spdk:cnode0"
perf="$SPDK_BIN_DIR/spdk_nvme_perf"

# In-capsule data size smaller than the fabrics connect command (1024) forces usage of control_msg_list. With just one buffer next req must be queued.
$rpc_py nvmf_create_transport "$NVMF_TRANSPORT_OPTS" --in-capsule-data-size 768 --control-msg-num 1
$rpc_py nvmf_create_subsystem "$subnqn" -a
$rpc_py bdev_malloc_create -b Malloc0 32 512
$rpc_py nvmf_subsystem_add_ns "$subnqn" Malloc0
$rpc_py nvmf_subsystem_add_listener "$subnqn" -t "$TEST_TRANSPORT" -a "$NVMF_FIRST_TARGET_IP" -s "$NVMF_PORT"

# run multiple instanced at once to trigger shortage of the control_msg_list buffers
"$perf" -c 0x2 -q 1 -o 4096 -w randread -t 1 -r "trtype:${TEST_TRANSPORT} adrfam:IPv4 traddr:${NVMF_FIRST_TARGET_IP} trsvcid:${NVMF_PORT}" &
perf_pid1=$!
"$perf" -c 0x4 -q 1 -o 4096 -w randread -t 1 -r "trtype:${TEST_TRANSPORT} adrfam:IPv4 traddr:${NVMF_FIRST_TARGET_IP} trsvcid:${NVMF_PORT}" &
perf_pid2=$!
"$perf" -c 0x8 -q 1 -o 4096 -w randread -t 1 -r "trtype:${TEST_TRANSPORT} adrfam:IPv4 traddr:${NVMF_FIRST_TARGET_IP} trsvcid:${NVMF_PORT}" &
perf_pid3=$!

wait $perf_pid1
wait $perf_pid2
wait $perf_pid3

trap - SIGINT SIGTERM EXIT
nvmftestfini