Commit 4e45c563 authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Konrad Sztyber
Browse files

nvmf/rdma: Correctly remove req from the pending_rdma_read_queue



This patch fixes qpair cleanup with rdma_req in
the pending_rdma_read_queue.
1. When a WC with error is reaped, we should remove
the request from the queue if there are remaining data WRs
2. When qpair is in error state and a request in the state
RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER is being
processed, we should check if it has remaining data WRs and
remove it from the queue
3. We should not process requests in the pending_rdma_read_queue
(nvmf_rdma_qpair_process_pending function) which are are not
in the RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER state,
e.g. a request might be transferring data over the network, and
qpair might be in error state and we could complete such request
in nvmf_rdma_qpair_process_pending and then reap completion for
this request and complete it again.

Also, when we are handling WC with error for DATA request, we
must call nvmf_rdma_request_process for the req in COMPLETED
state to release resources and decrement qpair's counters

Signed-off-by: default avatarAlexey Marchuk <alexeymar@nvidia.com>
Change-Id: Ifc26c3e9bd0e006aa8cc35bf6ae5fd913f452538
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/24477


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Community-CI: Mellanox Build Bot
parent 8b9c92d3
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -2078,6 +2078,14 @@ nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
		case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING:
			STAILQ_REMOVE(&rqpair->pending_rdma_read_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
			break;
		case RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER:
			if (rdma_req->num_remaining_data_wr) {
				/* Partially sent request is still in the pending_rdma_read_queue,
				 * remove it before completing */
				rdma_req->num_remaining_data_wr = 0;
				STAILQ_REMOVE(&rqpair->pending_rdma_read_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
			}
			break;
		case RDMA_REQUEST_STATE_DATA_TRANSFER_TO_HOST_PENDING:
			STAILQ_REMOVE(&rqpair->pending_rdma_write_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
			break;
@@ -3347,6 +3355,12 @@ nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport,

	/* We process I/O in the data transfer pending queue at the highest priority. */
	STAILQ_FOREACH_SAFE(rdma_req, &rqpair->pending_rdma_read_queue, state_link, req_tmp) {
		if (rdma_req->state != RDMA_REQUEST_STATE_DATA_TRANSFER_TO_CONTROLLER_PENDING) {
			/* Requests in this queue might be in state RDMA_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER,
			 * they are transmitting data over network but we keep them in the list to guarantee
			 * fair processing. */
			continue;
		}
		if (nvmf_rdma_request_process(rtransport, rdma_req) == false && drain == false) {
			break;
		}
@@ -4749,7 +4763,14 @@ nvmf_rdma_poller_poll(struct spdk_nvmf_rdma_transport *rtransport,
				if (rdma_req->data.wr.opcode == IBV_WR_RDMA_READ) {
					rqpair->current_read_depth--;
					if (rdma_req->num_outstanding_data_wr == 0) {
						if (rdma_req->num_remaining_data_wr) {
							/* Partially sent request is still in the pending_rdma_read_queue,
							 * remove it now before completing */
							rdma_req->num_remaining_data_wr = 0;
							STAILQ_REMOVE(&rqpair->pending_rdma_read_queue, rdma_req, spdk_nvmf_rdma_request, state_link);
						}
						rdma_req->state = RDMA_REQUEST_STATE_COMPLETED;
						nvmf_rdma_request_process(rtransport, rdma_req);
					}
				}
			}
+23 −0
Original line number Diff line number Diff line
#!/usr/bin/env bash
#  SPDX-License-Identifier: BSD-3-Clause
#  Copyright (C) 2017 Intel Corporation
#  Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
#  All rights reserved.
#
testdir=$(readlink -f $(dirname $0))
@@ -144,8 +145,30 @@ function nvmf_shutdown_tc3() {
	stoptarget
}

# Test 4: Kill the target unexpectedly with I/O outstanding, highly fragmented payload
function nvmf_shutdown_tc4() {
	starttarget

	# Run nvme_perf with highly fragmented payload
	$rootdir/build/bin/spdk_nvme_perf -q 128 -o 45056 -O 4096 -w randwrite -t 20 -r "trtype:$TEST_TRANSPORT adrfam:IPV4 traddr:$NVMF_FIRST_TARGET_IP trsvcid:$NVMF_PORT" -P 4 &
	perfpid=$!
	sleep 5
	# Expand the trap to clean up bdevperf if something goes wrong
	trap 'process_shm --id $NVMF_APP_SHM_ID; kill -9 $perfpid || true; nvmftestfini; exit 1' SIGINT SIGTERM EXIT

	# Kill the target half way through
	killprocess $nvmfpid
	nvmfpid=

	# Due to IOs are completed with errors, perf exits with bad status
	sleep 1
	wait $perfpid || true
	stoptarget
}

run_test "nvmf_shutdown_tc1" nvmf_shutdown_tc1
run_test "nvmf_shutdown_tc2" nvmf_shutdown_tc2
run_test "nvmf_shutdown_tc3" nvmf_shutdown_tc3
run_test "nvmf_shutdown_tc4" nvmf_shutdown_tc4

trap - SIGINT SIGTERM EXIT