Commit 89d2efe0 authored by Seth Howell's avatar Seth Howell Committed by Darek Stojaczyk
Browse files

rdma: set the srq param in the initiator.



We were setting this value in the target from our initiator, but it
turns out the rdma_conn_params struct is responsible for setting the
opposite side so we need to add it in the target side when accepting
connections.

Also, add a test to demonstrate target functionality when we overwhelm
the SRQ. It is useful to note that performance really tanks when you
start overwhelming the srq so it may be useful to use this test case to
check performance gains in edge cases over time.

Change-Id: Iac541bd9fc1d82eca9f21e7abc3f625663a6c460
Signed-off-by: default avatarSeth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/451678


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 0f94938a
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -1133,6 +1133,15 @@ spdk_nvmf_rdma_event_accept(struct rdma_cm_id *id, struct spdk_nvmf_rdma_qpair *
		ctrlr_event_data.initiator_depth = rqpair->max_read_depth;
	}

	/* Configure infinite retries for the initiator side qpair.
	 * When using a shared receive queue on the target side,
	 * we need to pass this value to the initiator to prevent the
	 * initiator side NIC from completing SEND requests back to the
	 * initiator with status rnr_retry_count_exceeded. */
	if (rqpair->srq != NULL) {
		ctrlr_event_data.rnr_retry_count = 0x7;
	}

	rc = rdma_accept(id, &ctrlr_event_data);
	if (rc) {
		SPDK_ERRLOG("Error %d on rdma_accept\n", errno);
+1 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ if [ $SPDK_TEST_NVME_CLI -eq 1 ]; then
	run_test suite test/nvmf/nvme_cli/nvme_cli.sh
fi
run_test suite test/nvmf/lvol/nvmf_lvol.sh
run_test suite test/nvmf/srq_overwhelm/srq_overwhelm.sh
run_test suite test/nvmf/shutdown/shutdown.sh
run_test suite test/nvmf/bdev_io_wait/bdev_io_wait.sh
run_test suite test/nvmf/create_transport/create_transport.sh
+82 −0
Original line number Diff line number Diff line
#!/usr/bin/env bash

testdir=$(readlink -f $(dirname $0))
rootdir=$(readlink -f $testdir/../../..)
source $rootdir/test/common/autotest_common.sh
source $rootdir/test/nvmf/common.sh

MALLOC_BDEV_SIZE=64
MALLOC_BLOCK_SIZE=512

rpc_py="$rootdir/scripts/rpc.py"

set -e

# pass the parameter 'iso' to this script when running it in isolation to trigger rdma device initialization.
# e.g. sudo ./fio.sh iso
nvmftestinit $1

RDMA_IP_LIST=$(get_available_rdma_ips)
NVMF_FIRST_TARGET_IP=$(echo "$RDMA_IP_LIST" | head -n 1)
if [ -z $NVMF_FIRST_TARGET_IP ]; then
	echo "no NIC for nvmf test"
	exit 0
fi

if check_ip_is_soft_roce $NVMF_FIRST_TARGET_IP; then
	echo "Using software RDMA, Likely not enough memory to run this test. aborting."
	exit 0
fi

timing_enter srq_overwhelm
timing_enter start_nvmf_tgt

$NVMF_APP -m 0xF &
nvmfpid=$!

trap "process_shm --id $NVMF_APP_SHM_ID; killprocess $nvmfpid; nvmftestfini $1; exit 1" SIGINT SIGTERM EXIT

waitforlisten $nvmfpid
# create the rdma transport with an intentionally small SRQ depth
$rpc_py nvmf_create_transport -t RDMA -u 8192 -s 1024
timing_exit start_nvmf_tgt

modprobe -v nvme-rdma
declare -a malloc_bdevs
malloc_bdevs[0]="$($rpc_py construct_malloc_bdev $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE)"
malloc_bdevs[1]+="$($rpc_py construct_malloc_bdev $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE)"
malloc_bdevs[2]+="$($rpc_py construct_malloc_bdev $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE)"
malloc_bdevs[3]+="$($rpc_py construct_malloc_bdev $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE)"
malloc_bdevs[4]+="$($rpc_py construct_malloc_bdev $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE)"
malloc_bdevs[5]+="$($rpc_py construct_malloc_bdev $MALLOC_BDEV_SIZE $MALLOC_BLOCK_SIZE)"

for i in $(seq 0 5); do
	let j=$i+1
	$rpc_py nvmf_subsystem_create nqn.2016-06.io.spdk:cnode$j -a -s SPDK00000000000001
	echo ${malloc_bdevs[i]}
	$rpc_py nvmf_subsystem_add_ns nqn.2016-06.io.spdk:cnode$j "${malloc_bdevs[i]}"
	$rpc_py nvmf_subsystem_add_listener nqn.2016-06.io.spdk:cnode$j -t rdma -a $NVMF_FIRST_TARGET_IP -s 4420
	nvme connect -t rdma -n "nqn.2016-06.io.spdk:cnode${j}" -a "$NVMF_FIRST_TARGET_IP" -s "$NVMF_PORT" -i 16
	waitforblk "nvme${i}n1"
done

# by running 6 different FIO jobs, each with 13 subjobs, we end up with 78 fio threads trying to write to
# our target at once. This completely overwhelms the target SRQ, but allows us to verify that rnr_retry is
# working even at very high queue depths because the rdma qpair doesn't fail.
# It is normal to see the initiator timeout and reconnect waiting for completions from an overwhelmmed target,
# but the connection should come up and FIO should complete without errors.
$rootdir/scripts/fio.py nvmf 1048576 128 read 10 13

sync

for i in $(seq 1 6); do
	nvme disconnect -n "nqn.2016-06.io.spdk:cnode${i}"
	$rpc_py delete_nvmf_subsystem nqn.2016-06.io.spdk:cnode$i
done

trap - SIGINT SIGTERM EXIT

nvmfcleanup
killprocess $nvmfpid
nvmftestfini $1
timing_exit srq_overwhelm