Commit 95aa1a73 authored by Krzysztof Karas's avatar Krzysztof Karas Committed by Tomasz Zawadzki
Browse files

sw_hotplug: avoid hotplug timeouts



Avoid hotplug application timeouts on machines
with multiple NVMe drives by scaling app run time
to number of NVMe drives.
Furthermore, change the way we wait for hotplug
app initialization by using "perform_tests" RPC,
and termination by starting it via timeout command.

Second part of the series fixing #2201.

Fixes #2201

Change-Id: Id82c8e8f6b9e870a55c4f43a11c755982855deeb
Signed-off-by: default avatarKrzysztof Karas <krzysztof.karas@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15965


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent f955f93c
Loading
Loading
Loading
Loading
+18 −22
Original line number Diff line number Diff line
@@ -8,6 +8,9 @@ rootdir=$(readlink -f $testdir/../..)
source $rootdir/scripts/common.sh
source $rootdir/test/common/autotest_common.sh

export PYTHONPATH="$rootdir/examples/nvme/hotplug/"
rpc_py=$rootdir/scripts/rpc.py

# Pci bus hotplug
# Helper function to remove/attach cotrollers
remove_attach_helper() {
@@ -16,12 +19,6 @@ remove_attach_helper() {
	local use_bdev=$3
	local dev

	# We need to make sure we wait long enough for hotplug to initialize the devices
	# and start IO - if we start removing devices before that happens we will end up
	# stepping on hotplug's toes forcing it to fail to report proper count of given
	# events.
	sleep "$hotplug_wait"

	while ((hotplug_events--)); do
		for dev in "${nvmes[@]}"; do
			echo 1 > "/sys/bus/pci/devices/$dev/remove"
@@ -72,30 +69,29 @@ remove_attach_helper() {
run_hotplug() {
	trap 'killprocess $hotplug_pid; exit 1' SIGINT SIGTERM EXIT

	"$SPDK_EXAMPLE_DIR/hotplug" \
	test_time=$((hotplug_events * hotplug_wait * nvme_count))

	# Hotplug may sometimes hang, so start it via timeout command.
	timeout -k 2s $((test_time + hotplug_wait)) "$SPDK_EXAMPLE_DIR/hotplug" \
		-i 0 \
		-t $((hotplug_events * hotplug_wait + hotplug_wait * 3)) \
		-t $((test_time)) \
		-n $((hotplug_events * nvme_count)) \
		-r $((hotplug_events * nvme_count)) \
		-l warning &
	hotplug_pid=$!
		-l warning --wait-for-rpc &
	timeout_pid=$!
	hotplug_pid=$(ps -o pid= --ppid "$timeout_pid")

	remove_attach_helper "$hotplug_events" "$hotplug_wait" false
	# Make sure Hotplug started before removing and inserting devices.
	waitforlisten "$hotplug_pid"

	# Wait in case hotplug app is lagging behind
	# and kill it, if it hung.
	sleep $hotplug_wait

	if ! kill -0 "$hotplug_pid"; then
		# hotplug already finished, check for the error code.
		wait "$hotplug_pid"
	else
		echo "Killing hotplug application"
		killprocess $hotplug_pid
		return 1
	fi
	$rpc_py --plugin hotplug_plugin perform_tests

	remove_attach_helper "$hotplug_events" "$hotplug_wait" false

	trap - SIGINT SIGTERM EXIT

	# Check timeout return code.
	wait "$timeout_pid"
}

# SPDK target hotplug