Commit 00641033 authored by Krzysztof Goreczny's avatar Krzysztof Goreczny Committed by Jim Harris
Browse files

test/common: fix cpu busy check for the interrupt mode tests



Checking CPU core utilization to establish if reactor is busy polling or
idle is error prone as multiple things might make core busy or not busy
enough. Multiple false positives were triggered in CI, see #3634.

More reliable way to do this it to check if reactor is waiting on
anything in the kernel.
In busy polling case this should be rare and non-permanent.
In interrupt mode however reactor should be often blocked by the
epoll_wait.

Change-Id: I33660653f2374c4da0cdba3283009a9b441d7deb
Signed-off-by: default avatarKrzysztof Goreczny <krzysztof.goreczny@dell.com>
Reviewed-on: https://review.spdk.io/c/spdk/spdk/+/26187


Reviewed-by: default avatarKonrad Sztyber <ksztyber@nvidia.com>
Reviewed-by: default avatarMichal Berger <michal.berger@nutanix.com>
Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
Tested-by: default avatarSPDK Automated Test System <spdkbot@gmail.com>
parent d428a790
Loading
Loading
Loading
Loading
+7 −15
Original line number Diff line number Diff line
@@ -10,26 +10,18 @@ function reactor_is_busy_or_idle() {
	local pid=$1
	local idx=$2
	local state=$3
	local busy_threshold=${BUSY_THRESHOLD:-65}
	local idle_threshold=${IDLE_THRESHOLD:-30}
	local reactor_state

	if [[ $state != "busy" ]] && [[ $state != "idle" ]]; then
		return 1
	fi

	if ! hash top; then
		# Fail this test if top is missing from system.
	if [[ $state != "busy" && $state != "idle" ]]; then
		return 1
	fi

	for ((j = 10; j != 0; j--)); do
		top_reactor=$(top -bHn 1 -p $pid -w 256 | grep reactor_$idx)
		cpu_rate=$(echo $top_reactor | sed -e 's/^\s*//g' | awk '{print $9}')
		cpu_rate=${cpu_rate%.*}
		reactor_state=($(ps -L -p"$pid" -ostate=,pid=,comm= | grep "reactor_$idx" | awk '{print $1}'))

		if [[ $state = "busy" ]] && ((cpu_rate < busy_threshold)); then
		if [[ $state = "busy" && $reactor_state != "R" ]]; then
			sleep 1
		elif [[ $state = "idle" ]] && ((cpu_rate > idle_threshold)); then
		elif [[ $state = "idle" && $reactor_state != "S" ]]; then
			sleep 1
		else
			return 0
@@ -37,9 +29,9 @@ function reactor_is_busy_or_idle() {
	done

	if [[ $state = "busy" ]]; then
		echo "cpu rate ${cpu_rate} of reactor $i probably is not busy polling"
		echo "reactor $i probably is not busy polling"
	else
		echo "cpu rate ${cpu_rate} of reactor $i probably is not idle interrupt"
		echo "reactor $i probably is not idle interrupt"
	fi

	return 1
+5 −5
Original line number Diff line number Diff line
@@ -20,7 +20,7 @@ $rpc_py nvmf_create_subsystem $NQN -a -s $NVMF_SERIAL
$rpc_py nvmf_subsystem_add_ns $NQN AIO0
$rpc_py nvmf_subsystem_add_listener $NQN -t $TEST_TRANSPORT -a $NVMF_FIRST_TARGET_IP -s $NVMF_PORT

# Confirm that with no traffic all cpu cores are idle
# Confirm that with no traffic reactors are idle
for i in {0..1}; do
	reactor_is_idle $nvmfpid $i
done
@@ -34,19 +34,19 @@ subnqn:${NQN}" "${NO_HUGE[@]}" &

perf_pid=$!

# confirm that during load all cpu cores are busy
# confirm that during load all reactors are busy
for i in {0..1}; do
	BUSY_THRESHOLD=30 reactor_is_busy $nvmfpid $i
	reactor_is_busy $nvmfpid $i
done

wait $perf_pid

# with no load all cpu cores should be idle again
# with no traffic all reactors should be idle again
for i in {0..1}; do
	reactor_is_idle $nvmfpid $i
done

# connecting initiator should not cause cores to be busy
# connecting initiator should not cause reactors to be busy
$NVME_CONNECT "${NVME_HOST[@]}" -t $TEST_TRANSPORT -n "$NQN" -a "$NVMF_FIRST_TARGET_IP" -s "$NVMF_PORT"
waitforserial "$NVMF_SERIAL"
for i in {0..1}; do