Commit 22364ca8 authored by Darek Stojaczyk's avatar Darek Stojaczyk Committed by Jim Harris
Browse files

test/qos: set qos limits to a % of the maximum disk performance



We used to set an arbitrary qos limit which in some
cases happened to be higher than the actual disk
capabilities. Even though we had an explicit check
for that and we skipped the entire qos test suite
if the device was too slow, the disk performance could
vary and be just enough to pass that initial check,
but then slow down and fail in the middle of the test
suite. If the bdev maxes out at 21MB/s on one run, it
may just as well do 19MB/s on another. That is exactly
the case causing intermittent failures on our CI.

We fix it by removing the arbitrary qos limit and
setting it to a % of the maximum disk performance
instead. This lets us e.g. remove the code for skipping
the entire test suite when the disk is too slow. We
definitely don't want to skip any tests.

Change-Id: I6de8a183c00bab64484b4ddb12df1dedfbed23f8
Signed-off-by: default avatarDarek Stojaczyk <dariusz.stojaczyk@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/451887


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarKarol Latecki <karol.latecki@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avataryidong0635 <dongx.yi@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 992ffd80
Loading
Loading
Loading
Loading
+47 −41
Original line number Diff line number Diff line
@@ -26,10 +26,13 @@ function check_qos_works_well() {
	end_io_count=$(jq -r '.bdevs[0].num_read_ops' <<< "$iostats")
	end_bytes_read=$(jq -r '.bdevs[0].bytes_read' <<< "$iostats")

	IOPS_RESULT=$(((end_io_count-start_io_count)/5))
	BANDWIDTH_RESULT=$(((end_bytes_read-start_bytes_read)/5))

	if [ $LIMIT_TYPE = IOPS ]; then
		read_result=$(((end_io_count-start_io_count)/5))
		read_result=$IOPS_RESULT
	else
		read_result=$(((end_bytes_read-start_bytes_read)/5))
		read_result=$BANDWIDTH_RESULT
	fi

	if [ $enable_limit = true ]; then
@@ -42,16 +45,10 @@ function check_qos_works_well() {
	else
		retval=$(echo "$read_result > $qos_limit" | bc)
		if [ $retval -eq 0 ]; then
			if [ $check_qos = true ]; then
				echo "$read_result less than $qos_limit - exit QoS testing"
				ENABLE_QOS=false
				exit 0
			else
			echo "$read_result less than $qos_limit - expected greater than"
			exit 1
		fi
	fi
	fi
}

if [ -z "$TARGET_IP" ]; then
@@ -68,12 +65,8 @@ timing_enter qos

MALLOC_BDEV_SIZE=64
MALLOC_BLOCK_SIZE=512
ENABLE_QOS=true
IOPS_LIMIT=20000
BANDWIDTH_LIMIT_MB=20
BANDWIDTH_LIMIT=$(($BANDWIDTH_LIMIT_MB*1024*1024))
READ_BANDWIDTH_LIMIT_MB=10
READ_BANDWIDTH_LIMIT=$(($READ_BANDWIDTH_LIMIT_MB*1024*1024))
IOPS_RESULT=
BANDWIDTH_RESULT=
LIMIT_TYPE=IOPS
rpc_py="$rootdir/scripts/rpc.py"
fio_py="$rootdir/scripts/fio.py"
@@ -104,37 +97,50 @@ iscsiadm -m node --login -p $TARGET_IP:$ISCSI_PORT

trap "iscsicleanup; killprocess $pid; iscsitestfini $1 $2; exit 1" SIGINT SIGTERM EXIT

# Check whether to enable the QoS testing.
check_qos_works_well false $IOPS_LIMIT Malloc0 true
# Run FIO without any QOS limits to determine the raw performance
check_qos_works_well false 0 Malloc0

# Set IOPS/bandwidth limit to 50% of the actual unrestrained performance.
IOPS_LIMIT=$(($IOPS_RESULT/2))
BANDWIDTH_LIMIT=$(($BANDWIDTH_RESULT/2))
# Set READ bandwidth limit to 50% of the RW bandwidth limit to be able
# to differentiate those two.
READ_BANDWIDTH_LIMIT=$(($BANDWIDTH_LIMIT/2))

# Also round them down to nearest multiple of either 1000 IOPS or 1MB BW
# which are the minimal QoS granularities
IOPS_LIMIT=$(($IOPS_LIMIT/1000*1000))
BANDWIDTH_LIMIT_MB=$(($BANDWIDTH_LIMIT/1024/1024))
BANDWIDTH_LIMIT=$(($BANDWIDTH_LIMIT_MB*1024*1024))
READ_BANDWIDTH_LIMIT_MB=$(($READ_BANDWIDTH_LIMIT/1024/1024))
READ_BANDWIDTH_LIMIT=$(($READ_BANDWIDTH_LIMIT_MB*1024*1024))

if [ $ENABLE_QOS = true ]; then
# Limit the I/O rate by RPC, then confirm the observed rate matches.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec $IOPS_LIMIT
	check_qos_works_well true $IOPS_LIMIT Malloc0 false
check_qos_works_well true $IOPS_LIMIT Malloc0

# Now disable the rate limiting, and confirm the observed rate is not limited anymore.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec 0
	check_qos_works_well false $IOPS_LIMIT Malloc0 false
check_qos_works_well false $IOPS_LIMIT Malloc0

# Limit the I/O rate again.
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec $IOPS_LIMIT
	check_qos_works_well true $IOPS_LIMIT Malloc0 false
check_qos_works_well true $IOPS_LIMIT Malloc0
echo "I/O rate limiting tests successful"

# Limit the I/O bandwidth rate by RPC, then confirm the observed rate matches.
LIMIT_TYPE=BANDWIDTH
$rpc_py set_bdev_qos_limit Malloc0 --rw_ios_per_sec 0 --rw_mbytes_per_sec $BANDWIDTH_LIMIT_MB
	check_qos_works_well true $BANDWIDTH_LIMIT Malloc0 false
check_qos_works_well true $BANDWIDTH_LIMIT Malloc0

# Now disable the bandwidth rate limiting, and confirm the observed rate is not limited anymore.
$rpc_py set_bdev_qos_limit Malloc0 --rw_mbytes_per_sec 0
	check_qos_works_well false $BANDWIDTH_LIMIT Malloc0 false
check_qos_works_well false $BANDWIDTH_LIMIT Malloc0

# Limit the I/O bandwidth rate again with both read/write and read/only.
$rpc_py set_bdev_qos_limit Malloc0 --rw_mbytes_per_sec $BANDWIDTH_LIMIT_MB --r_mbytes_per_sec $READ_BANDWIDTH_LIMIT_MB
	check_qos_works_well true $READ_BANDWIDTH_LIMIT Malloc0 false
check_qos_works_well true $READ_BANDWIDTH_LIMIT Malloc0
echo "I/O bandwidth limiting tests successful"
fi

iscsicleanup
$rpc_py delete_target_node 'iqn.2016-06.io.spdk:Target1'