Commit 80a9ef40 authored by Karol Latecki's avatar Karol Latecki Committed by Jim Harris
Browse files

test/vhost: vhost benchmark scripts update



- Add option to throttle iops in VMs using cgroups
- Add option to measure CPU utilization in VMs using SAR
- Add option to limit kernel vhost CPU cores (not NUMA optimized)
- Add option to do lvol preconditioning using fio bdev plugin
  before running IO performance test

Change-Id: I7e0fcf977be96ecf837385c2abc9d5dabbe2f8c5
Signed-off-by: default avatarKarol Latecki <karol.latecki@intel.com>
Reviewed-on: https://review.gerrithub.io/c/434229


Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-by: default avatarDarek Stojaczyk <dariusz.stojaczyk@intel.com>
Reviewed-by: default avatarPaweł Niedźwiecki <pawelx.niedzwiecki@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent a23b8c8c
Loading
Loading
Loading
Loading
+13 −4
Original line number Diff line number Diff line
@@ -139,7 +139,7 @@ parser.add_argument('-R', '--ramptime', default="10", type=str,
                    help="Ramp time param for FIO (in seconds). Default: 10")
parser.add_argument('-c', '--ctrl-type', default="spdk_vhost_scsi", type=str,
                    help="Type of vhost controller to use in test.\
                    Possible options: spdk_vhost_scsi, spdk_vhost_blk.\
                    Possible options: spdk_vhost_scsi, spdk_vhost_blk\
                    Default: spdk_vhost_scsi")
parser.add_argument('-s', '--split', default=False, type=bool,
                    help="Use split vbdevs instead of logical volumes. Default: false")
@@ -203,6 +203,15 @@ command = " ".join(["test/vhost/perf_bench/vhost_perf.sh",
                    "%s" % disk_arg,
                    "--fio-job=%s" % fio_cfg_path,
                    "%s" % cpu_cfg_arg])
print("INFO: Running perf test with command:")
print(command)
pr = check_output(command, shell=True)
# TODO: Disabled for now.
# Reason: initially this script was supposed to be a wrapper for .sh script and would
# - generate FIO config
# - generate SPDK/QEMU CPU mask configuration file
# - run test script
# Auto-generating CPU masks configuration needs some more work to be done
# and increasing number of params makes .py script hard to use.
# Will cleanup here soon.

# print("INFO: Running perf test with command:")
# print(command)
# pr = check_output(command, shell=True)
+203 −48
Original line number Diff line number Diff line
@@ -4,16 +4,23 @@ set -e
vm_count=1
vm_memory=2048
vm_image="/home/sys_sgsw/vhost_vm_image.qcow2"
vm_sar_enable=false
vm_sar_delay="0"
vm_sar_interval="1"
vm_sar_count="10"
vm_throttle=""
max_disks=""
ctrl_type="spdk_vhost_scsi"
use_split=false
throttle=false

kernel_cpus=""
lvol_precondition=false
lvol_stores=()
lvol_bdevs=()
used_vms=""
wwpn_prefix="naa.5001405bc6498"

fio_bin="--fio-bin=/home/sys_sgsw/fio_ubuntu"
precond_fio_bin="/usr/src/fio/fio"

function usage()
{
@@ -34,14 +41,22 @@ function usage()
	echo "                            Default: 2048 MB"
	echo "    --vm-image=PATH         OS image to use for running the VMs."
	echo "                            Default: /home/sys_sgsw/vhost_vm_image.qcow2"
	echo "    --vm-sar-enable         Measure CPU utilization on VM using sar."
	echo "    --vm-sar-delay=INT      Wait for X seconds before sarting SAR measurement on VMs. Default: 0."
	echo "    --vm-sar-interval=INT   Interval (seconds) argument for SAR. Default: 1s."
	echo "    --vm-sar-count=INT      Count argument for SAR. Default: 10."
	echo "    --vm-throttle-iops=INT  I/Os throttle rate in IOPS for each device on the VMs."
	echo "    --max-disks=INT         Maximum number of NVMe drives to use in test."
	echo "                            Default: will use all available NVMes."
	echo "    --ctrl-type=TYPE        Controller type to use for test:"
	echo "                            spdk_vhost_scsi - use spdk vhost scsi"
	echo "                            spdk_vhost_blk - use spdk vhost block"
	echo "                            kernel_vhost - use kernel vhost scsi"
	echo "                            Default: spdk_vhost_scsi"
	echo "    --use-split             Use split vbdevs instead of Logical Volumes"
	echo "    --throttle=INT          I/Os throttle rate in IOPS for each device on the VMs."
	echo "    --limit-kernel-vhost=INT  Limit kernel vhost to run only on a number of CPU cores."
	echo "    --lvol-precondition     Precondition lvols after creating. Default: true."
	echo "    --precond-fio-bin       FIO binary used for SPDK fio plugin precondition. Default: /usr/src/fio/fio."
	echo "    --custom-cpu-cfg=PATH   Custom CPU config for test."
	echo "                            Default: spdk/test/vhost/common/autotest.config"
	echo "-x                          set -x for script debug"
@@ -71,6 +86,21 @@ function cleanup_split_cfg()
	done
}

function cleanup_parted_config()
{
	local disks=$(ls /dev/nvme*n1 | sort --version-sort)
	for disk in $disks; do
		parted -s $disk rm 1
	done
}

function cleanup_kernel_vhost()
{
	notice "Cleaning kernel vhost configration"
	targetcli clearconfig confirm=True
	cleanup_parted_config
}

while getopts 'xh-:' optchar; do
	case "$optchar" in
		-)
@@ -81,13 +111,18 @@ while getopts 'xh-:' optchar; do
			vm-count=*) vm_count="${OPTARG#*=}" ;;
			vm-memory=*) vm_memory="${OPTARG#*=}" ;;
			vm-image=*) vm_image="${OPTARG#*=}" ;;
			vm-sar-enable) vm_sar_enable=true ;;
			vm-sar-delay=*) vm_sar_delay="${OPTARG#*=}" ;;
			vm-sar-interval=*) vm_sar_interval="${OPTARG#*=}" ;;
			vm-sar-count=*) vm_sar_count="${OPTARG#*=}" ;;
			vm-throttle-iops=*) vm_throttle="${OPTARG#*=}" ;;
			max-disks=*) max_disks="${OPTARG#*=}" ;;
			ctrl-type=*) ctrl_type="${OPTARG#*=}" ;;
			use-split) use_split=true ;;
			throttle) throttle=true ;;
			lvol-precondition) lvol_precondition=true ;;
			precond-fio-bin=*) precond_fio_bin="${OPTARG#*=}" ;;
			limit-kernel-vhost=*) kernel_cpus="${OPTARG#*=}" ;;
			custom-cpu-cfg=*) custom_cpu_cfg="${OPTARG#*=}" ;;
			thin-provisioning) thin=" -t " ;;
			multi-os) multi_os=true ;;
			*) usage $0 "Invalid argument '$OPTARG'" ;;
		esac
		;;
@@ -100,6 +135,7 @@ done

. $(readlink -e "$(dirname $0)/../common/common.sh") || exit 1
. $(readlink -e "$(dirname $0)/../../../scripts/common.sh") || exit 1
BASE_DIR=$(readlink -f $(dirname ${BASH_SOURCE[0]}))
COMMON_DIR="$(cd $(readlink -f $(dirname $0))/../common && pwd)"
rpc_py="$SPDK_BUILD_DIR/scripts/rpc.py -s $(get_vhost_dir)/rpc.sock"

@@ -124,36 +160,84 @@ if [[ ${#nvmes[@]} -lt max_disks ]]; then
	fail "Number of NVMe drives (${#nvmes[@]}) is lower than number of requested disks for test ($max_disks)"
fi

notice "running SPDK vhost"
spdk_vhost_run
notice "..."

# Calculate number of needed splits per NVMe
# so that each VM gets it's own bdev during test
# so that each VM gets it's own bdev during test.
splits=()

if [[ $vm_count -le $max_disks ]]; then
	for i in $(seq 0 $((max_disks - 1))); do
		splits+=("1")
	done
else
	#Calculate least minimum number of splits on each disks
	for i in `seq 0 $((max_disks - 1))`; do
		splits+=( $((vm_count / max_disks)) )
	done

	# Split up the remainder
	for i in `seq 0 $((vm_count % max_disks - 1))`; do
		(( splits[i]++ ))
	done

fi
notice "Preparing NVMe setup..."
notice "Using $max_disks physical NVMe drives"
notice "Nvme split list: ${splits[@]}"
# Prepare NVMes - Lvols or Splits

# ===== Prepare NVMe splits & run vhost process =====
if [[ "$ctrl_type" == "kernel_vhost" ]]; then
	trap 'vm_kill_all; sleep 1; cleanup_kernel_vhost; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
	# Split disks using parted for kernel vhost
	newline=$'\n'
	for (( i=0; i<$max_disks; i++ ));do
		parted -s /dev/nvme${i}n1 mklabel msdos
		parted -s /dev/nvme${i}n1 mkpart extended 2048s 100%
		part_size=$((100/${splits[$i]})) # Split 100% of disk into roughly even parts
		echo "  Creating ${splits[$i]} partitions of relative disk size ${part_size}"

		for p in $(seq 0 $((${splits[$i]} - 1))); do
			p_start=$(($p*$part_size))
			p_end=$(($p_start+$part_size))
			parted -s /dev/nvme${i}n1 mkpart logical ${p_start}% ${p_end}%
		done
	done
	sleep 1

	# Prepare kernel vhost configuration
	# Below grep: match only NVMe partitions which are not "Extended" type.
	# For example: will match nvme0n1p15 but not nvme0n1p1
	partitions=$(ls -1 /dev/nvme* | sort --version-sort | grep -P 'p(?!1$)\d+')
	backstores=()

	# Create block backstores for vhost kernel process
	for p in $partitions; do
		backstore_name=$(basename $p)
		backstores+=("$backstore_name")
		targetcli backstores/block create $backstore_name $p
	done

	# Create kernel vhost controllers and add LUNs
	for ((i=0; i<${#backstores[*]}; i++)); do
		# WWPN prefix misses 3 characters. Need to complete it
		# using block backstore number
		x=$(printf %03d $i)
		wwpn="${wwpn_prefix}${x}"
		targetcli vhost/ create $wwpn
		targetcli vhost/$wwpn/tpg1/luns create /backstores/block/${backstores[$i]}
	done
else
	# Run vhost process and prepare split vbdevs or lvol bdevs
	notice "running SPDK vhost"
	spdk_vhost_run
	notice "..."

	if [[ $use_split == true ]]; then
		notice "Using split vbdevs"
		trap 'cleanup_split_cfg; error_exit "${FUNCNAME}" "${LINENO}"' INT ERR
		split_bdevs=()
		for (( i=0; i<$max_disks; i++ ));do
			out=$($rpc_py construct_split_vbdev Nvme${i}n1 ${splits[$i]})
		for s in $out; do
			split_bdevs+=("$s")
			for s in $(seq 0 $((${splits[$i]}-1))); do
				split_bdevs+=("Nvme${i}n1p${s}")
			done
		done
		bdevs=("${split_bdevs[@]}")
@@ -172,12 +256,28 @@ else
		done
		bdevs=("${lvol_bdevs[@]}")
	fi
fi

if [[ ! "$ctrl_type" == "kernel_vhost" && $lvol_precondition == true && $use_split == false ]]; then
	# Need to precondition lvols due to UNMAP done after creation
	# of lvol_stores. Kill vhost for now and run fio_plugin over all lvol bdevs
	spdk_vhost_kill
	$SPDK_BUILD_DIR/scripts/gen_nvme.sh > $SPDK_BUILD_DIR/nvme.cfg
	fio_filename=$(printf ":%s" "${bdevs[@]}")
	fio_filename=${fio_filename:1}
	$precond_fio_bin --name="lvol_precondition" \
	--ioengine="${SPDK_BUILD_DIR}/examples/bdev/fio_plugin/fio_plugin" \
	--rw="write" --spdk_conf="${SPDK_BUILD_DIR}/nvme.cfg" --thread="1" \
	--group_reporting --direct="1" --size="100%" --loops="2" --bs="256k" \
	--filename="${fio_filename}" || true
	spdk_vhost_run
fi

# Prepare VMs and controllers
for (( i=0; i<$vm_count; i++)); do
	vm="vm_$i"

	setup_cmd="vm_setup --disk-type=$ctrl_type --force=$i"
	setup_cmd="vm_setup --disk-type=$ctrl_type --force=$i --memory=$vm_memory"
	setup_cmd+=" --os=$vm_image"

	if [[ "$ctrl_type" == "spdk_vhost_scsi" ]]; then
@@ -187,6 +287,9 @@ for (( i=0; i<$vm_count; i++)); do
	elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
		$rpc_py construct_vhost_blk_controller naa.$i.$i ${bdevs[$i]}
		setup_cmd+=" --disks=$i"
	elif [[ "$ctrl_type" == "kernel_vhost" ]]; then
		x=$(printf %03d $i)
		setup_cmd+=" --disks=${wwpn_prefix}${x}"
	fi
	$setup_cmd
	used_vms+=" $i"
@@ -197,6 +300,22 @@ done
vm_run $used_vms
vm_wait_for_boot 300 $used_vms

if [[ -n "$kernel_cpus" ]]; then
	mkdir -p /sys/fs/cgroup/cpuset/spdk
	kernel_mask=$vhost_0_reactor_mask
	kernel_mask=${kernel_mask#"["}
	kernel_mask=${kernel_mask%"]"}

	echo "$kernel_mask" >> /sys/fs/cgroup/cpuset/spdk/cpuset.cpus
	echo "0-1" >> /sys/fs/cgroup/cpuset/spdk/cpuset.mems

	kernel_vhost_pids=$(ps aux | grep -Po "^root\s+\K(\d+)(?=.*\[vhost-\d+\])")
	for kpid in $kernel_vhost_pids; do
		echo "Limiting kernel vhost pid ${kpid}"
		echo "${kpid}" >> /sys/fs/cgroup/cpuset/spdk/tasks
	done
fi

# Run FIO
fio_disks=""
for vm_num in $used_vms; do
@@ -209,21 +328,57 @@ for vm_num in $used_vms; do
		vm_check_scsi_location $vm_num
	elif [[ "$ctrl_type" == "spdk_vhost_blk" ]]; then
		vm_check_blk_location $vm_num
	elif [[ "$ctrl_type" == "kernel_vhost" ]]; then
		vm_check_scsi_location $vm_num
	fi

	if [[ -n "$vm_throttle" ]]; then
		block=$(printf '%s' $SCSI_DISK)
		major_minor=$(vm_ssh "$vm_num" "cat /sys/block/$block/dev")
		vm_ssh "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.read_iops_device"
		vm_ssh "$vm_num" "echo \"$major_minor $vm_throttle\" > /sys/fs/cgroup/blkio/blkio.throttle.write_iops_device"
	fi

	fio_disks+=" --vm=${vm_num}$(printf ':/dev/%s' $SCSI_DISK)"
done

# Run FIO traffic
run_fio $fio_bin --job-file="$fio_job" --out="$TEST_DIR/fio_results" --json $fio_disks
run_fio $fio_bin --job-file="$fio_job" --out="$TEST_DIR/fio_results" --json $fio_disks &
fio_pid=$!

if $vm_sar_enable; then
	sleep $vm_sar_delay
	mkdir -p $TEST_DIR/fio_results/sar_stats
	pids=""
	for vm_num in $used_vms; do
		vm_ssh "$vm_num" "mkdir -p /root/sar; sar -P ALL $vm_sar_interval $vm_sar_count >> /root/sar/sar_stats_VM${vm_num}.txt" &
		pids+=" $!"
	done
	for j in $pids; do
		wait $j
	done
	for vm_num in $used_vms; do
		vm_scp "$vm_num" "root@127.0.0.1:/root/sar/sar_stats_VM${vm_num}.txt" "$TEST_DIR/fio_results/sar_stats"
	done
fi

wait $fio_pid

notice "Shutting down virtual machines..."
vm_shutdown_all

#notice "Shutting down SPDK vhost app..."
if [[ "$ctrl_type" == "kernel_vhost" ]]; then
	cleanup_kernel_vhost || true
else
	notice "Shutting down SPDK vhost app..."
	if [[ $use_split == true ]]; then
		cleanup_split_cfg
	else
		cleanup_lvol_cfg
	fi
	spdk_vhost_kill
fi

if [[ -n "$kernel_cpus" ]]; then
	rmdir /sys/fs/cgroup/cpuset/spdk
fi