Commit b1d3b47b authored by Pawel Niedzwiecki's avatar Pawel Niedzwiecki Committed by Jim Harris
Browse files

test/nvme: Test case 1 and 2 of NVME performance tests



Measure Iops/core performance

Change-Id: I5c2632b45c62cadbd46a67a7284ece9f77b70902
Signed-off-by: default avatarPawel Niedzwiecki <pawelx.niedzwiecki@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/423170


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJohn Kariuki <John.K.Kariuki@intel.com>
Reviewed-by: default avatarKarol Latecki <karol.latecki@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 933270ea
Loading
Loading
Loading
Loading
+266 −0
Original line number Diff line number Diff line
#!/usr/bin/env bash

set -xe
BASE_DIR=$(readlink -f $(dirname $0))
ROOT_DIR=$(readlink -f $BASE_DIR/../../..)
PLUGIN_DIR_NVME=$ROOT_DIR/examples/nvme/fio_plugin
PLUGIN_DIR_BDEV=$ROOT_DIR/examples/bdev/fio_plugin
. $ROOT_DIR/scripts/common.sh || exit 1
. $ROOT_DIR/test/common/autotest_common.sh
NVME_FIO_RESULTS=$BASE_DIR/result.json

PRECONDITIONING=true
FIO_BIN="/usr/src/fio/fio"
RUNTIME=600
PLUGIN="nvme"
RAMP_TIME=30
BLK_SIZE=4096
RW=randrw
MIX=100
TYPE=("randread" "randrw" "randwrite")
IODEPTH=256
DISKNO=1
ONEWORKLOAD=false
CPUS_ALLOWED=1
REPEAT_NO=3

function get_cores(){
	local cpu_list="$1"
	for cpu in ${cpu_list//,/ }; do
		echo $cpu
	done
}

function get_cores_numa_node(){
	local cores=$1
	for core in $cores; do
		echo $(lscpu -p=cpu,node | grep "^$core\b" | awk -F ',' '{print $2}')
	done
}

function get_numa_node(){
	local plugin=$1
	local disks=$2
	if [ "$plugin" = "nvme" ]; then
		for bdf in $disks; do
			local driver=`grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}'`
			# Use this check to ommit blacklisted devices ( not binded to driver with setup.sh script )
			if [ "$driver" = "vfio-pci" ] || [ "$driver" = "uio_pci_generic" ]; then
				echo $(cat /sys/bus/pci/devices/$bdf/numa_node)
			fi
		done
	elif [ "$plugin" = "bdev" ]; then
		local bdevs=$(discover_bdevs $ROOT_DIR $BASE_DIR/bdev.conf)
		for name in $disks; do
			local bdev_bdf=$(jq -r ".[] | select(.name==\"$name\").driver_specific.nvme.pci_address" <<< $bdevs)
			echo $(cat /sys/bus/pci/devices/$bdev_bdf/numa_node)
		done
	fi
}

function get_disks(){
	local plugin=$1
	if [ "$plugin" = "nvme" ]; then
		for bdf in $(iter_pci_class_code 01 08 02); do
			driver=`grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}'`
			if [ "$driver" = "vfio-pci" ] || [ "$driver" = "uio_pci_generic" ]; then
				echo "$bdf"
			fi
		done
	elif [ "$plugin" = "bdev" ]; then
		local bdevs=$(discover_bdevs $ROOT_DIR $BASE_DIR/bdev.conf)
		echo $(jq -r '.[].name' <<< $bdevs)
	fi
}

function get_disks_on_numa(){
	local devs=($1)
	local numas=($2)
	local numa_no=$3
	local disks_on_numa=""
	local i

	for (( i=0; i<${#devs[@]}; i++ ))
	do
		if [ ${numas[$i]} = $numa_no ]; then
			disks_on_numa=$(($disks_on_numa+1))
		fi
	done
	echo $disks_on_numa
}

function create_fio_config(){
	local disk_no=$1
	local plugin=$2
	local disks=($3)
	local disks_numa=($4)
	local cores=($5)
	local total_disks=${#disks[@]}
	local no_cores=${#cores[@]}
	local filename=""

	local cores_numa=($(get_cores_numa_node "$5"))
	local disks_per_core=$(($disk_no/$no_cores))
	local disks_per_core_mod=$(($disk_no%$no_cores))
	for (( i=0; i<$no_cores; i++ ))
	do
		core_numa=${cores_numa[$i]}
		total_disks_per_core=$disks_per_core
		if [ "$disks_per_core_mod" -gt "0" ]; then
			total_disks_per_core=$(($disks_per_core+1))
			disks_per_core_mod=$(($disks_per_core_mod-1))
		fi

		if [ "$total_disks_per_core" = "0" ]; then
			break
		fi

		sed -i -e "\$a[filename${i}]" $BASE_DIR/config.fio
		#use cpus_allowed as cpumask works only for cores 1-32
		sed -i -e "\$acpus_allowed=${cores[$i]}" $BASE_DIR/config.fio
		m=0	#counter of disks per cpu core numa
		n=0 #counter of all disks
		while [ "$m" -lt  "$total_disks_per_core" ]; do
			if [ "${disks_numa[$n]}" = "$core_numa" ]; then
				m=$(($m+1))
				if [ "$plugin" = "nvme" ]; then
					filename='trtype=PCIe traddr='${disks[$n]//:/.}' ns=1'
				elif [ "$plugin" = "bdev" ]; then
					filename=${disks[$n]}
				fi
				sed -i -e "\$afilename=$filename" $BASE_DIR/config.fio
				#Mark numa of n'th disk as "x" to mark it as claimed
				disks_numa[$n]="x"
			fi
			n=$(($n+1))
			# If there is no more disks with numa node same as cpu numa node, switch to other numa node.
			if [ $n -ge $total_disks ]; then
				if [ "$core_numa" = "1" ]; then
					core_numa=0
				else
					core_numa=1
				fi
				n=0
			fi
		done
		echo "" >> $BASE_DIR/config.fio
	done
}

function preconditioning(){
	local dev_name=""
	local filename=""
	local i
	sed -i -e "\$a[preconditioning]" $BASE_DIR/config.fio
	for (( i=0; i < $DISKNO; i++ ))
	do
		dev_name='trtype=PCIe traddr='${disks[i]//:/.}' ns=1'
		filename+=$(printf %s":" "$dev_name")
	done
	echo "** Preconditioning disks, this can take a while, depending on the size of disks."
	run_spdk_nvme_fio "nvme" --filename="$filename" --size=100% --loops=2 --bs=1M\
		--rw=write --iodepth=32
}

function get_results(){
	local reads_pct=$2
	local writes_pct=$((100-$2))

	case "$1" in
		iops)
		iops=$(cat $NVME_FIO_RESULTS | jq -r '.jobs[] | (.read.iops + .write.iops)')
		iops=${iops%.*}
		echo $iops
		;;
		mean_lat_usec)
		mean_lat=$(cat $NVME_FIO_RESULTS | jq -r ".jobs[] | (.read.lat_ns.mean * $reads_pct + .write.lat_ns.mean * $writes_pct)")
		mean_lat=${mean_lat%.*}
		echo $(( $mean_lat/100000 ))
		;;
		p99_lat_usec)
		p99_lat=$(cat $NVME_FIO_RESULTS | jq -r ".jobs[] | (.read.clat_ns.percentile.\"99.000000\" * $reads_pct + .write.clat_ns.percentile.\"99.000000\" * $writes_pct)")
		p99_lat=${p99_lat%.*}
		echo $(( $p99_lat/100000 ))
		;;
		p99_99_lat_usec)
		p99_99_lat=$(cat $NVME_FIO_RESULTS | jq -r ".jobs[] | (.read.clat_ns.percentile.\"99.990000\" * $reads_pct + .write.clat_ns.percentile.\"99.990000\" * $writes_pct)")
		p99_99_lat=${p99_99_lat%.*}
		echo $(( $p99_99_lat/100000 ))
		;;
		stdev_usec)
		stdev=$(cat $NVME_FIO_RESULTS | jq -r ".jobs[] | (.read.clat_ns.stddev * $reads_pct + .write.clat_ns.stddev * $writes_pct)")
		stdev=${stdev%.*}
		echo $(( $stdev/100000 ))
		;;
	esac
}

function run_spdk_nvme_fio(){
	local plugin=$1
	echo "** Running fio test, this can take a while, depending on the run-time and ramp-time setting."
	if [ "$plugin" = "nvme" ]; then
		LD_PRELOAD=$PLUGIN_DIR_NVME/fio_plugin $FIO_BIN $BASE_DIR/config.fio --output-format=json\
		 "${@:2}" --ioengine=spdk
	elif [ "$plugin" = "bdev" ]; then
		LD_PRELOAD=$PLUGIN_DIR_BDEV/fio_plugin $FIO_BIN $BASE_DIR/config.fio --output-format=json\
		 "${@:2}" --ioengine=spdk_bdev --spdk_conf=$BASE_DIR/bdev.conf --spdk_mem=4096
	fi

	sleep 1
}

function usage()
{
	set +x
	[[ ! -z $2 ]] && ( echo "$2"; echo ""; )
	echo "Run NVMe PMD/BDEV performance test. Change options for easier debug and setup configuration"
	echo "Usage: $(basename $1) [options]"
	echo "-h, --help                Print help and exit"
	echo "    --run-time=TIME[s]    Tell fio to run the workload for the specified period of time. [default=$RUNTIME]"
	echo "    --ramp-time=TIME[s]   Fio will run the specified workload for this amount of time before logging any performance numbers. [default=$RAMP_TIME]"
	echo "    --fio-bin=PATH        Path to fio binary. [default=$FIO_BIN]"
	echo "    --fio-plugin=STR      Use bdev or nvme fio_plugin. [default=$PLUGIN]"
	echo "    --max-disk=INT,ALL    Number of disks to test on, this will run multiple workloads with increasing number of disk each run, if =ALL then test on all found disk. [default=$DISKNO]"
	echo "    --disk-no=INT,ALL     Number of disks to test on, this will run one workload on selected number od disks, it discards max-disk setting, if =ALL then test on all found disk"
	echo "    --rwmixread=INT       Percentage of a mixed workload that should be reads. [default=$MIX]"
	echo "    --iodepth=INT         Number of I/Os to keep in flight against the file. [default=$IODEPTH]"
	echo "    --cpu-allowed=INT     Comma-separated list of CPU cores used to run the workload. [default=$CPUS_ALLOWED]"
	echo "    --repeat-no=INT       How many times to repeat each workload. [default=$REPEAT_NO]"
	echo "    --no-preconditioning  Skip preconditioning"
	set -x
}

while getopts 'h-:' optchar; do
	case "$optchar" in
		-)
		case "$OPTARG" in
			help) usage $0; exit 0 ;;
			run-time=*) RUNTIME="${OPTARG#*=}" ;;
			ramp-time=*) RAMP_TIME="${OPTARG#*=}" ;;
			fio-bin=*) FIO_BIN="${OPTARG#*=}" ;;
			max-disk=*) DISKNO="${OPTARG#*=}" ;;
			disk-no=*) DISKNO="${OPTARG#*=}"; ONEWORKLOAD=true ;;
			fio-plugin=*) PLUGIN="${OPTARG#*=}" ;;
			rwmixread=*) MIX="${OPTARG#*=}" ;;
			iodepth=*) IODEPTH="${OPTARG#*=}" ;;
			no-preconditioning) PRECONDITIONING=false ;;
			cpu-allowed=*) CPUS_ALLOWED="${OPTARG#*=}" ;;
			repeat-no=*) REPEAT_NO="${OPTARG#*=}" ;;
			*) usage $0 echo "Invalid argument '$OPTARG'"; exit 1 ;;
		esac
		;;
		h) usage $0; exit 0 ;;
		*) usage $0 "Invalid argument '$optchar'"; exit 1 ;;
	esac
done

trap 'rm -f *.state $BASE_DIR/bdev.conf; print_backtrace' ERR SIGTERM SIGABRT
mkdir -p $BASE_DIR/results
date="$(date +'%m_%d_%Y_%H%M%S')"
disks=($(get_disks nvme))
if [[ $DISKNO == "ALL" ]] || [[ $DISKNO == "all" ]]; then
	DISKNO=${#disks[@]}
elif [[ $DISKNO -gt ${#disks[@]} ]] || [[ ! $DISKNO =~ ^[0-9]+$ ]]; then
	echo "error: Required devices number ($DISKNO) is not a valid number or it's larger than the number of devices found (${#disks[@]})"
	exit 1
fi
+5 −0
Original line number Diff line number Diff line
[global]
thread=1
group_reporting=1
direct=1
norandommap=1
+103 −0
Original line number Diff line number Diff line
#!/usr/bin/env bash

# Automated script that runs NVMe PMD/BDEV performance test.
# This script should be run as root. Please run the scripts/setup.sh before running this script to bind disks to VFIO/UIO driver
# This script takes the following parameters:
# "--run-time" - the run time for the workload in seconds
# "--ramp-time" - Fio will run the specified workload for this amount of time before logging any performance numbers
# "--cpu-allowed" - A comma-separated list of CPU cores used to run the workload. - When the spdk fio plugin is chosen, NVMe devices will
# be aligned to specific core according to their NUMA node. The script will try to align each core with devices matching core's
# on the same NUMA node first but if there are no devices left in the same NUMA node as the CPU Core then it will use devices on the other NUMA node.
# It is important to choose cores that will ensure best NUMA node allocation. For example, on a system with 8 devices on NUMA node
# 0 and 8 devices on NUMA node 1, cores 0-27 on numa node 0 and 28-55 on numa node 1, if test uses 16 disk and four cores
# then "--cpu-allowed=1,2,28,29" results in a NUMA-balanced configuration with 4 devices on each CPU core.
# However, if the test will use 10 CPU cores, then best option would be "--cpu-allowed=1,2,3,4,28,29,30,31,32,33" because cores 1-4 will be aligned with
# 2 devices on numa0 per core, cores 28-29 will be aligned with 2 devices on numa1 per core and cores 30-33 with 1 device on numa1 per core.
# "--iodepth" - Number of I/Os to keep in flight per devices.
# "--fio-plugin" - The SPDK fio plugin used in this test - bdev or nvme
# "--no-preconditioning" - skip preconditioning - Normally the script will precondition disks to put them in a steady state.
# However, preconditioning could be skipped, for example preconditiong has been already made and workload was 100% reads.
# "--disk-no" - use specified number of disks for test.
# "--repeat-no" Repeat each workolad specified number of times.
# An Example Performance Test Run
# "./spdk/test/perf/run_perf.sh --run-time=600 --ramp-time=60 --cpu-allowed=28 --fio-bin=/usr/src/fio/fio\
#  --rwmixread=100 --iodepth=256 --fio-plugin=bdev --no-preconditioning --disk-no=6"
# This command will run test using fio plugin for 600 seconds, 60 sec of ram time, randrw job with
# 100% reads with io depth 256 per disk, on 6 devices and skips preconditioning. Cpu core used for this test is
# core no 28.
BASE_DIR=$(readlink -f $(dirname $0))
. $BASE_DIR/common.sh
if [ $PLUGIN = "bdev" ]; then
	$ROOT_DIR/scripts/gen_nvme.sh >> $BASE_DIR/bdev.conf
fi

disk_names=$(get_disks $PLUGIN)
disks_numa=$(get_numa_node $PLUGIN "$disk_names")
cores=$(get_cores "$CPUS_ALLOWED")
no_cores=($cores)
no_cores=${#no_cores[@]}

if $PRECONDITIONING; then
	cp $BASE_DIR/config.fio.tmp $BASE_DIR/config.fio
	preconditioning
	rm -f $BASE_DIR/config.fio
fi

result_dir=perf_results_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${date}
mkdir $BASE_DIR/results/$result_dir
result_file=$BASE_DIR/results/$result_dir/perf_results_${BLK_SIZE}BS_${IODEPTH}QD_${RW}_${MIX}MIX_${PLUGIN}_${date}.csv
unset iops_disks mean_lat_disks p99_lat_disks p99_99_lat_disks stdev_disks
echo "run-time,ramp-time,fio-plugin,QD,block-size,num-cpu-cores,workload,workload-mix" > $result_file
printf "%s,%s,%s,%s,%s,%s,%s,%s\n" $RUNTIME $RAMP_TIME $PLUGIN $IODEPTH $BLK_SIZE $no_cores $RW $MIX >> $result_file
echo "num_of_disks,iops,avg_lat[usec],p99[usec],p99.99[usec],stdev[usec]" >> $result_file
#Run each workolad $REPEAT_NO times
for (( j=0; j < $REPEAT_NO; j++ ))
do
	#Start with $DISKNO disks and remove 2 disks for each run to avoid preconditioning before each run.
	for (( k=$DISKNO; k >= 1; k-=2 ))
	do
		cp $BASE_DIR/config.fio.tmp $BASE_DIR/config.fio
		echo "" >> $BASE_DIR/config.fio
		#The SPDK fio plugin supports submitting/completing I/Os to multiple SSDs from a single thread.
		#Therefore, the per thread queue depth is set to the desired IODEPTH/device X the number of devices per thread.
		qd=$(( $IODEPTH * $k ))
		filename=$(create_fio_config $k $PLUGIN "$disk_names" "$disks_numa" "$cores")
		desc="Running Test: Blocksize=${BLK_SIZE} Workload=$RW MIX=${MIX} qd=${IODEPTH} fio_plugin=$PLUGIN"
		run_spdk_nvme_fio $PLUGIN "--runtime=$RUNTIME" "--ramp_time=$RAMP_TIME" "--bs=$BLK_SIZE"\
		 "--rw=$RW" "--rwmixread=$MIX" "--iodepth=$qd" "--output=$NVME_FIO_RESULTS" "--time_based=1"\
		 "--description=$desc"

		#Store values for every number of used disks
		iops_disks[$k]=$((${iops_disks[$k]} + $(get_results iops $MIX)))
		mean_lat_disks_usec[$k]=$((${mean_lat_disks_usec[$k]} + $(get_results mean_lat_usec $MIX)))
		p99_lat_disks_usec[$k]=$((${p99_lat_disks_usec[$k]} + $(get_results p99_lat_usec $MIX)))
		p99_99_lat_disks_usec[$k]=$((${p99_99_lat_disks_usec[$k]} + $(get_results p99_99_lat_usec $MIX)))
		stdev_disks_usec[$k]=$((${stdev_disks_usec[$k]} + $(get_results stdev_usec $MIX)))
		cp $NVME_FIO_RESULTS $BASE_DIR/results/$result_dir/perf_results_${MIX}_${PLUGIN}_${no_cores}cpus_${date}_${k}_disks_${j}.json
		cp $BASE_DIR/config.fio $BASE_DIR/results/$result_dir/config_${MIX}_${PLUGIN}_${no_cores}cpus_${date}_${k}_disks_${j}.fio
		rm -f $BASE_DIR/config.fio

		#if tested on only one number of disk
		if $ONEWORKLOAD; then
			break
		fi
	done
done
#Write results to csv file
for (( k=$DISKNO; k >= 1; k-=2 ))
do
	iops_disks[$k]=$((${iops_disks[$k]} / 3))
	mean_lat_disks_usec[$k]=$((${mean_lat_disks_usec[$k]} / 3))
	p99_lat_disks_usec[$k]=$((${p99_lat_disks_usec[$k]} / 3))
	p99_99_lat_disks_usec[$k]=$((${p99_99_lat_disks_usec[$k]} / 3))
	stdev_disks_usec[$k]=$((${stdev_disks_usec[$k]} / 3))

	printf "%s,%s,%s,%s,%s,%s\n" ${k} ${iops_disks[$k]} ${mean_lat_disks_usec[$k]} ${p99_lat_disks_usec[$k]}\
	 ${p99_99_lat_disks_usec[$k]} ${stdev_disks_usec[$k]} >> $result_file

	#if tested on only one numeber of disk
	if $ONEWORKLOAD; then
		break
	fi
done
rm -f $BASE_DIR/bdev.conf $BASE_DIR/config.fio