Commit 36e573fc authored by Michal Berger's avatar Michal Berger Committed by Tomasz Zawadzki
Browse files

scripts/common: Introduce cache for the pci devices



Expose a cache of pci devices in form of an assoc array that could be
looked up during the runtime of a script like setup.sh.

In case of setup.sh, caching speeds up execution quite visibly:

config run, no caching:
real    0m4.488s
user    0m1.440s
sys     0m1.260s

config run, caching in use:
real    0m2.876s
user    0m0.365s
sys     0m0.420s

Note that for initial config runs, binding controllers to proper
drivers is the actual bottleneck.

status run, no caching:
real    0m1.877s
user    0m1.252s
sys     0m0.984s

status run, caching in use:
real    0m0.371s
user    0m0.242s
sys     0m0.204s

reset run, no caching:
real    0m2.559s
user    0m1.409s
sys     0m1.322s

reset run, caching in use:
real    0m0.960s
user    0m0.432s
sys     0m0.419s

Additionally, in case common tools, e.g. lspci, are missing, fallback to
sysfs to pick all needed devices from the pci bus. Targeted for Linux
systems only.

Change-Id: Ib69ef724b9f09eca0cbb9b88f1c363edc1efd5dc
Signed-off-by: default avatarMichal Berger <michalx.berger@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/1845


Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarDarek Stojaczyk <dariusz.stojaczyk@intel.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent a34329e8
Loading
Loading
Loading
Loading
+106 −0
Original line number Diff line number Diff line
@@ -28,6 +28,108 @@ function pci_can_use() {
	return 1
}

cache_pci_init () {
	local -gA pci_bus_cache

	[[ -z ${pci_bus_cache[*]} || $CMD == reset ]] || return 1

	pci_bus_cache=()
}

cache_pci () {
	local pci=$1 class=$2 vendor=$3 device=$4

	if [[ -n $class ]]; then
		class=0x${class/0x}
		pci_bus_cache["$class"]="${pci_bus_cache["$class"]:+${pci_bus_cache["$class"]} }$pci"
	fi
	if [[ -n $vendor && -n $device ]]; then
		vendor=0x${vendor/0x} device=0x${device/0x}
		pci_bus_cache["$vendor"]="${pci_bus_cache["$vendor"]:+${pci_bus_cache["$vendor"]} }$pci"
		pci_bus_cache["$device"]="${pci_bus_cache["$device"]:+${pci_bus_cache["$device"]} }$pci"
		pci_bus_cache["$vendor:$device"]="${pci_bus_cache["$vendor:$device"]:+${pci_bus_cache["$vendor:$device"]} }$pci"
	fi
}

cache_pci_bus_sysfs () {
	[[ -e /sys/bus/pci/devices ]] || return 1

	cache_pci_init || return 0

	local pci
	local class vendor device

	for pci in /sys/bus/pci/devices/*; do
		class=$(<"$pci/class") vendor=$(<"$pci/vendor") device=$(<"$pci/device")
		cache_pci "${pci##*/}" "$class" "$vendor" "$device"
	done
}

cache_pci_bus_lspci () {
	hash lspci 2>/dev/null || return 1

	cache_pci_init || return 0

	local dev
	while read -ra dev; do
		dev=("${dev[@]//\"/}")
		# lspci splits ls byte of the class (prog. interface) into a separate
		# field if it's != 0. Look for it and normalize the value to fit with
		# what kernel exposes under sysfs.
		if [[ ${dev[*]} =~ -p([0-9]+) ]]; then
			dev[1]+=${BASH_REMATCH[1]}
		else
			dev[1]+=00
		fi
		# pci class vendor device
		cache_pci "${dev[@]::4}"
	done < <(lspci -Dnmm)
}

cache_pci_bus_pciconf () {
	hash pciconf 2>/dev/null || return 1

	cache_pci_init || return 0

	local class vd vendor device
	local pci domain bus device function

	while read -r pci class _ vd _; do
		IFS=":" read -r domain bus device function _ <<<"${pci##*pci}"
		pci=$(printf '%04x:%02x:%02x:%x' \
			"$domain" "$bus" "$device" "$function")
		class=$(printf '0x%06x' $(( class )))
		vendor=$(printf '0x%04x' $(( vd & 0xffff )))
		device=$(printf '0x%04x' $(( (vd >> 16) & 0xffff )))

		cache_pci "$pci" "$class" "$vendor" "$device"
	done < <(pciconf -l)
}

cache_pci_bus () {
	case "$(uname -s)" in
		Linux) cache_pci_bus_lspci || cache_pci_bus_sysfs ;;
		FreeBSD) cache_pci_bus_pciconf ;;
	esac
}

iter_all_pci_sysfs () {
	cache_pci_bus_sysfs || return 1

	# default to class of the nvme devices
	local find=${1:-0x010802} findx=$2
	local pci pcis

	[[ -n ${pci_bus_cache["$find"]} ]] || return 0
	read -ra pcis <<<"${pci_bus_cache["$find"]}"

	if (( findx )); then
		printf '%s\n' "${pcis[@]::findx}"
	else
		printf '%s\n' "${pcis[@]}"
	fi
}

# This function will ignore PCI PCI_WHITELIST and PCI_BLACKLIST
function iter_all_pci_class_code() {
	local class
@@ -52,6 +154,8 @@ function iter_all_pci_class_code() {
		local addr=($(pciconf -l | grep -i "class=0x${class}${subclass}${progif}" | \
			cut -d$'\t' -f1 | sed -e 's/^[a-zA-Z0-9_]*@pci//g' | tr ':' ' '))
		printf "%04x:%02x:%02x:%x\n" ${addr[0]} ${addr[1]} ${addr[2]} ${addr[3]}
	elif iter_all_pci_sysfs "$(printf '0x%06x' $(( 0x$progif | 0x$subclass << 8 | 0x$class << 16 )))"; then
		:
	else
		echo "Missing PCI enumeration utility" >&2
		exit 1
@@ -72,6 +176,8 @@ function iter_all_pci_dev_id() {
		local addr=($(pciconf -l | grep -i "chip=0x${dev_id}${ven_id}" | \
			cut -d$'\t' -f1 | sed -e 's/^[a-zA-Z0-9_]*@pci//g' | tr ':' ' '))
		printf "%04x:%02x:%02x:%x\n" ${addr[0]} ${addr[1]} ${addr[2]} ${addr[3]}
	elif iter_all_pci_sysfs "0x$ven_id:0x$dev_id"; then
		:
	else
		echo "Missing PCI enumeration utility" >&2
		exit 1
+34 −45
Original line number Diff line number Diff line
@@ -211,7 +211,7 @@ function configure_linux_pci {
	fi

	# NVMe
	for bdf in $(iter_all_pci_class_code 01 08 02); do
	for bdf in ${pci_bus_cache["0x010802"]}; do
		blknames=()
		if ! pci_can_use $bdf; then
			pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller at $bdf"
@@ -244,7 +244,7 @@ function configure_linux_pci {

	while IFS= read -r dev_id
	do
		for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do
		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
			if ! pci_can_use $bdf; then
				pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device"
				continue
@@ -263,7 +263,7 @@ function configure_linux_pci {

        while IFS= read -r dev_id
        do
                for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do
                for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
                        if ! pci_can_use $bdf; then
                                pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device"
                                continue
@@ -282,7 +282,7 @@ function configure_linux_pci {

	while IFS= read -r dev_id
	do
		for bdf in $(iter_all_pci_dev_id 1af4 $dev_id); do
		for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do
			if ! pci_can_use $bdf; then
				pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at $bdf"
				continue
@@ -309,7 +309,7 @@ function configure_linux_pci {

	while IFS= read -r dev_id
	do
		for bdf in $(iter_pci_dev_id 8086 $dev_id); do
		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
			if [[ -z "$PCI_WHITELIST" ]] || ! pci_can_use $bdf; then
				echo "Skipping un-whitelisted VMD device at $bdf"
				continue
@@ -448,7 +448,7 @@ function reset_linux_pci {
	check_for_driver nvme
	driver_loaded=$?
	set -e
	for bdf in $(iter_all_pci_class_code 01 08 02); do
	for bdf in ${pci_bus_cache["0x010802"]}; do
		if ! pci_can_use $bdf; then
			pci_dev_echo "$bdf" "Skipping un-whitelisted NVMe controller $blkname"
			continue
@@ -472,7 +472,7 @@ function reset_linux_pci {
	set -e
	while IFS= read -r dev_id
	do
		for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do
		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
			if ! pci_can_use $bdf; then
				pci_dev_echo "$bdf" "Skipping un-whitelisted I/OAT device"
				continue
@@ -497,7 +497,7 @@ function reset_linux_pci {
        set -e
        while IFS= read -r dev_id
        do
                for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do
                for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
                        if ! pci_can_use $bdf; then
                                pci_dev_echo "$bdf" "Skipping un-whitelisted IDXD device"
                                continue
@@ -524,7 +524,7 @@ function reset_linux_pci {
	modprobe virtio-pci || true
	while IFS= read -r dev_id
	do
		for bdf in $(iter_all_pci_dev_id 1af4 $dev_id); do
		for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do
			if ! pci_can_use $bdf; then
				pci_dev_echo "$bdf" "Skipping un-whitelisted Virtio device at"
				continue
@@ -546,7 +546,7 @@ function reset_linux_pci {
	set -e
	while IFS= read -r dev_id
	do
		for bdf in $(iter_pci_dev_id 8086 $dev_id); do
		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
			if ! pci_can_use $bdf; then
				echo "Skipping un-whitelisted VMD device at $bdf"
				continue
@@ -605,7 +605,7 @@ function status_linux {
	echo "NVMe devices"

	echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name"
	for bdf in $(iter_all_pci_class_code 01 08 02); do
	for bdf in ${pci_bus_cache["0x010802"]}; do
		driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}')
		if [ "$numa_nodes" = "0" ]; then
			node="-"
@@ -630,7 +630,7 @@ function status_linux {
	| awk -F"x" '{print $2}')
	echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver"
	for dev_id in $TMP; do
		for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do
		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
			driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}')
			if [ "$numa_nodes" = "0" ]; then
				node="-"
@@ -651,7 +651,7 @@ function status_linux {
        | awk -F"x" '{print $2}')
        echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver"
        for dev_id in $TMP; do
                for bdf in $(iter_all_pci_dev_id 8086 $dev_id); do
                for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
                        driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}')
                        if [ "$numa_nodes" = "0" ]; then
                                node="-"
@@ -672,7 +672,7 @@ function status_linux {
	| awk -F"x" '{print $2}')
	echo -e "BDF\t\tVendor\tDevice\tNUMA\tDriver\t\tDevice name"
	for dev_id in $TMP; do
		for bdf in $(iter_all_pci_dev_id 1af4 $dev_id); do
		for bdf in ${pci_bus_cache["0x1af4:0x$dev_id"]}; do
			driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}')
			if [ "$numa_nodes" = "0" ]; then
				node="-"
@@ -694,7 +694,7 @@ function status_linux {
	| awk -F"x" '{print $2}')
	echo -e "BDF\t\tNuma Node\tDriver Name"
	for dev_id in $TMP; do
		for bdf in $(iter_pci_dev_id 8086 $dev_id); do
		for bdf in ${pci_bus_cache["0x8086:0x$dev_id"]}; do
			driver=$(grep DRIVER /sys/bus/pci/devices/$bdf/uevent |awk -F"=" '{print $2}')
			node=$(cat /sys/bus/pci/devices/$bdf/numa_node);
			echo -e "$bdf\t$node\t\t$driver"
@@ -703,44 +703,31 @@ function status_linux {
}

function configure_freebsd_pci {
	TMP=$(mktemp)
	local devs ids id
	local BDFS

	# NVMe
	GREP_STR="class=0x010802"
	devs=PCI_DEVICE_ID_INTEL_IOAT
	devs+="|PCI_DEVICE_ID_INTEL_IDXD"
	devs+="|PCI_DEVICE_ID_INTEL_VMD"

	# IOAT
	grep "PCI_DEVICE_ID_INTEL_IOAT" $rootdir/include/spdk/pci_ids.h \
	| awk -F"x" '{print $2}' > $TMP
	while IFS= read -r dev_id
	do
		GREP_STR="${GREP_STR}\|chip=0x${dev_id}8086"
	done < $TMP
	ids=($(grep -E "$devs" "$rootdir/include/spdk/pci_ids.h" | awk '{print $3}'))

        # IDXD
        grep "PCI_DEVICE_ID_INTEL_IDXD" $rootdir/include/spdk/pci_ids.h \
        | awk -F"x" '{print $2}' > $TMP
        while IFS= read -r dev_id
        do
                GREP_STR="${GREP_STR}\|chip=0x${dev_id}8086"
        done < $TMP

	# VMD
	grep "PCI_DEVICE_ID_INTEL_VMD" $rootdir/include/spdk/pci_ids.h \
	| awk -F"x" '{print $2}' > $TMP
	while IFS= read -r dev_id
	do
		GREP_STR="${GREP_STR}\|chip=0x${dev_id}8086"
	done < $TMP
	if [[ -n ${pci_bus_cache["0x010802"]} ]]; then
		BDFS+=(${pci_bus_cache["0x010802"]})
	fi

	AWK_PROG=("{if (count > 0) printf \",\"; printf \"%s:%s:%s\",\$2,\$3,\$4; count++}")
	echo "${AWK_PROG[*]}" > $TMP
	for id in "${ids[@]}"; do
		[[ -n ${pci_bus_cache["0x8086:$id"]} ]] || continue
		BDFS+=(${pci_bus_cache["0x8086:$id"]})
	done

	BDFS=$(pciconf -l | grep "${GREP_STR}" | awk -F: -f $TMP)
	# Drop the domain part from all the addresses
	BDFS=("${BDFS[@]#*:}")

	local IFS=","
	kldunload nic_uio.ko || true
	kenv hw.nic_uio.bdfs=$BDFS
	kenv hw.nic_uio.bdfs="${BDFS[*]}"
	kldload nic_uio.ko
	rm $TMP
}

function configure_freebsd {
@@ -764,6 +751,8 @@ function reset_freebsd {
	kldunload nic_uio.ko || true
}

CMD=reset cache_pci_bus

mode=$1

if [ -z "$mode" ]; then