autotest: Change logic around core-collector (a087f7be) · Commits · Public Repositories / spdk

autotest.sh

+7 −6

Original line number	Diff line number	Diff line
		@@ -32,12 +32,13 @@ fi
		if [ $(uname -s) = Linux ]; then
		old_core_pattern=$(< /proc/sys/kernel/core_pattern)
		mkdir -p "$output_dir/coredumps"
		# Set core_pattern to a known value to avoid ABRT, systemd-coredump, etc.
		# Dump the $output_dir path to a file so collector can pick it up while executing.
		# We don't set in in the core_pattern command line because of the string length limitation
		# of 128 bytes. See 'man core 5' for details.
		echo "\|$rootdir/scripts/core-collector.sh %P %s %t" > /proc/sys/kernel/core_pattern
		echo "$output_dir/coredumps" > "$rootdir/.coredump_path"
		# Set core_pattern to a known value to avoid looking for cores handled by different
		# entities, like apport, systemd-coredump, etc. We also don't want to pipe core to our
		# own collector as under SELINUX it won't be able to execute due to limitation
		# kernel_generic_help_t\|kernel_t domains may impose. Stick to a simple pattern
		# pointing at $output_dir/coredumps - when autotest finishes, process_core() will
		# pick any core from that location.
		echo "$output_dir/coredumps/%s-%p-%i-%t-%E.core" > /proc/sys/kernel/core_pattern

		# make sure nbd (network block device) driver is loaded if it is available
		# this ensures that when tests need to use nbd, it will be fully initialized

scripts/core-collector.sh

+59 −121

Original line number	Diff line number	Diff line
		@@ -3,27 +3,8 @@
		# Copyright (C) 2020 Intel Corporation
		# All rights reserved.
		#
		# We don't want to tell kernel to include %e or %E since these
		# can include whitespaces or other funny characters, and working
		# with those on the cmdline would be a nightmare. Use procfs for
		# the remaining pieces we want to gather:
		# \|$rootdir/scripts/core-collector.sh %P %s %t $output_dir

		rootdir=$(readlink -f "$(dirname "$0")/../")

		maps_to_json() {
		local _maps=("${maps[@]}")
		local mem_regions=() mem

		mem_regions=("/proc/$core_pid/map_files/"*)

		for mem in "${!mem_regions[@]}"; do
		_maps[mem]=\"${_maps[mem]}@${mem_regions[mem]##*/}\"
		done

		local IFS=","
		echo "${_maps[*]}"
		}
		shopt -s nullglob

		core_meta() {
		jq . <<- CORE
		@@ -32,12 +13,9 @@ core_meta() {
		"ts": "$core_time",
		"size": "$core_size bytes",
		"PID": $core_pid,
		"TID": $core_thread,
		"signal": "$core_sig ($core_sig_name)",
		"path": "$exe_path",
		"cwd": "$cwd_path",
		"statm": "$statm",
		"filter": "$(coredump_filter)",
		"mapped": [ $(maps_to_json) ]
		"path": "$exe_path"
		}
		}
		CORE
		@@ -45,103 +23,63 @@ core_meta() {

		bt() { hash gdb && gdb -batch -ex "thread apply all bt full" "$1" "$2" 2>&1; }

		stderr() {
		exec 2> "$core.stderr.txt"
		set -x
		}

		coredump_filter() {
		local bitmap bit
		local _filter filter

		bitmap[0]=anon-priv-mappings
		bitmap[1]=anon-shared-mappings
		bitmap[2]=file-priv-mappings
		bitmap[3]=file-shared-mappings
		bitmap[4]=elf-headers
		bitmap[5]=priv-hp
		bitmap[6]=shared-hp
		bitmap[7]=priv-DAX
		bitmap[8]=shared-DAX

		_filter=0x$(< "/proc/$core_pid/coredump_filter")

		for bit in "${!bitmap[@]}"; do
		((_filter & 1 << bit)) \|\| continue
		filter=${filter:+$filter,}${bitmap[bit]}
		done

		echo "$filter"
		}

		filter_process() {
		# Did the process sit in our repo?
		[[ $cwd_path == "$rootdir"* ]] && return 0

		# Did we load our fio plugins?
		[[ ${maps[]} == "$rootdir/build/fio/spdk_nvme"* ]] && return 0
		[[ ${maps[]} == "$rootdir/build/fio/spdk_bdev"* ]] && return 0

		# Do we depend on it?
		local crit_binaries=() bin

		crit_binaries+=("nvme")
		crit_binaries+=("qemu-system*")
		# Add more if needed

		for bin in "${crit_binaries[@]}"; do
		# The below SC is intentional
		# shellcheck disable=SC2053
		[[ ${exe_path##*/} == $bin ]] && return 0
		done

		return 1
		}

		args+=(core_pid)
		args+=(core_sig)
		args+=(core_ts)

		read -r "${args[@]}" <<< "$*"

		exe_path=$(readlink -f "/proc/$core_pid/exe")
		cwd_path=$(readlink -f "/proc/$core_pid/cwd")
		exe_comm=$(< "/proc/$core_pid/comm")
		statm=$(< "/proc/$core_pid/statm")
		core_time=$(date -d@"$core_ts")
		parse_core() {
		local core=$1 _core
		local cores_dir=$2

		local core_pid core_thread
		local core_save
		local core_sig core_sig_name
		local core_size
		local core_time
		local exe_comm exe_pat

		local prefix=(
		core_sig
		core_pid
		core_thread
		core_time
		)

		# $output_dir/coredumps/%s-%p-%i-%t-%E.core
		# \|
		# v
		# 11-47378-47378-1748807733-!opt!spdk!build!bin!spdk_tgt.core
		_core=${core##*/} _core=${_core%.core}
		# !opt!spdk!build!bin!spdk_tgt
		exe_path=${_core#----}
		# Split 11-47378-47378-1748807733 into respective variables
		IFS="-" read -r "${prefix[@]}" <<< "${_core%"-$exe_path"}"
		# /opt/spdk/build/bin/spdk_tgt
		exe_path=${exe_path//\!/\/}
		# spdk_tgt
		exe_comm=${exe_path##*/}
		# 11 -> SEGV
		core_sig_name=$(kill -l "$core_sig")
		mapfile -t maps < <(readlink -f "/proc/$core_pid/map_files/"*)

		# Filter out processes that we don't care about
		filter_process \|\| exit 0

		core=$(< "$rootdir/.coredump_path")/${exe_path##*/}_$core_pid.core
		stderr

		# RLIMIT_CORE is not enforced when core is piped to us. To make
		# sure we won't attempt to overload underlying storage, copy
		# only the reasonable amount of bytes (systemd defaults to 2G
		# so let's follow that).
		rlimit=$((1024 * 1024 * 1024 * 2))

		# Clear path for lz
		rm -f "$core"{,.{bin,bt,gz,json}}

		# Slurp the core
		head -c "$rlimit" <&0 > "$core"
		# seconds since Epoch to date
		core_time=$(date -d"@$core_time")
		# size in bytes
		core_size=$(wc -c < "$core")
		# $output_dir/coredumps/spdk_tgt-47378-47378
		core_save=$cores_dir/$exe_comm-$core_pid-$core_thread

		# Compress it
		gzip -c "$core" > "$core.gz"

		gzip -c "$core" > "$core_save.gz"
		# Save the binary
		cp "$exe_path" "$core.bin"

		cp "$exe_path" "$core_save.bin"
		# Save the backtrace
		bt "$exe_path" "$core" > "$core.bt.txt"

		bt "$exe_path" "$core" > "$core_save.bt.txt"
		# Save the metadata of the core
		core_meta > "$core.json"

		core_meta > "$core_save.json"
		# Nuke the original core
		rm "$core"
		}

		cores_dir=$1

		cores=("$cores_dir/"*.core)
		((${#cores[@]} > 0)) \|\| exit 0

		for core in "${cores[@]}"; do
		parse_core "$core" "$cores_dir"
		done

test/common/autotest_common.sh

+7 −7

Original line number	Diff line number	Diff line
		@@ -777,17 +777,17 @@ function gdb_attach() {

		function process_core() {
		# Note that this always was racy as we can't really sync with the kernel
		# to see if there's any core queued up for writing. We could check if
		# collector is running and wait for it explicitly, but it doesn't seem
		# to be worth the effort. So assume that if we are being called via
		# trap, as in, when some error has occurred, wait up to 10s for any
		# potential cores. If we are called just for cleanup at the very end,
		# don't wait since all the tests ended successfully, hence having any
		# critical cores lying around is unlikely.
		# to see if there's any core queued up for writing. Assume that if we are
		# being called via trap, as in, when some error has occurred, wait up to
		# 10s for any potential cores. If we are called just for cleanup at the
		# very end, don't wait since all the tests ended successfully, hence
		# having any critical cores lying around is unlikely.
		((autotest_es != 0)) && sleep 10

		local coredumps core

		"$rootdir/scripts/core-collector.sh" "$output_dir/coredumps"

		coredumps=("$output_dir/coredumps/"*.bt.txt)

		((${#coredumps[@]} > 0)) \|\| return 0