scripts: Use core dump collector (45c42ac2) · Commits · Public Repositories / spdk

autotest.sh

+3 −1

Original line number	Diff line number	Diff line
		@@ -30,8 +30,10 @@ fi

		if [ $(uname -s) = Linux ]; then
		old_core_pattern=$(< /proc/sys/kernel/core_pattern)
		mkdir -p "$output_dir/coredumps"
		# set core_pattern to a known value to avoid ABRT, systemd-coredump, etc.
		echo "core" > /proc/sys/kernel/core_pattern
		echo "\|$rootdir/scripts/core-collector.sh %P %s %t $output_dir/coredumps" > /proc/sys/kernel/core_pattern
		echo 2 > /proc/sys/kernel/core_pipe_limit

		# Make sure that the hugepage state for our VM is fresh so we don't fail
		# hugepage allocation. Allow time for this action to complete.

scripts/core-collector.sh

0 → 100755

+89 −0

Original line number	Diff line number	Diff line
		#!/usr/bin/env bash
		# We don't want to tell kernel to include %e or %E since these
		# can include whitespaces or other funny characters, and working
		# with those on the cmdline would be a nightmare. Use procfs for
		# the remaining pieces we want to gather:
		# \|$rootdir/scripts/core-collector.sh %P %s %t $output_dir

		get_rlimit() {
		local limit

		while read -ra limit; do
		[[ ${limit[1]} == core ]] && echo "${limit[4]}" # soft
		done < "/proc/$core_pid/limits"
		}

		core_meta() {
		jq . <<- CORE
		{
		"$exe_comm": {
		"ts": "$core_time",
		"size": "$core_size bytes",
		"PID": $core_pid,
		"signal": "$core_sig ($core_sig_name)",
		"path": "$exe_path",
		"statm": "$statm"
		}
		}
		CORE
		}

		bt() { hash gdb && gdb -batch -ex "thread apply all bt full" "$1" "$2" 2>&1; }

		stderr() {
		exec 2> "$core.stderr.txt"
		set -x
		}

		args+=(core_pid)
		args+=(core_sig)
		args+=(core_ts)
		args+=(output_dir)

		read -r "${args[@]}" <<< "$*"

		exe_path=$(readlink -f "/proc/$core_pid/exe")
		exe_comm=$(< "/proc/$core_pid/comm")
		statm=$(< "/proc/$core_pid/statm")
		core_time=$(date -d@"$core_ts")
		core_sig_name=$(kill -l "$core_sig")

		core=$output_dir/${exe_path##*/}_$core_pid.core
		stderr

		# RLIMIT_CORE is not enforced when core is piped to us. To make
		# sure we won't attempt to overload underlying storage, copy
		# only the reasonable amount of bytes (systemd defaults to 2G
		# so let's follow that). But first, check limits of terminating
		# process to see if we need to make any adjustments.
		max_core=$((1024 * 1024 * 1024 * 2))

		rlimit=$(get_rlimit)
		if [[ $rlimit == unlimited ]] \|\| ((rlimit > max_core)); then
		rlimit=$max_core
		fi

		# Nothing to do
		((rlimit == 0)) && exit 0

		# Clear path for lz
		rm -f "$core"{,.{bin,bt,gz,json}}

		# Slurp the core
		head -c "$rlimit" <&0 > "$core"
		core_size=$(wc -c < "$core")

		# Compress it
		gzip -c "$core" > "$core.gz"

		# Save the binary
		cp "$exe_path" "$core.bin"

		# Save the backtrace
		bt "$exe_path" "$core" > "$core.bt.txt"

		# Save the metadata of the core
		core_meta > "$core.json"

		# Nuke the original core
		rm "$core"

test/common/autotest_common.sh

+30 −18

Original line number	Diff line number	Diff line
		@@ -600,24 +600,36 @@ function gdb_attach() {
		}

		function process_core() {
		ret=0
		while IFS= read -r -d '' core; do
		exe=$(eu-readelf -n "$core" \| grep psargs \| sed "s/.psargs: $[^ \'\" ]$.*/\1/")
		if [[ ! -f "$exe" ]]; then
		exe=$(eu-readelf -n "$core" \| grep -oP -m1 "$exe.+")
		fi
		echo "exe for $core is $exe"
		if [[ -n "$exe" ]]; then
		if hash gdb &> /dev/null; then
		gdb -batch -ex "thread apply all bt full" $exe $core
		fi
		cp $exe $output_dir
		fi
		mv $core $output_dir
		chmod a+r $output_dir/$core
		ret=1
		done < <(find . -type f $ -name 'core.[0-9]' -o -name 'core' -o -name '.core' $ -print0)
		return $ret
		# Note that this always was racy as we can't really sync with the kernel
		# to see if there's any core queued up for writing. We could check if
		# collector is running and wait for it explicitly, but it doesn't seem
		# to be worth the effort. So assume that if we are being called via
		# trap, as in, when some error has occurred, wait up to 5s for any
		# potential cores. If we are called just for cleanup at the very end,
		# don't wait since all the tests ended successfully, hence having any
		# critical cores lying around is unlikely.
		local es=$?
		((es != 0)) && sleep 5s

		local coredumps core

		shopt -s nullglob
		coredumps=("$output_dir/coredumps/"*.bt.txt)
		shopt -u nullglob

		((${#coredumps[@]} > 0)) \|\| return 0
		chmod -R a+r "$output_dir/coredumps"

		for core in "${coredumps[@]}"; do
		cat <<- BT
		##### CORE BT ${core##*/} #####

		$(<"$core")

		--
		BT
		done
		return 1
		}

		function process_shm() {