Commit 8571999d authored by Michal Berger's avatar Michal Berger Committed by Tomasz Zawadzki
Browse files

test/scheduler: Stop moving all processes between cgroups



Initially, this was done in order to reduce jitter on the cpus the
SPDK processes would execute on - every other process was being
moved to a dedicated "all" cgroup which would limit cpus and mem
nodes to those SPDK was being moved away from.

This was done at early stages of the dynamic scheduler where it
was not known were potential discrepancies in tests are coming
from. Most of these issues had been already resolved, hence there
is no need to perform such an invasive action anymore.

In fact, under newer kernels (6.8.x) this cgroup dance causes a
peculiar issues where some of the processes become unresponsive
when suddenly moved from their designated cgroup.

Instead, move only processes associated with a test (this would
include SPDK processes executed along the way) to a separate
cgroup with appropriate setup.

While at it, enhance remove_cgroup() to make sure all lingering
leaf cgroups are removed as well - this is actually needed since
our cleanup() was leaving unattended cgroups around - /cpuset/all
and /cpuset in particular. Since this patch now tries to rely only
on /cpuset we need to make sure we can properly remove it without
hitting -EBUSY.

Change-Id: I450535959e323980341ffbc9723d38092aa1051d
Signed-off-by: default avatarMichal Berger <michal.berger@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/22859


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarKarol Latecki <karol.latecki@intel.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
parent 06472fb6
Loading
Loading
Loading
Loading
+11 −22
Original line number Diff line number Diff line
@@ -23,23 +23,6 @@ init_cpuset_cgroup() {
		set_cgroup_attr / cgroup.subtree_control "+cpuset"
		create_cgroup /cpuset
		set_cgroup_attr /cpuset cgroup.subtree_control "+cpuset"
		# On distros which use cgroup-v2 under systemd, each process is
		# maintained under separate, pre-configured subtree. With the rule of
		# "internal processes are not permitted" this means that we won't find
		# ourselves under subsystem's root, rather on the bottom of the cgroup
		# maintaining user's session. To recreate the simple /cpuset setup from
		# v1, move all the threads from all the existing cgroups to the top
		# cgroup / and then migrate it to the /cpuset we created above.
		for pid in /proc/+([0-9]); do
			cgroup=$(get_cgroup "${pid##*/}") || continue
			[[ $cgroup != / ]] || continue
			cgroups["$cgroup"]=$cgroup
		done 2> /dev/null
		for cgroup in "${!cgroups[@]}"; do
			move_cgroup_procs "$cgroup" /
		done
		# Now, move all the threads to the cpuset
		move_cgroup_procs / /cpuset
	elif ((cgroup_version == 1)); then
		set_cgroup_attr /cpuset cgroup.procs "$$"
	fi
@@ -118,12 +101,18 @@ create_cgroup() {
}

remove_cgroup() {
	local root_cgroup
	root_cgroup=$(dirname "$1")
	local cgroup=${1#"$sysfs_cgroup"} root_cgroup leaf_cgroup
	root_cgroup=$(dirname "$cgroup")

	[[ -e $sysfs_cgroup/$1 ]] || return 0
	move_cgroup_procs "$1" "$root_cgroup"
	rmdir "$sysfs_cgroup/$1"
	[[ -e $sysfs_cgroup/$cgroup ]] || return 0
	# Remove all lingering leaf cgroups if any
	for leaf_cgroup in "$sysfs_cgroup/$cgroup/"*/; do
		remove_cgroup "$leaf_cgroup"
	done
	# Instead of killing all the potential processes, we play it nice
	# and move them to the parent cgroup.
	move_cgroup_procs "$cgroup" "$root_cgroup"
	rmdir "$sysfs_cgroup/$cgroup"
}

exec_in_cgroup() {
+1 −1
Original line number Diff line number Diff line
@@ -398,7 +398,7 @@ exec_under_dynamic_scheduler() {
	if [[ -e /proc/$spdk_pid/status ]]; then
		killprocess "$spdk_pid"
	fi
	exec_in_cgroup "/cpuset/spdk" "$@" --wait-for-rpc &
	"$@" --wait-for-rpc &
	spdk_pid=$!
	# Give some time for the app to init itself
	waitforlisten "$spdk_pid"
+8 −13
Original line number Diff line number Diff line
@@ -6,14 +6,10 @@
xtrace_disable

source "$testdir/common.sh"
export SILENT_CGROUP_DEBUG=yes

restore_cgroups() {
	xtrace_disable
	kill_in_cgroup "/cpuset/spdk"
	remove_cgroup "/cpuset/spdk"
	remove_cgroup "/cpuset/all" || true
	remove_cpuset_cgroup || true
	remove_cpuset_cgroup
	xtrace_restore
}

@@ -52,14 +48,13 @@ all_cpus_csv=$(fold_array_onto_string "${all_cpus[@]}")
all_cpumask=$(mask_cpus "${all_cpus[@]}")
all_cpus_mems=0

# Pin spdk cores to a new cgroup
create_cgroup "/cpuset/spdk"
create_cgroup "/cpuset/all"
set_cgroup_attr "/cpuset/spdk" cpuset.cpus "$spdk_cpus_csv"
set_cgroup_attr "/cpuset/spdk" cpuset.mems "$spdk_cpus_mems"
set_cgroup_attr "/cpuset/all" cpuset.cpus "$all_cpus_csv"
set_cgroup_attr "/cpuset/all" cpuset.mems "$all_cpus_mems"
move_cgroup_procs "/cpuset" "/cpuset/all"
# For cgroupv2 it's required we jump first to the root cgroup ...
move_proc "$$" "/" "" cgroup.procs
# ... so we can now settle in a dedicated cgroup /cpuset
move_proc "$$" "/cpuset" "" cgroup.procs

set_cgroup_attr "/cpuset" cpuset.cpus "$spdk_cpus_csv"
set_cgroup_attr "/cpuset" cpuset.mems "$spdk_cpus_mems"

export \
	"spdk_cpumask=$spdk_cpumask" \