forked from luck/tmp_suning_uos_patched
7e849dbe60
commit 2685027fca387b602ae565bff17895188b803988 upstream. There are 3 places where the cpu and node masks of the top cpuset can be initialized in the order they are executed: 1) start_kernel -> cpuset_init() 2) start_kernel -> cgroup_init() -> cpuset_bind() 3) kernel_init_freeable() -> do_basic_setup() -> cpuset_init_smp() The first cpuset_init() call just sets all the bits in the masks. The second cpuset_bind() call sets cpus_allowed and mems_allowed to the default v2 values. The third cpuset_init_smp() call sets them back to v1 values. For systems with cgroup v2 setup, cpuset_bind() is called once. As a result, cpu and memory node hot add may fail to update the cpu and node masks of the top cpuset to include the newly added cpu or node in a cgroup v2 environment. For systems with cgroup v1 setup, cpuset_bind() is called again by rebind_subsystem() when the v1 cpuset filesystem is mounted as shown in the dmesg log below with an instrumented kernel. [ 2.609781] cpuset_bind() called - v2 = 1 [ 3.079473] cpuset_init_smp() called [ 7.103710] cpuset_bind() called - v2 = 0 smp_init() is called after the first two init functions. So we don't have a complete list of active cpus and memory nodes until later in cpuset_init_smp() which is the right time to set up effective_cpus and effective_mems. To fix this cgroup v2 mask setup problem, the potentially incorrect cpus_allowed & mems_allowed setting in cpuset_init_smp() are removed. For cgroup v2 systems, the initial cpuset_bind() call will set the masks correctly. For cgroup v1 systems, the second call to cpuset_bind() will do the right setup. cc: stable@vger.kernel.org Signed-off-by: Waiman Long <longman@redhat.com> Tested-by: Feng Tang <feng.tang@intel.com> Reviewed-by: Michal Koutný <mkoutny@suse.com> Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
||
---|---|---|
.. | ||
bpf | ||
cgroup | ||
configs | ||
debug | ||
dma | ||
entry | ||
events | ||
gcov | ||
irq | ||
kcsan | ||
livepatch | ||
locking | ||
power | ||
printk | ||
rcu | ||
sched | ||
time | ||
trace | ||
.gitignore | ||
acct.c | ||
async.c | ||
audit_fsnotify.c | ||
audit_tree.c | ||
audit_watch.c | ||
audit.c | ||
audit.h | ||
auditfilter.c | ||
auditsc.c | ||
backtracetest.c | ||
bounds.c | ||
capability.c | ||
compat.c | ||
configs.c | ||
context_tracking.c | ||
cpu_pm.c | ||
cpu.c | ||
crash_core.c | ||
crash_dump.c | ||
cred.c | ||
delayacct.c | ||
dma.c | ||
exec_domain.c | ||
exit.c | ||
extable.c | ||
fail_function.c | ||
fork.c | ||
freezer.c | ||
futex.c | ||
gen_kheaders.sh | ||
groups.c | ||
hung_task.c | ||
iomem.c | ||
irq_work.c | ||
jump_label.c | ||
kallsyms.c | ||
kcmp.c | ||
Kconfig.freezer | ||
Kconfig.hz | ||
Kconfig.locks | ||
Kconfig.preempt | ||
kcov.c | ||
kexec_core.c | ||
kexec_elf.c | ||
kexec_file.c | ||
kexec_internal.h | ||
kexec.c | ||
kheaders.c | ||
kmod.c | ||
kprobes.c | ||
ksysfs.c | ||
kthread.c | ||
latencytop.c | ||
Makefile | ||
module_signature.c | ||
module_signing.c | ||
module-internal.h | ||
module.c | ||
notifier.c | ||
nsproxy.c | ||
padata.c | ||
panic.c | ||
params.c | ||
pid_namespace.c | ||
pid.c | ||
profile.c | ||
ptrace.c | ||
range.c | ||
reboot.c | ||
regset.c | ||
relay.c | ||
resource.c | ||
rseq.c | ||
scftorture.c | ||
scs.c | ||
seccomp.c | ||
signal.c | ||
smp.c | ||
smpboot.c | ||
smpboot.h | ||
softirq.c | ||
stackleak.c | ||
stacktrace.c | ||
static_call.c | ||
stop_machine.c | ||
sys_ni.c | ||
sys.c | ||
sysctl-test.c | ||
sysctl.c | ||
task_work.c | ||
taskstats.c | ||
test_kprobes.c | ||
torture.c | ||
tracepoint.c | ||
tsacct.c | ||
ucount.c | ||
uid16.c | ||
uid16.h | ||
umh.c | ||
up.c | ||
user_namespace.c | ||
user-return-notifier.c | ||
user.c | ||
usermode_driver.c | ||
utsname_sysctl.c | ||
utsname.c | ||
watch_queue.c | ||
watchdog_hld.c | ||
watchdog.c | ||
workqueue_internal.h | ||
workqueue.c |