cgroup/cpuset: Fix violation of cpuset locking rule

[ Upstream commit 6ba34d3c73674e46d9e126e4f0cee79e5ef2481c ]

The cpuset fields that manage partition root state do not strictly
follow the cpuset locking rule that update to cpuset has to be done
with both the callback_lock and cpuset_mutex held. This is now fixed
by making sure that the locking rule is upheld.

Fixes: 3881b86128 ("cpuset: Add an error state to cpuset.sched.partition")
Fixes: 4b842da276 ("cpuset: Make CPU hotplug work with partition")
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
Waiman Long 2021-07-20 10:18:28 -04:00 committed by Greg Kroah-Hartman
parent cbc9766143
commit 10dfcfda5c

View File

@ -1148,6 +1148,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
struct cpuset *parent = parent_cs(cpuset); struct cpuset *parent = parent_cs(cpuset);
int adding; /* Moving cpus from effective_cpus to subparts_cpus */ int adding; /* Moving cpus from effective_cpus to subparts_cpus */
int deleting; /* Moving cpus from subparts_cpus to effective_cpus */ int deleting; /* Moving cpus from subparts_cpus to effective_cpus */
int new_prs;
bool part_error = false; /* Partition error? */ bool part_error = false; /* Partition error? */
percpu_rwsem_assert_held(&cpuset_rwsem); percpu_rwsem_assert_held(&cpuset_rwsem);
@ -1183,6 +1184,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
* A cpumask update cannot make parent's effective_cpus become empty. * A cpumask update cannot make parent's effective_cpus become empty.
*/ */
adding = deleting = false; adding = deleting = false;
new_prs = cpuset->partition_root_state;
if (cmd == partcmd_enable) { if (cmd == partcmd_enable) {
cpumask_copy(tmp->addmask, cpuset->cpus_allowed); cpumask_copy(tmp->addmask, cpuset->cpus_allowed);
adding = true; adding = true;
@ -1247,11 +1249,11 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
switch (cpuset->partition_root_state) { switch (cpuset->partition_root_state) {
case PRS_ENABLED: case PRS_ENABLED:
if (part_error) if (part_error)
cpuset->partition_root_state = PRS_ERROR; new_prs = PRS_ERROR;
break; break;
case PRS_ERROR: case PRS_ERROR:
if (!part_error) if (!part_error)
cpuset->partition_root_state = PRS_ENABLED; new_prs = PRS_ENABLED;
break; break;
} }
/* /*
@ -1260,10 +1262,10 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
part_error = (prev_prs == PRS_ERROR); part_error = (prev_prs == PRS_ERROR);
} }
if (!part_error && (cpuset->partition_root_state == PRS_ERROR)) if (!part_error && (new_prs == PRS_ERROR))
return 0; /* Nothing need to be done */ return 0; /* Nothing need to be done */
if (cpuset->partition_root_state == PRS_ERROR) { if (new_prs == PRS_ERROR) {
/* /*
* Remove all its cpus from parent's subparts_cpus. * Remove all its cpus from parent's subparts_cpus.
*/ */
@ -1272,7 +1274,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
parent->subparts_cpus); parent->subparts_cpus);
} }
if (!adding && !deleting) if (!adding && !deleting && (new_prs == cpuset->partition_root_state))
return 0; return 0;
/* /*
@ -1299,6 +1301,9 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
} }
parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus); parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
if (cpuset->partition_root_state != new_prs)
cpuset->partition_root_state = new_prs;
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
return cmd == partcmd_update; return cmd == partcmd_update;
@ -1321,6 +1326,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
struct cpuset *cp; struct cpuset *cp;
struct cgroup_subsys_state *pos_css; struct cgroup_subsys_state *pos_css;
bool need_rebuild_sched_domains = false; bool need_rebuild_sched_domains = false;
int new_prs;
rcu_read_lock(); rcu_read_lock();
cpuset_for_each_descendant_pre(cp, pos_css, cs) { cpuset_for_each_descendant_pre(cp, pos_css, cs) {
@ -1360,7 +1366,8 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
* update_tasks_cpumask() again for tasks in the parent * update_tasks_cpumask() again for tasks in the parent
* cpuset if the parent's subparts_cpus changes. * cpuset if the parent's subparts_cpus changes.
*/ */
if ((cp != cs) && cp->partition_root_state) { new_prs = cp->partition_root_state;
if ((cp != cs) && new_prs) {
switch (parent->partition_root_state) { switch (parent->partition_root_state) {
case PRS_DISABLED: case PRS_DISABLED:
/* /*
@ -1370,7 +1377,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
*/ */
WARN_ON_ONCE(cp->partition_root_state WARN_ON_ONCE(cp->partition_root_state
!= PRS_ERROR); != PRS_ERROR);
cp->partition_root_state = PRS_DISABLED; new_prs = PRS_DISABLED;
/* /*
* clear_bit() is an atomic operation and * clear_bit() is an atomic operation and
@ -1391,11 +1398,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
/* /*
* When parent is invalid, it has to be too. * When parent is invalid, it has to be too.
*/ */
cp->partition_root_state = PRS_ERROR; new_prs = PRS_ERROR;
if (cp->nr_subparts_cpus) {
cp->nr_subparts_cpus = 0;
cpumask_clear(cp->subparts_cpus);
}
break; break;
} }
} }
@ -1407,8 +1410,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
cpumask_copy(cp->effective_cpus, tmp->new_cpus); cpumask_copy(cp->effective_cpus, tmp->new_cpus);
if (cp->nr_subparts_cpus && if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) {
(cp->partition_root_state != PRS_ENABLED)) {
cp->nr_subparts_cpus = 0; cp->nr_subparts_cpus = 0;
cpumask_clear(cp->subparts_cpus); cpumask_clear(cp->subparts_cpus);
} else if (cp->nr_subparts_cpus) { } else if (cp->nr_subparts_cpus) {
@ -1435,6 +1437,10 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
= cpumask_weight(cp->subparts_cpus); = cpumask_weight(cp->subparts_cpus);
} }
} }
if (new_prs != cp->partition_root_state)
cp->partition_root_state = new_prs;
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
WARN_ON(!is_in_v2_mode() && WARN_ON(!is_in_v2_mode() &&
@ -1944,25 +1950,25 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
*/ */
static int update_prstate(struct cpuset *cs, int new_prs) static int update_prstate(struct cpuset *cs, int new_prs)
{ {
int err; int err, old_prs = cs->partition_root_state;
struct cpuset *parent = parent_cs(cs); struct cpuset *parent = parent_cs(cs);
struct tmpmasks tmpmask; struct tmpmasks tmpmask;
if (new_prs == cs->partition_root_state) if (old_prs == new_prs)
return 0; return 0;
/* /*
* Cannot force a partial or invalid partition root to a full * Cannot force a partial or invalid partition root to a full
* partition root. * partition root.
*/ */
if (new_prs && (cs->partition_root_state < 0)) if (new_prs && (old_prs == PRS_ERROR))
return -EINVAL; return -EINVAL;
if (alloc_cpumasks(NULL, &tmpmask)) if (alloc_cpumasks(NULL, &tmpmask))
return -ENOMEM; return -ENOMEM;
err = -EINVAL; err = -EINVAL;
if (!cs->partition_root_state) { if (!old_prs) {
/* /*
* Turning on partition root requires setting the * Turning on partition root requires setting the
* CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed * CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed
@ -1981,14 +1987,12 @@ static int update_prstate(struct cpuset *cs, int new_prs)
update_flag(CS_CPU_EXCLUSIVE, cs, 0); update_flag(CS_CPU_EXCLUSIVE, cs, 0);
goto out; goto out;
} }
cs->partition_root_state = PRS_ENABLED;
} else { } else {
/* /*
* Turning off partition root will clear the * Turning off partition root will clear the
* CS_CPU_EXCLUSIVE bit. * CS_CPU_EXCLUSIVE bit.
*/ */
if (cs->partition_root_state == PRS_ERROR) { if (old_prs == PRS_ERROR) {
cs->partition_root_state = PRS_DISABLED;
update_flag(CS_CPU_EXCLUSIVE, cs, 0); update_flag(CS_CPU_EXCLUSIVE, cs, 0);
err = 0; err = 0;
goto out; goto out;
@ -1999,8 +2003,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
if (err) if (err)
goto out; goto out;
cs->partition_root_state = PRS_DISABLED;
/* Turning off CS_CPU_EXCLUSIVE will not return error */ /* Turning off CS_CPU_EXCLUSIVE will not return error */
update_flag(CS_CPU_EXCLUSIVE, cs, 0); update_flag(CS_CPU_EXCLUSIVE, cs, 0);
} }
@ -2017,6 +2019,12 @@ static int update_prstate(struct cpuset *cs, int new_prs)
rebuild_sched_domains_locked(); rebuild_sched_domains_locked();
out: out:
if (!err) {
spin_lock_irq(&callback_lock);
cs->partition_root_state = new_prs;
spin_unlock_irq(&callback_lock);
}
free_cpumasks(NULL, &tmpmask); free_cpumasks(NULL, &tmpmask);
return err; return err;
} }
@ -3080,8 +3088,10 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
if (is_partition_root(cs) && (cpumask_empty(&new_cpus) || if (is_partition_root(cs) && (cpumask_empty(&new_cpus) ||
(parent->partition_root_state == PRS_ERROR))) { (parent->partition_root_state == PRS_ERROR))) {
if (cs->nr_subparts_cpus) { if (cs->nr_subparts_cpus) {
spin_lock_irq(&callback_lock);
cs->nr_subparts_cpus = 0; cs->nr_subparts_cpus = 0;
cpumask_clear(cs->subparts_cpus); cpumask_clear(cs->subparts_cpus);
spin_unlock_irq(&callback_lock);
compute_effective_cpumask(&new_cpus, cs, parent); compute_effective_cpumask(&new_cpus, cs, parent);
} }
@ -3095,7 +3105,9 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
cpumask_empty(&new_cpus)) { cpumask_empty(&new_cpus)) {
update_parent_subparts_cpumask(cs, partcmd_disable, update_parent_subparts_cpumask(cs, partcmd_disable,
NULL, tmp); NULL, tmp);
spin_lock_irq(&callback_lock);
cs->partition_root_state = PRS_ERROR; cs->partition_root_state = PRS_ERROR;
spin_unlock_irq(&callback_lock);
} }
cpuset_force_rebuild(); cpuset_force_rebuild();
} }