forked from luck/tmp_suning_uos_patched
psi: Fix uaf issue when psi trigger is destroyed while being polled
commit a06247c6804f1a7c86a2e5398a4c1f1db1471848 upstream.
With write operation on psi files replacing old trigger with a new one,
the lifetime of its waitqueue is totally arbitrary. Overwriting an
existing trigger causes its waitqueue to be freed and pending poll()
will stumble on trigger->event_wait which was destroyed.
Fix this by disallowing to redefine an existing psi trigger. If a write
operation is used on a file descriptor with an already existing psi
trigger, the operation will fail with EBUSY error.
Also bypass a check for psi_disabled in the psi_trigger_destroy as the
flag can be flipped after the trigger is created, leading to a memory
leak.
Fixes: 0e94682b73
("psi: introduce psi monitor")
Reported-by: syzbot+cdb5dd11c97cc532efad@syzkaller.appspotmail.com
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Analyzed-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20220111232309.1786347-1-surenb@google.com
[surenb: backported to 5.10 kernel]
CC: stable@vger.kernel.org # 5.10
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
080dbe7e9b
commit
d4e4e61d4a
|
@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger
|
||||||
for the same psi metric can be specified. However for each trigger a separate
|
for the same psi metric can be specified. However for each trigger a separate
|
||||||
file descriptor is required to be able to poll it separately from others,
|
file descriptor is required to be able to poll it separately from others,
|
||||||
therefore for each trigger a separate open() syscall should be made even
|
therefore for each trigger a separate open() syscall should be made even
|
||||||
when opening the same psi interface file.
|
when opening the same psi interface file. Write operations to a file descriptor
|
||||||
|
with an already existing psi trigger will fail with EBUSY.
|
||||||
|
|
||||||
Monitors activate only when system enters stall state for the monitored
|
Monitors activate only when system enters stall state for the monitored
|
||||||
psi metric and deactivates upon exit from the stall state. While system is
|
psi metric and deactivates upon exit from the stall state. While system is
|
||||||
|
|
|
@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to);
|
||||||
|
|
||||||
struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||||
char *buf, size_t nbytes, enum psi_res res);
|
char *buf, size_t nbytes, enum psi_res res);
|
||||||
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t);
|
void psi_trigger_destroy(struct psi_trigger *t);
|
||||||
|
|
||||||
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
|
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
|
||||||
poll_table *wait);
|
poll_table *wait);
|
||||||
|
|
|
@ -128,9 +128,6 @@ struct psi_trigger {
|
||||||
* events to one per window
|
* events to one per window
|
||||||
*/
|
*/
|
||||||
u64 last_event_time;
|
u64 last_event_time;
|
||||||
|
|
||||||
/* Refcounting to prevent premature destruction */
|
|
||||||
struct kref refcount;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct psi_group {
|
struct psi_group {
|
||||||
|
|
|
@ -3601,6 +3601,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
||||||
cgroup_get(cgrp);
|
cgroup_get(cgrp);
|
||||||
cgroup_kn_unlock(of->kn);
|
cgroup_kn_unlock(of->kn);
|
||||||
|
|
||||||
|
/* Allow only one trigger per file descriptor */
|
||||||
|
if (of->priv) {
|
||||||
|
cgroup_put(cgrp);
|
||||||
|
return -EBUSY;
|
||||||
|
}
|
||||||
|
|
||||||
psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||||
new = psi_trigger_create(psi, buf, nbytes, res);
|
new = psi_trigger_create(psi, buf, nbytes, res);
|
||||||
if (IS_ERR(new)) {
|
if (IS_ERR(new)) {
|
||||||
|
@ -3608,8 +3614,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
|
||||||
return PTR_ERR(new);
|
return PTR_ERR(new);
|
||||||
}
|
}
|
||||||
|
|
||||||
psi_trigger_replace(&of->priv, new);
|
smp_store_release(&of->priv, new);
|
||||||
|
|
||||||
cgroup_put(cgrp);
|
cgroup_put(cgrp);
|
||||||
|
|
||||||
return nbytes;
|
return nbytes;
|
||||||
|
@ -3644,7 +3649,7 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
|
||||||
|
|
||||||
static void cgroup_pressure_release(struct kernfs_open_file *of)
|
static void cgroup_pressure_release(struct kernfs_open_file *of)
|
||||||
{
|
{
|
||||||
psi_trigger_replace(&of->priv, NULL);
|
psi_trigger_destroy(of->priv);
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_PSI */
|
#endif /* CONFIG_PSI */
|
||||||
|
|
||||||
|
|
|
@ -1116,7 +1116,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||||
t->event = 0;
|
t->event = 0;
|
||||||
t->last_event_time = 0;
|
t->last_event_time = 0;
|
||||||
init_waitqueue_head(&t->event_wait);
|
init_waitqueue_head(&t->event_wait);
|
||||||
kref_init(&t->refcount);
|
|
||||||
|
|
||||||
mutex_lock(&group->trigger_lock);
|
mutex_lock(&group->trigger_lock);
|
||||||
|
|
||||||
|
@ -1145,15 +1144,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void psi_trigger_destroy(struct kref *ref)
|
void psi_trigger_destroy(struct psi_trigger *t)
|
||||||
{
|
{
|
||||||
struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount);
|
struct psi_group *group;
|
||||||
struct psi_group *group = t->group;
|
|
||||||
struct task_struct *task_to_destroy = NULL;
|
struct task_struct *task_to_destroy = NULL;
|
||||||
|
|
||||||
if (static_branch_likely(&psi_disabled))
|
/*
|
||||||
|
* We do not check psi_disabled since it might have been disabled after
|
||||||
|
* the trigger got created.
|
||||||
|
*/
|
||||||
|
if (!t)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
group = t->group;
|
||||||
/*
|
/*
|
||||||
* Wakeup waiters to stop polling. Can happen if cgroup is deleted
|
* Wakeup waiters to stop polling. Can happen if cgroup is deleted
|
||||||
* from under a polling process.
|
* from under a polling process.
|
||||||
|
@ -1189,9 +1192,9 @@ static void psi_trigger_destroy(struct kref *ref)
|
||||||
mutex_unlock(&group->trigger_lock);
|
mutex_unlock(&group->trigger_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for both *trigger_ptr from psi_trigger_replace and
|
* Wait for psi_schedule_poll_work RCU to complete its read-side
|
||||||
* poll_task RCUs to complete their read-side critical sections
|
* critical section before destroying the trigger and optionally the
|
||||||
* before destroying the trigger and optionally the poll_task
|
* poll_task.
|
||||||
*/
|
*/
|
||||||
synchronize_rcu();
|
synchronize_rcu();
|
||||||
/*
|
/*
|
||||||
|
@ -1208,18 +1211,6 @@ static void psi_trigger_destroy(struct kref *ref)
|
||||||
kfree(t);
|
kfree(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new)
|
|
||||||
{
|
|
||||||
struct psi_trigger *old = *trigger_ptr;
|
|
||||||
|
|
||||||
if (static_branch_likely(&psi_disabled))
|
|
||||||
return;
|
|
||||||
|
|
||||||
rcu_assign_pointer(*trigger_ptr, new);
|
|
||||||
if (old)
|
|
||||||
kref_put(&old->refcount, psi_trigger_destroy);
|
|
||||||
}
|
|
||||||
|
|
||||||
__poll_t psi_trigger_poll(void **trigger_ptr,
|
__poll_t psi_trigger_poll(void **trigger_ptr,
|
||||||
struct file *file, poll_table *wait)
|
struct file *file, poll_table *wait)
|
||||||
{
|
{
|
||||||
|
@ -1229,24 +1220,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
|
||||||
if (static_branch_likely(&psi_disabled))
|
if (static_branch_likely(&psi_disabled))
|
||||||
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
|
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
|
||||||
|
|
||||||
rcu_read_lock();
|
t = smp_load_acquire(trigger_ptr);
|
||||||
|
if (!t)
|
||||||
t = rcu_dereference(*(void __rcu __force **)trigger_ptr);
|
|
||||||
if (!t) {
|
|
||||||
rcu_read_unlock();
|
|
||||||
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
|
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
|
||||||
}
|
|
||||||
kref_get(&t->refcount);
|
|
||||||
|
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
poll_wait(file, &t->event_wait, wait);
|
poll_wait(file, &t->event_wait, wait);
|
||||||
|
|
||||||
if (cmpxchg(&t->event, 1, 0) == 1)
|
if (cmpxchg(&t->event, 1, 0) == 1)
|
||||||
ret |= EPOLLPRI;
|
ret |= EPOLLPRI;
|
||||||
|
|
||||||
kref_put(&t->refcount, psi_trigger_destroy);
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1270,14 +1252,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
|
||||||
|
|
||||||
buf[buf_size - 1] = '\0';
|
buf[buf_size - 1] = '\0';
|
||||||
|
|
||||||
new = psi_trigger_create(&psi_system, buf, nbytes, res);
|
|
||||||
if (IS_ERR(new))
|
|
||||||
return PTR_ERR(new);
|
|
||||||
|
|
||||||
seq = file->private_data;
|
seq = file->private_data;
|
||||||
|
|
||||||
/* Take seq->lock to protect seq->private from concurrent writes */
|
/* Take seq->lock to protect seq->private from concurrent writes */
|
||||||
mutex_lock(&seq->lock);
|
mutex_lock(&seq->lock);
|
||||||
psi_trigger_replace(&seq->private, new);
|
|
||||||
|
/* Allow only one trigger per file descriptor */
|
||||||
|
if (seq->private) {
|
||||||
|
mutex_unlock(&seq->lock);
|
||||||
|
return -EBUSY;
|
||||||
|
}
|
||||||
|
|
||||||
|
new = psi_trigger_create(&psi_system, buf, nbytes, res);
|
||||||
|
if (IS_ERR(new)) {
|
||||||
|
mutex_unlock(&seq->lock);
|
||||||
|
return PTR_ERR(new);
|
||||||
|
}
|
||||||
|
|
||||||
|
smp_store_release(&seq->private, new);
|
||||||
mutex_unlock(&seq->lock);
|
mutex_unlock(&seq->lock);
|
||||||
|
|
||||||
return nbytes;
|
return nbytes;
|
||||||
|
@ -1312,7 +1304,7 @@ static int psi_fop_release(struct inode *inode, struct file *file)
|
||||||
{
|
{
|
||||||
struct seq_file *seq = file->private_data;
|
struct seq_file *seq = file->private_data;
|
||||||
|
|
||||||
psi_trigger_replace(&seq->private, NULL);
|
psi_trigger_destroy(seq->private);
|
||||||
return single_release(inode, file);
|
return single_release(inode, file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user