forked from luck/tmp_suning_uos_patched
proc: Ensure we see the exit of each process tid exactly
In the work to remove proc_mnt I noticed that we were calling proc_flush_task now proc_flush_pid possibly multiple times for the same pid because of how de_thread works. This is a bare minimal patchset to sort out de_thread, by introducing exchange_tids and the helper of exchange_tids hlists_swap_heads_rcu. The actual call of exchange_tids should be slowpath so I have prioritized readability over getting every last drop of performance. I have also read through a bunch of the code to see if I could find anything that would be affected by this change. Users of has_group_leader_pid were a good canidates. But I also looked at other cases that might have a pid->task->pid transition. I ignored other sources of races with de_thread and exec as those are preexisting. I found a close call with send_signals user of task_active_pid_ns, but all pids of a thread group are guaranteeds to be in the same pid namespace so there is not a problem. I found a few pieces of debugging code that do: task = pid_task(pid, PIDTYPE_PID); if (task) { printk("%u\n", task->pid); } But I can't see how we care if it happens at the wrong moment that task->pid might not match pid_nr(pid); Similarly because the code in posix-cpu-timers goes pid->task->pid it feels like there should be a problem. But as the code that works with PIDTYPE_PID is only available within the thread group, and as de_thread kills all of the other threads before it makes any changes of this kind the race can not happen. In short I don't think this change will introduce any regressions. Eric W. Biederman (2): rculist: Add hlists_swap_heads_rcu proc: Ensure we see the exit of each process tid exactly once fs/exec.c | 5 +---- include/linux/pid.h | 1 + include/linux/rculist.h | 21 +++++++++++++++++++++ kernel/pid.c | 19 +++++++++++++++++++ 4 files changed, 42 insertions(+), 4 deletions(-) Link: https://lore.kernel.org/lkml/87sggnajpv.fsf_-_@x220.int.ebiederm.org/ Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
This commit is contained in:
commit
507122805e
|
@ -1186,11 +1186,8 @@ static int de_thread(struct task_struct *tsk)
|
|||
|
||||
/* Become a process group leader with the old leader's pid.
|
||||
* The old leader becomes a thread of the this thread group.
|
||||
* Note: The old leader also uses this pid until release_task
|
||||
* is called. Odd but simple and correct.
|
||||
*/
|
||||
tsk->pid = leader->pid;
|
||||
change_pid(tsk, PIDTYPE_PID, task_pid(leader));
|
||||
exchange_tids(tsk, leader);
|
||||
transfer_pid(leader, tsk, PIDTYPE_TGID);
|
||||
transfer_pid(leader, tsk, PIDTYPE_PGID);
|
||||
transfer_pid(leader, tsk, PIDTYPE_SID);
|
||||
|
|
|
@ -102,6 +102,7 @@ extern void attach_pid(struct task_struct *task, enum pid_type);
|
|||
extern void detach_pid(struct task_struct *task, enum pid_type);
|
||||
extern void change_pid(struct task_struct *task, enum pid_type,
|
||||
struct pid *pid);
|
||||
extern void exchange_tids(struct task_struct *task, struct task_struct *old);
|
||||
extern void transfer_pid(struct task_struct *old, struct task_struct *new,
|
||||
enum pid_type);
|
||||
|
||||
|
|
|
@ -506,6 +506,27 @@ static inline void hlist_replace_rcu(struct hlist_node *old,
|
|||
WRITE_ONCE(old->pprev, LIST_POISON2);
|
||||
}
|
||||
|
||||
/**
|
||||
* hlists_swap_heads_rcu - swap the lists the hlist heads point to
|
||||
* @left: The hlist head on the left
|
||||
* @right: The hlist head on the right
|
||||
*
|
||||
* The lists start out as [@left ][node1 ... ] and
|
||||
[@right ][node2 ... ]
|
||||
* The lists end up as [@left ][node2 ... ]
|
||||
* [@right ][node1 ... ]
|
||||
*/
|
||||
static inline void hlists_swap_heads_rcu(struct hlist_head *left, struct hlist_head *right)
|
||||
{
|
||||
struct hlist_node *node1 = left->first;
|
||||
struct hlist_node *node2 = right->first;
|
||||
|
||||
rcu_assign_pointer(left->first, node2);
|
||||
rcu_assign_pointer(right->first, node1);
|
||||
WRITE_ONCE(node2->pprev, &left->first);
|
||||
WRITE_ONCE(node1->pprev, &right->first);
|
||||
}
|
||||
|
||||
/*
|
||||
* return the first or the next element in an RCU protected hlist
|
||||
*/
|
||||
|
|
19
kernel/pid.c
19
kernel/pid.c
|
@ -363,6 +363,25 @@ void change_pid(struct task_struct *task, enum pid_type type,
|
|||
attach_pid(task, type);
|
||||
}
|
||||
|
||||
void exchange_tids(struct task_struct *left, struct task_struct *right)
|
||||
{
|
||||
struct pid *pid1 = left->thread_pid;
|
||||
struct pid *pid2 = right->thread_pid;
|
||||
struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
|
||||
struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
|
||||
|
||||
/* Swap the single entry tid lists */
|
||||
hlists_swap_heads_rcu(head1, head2);
|
||||
|
||||
/* Swap the per task_struct pid */
|
||||
rcu_assign_pointer(left->thread_pid, pid2);
|
||||
rcu_assign_pointer(right->thread_pid, pid1);
|
||||
|
||||
/* Swap the cached value */
|
||||
WRITE_ONCE(left->pid, pid_nr(pid2));
|
||||
WRITE_ONCE(right->pid, pid_nr(pid1));
|
||||
}
|
||||
|
||||
/* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
|
||||
void transfer_pid(struct task_struct *old, struct task_struct *new,
|
||||
enum pid_type type)
|
||||
|
|
Loading…
Reference in New Issue
Block a user