forked from luck/tmp_suning_uos_patched
sched/core: Optimize __schedule()
Oleg noted that by making do_exit() use __schedule() for the TASK_DEAD context switch, we can avoid the TASK_DEAD special case currently in __schedule() because that avoids the extra preempt_disable() from schedule(). In order to facilitate this, create a do_task_dead() helper which we place in the scheduler code, such that it can access __schedule(). Also add some __noreturn annotations to the functions, there's no coming back from do_exit(). Suggested-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Cheng Chao <cs.os.kernel@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: akpm@linux-foundation.org Cc: chris@chris-wilson.co.uk Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20160913163729.GB5012@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
bf89a30472
commit
9af6528ee9
|
@ -259,17 +259,14 @@ static inline void might_fault(void) { }
|
||||||
extern struct atomic_notifier_head panic_notifier_list;
|
extern struct atomic_notifier_head panic_notifier_list;
|
||||||
extern long (*panic_blink)(int state);
|
extern long (*panic_blink)(int state);
|
||||||
__printf(1, 2)
|
__printf(1, 2)
|
||||||
void panic(const char *fmt, ...)
|
void panic(const char *fmt, ...) __noreturn __cold;
|
||||||
__noreturn __cold;
|
|
||||||
void nmi_panic(struct pt_regs *regs, const char *msg);
|
void nmi_panic(struct pt_regs *regs, const char *msg);
|
||||||
extern void oops_enter(void);
|
extern void oops_enter(void);
|
||||||
extern void oops_exit(void);
|
extern void oops_exit(void);
|
||||||
void print_oops_end_marker(void);
|
void print_oops_end_marker(void);
|
||||||
extern int oops_may_print(void);
|
extern int oops_may_print(void);
|
||||||
void do_exit(long error_code)
|
void do_exit(long error_code) __noreturn;
|
||||||
__noreturn;
|
void complete_and_exit(struct completion *, long) __noreturn;
|
||||||
void complete_and_exit(struct completion *, long)
|
|
||||||
__noreturn;
|
|
||||||
|
|
||||||
/* Internal, do not use. */
|
/* Internal, do not use. */
|
||||||
int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
|
int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
|
||||||
|
|
|
@ -448,6 +448,8 @@ static inline void io_schedule(void)
|
||||||
io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
|
io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void __noreturn do_task_dead(void);
|
||||||
|
|
||||||
struct nsproxy;
|
struct nsproxy;
|
||||||
struct user_namespace;
|
struct user_namespace;
|
||||||
|
|
||||||
|
|
|
@ -725,7 +725,7 @@ static void check_stack_usage(void)
|
||||||
static inline void check_stack_usage(void) {}
|
static inline void check_stack_usage(void) {}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void do_exit(long code)
|
void __noreturn do_exit(long code)
|
||||||
{
|
{
|
||||||
struct task_struct *tsk = current;
|
struct task_struct *tsk = current;
|
||||||
int group_dead;
|
int group_dead;
|
||||||
|
@ -882,29 +882,7 @@ void do_exit(long code)
|
||||||
exit_rcu();
|
exit_rcu();
|
||||||
TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
|
TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
|
||||||
|
|
||||||
/*
|
do_task_dead();
|
||||||
* The setting of TASK_RUNNING by try_to_wake_up() may be delayed
|
|
||||||
* when the following two conditions become true.
|
|
||||||
* - There is race condition of mmap_sem (It is acquired by
|
|
||||||
* exit_mm()), and
|
|
||||||
* - SMI occurs before setting TASK_RUNINNG.
|
|
||||||
* (or hypervisor of virtual machine switches to other guest)
|
|
||||||
* As a result, we may become TASK_RUNNING after becoming TASK_DEAD
|
|
||||||
*
|
|
||||||
* To avoid it, we have to wait for releasing tsk->pi_lock which
|
|
||||||
* is held by try_to_wake_up()
|
|
||||||
*/
|
|
||||||
smp_mb();
|
|
||||||
raw_spin_unlock_wait(&tsk->pi_lock);
|
|
||||||
|
|
||||||
/* causes final put_task_struct in finish_task_switch(). */
|
|
||||||
tsk->state = TASK_DEAD;
|
|
||||||
tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
|
|
||||||
schedule();
|
|
||||||
BUG();
|
|
||||||
/* Avoid "noreturn function does return". */
|
|
||||||
for (;;)
|
|
||||||
cpu_relax(); /* For when BUG is null */
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(do_exit);
|
EXPORT_SYMBOL_GPL(do_exit);
|
||||||
|
|
||||||
|
|
|
@ -3331,17 +3331,6 @@ static void __sched notrace __schedule(bool preempt)
|
||||||
rq = cpu_rq(cpu);
|
rq = cpu_rq(cpu);
|
||||||
prev = rq->curr;
|
prev = rq->curr;
|
||||||
|
|
||||||
/*
|
|
||||||
* do_exit() calls schedule() with preemption disabled as an exception;
|
|
||||||
* however we must fix that up, otherwise the next task will see an
|
|
||||||
* inconsistent (higher) preempt count.
|
|
||||||
*
|
|
||||||
* It also avoids the below schedule_debug() test from complaining
|
|
||||||
* about this.
|
|
||||||
*/
|
|
||||||
if (unlikely(prev->state == TASK_DEAD))
|
|
||||||
preempt_enable_no_resched_notrace();
|
|
||||||
|
|
||||||
schedule_debug(prev);
|
schedule_debug(prev);
|
||||||
|
|
||||||
if (sched_feat(HRTICK))
|
if (sched_feat(HRTICK))
|
||||||
|
@ -3409,6 +3398,33 @@ static void __sched notrace __schedule(bool preempt)
|
||||||
}
|
}
|
||||||
STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
|
STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
|
||||||
|
|
||||||
|
void __noreturn do_task_dead(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The setting of TASK_RUNNING by try_to_wake_up() may be delayed
|
||||||
|
* when the following two conditions become true.
|
||||||
|
* - There is race condition of mmap_sem (It is acquired by
|
||||||
|
* exit_mm()), and
|
||||||
|
* - SMI occurs before setting TASK_RUNINNG.
|
||||||
|
* (or hypervisor of virtual machine switches to other guest)
|
||||||
|
* As a result, we may become TASK_RUNNING after becoming TASK_DEAD
|
||||||
|
*
|
||||||
|
* To avoid it, we have to wait for releasing tsk->pi_lock which
|
||||||
|
* is held by try_to_wake_up()
|
||||||
|
*/
|
||||||
|
smp_mb();
|
||||||
|
raw_spin_unlock_wait(¤t->pi_lock);
|
||||||
|
|
||||||
|
/* causes final put_task_struct in finish_task_switch(). */
|
||||||
|
__set_current_state(TASK_DEAD);
|
||||||
|
current->flags |= PF_NOFREEZE; /* tell freezer to ignore us */
|
||||||
|
__schedule(false);
|
||||||
|
BUG();
|
||||||
|
/* Avoid "noreturn function does return". */
|
||||||
|
for (;;)
|
||||||
|
cpu_relax(); /* For when BUG is null */
|
||||||
|
}
|
||||||
|
|
||||||
static inline void sched_submit_work(struct task_struct *tsk)
|
static inline void sched_submit_work(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
if (!tsk->state || tsk_is_pi_blocked(tsk))
|
if (!tsk->state || tsk_is_pi_blocked(tsk))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user