kernel_optimize_test/arch/hexagon/kernel/smp.c
Peter Zijlstra 5fbd036b55 sched: Cleanup cpu_active madness
Stepan found:

CPU0		CPUn

_cpu_up()
  __cpu_up()

		boostrap()
		  notify_cpu_starting()
		  set_cpu_online()
		  while (!cpu_active())
		    cpu_relax()

<PREEMPT-out>

smp_call_function(.wait=1)
  /* we find cpu_online() is true */
  arch_send_call_function_ipi_mask()

  /* wait-forever-more */

<PREEMPT-in>
		  local_irq_enable()

  cpu_notify(CPU_ONLINE)
    sched_cpu_active()
      set_cpu_active()

Now the purpose of cpu_active is mostly with bringing down a cpu, where
we mark it !active to avoid the load-balancer from moving tasks to it
while we tear down the cpu. This is required because we only update the
sched_domain tree after we brought the cpu-down. And this is needed so
that some tasks can still run while we bring it down, we just don't want
new tasks to appear.

On cpu-up however the sched_domain tree doesn't yet include the new cpu,
so its invisible to the load-balancer, regardless of the active state.
So instead of setting the active state after we boot the new cpu (and
consequently having to wait for it before enabling interrupts) set the
cpu active before we set it online and avoid the whole mess.

Reported-by: Stepan Moskovchenko <stepanm@codeaurora.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1323965362.18942.71.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2012-03-12 20:43:15 +01:00

275 lines
5.7 KiB
C

/*
* SMP support for Hexagon
*
* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
* only version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/spinlock.h>
#include <asm/system.h> /* xchg */
#include <asm/time.h> /* timer_interrupt */
#include <asm/hexagon_vm.h>
#define BASE_IPI_IRQ 26
/*
* cpu_possible_map needs to be filled out prior to setup_per_cpu_areas
* (which is prior to any of our smp_prepare_cpu crap), in order to set
* up the... per_cpu areas.
*/
struct ipi_data {
unsigned long bits;
};
static DEFINE_PER_CPU(struct ipi_data, ipi_data);
static inline void __handle_ipi(unsigned long *ops, struct ipi_data *ipi,
int cpu)
{
unsigned long msg = 0;
do {
msg = find_next_bit(ops, BITS_PER_LONG, msg+1);
switch (msg) {
case IPI_TIMER:
ipi_timer();
break;
case IPI_CALL_FUNC:
generic_smp_call_function_interrupt();
break;
case IPI_CALL_FUNC_SINGLE:
generic_smp_call_function_single_interrupt();
break;
case IPI_CPU_STOP:
/*
* call vmstop()
*/
__vmstop();
break;
case IPI_RESCHEDULE:
scheduler_ipi();
break;
}
} while (msg < BITS_PER_LONG);
}
/* Used for IPI call from other CPU's to unmask int */
void smp_vm_unmask_irq(void *info)
{
__vmintop_locen((long) info);
}
/*
* This is based on Alpha's IPI stuff.
* Supposed to take (int, void*) as args now.
* Specifically, first arg is irq, second is the irq_desc.
*/
irqreturn_t handle_ipi(int irq, void *desc)
{
int cpu = smp_processor_id();
struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
unsigned long ops;
while ((ops = xchg(&ipi->bits, 0)) != 0)
__handle_ipi(&ops, ipi, cpu);
return IRQ_HANDLED;
}
void send_ipi(const struct cpumask *cpumask, enum ipi_message_type msg)
{
unsigned long flags;
unsigned long cpu;
unsigned long retval;
local_irq_save(flags);
for_each_cpu(cpu, cpumask) {
struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
set_bit(msg, &ipi->bits);
/* Possible barrier here */
retval = __vmintop_post(BASE_IPI_IRQ+cpu);
if (retval != 0) {
printk(KERN_ERR "interrupt %ld not configured?\n",
BASE_IPI_IRQ+cpu);
}
}
local_irq_restore(flags);
}
static struct irqaction ipi_intdesc = {
.handler = handle_ipi,
.flags = IRQF_TRIGGER_RISING,
.name = "ipi_handler"
};
void __init smp_prepare_boot_cpu(void)
{
}
/*
* interrupts should already be disabled from the VM
* SP should already be correct; need to set THREADINFO_REG
* to point to current thread info
*/
void __cpuinit start_secondary(void)
{
unsigned int cpu;
unsigned long thread_ptr;
/* Calculate thread_info pointer from stack pointer */
__asm__ __volatile__(
"%0 = SP;\n"
: "=r" (thread_ptr)
);
thread_ptr = thread_ptr & ~(THREAD_SIZE-1);
__asm__ __volatile__(
QUOTED_THREADINFO_REG " = %0;\n"
:
: "r" (thread_ptr)
);
/* Set the memory struct */
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
cpu = smp_processor_id();
setup_irq(BASE_IPI_IRQ + cpu, &ipi_intdesc);
/* Register the clock_event dummy */
setup_percpu_clockdev();
printk(KERN_INFO "%s cpu %d\n", __func__, current_thread_info()->cpu);
set_cpu_online(cpu, true);
local_irq_enable();
cpu_idle();
}
/*
* called once for each present cpu
* apparently starts up the CPU and then
* maintains control until "cpu_online(cpu)" is set.
*/
int __cpuinit __cpu_up(unsigned int cpu)
{
struct task_struct *idle;
struct thread_info *thread;
void *stack_start;
/* Create new init task for the CPU */
idle = fork_idle(cpu);
if (IS_ERR(idle))
panic(KERN_ERR "fork_idle failed\n");
thread = (struct thread_info *)idle->stack;
thread->cpu = cpu;
/* Boot to the head. */
stack_start = ((void *) thread) + THREAD_SIZE;
__vmstart(start_secondary, stack_start);
while (!cpu_isset(cpu, cpu_online_map))
barrier();
return 0;
}
void __init smp_cpus_done(unsigned int max_cpus)
{
}
void __init smp_prepare_cpus(unsigned int max_cpus)
{
int i;
/*
* should eventually have some sort of machine
* descriptor that has this stuff
*/
/* Right now, let's just fake it. */
for (i = 0; i < max_cpus; i++)
cpu_set(i, cpu_present_map);
/* Also need to register the interrupts for IPI */
if (max_cpus > 1)
setup_irq(BASE_IPI_IRQ, &ipi_intdesc);
}
void smp_send_reschedule(int cpu)
{
send_ipi(cpumask_of(cpu), IPI_RESCHEDULE);
}
void smp_send_stop(void)
{
struct cpumask targets;
cpumask_copy(&targets, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &targets);
send_ipi(&targets, IPI_CPU_STOP);
}
void arch_send_call_function_single_ipi(int cpu)
{
send_ipi(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
}
void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
send_ipi(mask, IPI_CALL_FUNC);
}
int setup_profiling_timer(unsigned int multiplier)
{
return -EINVAL;
}
void smp_start_cpus(void)
{
int i;
for (i = 0; i < NR_CPUS; i++)
cpu_set(i, cpu_possible_map);
}