kernel_optimize_test/arch/x86/mm/srat.c
Yasuaki Ishimatsu 4af463d28f x86/numa: Set numa_nodes_parsed at acpi_numa_memory_affinity_init()
When hot-adding a CPU, the system outputs following messages
since node_to_cpumask_map[2] was not allocated memory.

Booting Node 2 Processor 32 APIC 0xc0
node_to_cpumask_map[2] NULL
Pid: 0, comm: swapper/32 Tainted: G       A     3.3.5-acd #21
Call Trace:
 [<ffffffff81048845>] debug_cpumask_set_cpu+0x155/0x160
 [<ffffffff8105e28a>] ? add_timer_on+0xaa/0x120
 [<ffffffff8150665f>] numa_add_cpu+0x1e/0x22
 [<ffffffff815020bb>] identify_cpu+0x1df/0x1e4
 [<ffffffff815020d6>] identify_econdary_cpu+0x16/0x1d
 [<ffffffff81504614>] smp_store_cpu_info+0x3c/0x3e
 [<ffffffff81505263>] smp_callin+0x139/0x1be
 [<ffffffff815052fb>] start_secondary+0x13/0xeb

The reason is that the bit of node 2 was not set at
numa_nodes_parsed. numa_nodes_parsed is set by only
acpi_numa_processor_affinity_init /
acpi_numa_x2apic_affinity_init. Thus even if hot-added memory
which is same PXM as hot-added CPU is written in ACPI SRAT
Table, if the hot-added CPU is not written in ACPI SRAT table,
numa_nodes_parsed is not set.

But according to ACPI Spec Rev 5.0, it says about ACPI SRAT
table as follows: This optional table provides information that
allows OSPM to associate processors and memory ranges, including
ranges of memory provided by hot-added memory devices, with
system localities / proximity domains and clock domains.

It means that ACPI SRAT table only provides information for CPUs
present at boot time and for memory including hot-added memory.
So hot-added memory is written in ACPI SRAT table, but hot-added
CPU is not written in it. Thus numa_nodes_parsed should be set
by not only acpi_numa_processor_affinity_init /
acpi_numa_x2apic_affinity_init but also
acpi_numa_memory_affinity_init for the case.

Additionally, if system has cpuless memory node,
acpi_numa_processor_affinity_init /
acpi_numa_x2apic_affinity_init cannot set numa_nodes_parseds
since these functions cannot find cpu description for the node.
In this case, numa_nodes_parsed needs to be set by
acpi_numa_memory_affinity_init.

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: liuj97@gmail.com
Cc: kosaki.motohiro@gmail.com
Link: http://lkml.kernel.org/r/4FCC2098.4030007@jp.fujitsu.com
[ merged it ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2012-06-06 11:58:39 +02:00

197 lines
4.6 KiB
C

/*
* ACPI 3.0 based NUMA setup
* Copyright 2004 Andi Kleen, SuSE Labs.
*
* Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
*
* Called from acpi_numa_init while reading the SRAT and SLIT tables.
* Assumes all memory regions belonging to a single proximity domain
* are in one chunk. Holes between them will be included in the node.
*/
#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/mmzone.h>
#include <linux/bitmap.h>
#include <linux/module.h>
#include <linux/topology.h>
#include <linux/bootmem.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <asm/proto.h>
#include <asm/numa.h>
#include <asm/e820.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
int acpi_numa __initdata;
static __init int setup_node(int pxm)
{
return acpi_map_pxm_to_node(pxm);
}
static __init void bad_srat(void)
{
printk(KERN_ERR "SRAT: SRAT not used.\n");
acpi_numa = -1;
}
static __init inline int srat_disabled(void)
{
return acpi_numa < 0;
}
/* Callback for SLIT parsing */
void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
{
int i, j;
for (i = 0; i < slit->locality_count; i++)
for (j = 0; j < slit->locality_count; j++)
numa_set_distance(pxm_to_node(i), pxm_to_node(j),
slit->entry[slit->locality_count * i + j]);
}
/* Callback for Proximity Domain -> x2APIC mapping */
void __init
acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
{
int pxm, node;
int apic_id;
if (srat_disabled())
return;
if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
bad_srat();
return;
}
if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
return;
pxm = pa->proximity_domain;
apic_id = pa->apic_id;
if (!apic->apic_id_valid(apic_id)) {
printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
pxm, apic_id);
return;
}
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
bad_srat();
return;
}
if (apic_id >= MAX_LOCAL_APIC) {
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
return;
}
set_apicid_to_node(apic_id, node);
node_set(node, numa_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
pxm, apic_id, node);
}
/* Callback for Proximity Domain -> LAPIC mapping */
void __init
acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
{
int pxm, node;
int apic_id;
if (srat_disabled())
return;
if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
bad_srat();
return;
}
if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
return;
pxm = pa->proximity_domain_lo;
if (acpi_srat_revision >= 2)
pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
bad_srat();
return;
}
if (get_uv_system_type() >= UV_X2APIC)
apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
else
apic_id = pa->apic_id;
if (apic_id >= MAX_LOCAL_APIC) {
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
return;
}
set_apicid_to_node(apic_id, node);
node_set(node, numa_nodes_parsed);
acpi_numa = 1;
printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
pxm, apic_id, node);
}
#ifdef CONFIG_MEMORY_HOTPLUG
static inline int save_add_info(void) {return 1;}
#else
static inline int save_add_info(void) {return 0;}
#endif
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
void __init
acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
{
u64 start, end;
int node, pxm;
if (srat_disabled())
return;
if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
bad_srat();
return;
}
if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
return;
if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
return;
start = ma->base_address;
end = start + ma->length;
pxm = ma->proximity_domain;
if (acpi_srat_revision <= 1)
pxm &= 0xff;
node = setup_node(pxm);
if (node < 0) {
printk(KERN_ERR "SRAT: Too many proximity domains.\n");
bad_srat();
return;
}
if (numa_add_memblk(node, start, end) < 0) {
bad_srat();
return;
}
node_set(node, numa_nodes_parsed);
printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n",
node, pxm,
(unsigned long long) start, (unsigned long long) end - 1);
}
void __init acpi_numa_arch_fixup(void) {}
int __init x86_acpi_numa_init(void)
{
int ret;
ret = acpi_numa_init();
if (ret < 0)
return ret;
return srat_disabled() ? -EINVAL : 0;
}