kernel_optimize_test/mm/memtest.c
Tony Luck fc6daaf931 mm/memblock: add extra "flags" to memblock to allow selection of memory based on attribute
Some high end Intel Xeon systems report uncorrectable memory errors as a
recoverable machine check.  Linux has included code for some time to
process these and just signal the affected processes (or even recover
completely if the error was in a read only page that can be replaced by
reading from disk).

But we have no recovery path for errors encountered during kernel code
execution.  Except for some very specific cases were are unlikely to ever
be able to recover.

Enter memory mirroring. Actually 3rd generation of memory mirroing.

Gen1: All memory is mirrored
	Pro: No s/w enabling - h/w just gets good data from other side of the
	     mirror
	Con: Halves effective memory capacity available to OS/applications

Gen2: Partial memory mirror - just mirror memory begind some memory controllers
	Pro: Keep more of the capacity
	Con: Nightmare to enable. Have to choose between allocating from
	     mirrored memory for safety vs. NUMA local memory for performance

Gen3: Address range partial memory mirror - some mirror on each memory
      controller
	Pro: Can tune the amount of mirror and keep NUMA performance
	Con: I have to write memory management code to implement

The current plan is just to use mirrored memory for kernel allocations.
This has been broken into two phases:

1) This patch series - find the mirrored memory, use it for boot time
   allocations

2) Wade into mm/page_alloc.c and define a ZONE_MIRROR to pick up the
   unused mirrored memory from mm/memblock.c and only give it out to
   select kernel allocations (this is still being scoped because
   page_alloc.c is scary).

This patch (of 3):

Add extra "flags" to memblock to allow selection of memory based on
attribute.  No functional changes

Signed-off-by: Tony Luck <tony.luck@intel.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Xiexiuqi <xiexiuqi@huawei.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-06-24 17:49:44 -07:00

120 lines
3.0 KiB
C

#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/pfn.h>
#include <linux/memblock.h>
static u64 patterns[] __initdata = {
/* The first entry has to be 0 to leave memtest with zeroed memory */
0,
0xffffffffffffffffULL,
0x5555555555555555ULL,
0xaaaaaaaaaaaaaaaaULL,
0x1111111111111111ULL,
0x2222222222222222ULL,
0x4444444444444444ULL,
0x8888888888888888ULL,
0x3333333333333333ULL,
0x6666666666666666ULL,
0x9999999999999999ULL,
0xccccccccccccccccULL,
0x7777777777777777ULL,
0xbbbbbbbbbbbbbbbbULL,
0xddddddddddddddddULL,
0xeeeeeeeeeeeeeeeeULL,
0x7a6c7258554e494cULL, /* yeah ;-) */
};
static void __init reserve_bad_mem(u64 pattern, phys_addr_t start_bad, phys_addr_t end_bad)
{
printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n",
(unsigned long long) pattern,
(unsigned long long) start_bad,
(unsigned long long) end_bad);
memblock_reserve(start_bad, end_bad - start_bad);
}
static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size)
{
u64 *p, *start, *end;
phys_addr_t start_bad, last_bad;
phys_addr_t start_phys_aligned;
const size_t incr = sizeof(pattern);
start_phys_aligned = ALIGN(start_phys, incr);
start = __va(start_phys_aligned);
end = start + (size - (start_phys_aligned - start_phys)) / incr;
start_bad = 0;
last_bad = 0;
for (p = start; p < end; p++)
*p = pattern;
for (p = start; p < end; p++, start_phys_aligned += incr) {
if (*p == pattern)
continue;
if (start_phys_aligned == last_bad + incr) {
last_bad += incr;
continue;
}
if (start_bad)
reserve_bad_mem(pattern, start_bad, last_bad + incr);
start_bad = last_bad = start_phys_aligned;
}
if (start_bad)
reserve_bad_mem(pattern, start_bad, last_bad + incr);
}
static void __init do_one_pass(u64 pattern, phys_addr_t start, phys_addr_t end)
{
u64 i;
phys_addr_t this_start, this_end;
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &this_start,
&this_end, NULL) {
this_start = clamp(this_start, start, end);
this_end = clamp(this_end, start, end);
if (this_start < this_end) {
printk(KERN_INFO " %010llx - %010llx pattern %016llx\n",
(unsigned long long)this_start,
(unsigned long long)this_end,
(unsigned long long)cpu_to_be64(pattern));
memtest(pattern, this_start, this_end - this_start);
}
}
}
/* default is disabled */
static int memtest_pattern __initdata;
static int __init parse_memtest(char *arg)
{
if (arg)
memtest_pattern = simple_strtoul(arg, NULL, 0);
else
memtest_pattern = ARRAY_SIZE(patterns);
return 0;
}
early_param("memtest", parse_memtest);
void __init early_memtest(phys_addr_t start, phys_addr_t end)
{
unsigned int i;
unsigned int idx = 0;
if (!memtest_pattern)
return;
printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
for (i = memtest_pattern-1; i < UINT_MAX; --i) {
idx = i % ARRAY_SIZE(patterns);
do_one_pass(patterns[idx], start, end);
}
}