Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  amd64_edac: check NB MCE bank enable on the current node properly
  amd64_edac: Rewrite unganged mode code of f10_early_channel_count
  amd64_edac: cleanup amd64_check_ecc_enabled
  x86, EDAC: Provide function to return NodeId of a CPU
  amd64_edac: build driver only on AMD hardware
This commit is contained in:
Linus Torvalds 2009-09-17 09:55:52 -07:00
commit de55a8958f
5 changed files with 88 additions and 103 deletions

View File

@ -1020,4 +1020,6 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
extern int get_tsc_mode(unsigned long adr); extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val); extern int set_tsc_mode(unsigned int val);
extern int amd_get_nb_id(int cpu);
#endif /* _ASM_X86_PROCESSOR_H */ #endif /* _ASM_X86_PROCESSOR_H */

View File

@ -333,6 +333,16 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
#endif #endif
} }
int amd_get_nb_id(int cpu)
{
int id = 0;
#ifdef CONFIG_SMP
id = per_cpu(cpu_llc_id, cpu);
#endif
return id;
}
EXPORT_SYMBOL_GPL(amd_get_nb_id);
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{ {
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)

View File

@ -59,7 +59,7 @@ config EDAC_MM_EDAC
config EDAC_AMD64 config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && CPU_SUP_AMD
help help
Support for error detection and correction on the AMD 64 Support for error detection and correction on the AMD 64
Families of Memory Controllers (K8, F10h and F11h) Families of Memory Controllers (K8, F10h and F11h)

View File

@ -1255,7 +1255,9 @@ static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map)
*/ */
static int f10_early_channel_count(struct amd64_pvt *pvt) static int f10_early_channel_count(struct amd64_pvt *pvt)
{ {
int dbams[] = { DBAM0, DBAM1 };
int err = 0, channels = 0; int err = 0, channels = 0;
int i, j;
u32 dbam; u32 dbam;
err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
@ -1288,46 +1290,19 @@ static int f10_early_channel_count(struct amd64_pvt *pvt)
* is more than just one DIMM present in unganged mode. Need to check * is more than just one DIMM present in unganged mode. Need to check
* both controllers since DIMMs can be placed in either one. * both controllers since DIMMs can be placed in either one.
*/ */
channels = 0; for (i = 0; i < ARRAY_SIZE(dbams); i++) {
err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM0, &dbam); err = pci_read_config_dword(pvt->dram_f2_ctl, dbams[i], &dbam);
if (err)
goto err_reg;
if (DBAM_DIMM(0, dbam) > 0)
channels++;
if (DBAM_DIMM(1, dbam) > 0)
channels++;
if (DBAM_DIMM(2, dbam) > 0)
channels++;
if (DBAM_DIMM(3, dbam) > 0)
channels++;
/* If more than 2 DIMMs are present, then we have 2 channels */
if (channels > 2)
channels = 2;
else if (channels == 0) {
/* No DIMMs on DCT0, so look at DCT1 */
err = pci_read_config_dword(pvt->dram_f2_ctl, DBAM1, &dbam);
if (err) if (err)
goto err_reg; goto err_reg;
if (DBAM_DIMM(0, dbam) > 0) for (j = 0; j < 4; j++) {
channels++; if (DBAM_DIMM(j, dbam) > 0) {
if (DBAM_DIMM(1, dbam) > 0) channels++;
channels++; break;
if (DBAM_DIMM(2, dbam) > 0) }
channels++; }
if (DBAM_DIMM(3, dbam) > 0)
channels++;
if (channels > 2)
channels = 2;
} }
/* If we found ALL 0 values, then assume just ONE DIMM-ONE Channel */
if (channels == 0)
channels = 1;
debugf0("MCT channel count: %d\n", channels); debugf0("MCT channel count: %d\n", channels);
return channels; return channels;
@ -2766,30 +2741,53 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs); wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, msrs);
} }
static void check_mcg_ctl(void *ret) /* get all cores on this DCT */
static void get_cpus_on_this_dct_cpumask(cpumask_t *mask, int nid)
{ {
u64 msr_val = 0; int cpu;
u8 nbe;
rdmsrl(MSR_IA32_MCG_CTL, msr_val); for_each_online_cpu(cpu)
nbe = msr_val & K8_MSR_MCGCTL_NBE; if (amd_get_nb_id(cpu) == nid)
cpumask_set_cpu(cpu, mask);
debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
raw_smp_processor_id(), msr_val,
(nbe ? "enabled" : "disabled"));
if (!nbe)
*(int *)ret = 0;
} }
/* check MCG_CTL on all the cpus on this node */ /* check MCG_CTL on all the cpus on this node */
static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask) static bool amd64_nb_mce_bank_enabled_on_node(int nid)
{ {
int ret = 1; cpumask_t mask;
preempt_disable(); struct msr *msrs;
smp_call_function_many(mask, check_mcg_ctl, &ret, 1); int cpu, nbe, idx = 0;
preempt_enable(); bool ret = false;
cpumask_clear(&mask);
get_cpus_on_this_dct_cpumask(&mask, nid);
msrs = kzalloc(sizeof(struct msr) * cpumask_weight(&mask), GFP_KERNEL);
if (!msrs) {
amd64_printk(KERN_WARNING, "%s: error allocating msrs\n",
__func__);
return false;
}
rdmsr_on_cpus(&mask, MSR_IA32_MCG_CTL, msrs);
for_each_cpu(cpu, &mask) {
nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE;
debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
cpu, msrs[idx].q,
(nbe ? "enabled" : "disabled"));
if (!nbe)
goto out;
idx++;
}
ret = true;
out:
kfree(msrs);
return ret; return ret;
} }
@ -2799,71 +2797,46 @@ static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask)
* the memory system completely. A command line option allows to force-enable * the memory system completely. A command line option allows to force-enable
* hardware ECC later in amd64_enable_ecc_error_reporting(). * hardware ECC later in amd64_enable_ecc_error_reporting().
*/ */
static const char *ecc_warning =
"WARNING: ECC is disabled by BIOS. Module will NOT be loaded.\n"
" Either Enable ECC in the BIOS, or set 'ecc_enable_override'.\n"
" Also, use of the override can cause unknown side effects.\n";
static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
{ {
u32 value; u32 value;
int err = 0, ret = 0; int err = 0;
u8 ecc_enabled = 0; u8 ecc_enabled = 0;
bool nb_mce_en = false;
err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value); err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
if (err) if (err)
debugf0("Reading K8_NBCTL failed\n"); debugf0("Reading K8_NBCTL failed\n");
ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
if (!ecc_enabled)
amd64_printk(KERN_WARNING, "This node reports that Memory ECC "
"is currently disabled, set F3x%x[22] (%s).\n",
K8_NBCFG, pci_name(pvt->misc_f3_ctl));
else
amd64_printk(KERN_INFO, "ECC is enabled by BIOS.\n");
ret = amd64_mcg_ctl_enabled_on_cpus(cpumask_of_node(pvt->mc_node_id)); nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
if (!nb_mce_en)
amd64_printk(KERN_WARNING, "NB MCE bank disabled, set MSR "
"0x%08x[4] on node %d to enable.\n",
MSR_IA32_MCG_CTL, pvt->mc_node_id);
debugf0("K8_NBCFG=0x%x, DRAM ECC is %s\n", value, if (!ecc_enabled || !nb_mce_en) {
(value & K8_NBCFG_ECC_ENABLE ? "enabled" : "disabled"));
if (!ecc_enabled || !ret) {
if (!ecc_enabled) {
amd64_printk(KERN_WARNING, "This node reports that "
"Memory ECC is currently "
"disabled.\n");
amd64_printk(KERN_WARNING, "bit 0x%lx in register "
"F3x%x of the MISC_CONTROL device (%s) "
"should be enabled\n", K8_NBCFG_ECC_ENABLE,
K8_NBCFG, pci_name(pvt->misc_f3_ctl));
}
if (!ret) {
amd64_printk(KERN_WARNING, "bit 0x%016lx in MSR 0x%08x "
"of node %d should be enabled\n",
K8_MSR_MCGCTL_NBE, MSR_IA32_MCG_CTL,
pvt->mc_node_id);
}
if (!ecc_enable_override) { if (!ecc_enable_override) {
amd64_printk(KERN_WARNING, "WARNING: ECC is NOT " amd64_printk(KERN_WARNING, "%s", ecc_warning);
"currently enabled by the BIOS. Module " return -ENODEV;
"will NOT be loaded.\n" }
" Either Enable ECC in the BIOS, " } else
"or use the 'ecc_enable_override' "
"parameter.\n"
" Might be a BIOS bug, if BIOS says "
"ECC is enabled\n"
" Use of the override can cause "
"unknown side effects.\n");
ret = -ENODEV;
} else
/*
* enable further driver loading if ECC enable is
* overridden.
*/
ret = 0;
} else {
amd64_printk(KERN_INFO,
"ECC is enabled by BIOS, Proceeding "
"with EDAC module initialization\n");
/* Signal good ECC status */
ret = 0;
/* CLEAR the override, since BIOS controlled it */ /* CLEAR the override, since BIOS controlled it */
ecc_enable_override = 0; ecc_enable_override = 0;
}
return ret; return 0;
} }
struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +

View File

@ -405,7 +405,7 @@ void decode_mce(struct mce *m)
regs.nbsh = (u32)(m->status >> 32); regs.nbsh = (u32)(m->status >> 32);
regs.nbeal = (u32) m->addr; regs.nbeal = (u32) m->addr;
regs.nbeah = (u32)(m->addr >> 32); regs.nbeah = (u32)(m->addr >> 32);
node = per_cpu(cpu_llc_id, m->extcpu); node = amd_get_nb_id(m->extcpu);
amd_decode_nb_mce(node, &regs, 1); amd_decode_nb_mce(node, &regs, 1);
break; break;