Merge branch 'for-4.8/libnvdimm' into libnvdimm-for-next

This commit is contained in:
Dan Williams 2016-07-24 08:05:44 -07:00
commit 0606263f24
59 changed files with 1369 additions and 1009 deletions

View File

@ -395,7 +395,7 @@ prototypes:
int (*release) (struct gendisk *, fmode_t);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*direct_access) (struct block_device *, sector_t, void __pmem **,
int (*direct_access) (struct block_device *, sector_t, void **,
unsigned long *);
int (*media_changed) (struct gendisk *);
void (*unlock_native_capacity) (struct gendisk *);

View File

@ -256,28 +256,18 @@ If any of these error conditions are encountered, the arena is put into a read
only state using a flag in the info block.
5. In-kernel usage
==================
5. Usage
========
Any block driver that supports byte granularity IO to the storage may register
with the BTT. It will have to provide the rw_bytes interface in its
block_device_operations struct:
The BTT can be set up on any disk (namespace) exposed by the libnvdimm subsystem
(pmem, or blk mode). The easiest way to set up such a namespace is using the
'ndctl' utility [1]:
int (*rw_bytes)(struct gendisk *, void *, size_t, off_t, int rw);
For example, the ndctl command line to setup a btt with a 4k sector size is:
It may register with the BTT after it adds its own gendisk, using btt_init:
ndctl create-namespace -f -e namespace0.0 -m sector -l 4k
struct btt *btt_init(struct gendisk *disk, unsigned long long rawsize,
u32 lbasize, u8 uuid[], int maxlane);
See ndctl create-namespace --help for more options.
note that maxlane is the maximum amount of concurrency the driver wishes to
allow the BTT to use.
The BTT 'disk' appears as a stacked block device that grabs the underlying block
device in the O_EXCL mode.
When the driver wishes to remove the backing disk, it should similarly call
btt_fini using the same struct btt* handle that was provided to it by btt_init.
void btt_fini(struct btt *btt);
[1]: https://github.com/pmem/ndctl

View File

@ -143,12 +143,12 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
*/
static long
axon_ram_direct_access(struct block_device *device, sector_t sector,
void __pmem **kaddr, pfn_t *pfn, long size)
void **kaddr, pfn_t *pfn, long size)
{
struct axon_ram_bank *bank = device->bd_disk->private_data;
loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
*kaddr = (void __pmem __force *) bank->io_addr + offset;
*kaddr = (void *) bank->io_addr + offset;
*pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
return bank->size - offset;
}

View File

@ -225,7 +225,6 @@
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */

View File

@ -26,13 +26,11 @@
* @n: length of the copy in bytes
*
* Copy data to persistent memory media via non-temporal stores so that
* a subsequent arch_wmb_pmem() can flush cpu and memory controller
* write buffers to guarantee durability.
* a subsequent pmem driver flush operation will drain posted write queues.
*/
static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
size_t n)
static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
{
int unwritten;
int rem;
/*
* We are copying between two kernel buffers, if
@ -40,59 +38,36 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
* fault) we would have already reported a general protection fault
* before the WARN+BUG.
*/
unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
(void __user *) src, n);
if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
__func__, dst, src, unwritten))
rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
__func__, dst, src, rem))
BUG();
}
static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
size_t n)
static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
return memcpy_mcsafe(dst, (void __force *) src, n);
memcpy(dst, (void __force *) src, n);
return memcpy_mcsafe(dst, src, n);
memcpy(dst, src, n);
return 0;
}
/**
* arch_wmb_pmem - synchronize writes to persistent memory
*
* After a series of arch_memcpy_to_pmem() operations this drains data
* from cpu write buffers and any platform (memory controller) buffers
* to ensure that written data is durable on persistent memory media.
*/
static inline void arch_wmb_pmem(void)
{
/*
* wmb() to 'sfence' all previous writes such that they are
* architecturally visible to 'pcommit'. Note, that we've
* already arranged for pmem writes to avoid the cache via
* arch_memcpy_to_pmem().
*/
wmb();
pcommit_sfence();
}
/**
* arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
* instruction. This function requires explicit ordering with an
* arch_wmb_pmem() call.
* instruction.
*/
static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1;
void *vaddr = (void __force *)addr;
void *vend = vaddr + size;
void *vend = addr + size;
void *p;
for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
for (p = (void *)((unsigned long)addr & ~clflush_mask);
p < vend; p += x86_clflush_size)
clwb(p);
}
@ -113,16 +88,14 @@ static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
* @i: iterator with source data
*
* Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
* This function requires explicit ordering with an arch_wmb_pmem() call.
*/
static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
void *vaddr = (void __force *)addr;
size_t len;
/* TODO: skip the write-back by always using non-temporal stores */
len = copy_from_iter_nocache(vaddr, bytes, i);
len = copy_from_iter_nocache(addr, bytes, i);
if (__iter_needs_pmem_wb(i))
arch_wb_cache_pmem(addr, bytes);
@ -136,28 +109,16 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
* @size: number of bytes to zero
*
* Write zeros into the memory range starting at 'addr' for 'size' bytes.
* This function requires explicit ordering with an arch_wmb_pmem() call.
*/
static inline void arch_clear_pmem(void __pmem *addr, size_t size)
static inline void arch_clear_pmem(void *addr, size_t size)
{
void *vaddr = (void __force *)addr;
memset(vaddr, 0, size);
memset(addr, 0, size);
arch_wb_cache_pmem(addr, size);
}
static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
static inline void arch_invalidate_pmem(void *addr, size_t size)
{
clflush_cache_range((void __force *) addr, size);
}
static inline bool __arch_has_wmb_pmem(void)
{
/*
* We require that wmb() be an 'sfence', that is only guaranteed on
* 64-bit builds
*/
return static_cpu_has(X86_FEATURE_PCOMMIT);
clflush_cache_range(addr, size);
}
#endif /* CONFIG_ARCH_HAS_PMEM_API */
#endif /* __ASM_X86_PMEM_H__ */

View File

@ -253,52 +253,6 @@ static inline void clwb(volatile void *__p)
: [pax] "a" (p));
}
/**
* pcommit_sfence() - persistent commit and fence
*
* The PCOMMIT instruction ensures that data that has been flushed from the
* processor's cache hierarchy with CLWB, CLFLUSHOPT or CLFLUSH is accepted to
* memory and is durable on the DIMM. The primary use case for this is
* persistent memory.
*
* This function shows how to properly use CLWB/CLFLUSHOPT/CLFLUSH and PCOMMIT
* with appropriate fencing.
*
* Example:
* void flush_and_commit_buffer(void *vaddr, unsigned int size)
* {
* unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1;
* void *vend = vaddr + size;
* void *p;
*
* for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
* p < vend; p += boot_cpu_data.x86_clflush_size)
* clwb(p);
*
* // SFENCE to order CLWB/CLFLUSHOPT/CLFLUSH cache flushes
* // MFENCE via mb() also works
* wmb();
*
* // PCOMMIT and the required SFENCE for ordering
* pcommit_sfence();
* }
*
* After this function completes the data pointed to by 'vaddr' has been
* accepted to memory and will be durable if the 'vaddr' points to persistent
* memory.
*
* PCOMMIT must always be ordered by an MFENCE or SFENCE, so to help simplify
* things we include both the PCOMMIT and the required SFENCE in the
* alternatives generated by pcommit_sfence().
*/
static inline void pcommit_sfence(void)
{
alternative(ASM_NOP7,
".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
"sfence",
X86_FEATURE_PCOMMIT);
}
#define nop() asm volatile ("nop")

View File

@ -72,7 +72,6 @@
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
#define SECONDARY_EXEC_PCOMMIT 0x00200000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
#define PIN_BASED_EXT_INTR_MASK 0x00000001

View File

@ -78,7 +78,6 @@
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
#define EXIT_REASON_PCOMMIT 65
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@ -127,8 +126,7 @@
{ EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
{ EXIT_REASON_PCOMMIT, "PCOMMIT" }
{ EXIT_REASON_XRSTORS, "XRSTORS" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4

View File

@ -366,7 +366,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB);
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =

View File

@ -144,14 +144,6 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
static inline bool guest_cpuid_has_pcommit(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_PCOMMIT));
}
static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;

View File

@ -2707,8 +2707,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_PCOMMIT;
SECONDARY_EXEC_XSAVES;
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
@ -3270,7 +3269,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_PML |
SECONDARY_EXEC_PCOMMIT |
SECONDARY_EXEC_TSC_SCALING;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
@ -4858,9 +4856,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
if (!enable_pml)
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
/* Currently, we allow L1 guest to directly run pcommit instruction. */
exec_control &= ~SECONDARY_EXEC_PCOMMIT;
return exec_control;
}
@ -4904,9 +4899,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
if (cpu_has_secondary_exec_ctrls())
if (cpu_has_secondary_exec_ctrls()) {
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
vmx_secondary_exec_control(vmx));
}
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
vmcs_write64(EOI_EXIT_BITMAP0, 0);
@ -7558,13 +7554,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
return 1;
}
static int handle_pcommit(struct kvm_vcpu *vcpu)
{
/* we never catch pcommit instruct for L1 guest. */
WARN_ON(1);
return 1;
}
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@ -7615,7 +7604,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
[EXIT_REASON_PCOMMIT] = handle_pcommit,
};
static const int kvm_vmx_max_exit_handlers =
@ -7924,8 +7912,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* the XSS exit bitmap in vmcs12.
*/
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
case EXIT_REASON_PCOMMIT:
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
default:
return true;
}
@ -9086,15 +9072,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (cpu_has_secondary_exec_ctrls())
vmcs_set_secondary_exec_control(secondary_exec_ctl);
if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
if (guest_cpuid_has_pcommit(vcpu))
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_PCOMMIT;
else
vmx->nested.nested_vmx_secondary_ctls_high &=
~SECONDARY_EXEC_PCOMMIT;
}
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@ -9707,8 +9684,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_PCOMMIT);
SECONDARY_EXEC_APIC_REGISTER_VIRT);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12->secondary_vm_exec_control;

View File

@ -947,7 +947,7 @@ GrpTable: Grp15
4: XSAVE
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
7: clflush | clflushopt (66) | sfence (11B)
EndTable
GrpTable: Grp16

View File

@ -447,32 +447,7 @@ config ACPI_REDUCED_HARDWARE_ONLY
If you are unsure what to do, do not enable this option.
config ACPI_NFIT
tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
depends on PHYS_ADDR_T_64BIT
depends on BLK_DEV
depends on ARCH_HAS_MMIO_FLUSH
select LIBNVDIMM
help
Infrastructure to probe ACPI 6 compliant platforms for
NVDIMMs (NFIT) and register a libnvdimm device tree. In
addition to storage devices this also enables libnvdimm to pass
ACPI._DSM messages for platform/dimm configuration.
To compile this driver as a module, choose M here:
the module will be called nfit.
config ACPI_NFIT_DEBUG
bool "NFIT DSM debug"
depends on ACPI_NFIT
depends on DYNAMIC_DEBUG
default n
help
Enabling this option causes the nfit driver to dump the
input and output buffers of _DSM operations on the ACPI0012
device and its children. This can be very verbose, so leave
it disabled unless you are debugging a hardware / firmware
issue.
source "drivers/acpi/nfit/Kconfig"
source "drivers/acpi/apei/Kconfig"

View File

@ -70,7 +70,7 @@ obj-$(CONFIG_ACPI_PCI_SLOT) += pci_slot.o
obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
obj-$(CONFIG_ACPI) += container.o
obj-$(CONFIG_ACPI_THERMAL) += thermal.o
obj-$(CONFIG_ACPI_NFIT) += nfit.o
obj-$(CONFIG_ACPI_NFIT) += nfit/
obj-$(CONFIG_ACPI) += acpi_memhotplug.o
obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
obj-$(CONFIG_ACPI_BATTERY) += battery.o

26
drivers/acpi/nfit/Kconfig Normal file
View File

@ -0,0 +1,26 @@
config ACPI_NFIT
tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
depends on PHYS_ADDR_T_64BIT
depends on BLK_DEV
depends on ARCH_HAS_MMIO_FLUSH
select LIBNVDIMM
help
Infrastructure to probe ACPI 6 compliant platforms for
NVDIMMs (NFIT) and register a libnvdimm device tree. In
addition to storage devices this also enables libnvdimm to pass
ACPI._DSM messages for platform/dimm configuration.
To compile this driver as a module, choose M here:
the module will be called nfit.
config ACPI_NFIT_DEBUG
bool "NFIT DSM debug"
depends on ACPI_NFIT
depends on DYNAMIC_DEBUG
default n
help
Enabling this option causes the nfit driver to dump the
input and output buffers of _DSM operations on the ACPI0012
device and its children. This can be very verbose, so leave
it disabled unless you are debugging a hardware / firmware
issue.

View File

@ -0,0 +1,3 @@
obj-$(CONFIG_ACPI_NFIT) := nfit.o
nfit-y := core.o
nfit-$(CONFIG_X86_MCE) += mce.o

File diff suppressed because it is too large Load Diff

89
drivers/acpi/nfit/mce.c Normal file
View File

@ -0,0 +1,89 @@
/*
* NFIT - Machine Check Handler
*
* Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/notifier.h>
#include <linux/acpi.h>
#include <asm/mce.h>
#include "nfit.h"
static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *mce = (struct mce *)data;
struct acpi_nfit_desc *acpi_desc;
struct nfit_spa *nfit_spa;
/* We only care about memory errors */
if (!(mce->status & MCACOD))
return NOTIFY_DONE;
/*
* mce->addr contains the physical addr accessed that caused the
* machine check. We need to walk through the list of NFITs, and see
* if any of them matches that address, and only then start a scrub.
*/
mutex_lock(&acpi_desc_lock);
list_for_each_entry(acpi_desc, &acpi_descs, list) {
struct device *dev = acpi_desc->dev;
int found_match = 0;
mutex_lock(&acpi_desc->init_mutex);
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
struct acpi_nfit_system_address *spa = nfit_spa->spa;
if (nfit_spa_type(spa) == NFIT_SPA_PM)
continue;
/* find the spa that covers the mce addr */
if (spa->address > mce->addr)
continue;
if ((spa->address + spa->length - 1) < mce->addr)
continue;
found_match = 1;
dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
__func__, spa->range_index, spa->address,
spa->length);
/*
* We can break at the first match because we're going
* to rescan all the SPA ranges. There shouldn't be any
* aliasing anyway.
*/
break;
}
mutex_unlock(&acpi_desc->init_mutex);
/*
* We can ignore an -EBUSY here because if an ARS is already
* in progress, just let that be the last authoritative one
*/
if (found_match)
acpi_nfit_ars_rescan(acpi_desc);
}
mutex_unlock(&acpi_desc_lock);
return NOTIFY_DONE;
}
static struct notifier_block nfit_mce_dec = {
.notifier_call = nfit_handle_mce,
};
void nfit_mce_register(void)
{
mce_register_decode_chain(&nfit_mce_dec);
}
void nfit_mce_unregister(void)
{
mce_unregister_decode_chain(&nfit_mce_dec);
}

View File

@ -16,6 +16,7 @@
#define __NFIT_H__
#include <linux/workqueue.h>
#include <linux/libnvdimm.h>
#include <linux/ndctl.h>
#include <linux/types.h>
#include <linux/uuid.h>
#include <linux/acpi.h>
@ -31,6 +32,9 @@
#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6"
#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e"
/* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
#define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
| ACPI_NFIT_MEM_NOT_ARMED)
@ -40,6 +44,7 @@ enum nfit_uuids {
NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL,
NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
NFIT_SPA_VOLATILE,
NFIT_SPA_PM,
NFIT_SPA_DCR,
@ -74,37 +79,37 @@ enum {
};
struct nfit_spa {
struct acpi_nfit_system_address *spa;
struct list_head list;
struct nd_region *nd_region;
unsigned int ars_done:1;
unsigned int ars_required:1;
u32 clear_err_unit;
u32 max_ars;
struct acpi_nfit_system_address spa[0];
};
struct nfit_dcr {
struct acpi_nfit_control_region *dcr;
struct list_head list;
struct acpi_nfit_control_region dcr[0];
};
struct nfit_bdw {
struct acpi_nfit_data_region *bdw;
struct list_head list;
struct acpi_nfit_data_region bdw[0];
};
struct nfit_idt {
struct acpi_nfit_interleave *idt;
struct list_head list;
struct acpi_nfit_interleave idt[0];
};
struct nfit_flush {
struct acpi_nfit_flush_address *flush;
struct list_head list;
struct acpi_nfit_flush_address flush[0];
};
struct nfit_memdev {
struct acpi_nfit_memory_map *memdev;
struct list_head list;
struct acpi_nfit_memory_map memdev[0];
};
/* assembled tables for a given dimm/memory-device */
@ -123,6 +128,7 @@ struct nfit_mem {
struct list_head list;
struct acpi_device *adev;
struct acpi_nfit_desc *acpi_desc;
struct resource *flush_wpq;
unsigned long dsm_mask;
int family;
};
@ -130,10 +136,7 @@ struct nfit_mem {
struct acpi_nfit_desc {
struct nvdimm_bus_descriptor nd_desc;
struct acpi_table_header acpi_header;
struct acpi_nfit_header *nfit;
struct mutex spa_map_mutex;
struct mutex init_mutex;
struct list_head spa_maps;
struct list_head memdevs;
struct list_head flushes;
struct list_head dimms;
@ -146,6 +149,9 @@ struct acpi_nfit_desc {
struct nd_cmd_ars_status *ars_status;
size_t ars_status_size;
struct work_struct work;
struct list_head list;
struct kernfs_node *scrub_count_state;
unsigned int scrub_count;
unsigned int cancel:1;
unsigned long dimm_cmd_force_en;
unsigned long bus_cmd_force_en;
@ -161,7 +167,7 @@ enum nd_blk_mmio_selector {
struct nd_blk_addr {
union {
void __iomem *base;
void __pmem *aperture;
void *aperture;
};
};
@ -180,28 +186,26 @@ struct nfit_blk {
u64 bdw_offset; /* post interleave offset */
u64 stat_offset;
u64 cmd_offset;
void __iomem *nvdimm_flush;
u32 dimm_flags;
};
enum spa_map_type {
SPA_MAP_CONTROL,
SPA_MAP_APERTURE,
};
extern struct list_head acpi_descs;
extern struct mutex acpi_desc_lock;
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
struct nfit_spa_mapping {
struct acpi_nfit_desc *acpi_desc;
struct acpi_nfit_system_address *spa;
struct list_head list;
struct kref kref;
enum spa_map_type type;
struct nd_blk_addr addr;
};
static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
#ifdef CONFIG_X86_MCE
void nfit_mce_register(void);
void nfit_mce_unregister(void);
#else
static inline void nfit_mce_register(void)
{
return container_of(kref, struct nfit_spa_mapping, kref);
}
static inline void nfit_mce_unregister(void)
{
}
#endif
int nfit_spa_type(struct acpi_nfit_system_address *spa);
static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
struct nfit_mem *nfit_mem)
@ -218,6 +222,6 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
}
const u8 *to_nfit_uuid(enum nfit_uuids id);
int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz);
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
#endif /* __NFIT_H__ */

View File

@ -381,7 +381,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
#ifdef CONFIG_BLK_DEV_RAM_DAX
static long brd_direct_access(struct block_device *bdev, sector_t sector,
void __pmem **kaddr, pfn_t *pfn, long size)
void **kaddr, pfn_t *pfn, long size)
{
struct brd_device *brd = bdev->bd_disk->private_data;
struct page *page;
@ -391,7 +391,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
page = brd_insert_page(brd, sector);
if (!page)
return -ENOSPC;
*kaddr = (void __pmem *)page_address(page);
*kaddr = page_address(page);
*pfn = page_to_pfn_t(page);
return PAGE_SIZE;

View File

@ -211,11 +211,9 @@ int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
}
dax_dev->dev = dev;
rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev);
if (rc) {
unregister_dax_dev(dev);
rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
if (rc)
return rc;
}
return 0;

View File

@ -102,21 +102,19 @@ static int dax_pmem_probe(struct device *dev)
if (rc)
return rc;
rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref);
if (rc) {
dax_pmem_percpu_exit(&dax_pmem->ref);
rc = devm_add_action_or_reset(dev, dax_pmem_percpu_exit,
&dax_pmem->ref);
if (rc)
return rc;
}
addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref);
if (rc) {
dax_pmem_percpu_kill(&dax_pmem->ref);
rc = devm_add_action_or_reset(dev, dax_pmem_percpu_kill,
&dax_pmem->ref);
if (rc)
return rc;
}
nd_region = to_nd_region(dev->parent);
dax_region = alloc_dax_region(dev, nd_region->id, &res,

View File

@ -1,6 +1,7 @@
menuconfig LIBNVDIMM
tristate "NVDIMM (Non-Volatile Memory Device) Support"
depends on PHYS_ADDR_T_64BIT
depends on HAS_IOMEM
depends on BLK_DEV
help
Generic support for non-volatile memory devices including
@ -19,7 +20,6 @@ if LIBNVDIMM
config BLK_DEV_PMEM
tristate "PMEM: Persistent memory block device support"
default LIBNVDIMM
depends on HAS_IOMEM
select ND_BTT if BTT
select ND_PFN if NVDIMM_PFN
help

View File

@ -267,10 +267,8 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return -ENOMEM;
if (devm_add_action(dev, nd_blk_release_queue, q)) {
blk_cleanup_queue(q);
if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
return -ENOMEM;
}
blk_queue_make_request(q, nd_blk_make_request);
blk_queue_max_hw_sectors(q, UINT_MAX);
@ -282,10 +280,6 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
disk = alloc_disk(0);
if (!disk)
return -ENOMEM;
if (devm_add_action(dev, nd_blk_release_disk, disk)) {
put_disk(disk);
return -ENOMEM;
}
disk->driverfs_dev = dev;
disk->first_minor = 0;
@ -296,6 +290,9 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
set_capacity(disk, 0);
add_disk(disk);
if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
return -ENOMEM;
if (nsblk_meta_size(nsblk)) {
int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));

View File

@ -198,8 +198,7 @@ struct device *nd_btt_create(struct nd_region *nd_region)
{
struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
if (dev)
__nd_device_register(dev);
__nd_device_register(dev);
return dev;
}

View File

@ -31,6 +31,7 @@
int nvdimm_major;
static int nvdimm_bus_major;
static struct class *nd_class;
static DEFINE_IDA(nd_ida);
static int to_nd_device_type(struct device *dev)
{
@ -60,20 +61,13 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
to_nd_device_type(dev));
}
static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
{
struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
}
static struct module *to_bus_provider(struct device *dev)
{
/* pin bus providers while regions are enabled */
if (is_nd_pmem(dev) || is_nd_blk(dev)) {
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
return nvdimm_bus->module;
return nvdimm_bus->nd_desc->module;
}
return NULL;
}
@ -136,6 +130,21 @@ static int nvdimm_bus_remove(struct device *dev)
return rc;
}
static void nvdimm_bus_shutdown(struct device *dev)
{
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
struct nd_device_driver *nd_drv = NULL;
if (dev->driver)
nd_drv = to_nd_device_driver(dev->driver);
if (nd_drv && nd_drv->shutdown) {
nd_drv->shutdown(dev);
dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n",
dev->driver->name, dev_name(dev));
}
}
void nd_device_notify(struct device *dev, enum nvdimm_event event)
{
device_lock(dev);
@ -208,14 +217,187 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
}
EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
static int nvdimm_bus_match(struct device *dev, struct device_driver *drv);
static struct bus_type nvdimm_bus_type = {
.name = "nd",
.uevent = nvdimm_bus_uevent,
.match = nvdimm_bus_match,
.probe = nvdimm_bus_probe,
.remove = nvdimm_bus_remove,
.shutdown = nvdimm_bus_shutdown,
};
static void nvdimm_bus_release(struct device *dev)
{
struct nvdimm_bus *nvdimm_bus;
nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
ida_simple_remove(&nd_ida, nvdimm_bus->id);
kfree(nvdimm_bus);
}
static bool is_nvdimm_bus(struct device *dev)
{
return dev->release == nvdimm_bus_release;
}
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
{
struct device *dev;
for (dev = nd_dev; dev; dev = dev->parent)
if (is_nvdimm_bus(dev))
break;
dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
if (dev)
return to_nvdimm_bus(dev);
return NULL;
}
struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
{
struct nvdimm_bus *nvdimm_bus;
nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
WARN_ON(!is_nvdimm_bus(dev));
return nvdimm_bus;
}
EXPORT_SYMBOL_GPL(to_nvdimm_bus);
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nd_desc)
{
struct nvdimm_bus *nvdimm_bus;
int rc;
nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
if (!nvdimm_bus)
return NULL;
INIT_LIST_HEAD(&nvdimm_bus->list);
INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
INIT_LIST_HEAD(&nvdimm_bus->poison_list);
init_waitqueue_head(&nvdimm_bus->probe_wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
mutex_init(&nvdimm_bus->reconfig_mutex);
if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus);
return NULL;
}
nvdimm_bus->nd_desc = nd_desc;
nvdimm_bus->dev.parent = parent;
nvdimm_bus->dev.release = nvdimm_bus_release;
nvdimm_bus->dev.groups = nd_desc->attr_groups;
nvdimm_bus->dev.bus = &nvdimm_bus_type;
dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
rc = device_register(&nvdimm_bus->dev);
if (rc) {
dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
goto err;
}
return nvdimm_bus;
err:
put_device(&nvdimm_bus->dev);
return NULL;
}
EXPORT_SYMBOL_GPL(nvdimm_bus_register);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
{
if (!nvdimm_bus)
return;
device_unregister(&nvdimm_bus->dev);
}
EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
static int child_unregister(struct device *dev, void *data)
{
/*
* the singular ndctl class device per bus needs to be
* "device_destroy"ed, so skip it here
*
* i.e. remove classless children
*/
if (dev->class)
/* pass */;
else
nd_device_unregister(dev, ND_SYNC);
return 0;
}
static void free_poison_list(struct list_head *poison_list)
{
struct nd_poison *pl, *next;
list_for_each_entry_safe(pl, next, poison_list, list) {
list_del(&pl->list);
kfree(pl);
}
list_del_init(poison_list);
}
static int nd_bus_remove(struct device *dev)
{
struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
mutex_lock(&nvdimm_bus_list_mutex);
list_del_init(&nvdimm_bus->list);
mutex_unlock(&nvdimm_bus_list_mutex);
nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
nvdimm_bus_lock(&nvdimm_bus->dev);
free_poison_list(&nvdimm_bus->poison_list);
nvdimm_bus_unlock(&nvdimm_bus->dev);
nvdimm_bus_destroy_ndctl(nvdimm_bus);
return 0;
}
static int nd_bus_probe(struct device *dev)
{
struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
int rc;
rc = nvdimm_bus_create_ndctl(nvdimm_bus);
if (rc)
return rc;
mutex_lock(&nvdimm_bus_list_mutex);
list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
mutex_unlock(&nvdimm_bus_list_mutex);
/* enable bus provider attributes to look up their local context */
dev_set_drvdata(dev, nvdimm_bus->nd_desc);
return 0;
}
static struct nd_device_driver nd_bus_driver = {
.probe = nd_bus_probe,
.remove = nd_bus_remove,
.drv = {
.name = "nd_bus",
.suppress_bind_attrs = true,
.bus = &nvdimm_bus_type,
.owner = THIS_MODULE,
.mod_name = KBUILD_MODNAME,
},
};
static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
{
struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver)
return true;
return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
}
static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
void nd_synchronize(void)
@ -395,12 +577,10 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
"ndctl%d", nvdimm_bus->id);
if (IS_ERR(dev)) {
if (IS_ERR(dev))
dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
nvdimm_bus->id, PTR_ERR(dev));
return PTR_ERR(dev);
}
return 0;
return PTR_ERR_OR_ZERO(dev);
}
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
@ -850,8 +1030,14 @@ int __init nvdimm_bus_init(void)
goto err_class;
}
rc = driver_register(&nd_bus_driver.drv);
if (rc)
goto err_nd_bus;
return 0;
err_nd_bus:
class_destroy(nd_class);
err_class:
unregister_chrdev(nvdimm_major, "dimmctl");
err_dimm_chrdev:
@ -864,8 +1050,10 @@ int __init nvdimm_bus_init(void)
void nvdimm_bus_exit(void)
{
driver_unregister(&nd_bus_driver.drv);
class_destroy(nd_class);
unregister_chrdev(nvdimm_bus_major, "ndctl");
unregister_chrdev(nvdimm_major, "dimmctl");
bus_unregister(&nvdimm_bus_type);
ida_destroy(&nd_ida);
}

View File

@ -240,7 +240,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
return memcpy_from_pmem(buf, nsio->addr + offset, size);
} else {
memcpy_to_pmem(nsio->addr + offset, buf, size);
wmb_pmem();
nvdimm_flush(to_nd_region(ndns->dev.parent));
}
return 0;
@ -266,9 +266,8 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio)
nsio->addr = devm_memremap(dev, res->start, resource_size(res),
ARCH_MEMREMAP_PMEM);
if (IS_ERR(nsio->addr))
return PTR_ERR(nsio->addr);
return 0;
return PTR_ERR_OR_ZERO(nsio->addr);
}
EXPORT_SYMBOL_GPL(devm_nsio_enable);

View File

@ -20,12 +20,12 @@
#include <linux/ndctl.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/io.h>
#include "nd-core.h"
#include "nd.h"
LIST_HEAD(nvdimm_bus_list);
DEFINE_MUTEX(nvdimm_bus_list_mutex);
static DEFINE_IDA(nd_ida);
void nvdimm_bus_lock(struct device *dev)
{
@ -57,6 +57,127 @@ bool is_nvdimm_bus_locked(struct device *dev)
}
EXPORT_SYMBOL(is_nvdimm_bus_locked);
struct nvdimm_map {
struct nvdimm_bus *nvdimm_bus;
struct list_head list;
resource_size_t offset;
unsigned long flags;
size_t size;
union {
void *mem;
void __iomem *iomem;
};
struct kref kref;
};
static struct nvdimm_map *find_nvdimm_map(struct device *dev,
resource_size_t offset)
{
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
struct nvdimm_map *nvdimm_map;
list_for_each_entry(nvdimm_map, &nvdimm_bus->mapping_list, list)
if (nvdimm_map->offset == offset)
return nvdimm_map;
return NULL;
}
static struct nvdimm_map *alloc_nvdimm_map(struct device *dev,
resource_size_t offset, size_t size, unsigned long flags)
{
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
struct nvdimm_map *nvdimm_map;
nvdimm_map = kzalloc(sizeof(*nvdimm_map), GFP_KERNEL);
if (!nvdimm_map)
return NULL;
INIT_LIST_HEAD(&nvdimm_map->list);
nvdimm_map->nvdimm_bus = nvdimm_bus;
nvdimm_map->offset = offset;
nvdimm_map->flags = flags;
nvdimm_map->size = size;
kref_init(&nvdimm_map->kref);
if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev)))
goto err_request_region;
if (flags)
nvdimm_map->mem = memremap(offset, size, flags);
else
nvdimm_map->iomem = ioremap(offset, size);
if (!nvdimm_map->mem)
goto err_map;
dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev), "%s: bus unlocked!",
__func__);
list_add(&nvdimm_map->list, &nvdimm_bus->mapping_list);
return nvdimm_map;
err_map:
release_mem_region(offset, size);
err_request_region:
kfree(nvdimm_map);
return NULL;
}
static void nvdimm_map_release(struct kref *kref)
{
struct nvdimm_bus *nvdimm_bus;
struct nvdimm_map *nvdimm_map;
nvdimm_map = container_of(kref, struct nvdimm_map, kref);
nvdimm_bus = nvdimm_map->nvdimm_bus;
dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
list_del(&nvdimm_map->list);
if (nvdimm_map->flags)
memunmap(nvdimm_map->mem);
else
iounmap(nvdimm_map->iomem);
release_mem_region(nvdimm_map->offset, nvdimm_map->size);
kfree(nvdimm_map);
}
static void nvdimm_map_put(void *data)
{
struct nvdimm_map *nvdimm_map = data;
struct nvdimm_bus *nvdimm_bus = nvdimm_map->nvdimm_bus;
nvdimm_bus_lock(&nvdimm_bus->dev);
kref_put(&nvdimm_map->kref, nvdimm_map_release);
nvdimm_bus_unlock(&nvdimm_bus->dev);
}
/**
* devm_nvdimm_memremap - map a resource that is shared across regions
* @dev: device that will own a reference to the shared mapping
* @offset: physical base address of the mapping
* @size: mapping size
* @flags: memremap flags, or, if zero, perform an ioremap instead
*/
void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
size_t size, unsigned long flags)
{
struct nvdimm_map *nvdimm_map;
nvdimm_bus_lock(dev);
nvdimm_map = find_nvdimm_map(dev, offset);
if (!nvdimm_map)
nvdimm_map = alloc_nvdimm_map(dev, offset, size, flags);
else
kref_get(&nvdimm_map->kref);
nvdimm_bus_unlock(dev);
if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map))
return NULL;
return nvdimm_map->mem;
}
EXPORT_SYMBOL_GPL(devm_nvdimm_memremap);
u64 nd_fletcher64(void *addr, size_t len, bool le)
{
u32 *buf = addr;
@ -73,25 +194,6 @@ u64 nd_fletcher64(void *addr, size_t len, bool le)
}
EXPORT_SYMBOL_GPL(nd_fletcher64);
static void nvdimm_bus_release(struct device *dev)
{
struct nvdimm_bus *nvdimm_bus;
nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
ida_simple_remove(&nd_ida, nvdimm_bus->id);
kfree(nvdimm_bus);
}
struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
{
struct nvdimm_bus *nvdimm_bus;
nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release);
return nvdimm_bus;
}
EXPORT_SYMBOL_GPL(to_nvdimm_bus);
struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
{
/* struct nvdimm_bus definition is private to libnvdimm */
@ -99,18 +201,12 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus)
}
EXPORT_SYMBOL_GPL(to_nd_desc);
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus)
{
struct device *dev;
for (dev = nd_dev; dev; dev = dev->parent)
if (dev->release == nvdimm_bus_release)
break;
dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
if (dev)
return to_nvdimm_bus(dev);
return NULL;
/* struct nvdimm_bus definition is private to libnvdimm */
return &nvdimm_bus->dev;
}
EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev);
static bool is_uuid_sep(char sep)
{
@ -325,51 +421,6 @@ struct attribute_group nvdimm_bus_attribute_group = {
};
EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nd_desc, struct module *module)
{
struct nvdimm_bus *nvdimm_bus;
int rc;
nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
if (!nvdimm_bus)
return NULL;
INIT_LIST_HEAD(&nvdimm_bus->list);
INIT_LIST_HEAD(&nvdimm_bus->poison_list);
init_waitqueue_head(&nvdimm_bus->probe_wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
mutex_init(&nvdimm_bus->reconfig_mutex);
if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus);
return NULL;
}
nvdimm_bus->nd_desc = nd_desc;
nvdimm_bus->module = module;
nvdimm_bus->dev.parent = parent;
nvdimm_bus->dev.release = nvdimm_bus_release;
nvdimm_bus->dev.groups = nd_desc->attr_groups;
dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
rc = device_register(&nvdimm_bus->dev);
if (rc) {
dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
goto err;
}
rc = nvdimm_bus_create_ndctl(nvdimm_bus);
if (rc)
goto err;
mutex_lock(&nvdimm_bus_list_mutex);
list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
mutex_unlock(&nvdimm_bus_list_mutex);
return nvdimm_bus;
err:
put_device(&nvdimm_bus->dev);
return NULL;
}
EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
static void set_badblock(struct badblocks *bb, sector_t s, int num)
{
dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n",
@ -545,54 +596,6 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
}
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
static void free_poison_list(struct list_head *poison_list)
{
struct nd_poison *pl, *next;
list_for_each_entry_safe(pl, next, poison_list, list) {
list_del(&pl->list);
kfree(pl);
}
list_del_init(poison_list);
}
static int child_unregister(struct device *dev, void *data)
{
/*
* the singular ndctl class device per bus needs to be
* "device_destroy"ed, so skip it here
*
* i.e. remove classless children
*/
if (dev->class)
/* pass */;
else
nd_device_unregister(dev, ND_SYNC);
return 0;
}
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
{
if (!nvdimm_bus)
return;
mutex_lock(&nvdimm_bus_list_mutex);
list_del_init(&nvdimm_bus->list);
mutex_unlock(&nvdimm_bus_list_mutex);
nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
nvdimm_bus_lock(&nvdimm_bus->dev);
free_poison_list(&nvdimm_bus->poison_list);
nvdimm_bus_unlock(&nvdimm_bus->dev);
nvdimm_bus_destroy_ndctl(nvdimm_bus);
device_unregister(&nvdimm_bus->dev);
}
EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
#ifdef CONFIG_BLK_DEV_INTEGRITY
int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
{
@ -601,7 +604,8 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
if (meta_size == 0)
return 0;
bi.profile = NULL;
memset(&bi, 0, sizeof(bi));
bi.tuple_size = meta_size;
bi.tag_size = meta_size;
@ -650,7 +654,6 @@ static __exit void libnvdimm_exit(void)
nvdimm_bus_exit();
nd_region_devs_exit();
nvdimm_devs_exit();
ida_destroy(&nd_ida);
}
MODULE_LICENSE("GPL v2");

View File

@ -346,7 +346,8 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
const struct attribute_group **groups, unsigned long flags,
unsigned long cmd_mask)
unsigned long cmd_mask, int num_flush,
struct resource *flush_wpq)
{
struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
struct device *dev;
@ -362,6 +363,8 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
nvdimm->provider_data = provider_data;
nvdimm->flags = flags;
nvdimm->cmd_mask = cmd_mask;
nvdimm->num_flush = num_flush;
nvdimm->flush_wpq = flush_wpq;
atomic_set(&nvdimm->busy, 0);
dev = &nvdimm->dev;
dev_set_name(dev, "nmem%d", nvdimm->id);

View File

@ -47,6 +47,7 @@ static int e820_pmem_probe(struct platform_device *pdev)
nd_desc.attr_groups = e820_pmem_attribute_groups;
nd_desc.provider_name = "e820";
nd_desc.module = THIS_MODULE;
nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
if (!nvdimm_bus)
goto err;

View File

@ -26,11 +26,11 @@ extern int nvdimm_major;
struct nvdimm_bus {
struct nvdimm_bus_descriptor *nd_desc;
wait_queue_head_t probe_wait;
struct module *module;
struct list_head list;
struct device dev;
int id, probe_active;
struct list_head poison_list;
struct list_head mapping_list;
struct mutex reconfig_mutex;
};
@ -40,7 +40,8 @@ struct nvdimm {
unsigned long cmd_mask;
struct device dev;
atomic_t busy;
int id;
int id, num_flush;
struct resource *flush_wpq;
};
bool is_nvdimm(struct device *dev);

View File

@ -49,9 +49,11 @@ struct nvdimm_drvdata {
struct kref kref;
};
struct nd_region_namespaces {
int count;
int active;
struct nd_region_data {
int ns_count;
int ns_active;
unsigned int flush_mask;
void __iomem *flush_wpq[0][0];
};
static inline struct nd_namespace_index *to_namespace_index(
@ -119,7 +121,6 @@ struct nd_region {
struct nd_blk_region {
int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
void *iobuf, u64 len, int rw);
void *blk_provider_data;
@ -325,6 +326,7 @@ static inline void devm_nsio_disable(struct device *dev,
}
#endif
int nd_blk_region_init(struct nd_region *nd_region);
int nd_region_activate(struct nd_region *nd_region);
void __nd_iostat_start(struct bio *bio, unsigned long *start);
static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
{

View File

@ -29,27 +29,28 @@
#include <linux/slab.h>
#include <linux/pmem.h>
#include <linux/nd.h>
#include "pmem.h"
#include "pfn.h"
#include "nd.h"
struct pmem_device {
/* One contiguous memory region per device */
phys_addr_t phys_addr;
/* when non-zero this device is hosting a 'pfn' instance */
phys_addr_t data_offset;
u64 pfn_flags;
void __pmem *virt_addr;
/* immutable base size of the namespace */
size_t size;
/* trim size when namespace capacity has been section aligned */
u32 pfn_pad;
struct badblocks bb;
};
static struct device *to_dev(struct pmem_device *pmem)
{
/*
* nvdimm bus services need a 'dev' parameter, and we record the device
* at init in bb.dev.
*/
return pmem->bb.dev;
}
static struct nd_region *to_region(struct pmem_device *pmem)
{
return to_nd_region(to_dev(pmem)->parent);
}
static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
unsigned int len)
{
struct device *dev = pmem->bb.dev;
struct device *dev = to_dev(pmem);
sector_t sector;
long cleared;
@ -57,7 +58,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
if (cleared > 0 && cleared / 512) {
dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
dev_dbg(dev, "%s: %#llx clear %ld sector%s\n",
__func__, (unsigned long long) sector,
cleared / 512, cleared / 512 > 1 ? "s" : "");
badblocks_clear(&pmem->bb, sector, cleared / 512);
@ -73,7 +74,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
bool bad_pmem = false;
void *mem = kmap_atomic(page);
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
void *pmem_addr = pmem->virt_addr + pmem_off;
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
bad_pmem = true;
@ -112,6 +113,11 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
return rc;
}
/* account for REQ_FLUSH rename, replace with REQ_PREFLUSH after v4.8-rc1 */
#ifndef REQ_FLUSH
#define REQ_FLUSH REQ_PREFLUSH
#endif
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
int rc = 0;
@ -120,6 +126,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
struct bio_vec bvec;
struct bvec_iter iter;
struct pmem_device *pmem = q->queuedata;
struct nd_region *nd_region = to_region(pmem);
if (bio->bi_rw & REQ_FLUSH)
nvdimm_flush(nd_region);
do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter) {
@ -134,8 +144,8 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
if (do_acct)
nd_iostat_end(bio, start);
if (bio_data_dir(bio))
wmb_pmem();
if (bio->bi_rw & REQ_FUA)
nvdimm_flush(nd_region);
bio_endio(bio);
return BLK_QC_T_NONE;
@ -148,8 +158,6 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
int rc;
rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
if (rw & WRITE)
wmb_pmem();
/*
* The ->rw_page interface is subtle and tricky. The core
@ -163,8 +171,9 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
return rc;
}
static long pmem_direct_access(struct block_device *bdev, sector_t sector,
void __pmem **kaddr, pfn_t *pfn, long size)
/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
__weak long pmem_direct_access(struct block_device *bdev, sector_t sector,
void **kaddr, pfn_t *pfn, long size)
{
struct pmem_device *pmem = bdev->bd_queue->queuedata;
resource_size_t offset = sector * 512 + pmem->data_offset;
@ -195,7 +204,7 @@ static void pmem_release_queue(void *q)
blk_cleanup_queue(q);
}
void pmem_release_disk(void *disk)
static void pmem_release_disk(void *disk)
{
del_gendisk(disk);
put_disk(disk);
@ -205,6 +214,7 @@ static int pmem_attach_disk(struct device *dev,
struct nd_namespace_common *ndns)
{
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
struct nd_region *nd_region = to_nd_region(dev->parent);
struct vmem_altmap __altmap, *altmap = NULL;
struct resource *res = &nsio->res;
struct nd_pfn *nd_pfn = NULL;
@ -234,7 +244,7 @@ static int pmem_attach_disk(struct device *dev,
dev_set_drvdata(dev, pmem);
pmem->phys_addr = res->start;
pmem->size = resource_size(res);
if (!arch_has_wmb_pmem())
if (nvdimm_has_flush(nd_region) < 0)
dev_warn(dev, "unable to guarantee persistence of writes\n");
if (!devm_request_mem_region(dev, res->start, resource_size(res),
@ -269,15 +279,14 @@ static int pmem_attach_disk(struct device *dev,
* At release time the queue must be dead before
* devm_memremap_pages is unwound
*/
if (devm_add_action(dev, pmem_release_queue, q)) {
blk_cleanup_queue(q);
if (devm_add_action_or_reset(dev, pmem_release_queue, q))
return -ENOMEM;
}
if (IS_ERR(addr))
return PTR_ERR(addr);
pmem->virt_addr = (void __pmem *) addr;
pmem->virt_addr = addr;
blk_queue_write_cache(q, true, true);
blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_max_hw_sectors(q, UINT_MAX);
@ -288,10 +297,6 @@ static int pmem_attach_disk(struct device *dev,
disk = alloc_disk_node(0, nid);
if (!disk)
return -ENOMEM;
if (devm_add_action(dev, pmem_release_disk, disk)) {
put_disk(disk);
return -ENOMEM;
}
disk->fops = &pmem_fops;
disk->queue = q;
@ -302,9 +307,13 @@ static int pmem_attach_disk(struct device *dev,
/ 512);
if (devm_init_badblocks(dev, &pmem->bb))
return -ENOMEM;
nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res);
nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
disk->bb = &pmem->bb;
add_disk(disk);
if (devm_add_action_or_reset(dev, pmem_release_disk, disk))
return -ENOMEM;
revalidate_disk(disk);
return 0;
@ -340,13 +349,20 @@ static int nd_pmem_remove(struct device *dev)
{
if (is_nd_btt(dev))
nvdimm_namespace_detach_btt(to_nd_btt(dev));
nvdimm_flush(to_nd_region(dev->parent));
return 0;
}
static void nd_pmem_shutdown(struct device *dev)
{
nvdimm_flush(to_nd_region(dev->parent));
}
static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
struct pmem_device *pmem = dev_get_drvdata(dev);
struct nd_region *nd_region = to_region(pmem);
resource_size_t offset = 0, end_trunc = 0;
struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio;
@ -382,6 +398,7 @@ static struct nd_device_driver nd_pmem_driver = {
.probe = nd_pmem_probe,
.remove = nd_pmem_remove,
.notify = nd_pmem_notify,
.shutdown = nd_pmem_shutdown,
.drv = {
.name = "nd_pmem",
},

24
drivers/nvdimm/pmem.h Normal file
View File

@ -0,0 +1,24 @@
#ifndef __NVDIMM_PMEM_H__
#define __NVDIMM_PMEM_H__
#include <linux/badblocks.h>
#include <linux/types.h>
#include <linux/pfn_t.h>
#include <linux/fs.h>
long pmem_direct_access(struct block_device *bdev, sector_t sector,
void **kaddr, pfn_t *pfn, long size);
/* this definition is in it's own header for tools/testing/nvdimm to consume */
struct pmem_device {
/* One contiguous memory region per device */
phys_addr_t phys_addr;
/* when non-zero this device is hosting a 'pfn' instance */
phys_addr_t data_offset;
u64 pfn_flags;
void *virt_addr;
/* immutable base size of the namespace */
size_t size;
/* trim size when namespace capacity has been section aligned */
u32 pfn_pad;
struct badblocks bb;
};
#endif /* __NVDIMM_PMEM_H__ */

View File

@ -20,7 +20,7 @@ static int nd_region_probe(struct device *dev)
{
int err, rc;
static unsigned long once;
struct nd_region_namespaces *num_ns;
struct nd_region_data *ndrd;
struct nd_region *nd_region = to_nd_region(dev);
if (nd_region->num_lanes > num_online_cpus()
@ -33,21 +33,21 @@ static int nd_region_probe(struct device *dev)
nd_region->num_lanes);
}
rc = nd_region_activate(nd_region);
if (rc)
return rc;
rc = nd_blk_region_init(nd_region);
if (rc)
return rc;
rc = nd_region_register_namespaces(nd_region, &err);
num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
if (!num_ns)
return -ENOMEM;
if (rc < 0)
return rc;
num_ns->active = rc;
num_ns->count = rc + err;
dev_set_drvdata(dev, num_ns);
ndrd = dev_get_drvdata(dev);
ndrd->ns_active = rc;
ndrd->ns_count = rc + err;
if (rc && err && rc == err)
return -ENODEV;
@ -82,6 +82,8 @@ static int nd_region_remove(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev);
device_for_each_child(dev, NULL, child_unregister);
/* flush attribute readers and disable */
nvdimm_bus_lock(dev);
nd_region->ns_seed = NULL;
@ -91,7 +93,6 @@ static int nd_region_remove(struct device *dev)
dev_set_drvdata(dev, NULL);
nvdimm_bus_unlock(dev);
device_for_each_child(dev, NULL, child_unregister);
return 0;
}

View File

@ -14,13 +14,97 @@
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/hash.h>
#include <linux/pmem.h>
#include <linux/sort.h>
#include <linux/io.h>
#include <linux/nd.h>
#include "nd-core.h"
#include "nd.h"
/*
* For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
* irrelevant.
*/
#include <linux/io-64-nonatomic-hi-lo.h>
static DEFINE_IDA(region_ida);
static DEFINE_PER_CPU(int, flush_idx);
static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
struct nd_region_data *ndrd)
{
int i, j;
dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
for (i = 0; i < nvdimm->num_flush; i++) {
struct resource *res = &nvdimm->flush_wpq[i];
unsigned long pfn = PHYS_PFN(res->start);
void __iomem *flush_page;
/* check if flush hints share a page */
for (j = 0; j < i; j++) {
struct resource *res_j = &nvdimm->flush_wpq[j];
unsigned long pfn_j = PHYS_PFN(res_j->start);
if (pfn == pfn_j)
break;
}
if (j < i)
flush_page = (void __iomem *) ((unsigned long)
ndrd->flush_wpq[dimm][j] & PAGE_MASK);
else
flush_page = devm_nvdimm_ioremap(dev,
PHYS_PFN(pfn), PAGE_SIZE);
if (!flush_page)
return -ENXIO;
ndrd->flush_wpq[dimm][i] = flush_page
+ (res->start & ~PAGE_MASK);
}
return 0;
}
int nd_region_activate(struct nd_region *nd_region)
{
int i, num_flush = 0;
struct nd_region_data *ndrd;
struct device *dev = &nd_region->dev;
size_t flush_data_size = sizeof(void *);
nvdimm_bus_lock(&nd_region->dev);
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
/* at least one null hint slot per-dimm for the "no-hint" case */
flush_data_size += sizeof(void *);
num_flush = min_not_zero(num_flush, nvdimm->num_flush);
if (!nvdimm->num_flush)
continue;
flush_data_size += nvdimm->num_flush * sizeof(void *);
}
nvdimm_bus_unlock(&nd_region->dev);
ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
if (!ndrd)
return -ENOMEM;
dev_set_drvdata(dev, ndrd);
ndrd->flush_mask = (1 << ilog2(num_flush)) - 1;
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);
if (rc)
return rc;
}
return 0;
}
static void nd_region_release(struct device *dev)
{
@ -242,12 +326,12 @@ static DEVICE_ATTR_RO(available_size);
static ssize_t init_namespaces_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
struct nd_region_data *ndrd = dev_get_drvdata(dev);
ssize_t rc;
nvdimm_bus_lock(dev);
if (num_ns)
rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
if (ndrd)
rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
else
rc = -ENXIO;
nvdimm_bus_unlock(dev);
@ -433,8 +517,6 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
if (is_nd_pmem(dev))
return;
to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
}
if (dev->parent && is_nd_blk(dev->parent) && probe) {
nd_region = to_nd_region(dev->parent);
@ -698,7 +780,6 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
if (ndbr) {
nd_region = &ndbr->nd_region;
ndbr->enable = ndbr_desc->enable;
ndbr->disable = ndbr_desc->disable;
ndbr->do_io = ndbr_desc->do_io;
}
region_buf = ndbr;
@ -794,6 +875,67 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
}
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
/**
* nvdimm_flush - flush any posted write queues between the cpu and pmem media
* @nd_region: blk or interleaved pmem region
*/
void nvdimm_flush(struct nd_region *nd_region)
{
struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
int i, idx;
/*
* Try to encourage some diversity in flush hint addresses
* across cpus assuming a limited number of flush hints.
*/
idx = this_cpu_read(flush_idx);
idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
/*
* The first wmb() is needed to 'sfence' all previous writes
* such that they are architecturally visible for the platform
* buffer flush. Note that we've already arranged for pmem
* writes to avoid the cache via arch_memcpy_to_pmem(). The
* final wmb() ensures ordering for the NVDIMM flush write.
*/
wmb();
for (i = 0; i < nd_region->ndr_mappings; i++)
if (ndrd->flush_wpq[i][0])
writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]);
wmb();
}
EXPORT_SYMBOL_GPL(nvdimm_flush);
/**
* nvdimm_has_flush - determine write flushing requirements
* @nd_region: blk or interleaved pmem region
*
* Returns 1 if writes require flushing
* Returns 0 if writes do not require flushing
* Returns -ENXIO if flushing capability can not be determined
*/
int nvdimm_has_flush(struct nd_region *nd_region)
{
struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
int i;
/* no nvdimm == flushing capability unknown */
if (nd_region->ndr_mappings == 0)
return -ENXIO;
for (i = 0; i < nd_region->ndr_mappings; i++)
/* flush hints present, flushing required */
if (ndrd->flush_wpq[i][0])
return 1;
/*
* The platform defines dimm devices without hints, assume
* platform persistence mechanism like ADR
*/
return 0;
}
EXPORT_SYMBOL_GPL(nvdimm_has_flush);
void __exit nd_region_devs_exit(void)
{
ida_destroy(&region_ida);

View File

@ -31,7 +31,7 @@ static void dcssblk_release(struct gendisk *disk, fmode_t mode);
static blk_qc_t dcssblk_make_request(struct request_queue *q,
struct bio *bio);
static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
void __pmem **kaddr, pfn_t *pfn, long size);
void **kaddr, pfn_t *pfn, long size);
static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
@ -884,7 +884,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio)
static long
dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
void __pmem **kaddr, pfn_t *pfn, long size)
void **kaddr, pfn_t *pfn, long size)
{
struct dcssblk_dev_info *dev_info;
unsigned long offset, dev_sz;
@ -894,7 +894,7 @@ dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
return -ENODEV;
dev_sz = dev_info->end - dev_info->start;
offset = secnum * 512;
*kaddr = (void __pmem *) (dev_info->start + offset);
*kaddr = (void *) dev_info->start + offset;
*pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV);
return dev_sz - offset;

View File

@ -75,13 +75,13 @@ static long dax_map_atomic(struct block_device *bdev, struct blk_dax_ctl *dax)
struct request_queue *q = bdev->bd_queue;
long rc = -EIO;
dax->addr = (void __pmem *) ERR_PTR(-EIO);
dax->addr = ERR_PTR(-EIO);
if (blk_queue_enter(q, true) != 0)
return rc;
rc = bdev_direct_access(bdev, dax);
if (rc < 0) {
dax->addr = (void __pmem *) ERR_PTR(rc);
dax->addr = ERR_PTR(rc);
blk_queue_exit(q);
return rc;
}
@ -147,12 +147,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
struct buffer_head *bh)
{
loff_t pos = start, max = start, bh_max = start;
bool hole = false, need_wmb = false;
bool hole = false;
struct block_device *bdev = NULL;
int rw = iov_iter_rw(iter), rc;
long map_len = 0;
struct blk_dax_ctl dax = {
.addr = (void __pmem *) ERR_PTR(-EIO),
.addr = ERR_PTR(-EIO),
};
unsigned blkbits = inode->i_blkbits;
sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1)
@ -218,7 +218,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
if (iov_iter_rw(iter) == WRITE) {
len = copy_from_iter_pmem(dax.addr, max - pos, iter);
need_wmb = true;
} else if (!hole)
len = copy_to_iter((void __force *) dax.addr, max - pos,
iter);
@ -235,8 +234,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
dax.addr += len;
}
if (need_wmb)
wmb_pmem();
dax_unmap_atomic(bdev, &dax);
return (pos == start) ? rc : pos - start;
@ -788,7 +785,6 @@ int dax_writeback_mapping_range(struct address_space *mapping,
return ret;
}
}
wmb_pmem();
return 0;
}
EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
@ -1232,7 +1228,6 @@ int __dax_zero_page_range(struct block_device *bdev, sector_t sector,
if (dax_map_atomic(bdev, &dax) < 0)
return PTR_ERR(dax.addr);
clear_pmem(dax.addr + offset, length);
wmb_pmem();
dax_unmap_atomic(bdev, &dax);
}
return 0;

View File

@ -1659,7 +1659,7 @@ static inline bool integrity_req_gap_front_merge(struct request *req,
*/
struct blk_dax_ctl {
sector_t sector;
void __pmem *addr;
void *addr;
long size;
pfn_t pfn;
};
@ -1670,8 +1670,8 @@ struct block_device_operations {
int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
long (*direct_access)(struct block_device *, sector_t, void __pmem **,
pfn_t *, long);
long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *,
long);
unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing);
/* ->media_changed() is DEPRECATED, use ->check_events() instead */

View File

@ -17,7 +17,6 @@
# define __release(x) __context__(x,-1)
# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
# define __percpu __attribute__((noderef, address_space(3)))
# define __pmem __attribute__((noderef, address_space(5)))
#ifdef CONFIG_SPARSE_RCU_POINTER
# define __rcu __attribute__((noderef, address_space(4)))
#else /* CONFIG_SPARSE_RCU_POINTER */
@ -45,7 +44,6 @@ extern void __chk_io_ptr(const volatile void __iomem *);
# define __cond_lock(x,c) (c)
# define __percpu
# define __rcu
# define __pmem
# define __private
# define ACCESS_PRIVATE(p, member) ((p)->member)
#endif /* __CHECKER__ */

View File

@ -52,6 +52,7 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
struct nd_namespace_label;
struct nvdimm_drvdata;
struct nd_mapping {
struct nvdimm *nvdimm;
struct nd_namespace_label **labels;
@ -69,6 +70,7 @@ struct nd_mapping {
struct nvdimm_bus_descriptor {
const struct attribute_group **attr_groups;
unsigned long cmd_mask;
struct module *module;
char *provider_name;
ndctl_fn ndctl;
int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
@ -99,13 +101,21 @@ struct nd_region_desc {
unsigned long flags;
};
struct device;
void *devm_nvdimm_memremap(struct device *dev, resource_size_t offset,
size_t size, unsigned long flags);
static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
resource_size_t offset, size_t size)
{
return (void __iomem *) devm_nvdimm_memremap(dev, offset, size, 0);
}
struct nvdimm_bus;
struct module;
struct device;
struct nd_blk_region;
struct nd_blk_region_desc {
int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
void *iobuf, u64 len, int rw);
struct nd_region_desc ndr_desc;
@ -119,22 +129,22 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
#define nvdimm_bus_register(parent, desc) \
__nvdimm_bus_register(parent, desc, THIS_MODULE)
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
struct nvdimm *to_nvdimm(struct device *dev);
struct nd_region *to_nd_region(struct device *dev);
struct nd_blk_region *to_nd_blk_region(struct device *dev);
struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
const char *nvdimm_name(struct nvdimm *nvdimm);
unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
void *nvdimm_provider_data(struct nvdimm *nvdimm);
struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
const struct attribute_group **groups, unsigned long flags,
unsigned long cmd_mask);
unsigned long cmd_mask, int num_flush,
struct resource *flush_wpq);
const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
@ -156,4 +166,6 @@ struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr);
unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
u64 nd_fletcher64(void *addr, size_t len, bool le);
void nvdimm_flush(struct nd_region *nd_region);
int nvdimm_has_flush(struct nd_region *nd_region);
#endif /* __LIBNVDIMM_H__ */

View File

@ -26,6 +26,7 @@ struct nd_device_driver {
unsigned long type;
int (*probe)(struct device *dev);
int (*remove)(struct device *dev);
void (*shutdown)(struct device *dev);
void (*notify)(struct device *dev, enum nvdimm_event event);
};
@ -67,7 +68,7 @@ struct nd_namespace_io {
struct nd_namespace_common common;
struct resource res;
resource_size_t size;
void __pmem *addr;
void *addr;
struct badblocks bb;
};

View File

@ -28,7 +28,10 @@ static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
return __pfn_to_pfn_t(pfn, 0);
}
extern pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags);
static inline pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
{
return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
}
static inline bool pfn_t_has_page(pfn_t pfn)
{

View File

@ -26,47 +26,35 @@
* calling these symbols with arch_has_pmem_api() and redirect to the
* implementation in asm/pmem.h.
*/
static inline bool __arch_has_wmb_pmem(void)
{
return false;
}
static inline void arch_wmb_pmem(void)
static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
{
BUG();
}
static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
size_t n)
{
BUG();
}
static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src,
size_t n)
static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
{
BUG();
return -EFAULT;
}
static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
BUG();
return 0;
}
static inline void arch_clear_pmem(void __pmem *addr, size_t size)
static inline void arch_clear_pmem(void *addr, size_t size)
{
BUG();
}
static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
BUG();
}
static inline void arch_invalidate_pmem(void __pmem *addr, size_t size)
static inline void arch_invalidate_pmem(void *addr, size_t size)
{
BUG();
}
@ -77,13 +65,6 @@ static inline bool arch_has_pmem_api(void)
return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
}
static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
size_t size)
{
memcpy(dst, (void __force *) src, size);
return 0;
}
/*
* memcpy_from_pmem - read from persistent memory with error handling
* @dst: destination buffer
@ -92,54 +73,13 @@ static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src,
*
* Returns 0 on success negative error code on failure.
*/
static inline int memcpy_from_pmem(void *dst, void __pmem const *src,
size_t size)
static inline int memcpy_from_pmem(void *dst, void const *src, size_t size)
{
if (arch_has_pmem_api())
return arch_memcpy_from_pmem(dst, src, size);
else
return default_memcpy_from_pmem(dst, src, size);
}
/**
* arch_has_wmb_pmem - true if wmb_pmem() ensures durability
*
* For a given cpu implementation within an architecture it is possible
* that wmb_pmem() resolves to a nop. In the case this returns
* false, pmem api users are unable to ensure durability and may want to
* fall back to a different data consistency model, or otherwise notify
* the user.
*/
static inline bool arch_has_wmb_pmem(void)
{
return arch_has_pmem_api() && __arch_has_wmb_pmem();
}
/*
* These defaults seek to offer decent performance and minimize the
* window between i/o completion and writes being durable on media.
* However, it is undefined / architecture specific whether
* ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
* making data durable relative to i/o completion.
*/
static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
size_t size)
{
memcpy((void __force *) dst, src, size);
}
static inline size_t default_copy_from_iter_pmem(void __pmem *addr,
size_t bytes, struct iov_iter *i)
{
return copy_from_iter_nocache((void __force *)addr, bytes, i);
}
static inline void default_clear_pmem(void __pmem *addr, size_t size)
{
if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0)
clear_page((void __force *)addr);
else
memset((void __force *)addr, 0, size);
memcpy(dst, src, size);
return 0;
}
/**
@ -152,29 +92,14 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size)
* being effectively evicted from, or never written to, the processor
* cache hierarchy after the copy completes. After memcpy_to_pmem()
* data may still reside in cpu or platform buffers, so this operation
* must be followed by a wmb_pmem().
* must be followed by a blkdev_issue_flush() on the pmem block device.
*/
static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
{
if (arch_has_pmem_api())
arch_memcpy_to_pmem(dst, src, n);
else
default_memcpy_to_pmem(dst, src, n);
}
/**
* wmb_pmem - synchronize writes to persistent memory
*
* After a series of memcpy_to_pmem() operations this drains data from
* cpu write buffers and any platform (memory controller) buffers to
* ensure that written data is durable on persistent memory media.
*/
static inline void wmb_pmem(void)
{
if (arch_has_wmb_pmem())
arch_wmb_pmem();
else
wmb();
memcpy(dst, src, n);
}
/**
@ -184,14 +109,14 @@ static inline void wmb_pmem(void)
* @i: iterator with source data
*
* Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
* This function requires explicit ordering with a wmb_pmem() call.
* See blkdev_issue_flush() note for memcpy_to_pmem().
*/
static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
static inline size_t copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
if (arch_has_pmem_api())
return arch_copy_from_iter_pmem(addr, bytes, i);
return default_copy_from_iter_pmem(addr, bytes, i);
return copy_from_iter_nocache(addr, bytes, i);
}
/**
@ -200,14 +125,14 @@ static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
* @size: number of bytes to zero
*
* Write zeros into the memory range starting at 'addr' for 'size' bytes.
* This function requires explicit ordering with a wmb_pmem() call.
* See blkdev_issue_flush() note for memcpy_to_pmem().
*/
static inline void clear_pmem(void __pmem *addr, size_t size)
static inline void clear_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_clear_pmem(addr, size);
else
default_clear_pmem(addr, size);
memset(addr, 0, size);
}
/**
@ -218,7 +143,7 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
* For platforms that support clearing poison this flushes any poisoned
* ranges out of the cache
*/
static inline void invalidate_pmem(void __pmem *addr, size_t size)
static inline void invalidate_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_invalidate_pmem(addr, size);
@ -230,9 +155,9 @@ static inline void invalidate_pmem(void __pmem *addr, size_t size)
* @size: number of bytes to write back
*
* Write back the processor cache range starting at 'addr' for 'size' bytes.
* This function requires explicit ordering with a wmb_pmem() call.
* See blkdev_issue_flush() note for memcpy_to_pmem().
*/
static inline void wb_cache_pmem(void __pmem *addr, size_t size)
static inline void wb_cache_pmem(void *addr, size_t size)
{
if (arch_has_pmem_api())
arch_wb_cache_pmem(addr, size);

View File

@ -298,6 +298,7 @@ struct nd_cmd_pkg {
#define NVDIMM_FAMILY_INTEL 0
#define NVDIMM_FAMILY_HPE1 1
#define NVDIMM_FAMILY_HPE2 2
#define NVDIMM_FAMILY_MSFT 3
#define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\
struct nd_cmd_pkg)

View File

@ -169,12 +169,6 @@ void devm_memunmap(struct device *dev, void *addr)
}
EXPORT_SYMBOL(devm_memunmap);
pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
{
return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
}
EXPORT_SYMBOL(phys_to_pfn_t);
#ifdef CONFIG_ZONE_DEVICE
static DEFINE_MUTEX(pgmap_lock);
static RADIX_TREE(pgmap_radix, GFP_KERNEL);

View File

@ -313,7 +313,6 @@ our $Sparse = qr{
__kernel|
__force|
__iomem|
__pmem|
__must_check|
__init_refok|
__kprobes|

View File

@ -947,7 +947,7 @@ GrpTable: Grp15
4: XSAVE
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
7: clflush | clflushopt (66) | sfence (11B)
EndTable
GrpTable: Grp16

View File

@ -654,5 +654,3 @@
"0f c7 1d 78 56 34 12 \txrstors 0x12345678",},
{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",},
{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
"66 0f ae f8 \tpcommit ",},

View File

@ -764,5 +764,3 @@
"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",},
{{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",},
{{0x66, 0x0f, 0xae, 0xf8, }, 4, 0, "", "",
"66 0f ae f8 \tpcommit ",},

View File

@ -866,10 +866,6 @@ int main(void)
#endif /* #ifndef __x86_64__ */
/* pcommit */
asm volatile("pcommit");
/* Following line is a marker for the awk script - do not change */
asm volatile("rdtsc"); /* Stop here */

View File

@ -947,7 +947,7 @@ GrpTable: Grp15
4: XSAVE
5: XRSTOR | lfence (11B)
6: XSAVEOPT | clwb (66) | mfence (11B)
7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
7: clflush | clflushopt (66) | sfence (11B)
EndTable
GrpTable: Grp16

View File

@ -11,12 +11,14 @@ ldflags-y += --wrap=__devm_release_region
ldflags-y += --wrap=__request_region
ldflags-y += --wrap=__release_region
ldflags-y += --wrap=devm_memremap_pages
ldflags-y += --wrap=phys_to_pfn_t
ldflags-y += --wrap=insert_resource
ldflags-y += --wrap=remove_resource
DRIVERS := ../../../drivers
NVDIMM_SRC := $(DRIVERS)/nvdimm
ACPI_SRC := $(DRIVERS)/acpi
ACPI_SRC := $(DRIVERS)/acpi/nfit
DAX_SRC := $(DRIVERS)/dax
ccflags-y := -I$(src)/$(NVDIMM_SRC)/
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
@ -27,10 +29,12 @@ obj-$(CONFIG_ACPI_NFIT) += nfit.o
obj-$(CONFIG_DEV_DAX) += dax.o
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
nfit-y := $(ACPI_SRC)/nfit.o
nfit-y := $(ACPI_SRC)/core.o
nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
nfit-y += config_check.o
nd_pmem-y := $(NVDIMM_SRC)/pmem.o
nd_pmem-y += pmem-dax.o
nd_pmem-y += config_check.o
nd_btt-y := $(NVDIMM_SRC)/btt.o

View File

@ -10,6 +10,7 @@ void check(void)
BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2014-2016, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include "test/nfit_test.h"
#include <linux/blkdev.h>
#include <pmem.h>
#include <nd.h>
long pmem_direct_access(struct block_device *bdev, sector_t sector,
void **kaddr, pfn_t *pfn, long size)
{
struct pmem_device *pmem = bdev->bd_queue->queuedata;
resource_size_t offset = sector * 512 + pmem->data_offset;
if (unlikely(is_bad_pmem(&pmem->bb, sector, size)))
return -EIO;
/*
* Limit dax to a single page at a time given vmalloc()-backed
* in the nfit_test case.
*/
if (get_nfit_res(pmem->phys_addr + offset)) {
struct page *page;
*kaddr = pmem->virt_addr + offset;
page = vmalloc_to_page(pmem->virt_addr + offset);
*pfn = page_to_pfn_t(page);
dev_dbg_ratelimited(disk_to_dev(bdev->bd_disk)->parent,
"%s: sector: %#llx pfn: %#lx\n", __func__,
(unsigned long long) sector, page_to_pfn(page));
return PAGE_SIZE;
}
*kaddr = pmem->virt_addr + offset;
*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
/*
* If badblocks are present, limit known good range to the
* requested range.
*/
if (unlikely(pmem->bb.count))
return size;
return pmem->size - pmem->pfn_pad - offset;
}

View File

@ -1,5 +1,5 @@
ccflags-y := -I$(src)/../../../../drivers/nvdimm/
ccflags-y += -I$(src)/../../../../drivers/acpi/
ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/
obj-m += nfit_test.o
obj-m += nfit_test_iomap.o

View File

@ -10,11 +10,13 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/memremap.h>
#include <linux/rculist.h>
#include <linux/export.h>
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/pfn_t.h>
#include <linux/io.h>
#include <linux/mm.h>
#include "nfit_test.h"
@ -52,7 +54,7 @@ static struct nfit_test_resource *__get_nfit_res(resource_size_t resource)
return NULL;
}
static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
struct nfit_test_resource *get_nfit_res(resource_size_t resource)
{
struct nfit_test_resource *res;
@ -62,6 +64,7 @@ static struct nfit_test_resource *get_nfit_res(resource_size_t resource)
return res;
}
EXPORT_SYMBOL(get_nfit_res);
void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
@ -97,10 +100,6 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
}
EXPORT_SYMBOL(__wrap_devm_memremap);
#ifdef __HAVE_ARCH_PTE_DEVMAP
#include <linux/memremap.h>
#include <linux/pfn_t.h>
void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap)
{
@ -122,19 +121,6 @@ pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
return phys_to_pfn_t(addr, flags);
}
EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
#else
/* to be removed post 4.5-rc1 */
void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res)
{
resource_size_t offset = res->start;
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
return nfit_res->buf + offset - nfit_res->res->start;
return devm_memremap_pages(dev, res);
}
EXPORT_SYMBOL(__wrap_devm_memremap_pages);
#endif
void *__wrap_memremap(resource_size_t offset, size_t size,
unsigned long flags)
@ -229,6 +215,22 @@ struct resource *__wrap___request_region(struct resource *parent,
}
EXPORT_SYMBOL(__wrap___request_region);
int __wrap_insert_resource(struct resource *parent, struct resource *res)
{
if (get_nfit_res(res->start))
return 0;
return insert_resource(parent, res);
}
EXPORT_SYMBOL(__wrap_insert_resource);
int __wrap_remove_resource(struct resource *res)
{
if (get_nfit_res(res->start))
return 0;
return remove_resource(res);
}
EXPORT_SYMBOL(__wrap_remove_resource);
struct resource *__wrap___devm_request_region(struct device *dev,
struct resource *parent, resource_size_t start,
resource_size_t n, const char *name)

View File

@ -98,11 +98,13 @@
enum {
NUM_PM = 3,
NUM_DCR = 5,
NUM_HINTS = 8,
NUM_BDW = NUM_DCR,
NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
DIMM_SIZE = SZ_32M,
LABEL_SIZE = SZ_128K,
SPA_VCD_SIZE = SZ_4M,
SPA0_SIZE = DIMM_SIZE,
SPA1_SIZE = DIMM_SIZE*2,
SPA2_SIZE = DIMM_SIZE,
@ -470,11 +472,7 @@ static void release_nfit_res(void *data)
list_del(&nfit_res->list);
spin_unlock(&nfit_test_lock);
if (is_vmalloc_addr(nfit_res->buf))
vfree(nfit_res->buf);
else
dma_free_coherent(nfit_res->dev, resource_size(res),
nfit_res->buf, res->start);
vfree(nfit_res->buf);
kfree(res);
kfree(nfit_res);
}
@ -507,9 +505,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
return nfit_res->buf;
err:
if (buf && !is_vmalloc_addr(buf))
dma_free_coherent(dev, size, buf, *dma);
else if (buf)
if (buf)
vfree(buf);
kfree(res);
kfree(nfit_res);
@ -524,15 +520,6 @@ static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
return __test_alloc(t, size, dma, buf);
}
static void *test_alloc_coherent(struct nfit_test *t, size_t size,
dma_addr_t *dma)
{
struct device *dev = &t->pdev.dev;
void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL);
return __test_alloc(t, size, dma, buf);
}
static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
{
int i;
@ -584,7 +571,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
+ offsetof(struct acpi_nfit_control_region,
window_size) * NUM_DCR
+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
+ sizeof(struct acpi_nfit_flush_address) * NUM_DCR;
+ (sizeof(struct acpi_nfit_flush_address)
+ sizeof(u64) * NUM_HINTS) * NUM_DCR;
int i;
t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
@ -592,15 +580,15 @@ static int nfit_test0_alloc(struct nfit_test *t)
return -ENOMEM;
t->nfit_size = nfit_size;
t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]);
t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]);
if (!t->spa_set[0])
return -ENOMEM;
t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]);
t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]);
if (!t->spa_set[1])
return -ENOMEM;
t->spa_set[2] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[2]);
t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]);
if (!t->spa_set[2])
return -ENOMEM;
@ -614,7 +602,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
return -ENOMEM;
sprintf(t->label[i], "label%d", i);
t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]);
t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS,
&t->flush_dma[i]);
if (!t->flush[i])
return -ENOMEM;
}
@ -630,7 +619,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
static int nfit_test1_alloc(struct nfit_test *t)
{
size_t nfit_size = sizeof(struct acpi_nfit_system_address)
size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+ sizeof(struct acpi_nfit_memory_map)
+ offsetof(struct acpi_nfit_control_region, window_size);
@ -639,15 +628,31 @@ static int nfit_test1_alloc(struct nfit_test *t)
return -ENOMEM;
t->nfit_size = nfit_size;
t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]);
t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]);
if (!t->spa_set[0])
return -ENOMEM;
t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]);
if (!t->spa_set[1])
return -ENOMEM;
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
static void dcr_common_init(struct acpi_nfit_control_region *dcr)
{
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr->valid_fields = 1;
dcr->manufacturing_location = 0xa;
dcr->manufacturing_date = cpu_to_be16(2016);
}
static void nfit_test0_setup(struct nfit_test *t)
{
const int flush_hint_size = sizeof(struct acpi_nfit_flush_address)
+ (sizeof(u64) * NUM_HINTS);
struct acpi_nfit_desc *acpi_desc;
struct acpi_nfit_memory_map *memdev;
void *nfit_buf = t->nfit_buf;
@ -655,7 +660,7 @@ static void nfit_test0_setup(struct nfit_test *t)
struct acpi_nfit_control_region *dcr;
struct acpi_nfit_data_region *bdw;
struct acpi_nfit_flush_address *flush;
unsigned int offset;
unsigned int offset, i;
/*
* spa0 (interleave first half of dimm0 and dimm1, note storage
@ -972,9 +977,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 0+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[0];
dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
@ -989,9 +992,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 1+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[1];
dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
@ -1006,9 +1007,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 2+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[2];
dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
@ -1023,9 +1022,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 3+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[3];
dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
@ -1042,9 +1039,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 4+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[0];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
@ -1056,9 +1051,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 5+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[1];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
@ -1070,9 +1063,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 6+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[2];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
@ -1084,9 +1075,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 7+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[3];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
@ -1141,45 +1130,47 @@ static void nfit_test0_setup(struct nfit_test *t)
/* flush0 (dimm0) */
flush = nfit_buf + offset;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
flush->header.length = sizeof(struct acpi_nfit_flush_address);
flush->header.length = flush_hint_size;
flush->device_handle = handle[0];
flush->hint_count = 1;
flush->hint_address[0] = t->flush_dma[0];
flush->hint_count = NUM_HINTS;
for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
/* flush1 (dimm1) */
flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1;
flush = nfit_buf + offset + flush_hint_size * 1;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
flush->header.length = sizeof(struct acpi_nfit_flush_address);
flush->header.length = flush_hint_size;
flush->device_handle = handle[1];
flush->hint_count = 1;
flush->hint_address[0] = t->flush_dma[1];
flush->hint_count = NUM_HINTS;
for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
/* flush2 (dimm2) */
flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2;
flush = nfit_buf + offset + flush_hint_size * 2;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
flush->header.length = sizeof(struct acpi_nfit_flush_address);
flush->header.length = flush_hint_size;
flush->device_handle = handle[2];
flush->hint_count = 1;
flush->hint_address[0] = t->flush_dma[2];
flush->hint_count = NUM_HINTS;
for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
/* flush3 (dimm3) */
flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3;
flush = nfit_buf + offset + flush_hint_size * 3;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
flush->header.length = sizeof(struct acpi_nfit_flush_address);
flush->header.length = flush_hint_size;
flush->device_handle = handle[3];
flush->hint_count = 1;
flush->hint_address[0] = t->flush_dma[3];
flush->hint_count = NUM_HINTS;
for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
if (t->setup_hotplug) {
offset = offset + sizeof(struct acpi_nfit_flush_address) * 4;
offset = offset + flush_hint_size * 4;
/* dcr-descriptor4: blk */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
dcr->region_index = 8+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[4];
dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
@ -1196,9 +1187,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 9+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~handle[4];
dcr->code = NFIT_FIC_BYTEN;
dcr->windows = 0;
@ -1300,10 +1289,12 @@ static void nfit_test0_setup(struct nfit_test *t)
/* flush3 (dimm4) */
flush = nfit_buf + offset;
flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
flush->header.length = sizeof(struct acpi_nfit_flush_address);
flush->header.length = flush_hint_size;
flush->device_handle = handle[4];
flush->hint_count = 1;
flush->hint_address[0] = t->flush_dma[4];
flush->hint_count = NUM_HINTS;
for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[4]
+ i * sizeof(u64);
}
post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE);
@ -1339,7 +1330,16 @@ static void nfit_test1_setup(struct nfit_test *t)
spa->address = t->spa_set_dma[0];
spa->length = SPA2_SIZE;
offset += sizeof(*spa);
/* virtual cd region */
spa = nfit_buf + sizeof(*spa);
spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
spa->header.length = sizeof(*spa);
memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
spa->range_index = 0;
spa->address = t->spa_set_dma[1];
spa->length = SPA_VCD_SIZE;
offset += sizeof(*spa) * 2;
/* mem-region0 (spa0, dimm0) */
memdev = nfit_buf + offset;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@ -1365,9 +1365,7 @@ static void nfit_test1_setup(struct nfit_test *t)
dcr->header.length = offsetof(struct acpi_nfit_control_region,
window_size);
dcr->region_index = 0+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr_common_init(dcr);
dcr->serial_number = ~0;
dcr->code = NFIT_FIC_BYTE;
dcr->windows = 0;
@ -1462,20 +1460,16 @@ static int nfit_test_probe(struct platform_device *pdev)
nfit_test->setup(nfit_test);
acpi_desc = &nfit_test->acpi_desc;
acpi_nfit_desc_init(acpi_desc, &pdev->dev);
acpi_desc->nfit = nfit_test->nfit_buf;
acpi_desc->blk_do_io = nfit_test_blk_do_io;
nd_desc = &acpi_desc->nd_desc;
nd_desc->provider_name = NULL;
nd_desc->module = THIS_MODULE;
nd_desc->ndctl = nfit_test_ctl;
acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc);
if (!acpi_desc->nvdimm_bus)
return -ENXIO;
rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
if (rc) {
nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
nfit_test->nfit_size);
if (rc)
return rc;
}
if (nfit_test->setup != nfit_test0_setup)
return 0;
@ -1483,22 +1477,16 @@ static int nfit_test_probe(struct platform_device *pdev)
nfit_test->setup_hotplug = 1;
nfit_test->setup(nfit_test);
rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
if (rc) {
nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
nfit_test->nfit_size);
if (rc)
return rc;
}
return 0;
}
static int nfit_test_remove(struct platform_device *pdev)
{
struct nfit_test *nfit_test = to_nfit_test(&pdev->dev);
struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc;
nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
return 0;
}
@ -1523,12 +1511,6 @@ static struct platform_driver nfit_test_driver = {
.id_table = nfit_test_id,
};
#ifdef CONFIG_CMA_SIZE_MBYTES
#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
#else
#define CMA_SIZE_MBYTES 0
#endif
static __init int nfit_test_init(void)
{
int rc, i;
@ -1538,7 +1520,6 @@ static __init int nfit_test_init(void)
for (i = 0; i < NUM_NFITS; i++) {
struct nfit_test *nfit_test;
struct platform_device *pdev;
static int once;
nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
if (!nfit_test) {
@ -1577,20 +1558,6 @@ static __init int nfit_test_init(void)
goto err_register;
instances[i] = nfit_test;
if (!once++) {
dma_addr_t dma;
void *buf;
buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma,
GFP_KERNEL);
if (!buf) {
rc = -ENOMEM;
dev_warn(&pdev->dev, "need 128M of free cma\n");
goto err_register;
}
dma_free_coherent(&pdev->dev, SZ_128M, buf, dma);
}
}
rc = platform_driver_register(&nfit_test_driver);

View File

@ -12,6 +12,7 @@
*/
#ifndef __NFIT_TEST_H__
#define __NFIT_TEST_H__
#include <linux/list.h>
struct nfit_test_resource {
struct list_head list;
@ -26,4 +27,5 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
void __wrap_iounmap(volatile void __iomem *addr);
void nfit_test_setup(nfit_test_lookup_fn lookup);
void nfit_test_teardown(void);
struct nfit_test_resource *get_nfit_res(resource_size_t resource);
#endif