2005-04-17 06:20:36 +08:00
|
|
|
#ifndef _LINUX_HUGETLB_H
|
|
|
|
#define _LINUX_HUGETLB_H
|
|
|
|
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
|
|
|
|
|
|
#include <linux/mempolicy.h>
|
2005-06-22 08:14:44 +08:00
|
|
|
#include <asm/tlbflush.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
struct ctl_table;
|
|
|
|
|
|
|
|
static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
return vma->vm_flags & VM_HUGETLB;
|
|
|
|
}
|
|
|
|
|
|
|
|
int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
|
|
|
|
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
|
|
|
|
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
|
|
|
|
void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
|
2006-10-11 16:20:46 +08:00
|
|
|
void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
|
2005-04-17 06:20:36 +08:00
|
|
|
int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
|
|
|
|
int hugetlb_report_meminfo(char *);
|
|
|
|
int hugetlb_report_node_meminfo(int, char *);
|
|
|
|
unsigned long hugetlb_total_pages(void);
|
2005-10-20 23:24:28 +08:00
|
|
|
int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
|
unsigned long address, int write_access);
|
2006-06-23 17:03:15 +08:00
|
|
|
int hugetlb_reserve_pages(struct inode *inode, long from, long to);
|
|
|
|
void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
extern unsigned long max_huge_pages;
|
|
|
|
extern const unsigned long hugetlb_zero, hugetlb_infinity;
|
|
|
|
extern int sysctl_hugetlb_shm_group;
|
|
|
|
|
2005-06-22 08:14:44 +08:00
|
|
|
/* arch callbacks */
|
|
|
|
|
|
|
|
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr);
|
|
|
|
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr);
|
|
|
|
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
|
|
|
|
int write);
|
|
|
|
struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
|
|
|
|
pmd_t *pmd, int write);
|
|
|
|
int pmd_huge(pmd_t pmd);
|
2006-03-22 16:08:50 +08:00
|
|
|
void hugetlb_change_protection(struct vm_area_struct *vma,
|
|
|
|
unsigned long address, unsigned long end, pgprot_t newprot);
|
2005-06-22 08:14:44 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE
|
|
|
|
#define is_hugepage_only_range(mm, addr, len) 0
|
[PATCH] hugepage: Fix hugepage logic in free_pgtables()
free_pgtables() has special logic to call hugetlb_free_pgd_range() instead
of the normal free_pgd_range() on hugepage VMAs. However, the test it uses
to do so is incorrect: it calls is_hugepage_only_range on a hugepage sized
range at the start of the vma. is_hugepage_only_range() will return true
if the given range has any intersection with a hugepage address region, and
in this case the given region need not be hugepage aligned. So, for
example, this test can return true if called on, say, a 4k VMA immediately
preceding a (nicely aligned) hugepage VMA.
At present we get away with this because the powerpc version of
hugetlb_free_pgd_range() is just a call to free_pgd_range(). On ia64 (the
only other arch with a non-trivial is_hugepage_only_range()) we get away
with it for a different reason; the hugepage area is not contiguous with
the rest of the user address space, and VMAs are not permitted in between,
so the test can't return a false positive there.
Nonetheless this should be fixed. We do that in the patch below by
replacing the is_hugepage_only_range() test with an explicit test of the
VMA using is_vm_hugetlb_page().
This in turn changes behaviour for platforms where is_hugepage_only_range()
returns false always (everything except powerpc and ia64). We address this
by ensuring that hugetlb_free_pgd_range() is defined to be identical to
free_pgd_range() (instead of a no-op) on everything except ia64. Even so,
it will prevent some otherwise possible coalescing of calls down to
free_pgd_range(). Since this only happens for hugepage VMAs, removing this
small optimization seems unlikely to cause any trouble.
This patch causes no regressions on the libhugetlbfs testsuite - ppc64
POWER5 (8-way), ppc64 G5 (2-way) and i386 Pentium M (UP).
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Cc: William Lee Irwin III <wli@holomorphy.com>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-22 16:08:57 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef ARCH_HAS_HUGETLB_FREE_PGD_RANGE
|
|
|
|
#define hugetlb_free_pgd_range free_pgd_range
|
2006-03-22 16:08:59 +08:00
|
|
|
#else
|
|
|
|
void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
|
|
|
|
unsigned long end, unsigned long floor,
|
|
|
|
unsigned long ceiling);
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef ARCH_HAS_PREPARE_HUGEPAGE_RANGE
|
2006-03-22 16:09:01 +08:00
|
|
|
/*
|
|
|
|
* If the arch doesn't supply something else, assume that hugepage
|
|
|
|
* size aligned regions are ok without further preparation.
|
|
|
|
*/
|
[PATCH] hugetlb: prepare_hugepage_range check offset too
(David:)
If hugetlbfs_file_mmap() returns a failure to do_mmap_pgoff() - for example,
because the given file offset is not hugepage aligned - then do_mmap_pgoff
will go to the unmap_and_free_vma backout path.
But at this stage the vma hasn't been marked as hugepage, and the backout path
will call unmap_region() on it. That will eventually call down to the
non-hugepage version of unmap_page_range(). On ppc64, at least, that will
cause serious problems if there are any existing hugepage pagetable entries in
the vicinity - for example if there are any other hugepage mappings under the
same PUD. unmap_page_range() will trigger a bad_pud() on the hugepage pud
entries. I suspect this will also cause bad problems on ia64, though I don't
have a machine to test it on.
(Hugh:)
prepare_hugepage_range() should check file offset alignment when it checks
virtual address and length, to stop MAP_FIXED with a bad huge offset from
unmapping before it fails further down. PowerPC should apply the same
prepare_hugepage_range alignment checks as ia64 and all the others do.
Then none of the alignment checks in hugetlbfs_file_mmap are required (nor
is the check for too small a mapping); but even so, move up setting of
VM_HUGETLB and add a comment to warn of what David Gibson discovered - if
hugetlbfs_file_mmap fails before setting it, do_mmap_pgoff's unmap_region
when unwinding from error will go the non-huge way, which may cause bad
behaviour on architectures (powerpc and ia64) which segregate their huge
mappings into a separate region of the address space.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-11-14 18:03:32 +08:00
|
|
|
static inline int prepare_hugepage_range(unsigned long addr, unsigned long len,
|
|
|
|
pgoff_t pgoff)
|
2006-03-22 16:09:01 +08:00
|
|
|
{
|
[PATCH] hugetlb: prepare_hugepage_range check offset too
(David:)
If hugetlbfs_file_mmap() returns a failure to do_mmap_pgoff() - for example,
because the given file offset is not hugepage aligned - then do_mmap_pgoff
will go to the unmap_and_free_vma backout path.
But at this stage the vma hasn't been marked as hugepage, and the backout path
will call unmap_region() on it. That will eventually call down to the
non-hugepage version of unmap_page_range(). On ppc64, at least, that will
cause serious problems if there are any existing hugepage pagetable entries in
the vicinity - for example if there are any other hugepage mappings under the
same PUD. unmap_page_range() will trigger a bad_pud() on the hugepage pud
entries. I suspect this will also cause bad problems on ia64, though I don't
have a machine to test it on.
(Hugh:)
prepare_hugepage_range() should check file offset alignment when it checks
virtual address and length, to stop MAP_FIXED with a bad huge offset from
unmapping before it fails further down. PowerPC should apply the same
prepare_hugepage_range alignment checks as ia64 and all the others do.
Then none of the alignment checks in hugetlbfs_file_mmap are required (nor
is the check for too small a mapping); but even so, move up setting of
VM_HUGETLB and add a comment to warn of what David Gibson discovered - if
hugetlbfs_file_mmap fails before setting it, do_mmap_pgoff's unmap_region
when unwinding from error will go the non-huge way, which may cause bad
behaviour on architectures (powerpc and ia64) which segregate their huge
mappings into a separate region of the address space.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-11-14 18:03:32 +08:00
|
|
|
if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
|
|
|
|
return -EINVAL;
|
2006-03-22 16:09:01 +08:00
|
|
|
if (len & ~HPAGE_MASK)
|
|
|
|
return -EINVAL;
|
|
|
|
if (addr & ~HPAGE_MASK)
|
|
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
#else
|
[PATCH] hugetlb: prepare_hugepage_range check offset too
(David:)
If hugetlbfs_file_mmap() returns a failure to do_mmap_pgoff() - for example,
because the given file offset is not hugepage aligned - then do_mmap_pgoff
will go to the unmap_and_free_vma backout path.
But at this stage the vma hasn't been marked as hugepage, and the backout path
will call unmap_region() on it. That will eventually call down to the
non-hugepage version of unmap_page_range(). On ppc64, at least, that will
cause serious problems if there are any existing hugepage pagetable entries in
the vicinity - for example if there are any other hugepage mappings under the
same PUD. unmap_page_range() will trigger a bad_pud() on the hugepage pud
entries. I suspect this will also cause bad problems on ia64, though I don't
have a machine to test it on.
(Hugh:)
prepare_hugepage_range() should check file offset alignment when it checks
virtual address and length, to stop MAP_FIXED with a bad huge offset from
unmapping before it fails further down. PowerPC should apply the same
prepare_hugepage_range alignment checks as ia64 and all the others do.
Then none of the alignment checks in hugetlbfs_file_mmap are required (nor
is the check for too small a mapping); but even so, move up setting of
VM_HUGETLB and add a comment to warn of what David Gibson discovered - if
hugetlbfs_file_mmap fails before setting it, do_mmap_pgoff's unmap_region
when unwinding from error will go the non-huge way, which may cause bad
behaviour on architectures (powerpc and ia64) which segregate their huge
mappings into a separate region of the address space.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-11-14 18:03:32 +08:00
|
|
|
int prepare_hugepage_range(unsigned long addr, unsigned long len,
|
|
|
|
pgoff_t pgoff);
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
2005-06-22 08:14:44 +08:00
|
|
|
#ifndef ARCH_HAS_SETCLEAR_HUGE_PTE
|
|
|
|
#define set_huge_pte_at(mm, addr, ptep, pte) set_pte_at(mm, addr, ptep, pte)
|
|
|
|
#define huge_ptep_get_and_clear(mm, addr, ptep) ptep_get_and_clear(mm, addr, ptep)
|
|
|
|
#else
|
|
|
|
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
|
|
pte_t *ptep, pte_t pte);
|
|
|
|
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
|
|
|
|
pte_t *ptep);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef ARCH_HAS_HUGETLB_PREFAULT_HOOK
|
|
|
|
#define hugetlb_prefault_arch_hook(mm) do { } while (0)
|
|
|
|
#else
|
|
|
|
void hugetlb_prefault_arch_hook(struct mm_struct *mm);
|
|
|
|
#endif
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#else /* !CONFIG_HUGETLB_PAGE */
|
|
|
|
|
|
|
|
static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
static inline unsigned long hugetlb_total_pages(void)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; })
|
|
|
|
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
|
|
|
|
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
|
|
|
|
#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
|
|
|
|
#define unmap_hugepage_range(vma, start, end) BUG()
|
|
|
|
#define hugetlb_report_meminfo(buf) 0
|
|
|
|
#define hugetlb_report_node_meminfo(n, buf) 0
|
|
|
|
#define follow_huge_pmd(mm, addr, pmd, write) NULL
|
[PATCH] hugetlb: prepare_hugepage_range check offset too
(David:)
If hugetlbfs_file_mmap() returns a failure to do_mmap_pgoff() - for example,
because the given file offset is not hugepage aligned - then do_mmap_pgoff
will go to the unmap_and_free_vma backout path.
But at this stage the vma hasn't been marked as hugepage, and the backout path
will call unmap_region() on it. That will eventually call down to the
non-hugepage version of unmap_page_range(). On ppc64, at least, that will
cause serious problems if there are any existing hugepage pagetable entries in
the vicinity - for example if there are any other hugepage mappings under the
same PUD. unmap_page_range() will trigger a bad_pud() on the hugepage pud
entries. I suspect this will also cause bad problems on ia64, though I don't
have a machine to test it on.
(Hugh:)
prepare_hugepage_range() should check file offset alignment when it checks
virtual address and length, to stop MAP_FIXED with a bad huge offset from
unmapping before it fails further down. PowerPC should apply the same
prepare_hugepage_range alignment checks as ia64 and all the others do.
Then none of the alignment checks in hugetlbfs_file_mmap are required (nor
is the check for too small a mapping); but even so, move up setting of
VM_HUGETLB and add a comment to warn of what David Gibson discovered - if
hugetlbfs_file_mmap fails before setting it, do_mmap_pgoff's unmap_region
when unwinding from error will go the non-huge way, which may cause bad
behaviour on architectures (powerpc and ia64) which segregate their huge
mappings into a separate region of the address space.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-11-14 18:03:32 +08:00
|
|
|
#define prepare_hugepage_range(addr,len,pgoff) (-EINVAL)
|
2005-04-17 06:20:36 +08:00
|
|
|
#define pmd_huge(x) 0
|
|
|
|
#define is_hugepage_only_range(mm, addr, len) 0
|
[PATCH] hugepage: Fix hugepage logic in free_pgtables()
free_pgtables() has special logic to call hugetlb_free_pgd_range() instead
of the normal free_pgd_range() on hugepage VMAs. However, the test it uses
to do so is incorrect: it calls is_hugepage_only_range on a hugepage sized
range at the start of the vma. is_hugepage_only_range() will return true
if the given range has any intersection with a hugepage address region, and
in this case the given region need not be hugepage aligned. So, for
example, this test can return true if called on, say, a 4k VMA immediately
preceding a (nicely aligned) hugepage VMA.
At present we get away with this because the powerpc version of
hugetlb_free_pgd_range() is just a call to free_pgd_range(). On ia64 (the
only other arch with a non-trivial is_hugepage_only_range()) we get away
with it for a different reason; the hugepage area is not contiguous with
the rest of the user address space, and VMAs are not permitted in between,
so the test can't return a false positive there.
Nonetheless this should be fixed. We do that in the patch below by
replacing the is_hugepage_only_range() test with an explicit test of the
VMA using is_vm_hugetlb_page().
This in turn changes behaviour for platforms where is_hugepage_only_range()
returns false always (everything except powerpc and ia64). We address this
by ensuring that hugetlb_free_pgd_range() is defined to be identical to
free_pgd_range() (instead of a no-op) on everything except ia64. Even so,
it will prevent some otherwise possible coalescing of calls down to
free_pgd_range(). Since this only happens for hugepage VMAs, removing this
small optimization seems unlikely to cause any trouble.
This patch causes no regressions on the libhugetlbfs testsuite - ppc64
POWER5 (8-way), ppc64 G5 (2-way) and i386 Pentium M (UP).
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Cc: William Lee Irwin III <wli@holomorphy.com>
Acked-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-22 16:08:57 +08:00
|
|
|
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
|
2005-10-20 23:24:28 +08:00
|
|
|
#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; })
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2006-03-22 16:08:50 +08:00
|
|
|
#define hugetlb_change_protection(vma, address, end, newprot)
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
#ifndef HPAGE_MASK
|
2005-11-14 08:06:42 +08:00
|
|
|
#define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */
|
|
|
|
#define HPAGE_SIZE PAGE_SIZE
|
2005-04-17 06:20:36 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif /* !CONFIG_HUGETLB_PAGE */
|
|
|
|
|
|
|
|
#ifdef CONFIG_HUGETLBFS
|
|
|
|
struct hugetlbfs_config {
|
|
|
|
uid_t uid;
|
|
|
|
gid_t gid;
|
|
|
|
umode_t mode;
|
|
|
|
long nr_blocks;
|
|
|
|
long nr_inodes;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct hugetlbfs_sb_info {
|
|
|
|
long max_blocks; /* blocks allowed */
|
|
|
|
long free_blocks; /* blocks free */
|
|
|
|
long max_inodes; /* inodes allowed */
|
|
|
|
long free_inodes; /* inodes free */
|
|
|
|
spinlock_t stat_lock;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
struct hugetlbfs_inode_info {
|
|
|
|
struct shared_policy policy;
|
|
|
|
struct inode vfs_inode;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
|
|
|
|
{
|
|
|
|
return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
|
|
|
|
{
|
|
|
|
return sb->s_fs_info;
|
|
|
|
}
|
|
|
|
|
2006-03-28 17:56:42 +08:00
|
|
|
extern const struct file_operations hugetlbfs_file_operations;
|
2005-04-17 06:20:36 +08:00
|
|
|
extern struct vm_operations_struct hugetlb_vm_ops;
|
|
|
|
struct file *hugetlb_zero_setup(size_t);
|
|
|
|
int hugetlb_get_quota(struct address_space *mapping);
|
|
|
|
void hugetlb_put_quota(struct address_space *mapping);
|
|
|
|
|
|
|
|
static inline int is_file_hugepages(struct file *file)
|
|
|
|
{
|
|
|
|
return file->f_op == &hugetlbfs_file_operations;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void set_file_hugepages(struct file *file)
|
|
|
|
{
|
|
|
|
file->f_op = &hugetlbfs_file_operations;
|
|
|
|
}
|
|
|
|
#else /* !CONFIG_HUGETLBFS */
|
|
|
|
|
|
|
|
#define is_file_hugepages(file) 0
|
|
|
|
#define set_file_hugepages(file) BUG()
|
|
|
|
#define hugetlb_zero_setup(size) ERR_PTR(-ENOSYS)
|
|
|
|
|
|
|
|
#endif /* !CONFIG_HUGETLBFS */
|
|
|
|
|
|
|
|
#endif /* _LINUX_HUGETLB_H */
|