kernel_optimize_test/arch/arm64/mm/mmap.c
Kees Cook 8f2af155b5 exec: pass stack rlimit into mm layout functions
Patch series "exec: Pin stack limit during exec".

Attempts to solve problems with the stack limit changing during exec
continue to be frustrated[1][2].  In addition to the specific issues
around the Stack Clash family of flaws, Andy Lutomirski pointed out[3]
other places during exec where the stack limit is used and is assumed to
be unchanging.  Given the many places it gets used and the fact that it
can be manipulated/raced via setrlimit() and prlimit(), I think the only
way to handle this is to move away from the "current" view of the stack
limit and instead attach it to the bprm, and plumb this down into the
functions that need to know the stack limits.  This series implements
the approach.

[1] 04e35f4495 ("exec: avoid RLIMIT_STACK races with prlimit()")
[2] 779f4e1c6c ("Revert "exec: avoid RLIMIT_STACK races with prlimit()"")
[3] to security@kernel.org, "Subject: existing rlimit races?"

This patch (of 3):

Since it is possible that the stack rlimit can change externally during
exec (either via another thread calling setrlimit() or another process
calling prlimit()), provide a way to pass the rlimit down into the
per-architecture mm layout functions so that the rlimit can stay in the
bprm structure instead of sitting in the signal structure until exec is
finalized.

Link: http://lkml.kernel.org/r/1518638796-20819-2-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Ben Hutchings <ben@decadent.org.uk>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Hugh Dickins <hughd@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Greg KH <greg@kroah.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ben Hutchings <ben.hutchings@codethink.co.uk>
Cc: Brad Spengler <spender@grsecurity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-04-11 10:28:37 -07:00

154 lines
4.1 KiB
C

/*
* Based on arch/arm/mm/mmap.c
*
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/elf.h>
#include <linux/fs.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/export.h>
#include <linux/shm.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/io.h>
#include <linux/personality.h>
#include <linux/random.h>
#include <asm/cputype.h>
/*
* Leave enough space between the mmap area and the stack to honour ulimit in
* the face of randomisation.
*/
#define MIN_GAP (SZ_128M)
#define MAX_GAP (STACK_TOP/6*5)
static int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
}
unsigned long arch_mmap_rnd(void)
{
unsigned long rnd;
#ifdef CONFIG_COMPAT
if (test_thread_flag(TIF_32BIT))
rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
else
#endif
rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
return rnd << PAGE_SHIFT;
}
static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
{
unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap;
/* Values close to RLIM_INFINITY can overflow. */
if (gap + pad > gap)
gap += pad;
if (gap < MIN_GAP)
gap = MIN_GAP;
else if (gap > MAX_GAP)
gap = MAX_GAP;
return PAGE_ALIGN(STACK_TOP - gap - rnd);
}
/*
* This function, called very early during the creation of a new process VM
* image, sets up which VM layout function to use:
*/
void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
if (current->flags & PF_RANDOMIZE)
random_factor = arch_mmap_rnd();
/*
* Fall back to the standard layout if the personality bit is set, or
* if the expected stack growth is unlimited:
*/
if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
/*
* You really shouldn't be using read() or write() on /dev/mem. This might go
* away in the future.
*/
int valid_phys_addr_range(phys_addr_t addr, size_t size)
{
/*
* Check whether addr is covered by a memory region without the
* MEMBLOCK_NOMAP attribute, and whether that region covers the
* entire range. In theory, this could lead to false negatives
* if the range is covered by distinct but adjacent memory regions
* that only differ in other attributes. However, few of such
* attributes have been defined, and it is debatable whether it
* follows that /dev/mem read() calls should be able traverse
* such boundaries.
*/
return memblock_is_region_memory(addr, size) &&
memblock_is_map_memory(addr);
}
/*
* Do not allow /dev/mem mappings beyond the supported physical range.
*/
int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
{
return !(((pfn << PAGE_SHIFT) + size) & ~PHYS_MASK);
}
#ifdef CONFIG_STRICT_DEVMEM
#include <linux/ioport.h>
/*
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
* is valid. The argument is a physical page number. We mimic x86 here by
* disallowing access to system RAM as well as device-exclusive MMIO regions.
* This effectively disable read()/write() on /dev/mem.
*/
int devmem_is_allowed(unsigned long pfn)
{
if (iomem_is_exclusive(pfn << PAGE_SHIFT))
return 0;
if (!page_is_ram(pfn))
return 1;
return 0;
}
#endif