kernel_optimize_test/arch/sh/kernel/hw_breakpoint.c

409 lines
8.2 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* arch/sh/kernel/hw_breakpoint.c
*
* Unified kernel/user-space hardware breakpoint facility for the on-chip UBC.
*
* Copyright (C) 2009 - 2010 Paul Mundt
*/
#include <linux/init.h>
#include <linux/perf_event.h>
#include <linux/sched/signal.h>
#include <linux/hw_breakpoint.h>
#include <linux/percpu.h>
#include <linux/kallsyms.h>
#include <linux/notifier.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/io.h>
#include <linux/clk.h>
#include <asm/hw_breakpoint.h>
#include <asm/mmu_context.h>
#include <asm/ptrace.h>
#include <asm/traps.h>
/*
* Stores the breakpoints currently in use on each breakpoint address
* register for each cpus
*/
static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
/*
* A dummy placeholder for early accesses until the CPUs get a chance to
* register their UBCs later in the boot process.
*/
static struct sh_ubc ubc_dummy = { .num_events = 0 };
static struct sh_ubc *sh_ubc __read_mostly = &ubc_dummy;
/*
* Install a perf counter breakpoint.
*
* We seek a free UBC channel and use it for this breakpoint.
*
* Atomic: we hold the counter->ctx->lock and we only handle variables
* and registers local to this cpu.
*/
int arch_install_hw_breakpoint(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
int i;
for (i = 0; i < sh_ubc->num_events; i++) {
sh: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Signed-off-by: Christoph Lameter <cl@linux.com> Tested-by: Geert Uytterhoeven <geert@linux-m68k.org> [compilation only] Cc: Paul Mundt <lethal@linux-sh.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-06-05 07:05:51 +08:00
struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
if (!*slot) {
*slot = bp;
break;
}
}
if (WARN_ONCE(i == sh_ubc->num_events, "Can't find any breakpoint slot"))
return -EBUSY;
clk_enable(sh_ubc->clk);
sh_ubc->enable(info, i);
return 0;
}
/*
* Uninstall the breakpoint contained in the given counter.
*
* First we search the debug address register it uses and then we disable
* it.
*
* Atomic: we hold the counter->ctx->lock and we only handle variables
* and registers local to this cpu.
*/
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
int i;
for (i = 0; i < sh_ubc->num_events; i++) {
sh: Replace __get_cpu_var uses __get_cpu_var() is used for multiple purposes in the kernel source. One of them is address calculation via the form &__get_cpu_var(x). This calculates the address for the instance of the percpu variable of the current processor based on an offset. Other use cases are for storing and retrieving data from the current processors percpu area. __get_cpu_var() can be used as an lvalue when writing data or on the right side of an assignment. __get_cpu_var() is defined as : #define __get_cpu_var(var) (*this_cpu_ptr(&(var))) __get_cpu_var() always only does an address determination. However, store and retrieve operations could use a segment prefix (or global register on other platforms) to avoid the address calculation. this_cpu_write() and this_cpu_read() can directly take an offset into a percpu area and use optimized assembly code to read and write per cpu variables. This patch converts __get_cpu_var into either an explicit address calculation using this_cpu_ptr() or into a use of this_cpu operations that use the offset. Thereby address calculations are avoided and less registers are used when code is generated. At the end of the patch set all uses of __get_cpu_var have been removed so the macro is removed too. The patch set includes passes over all arches as well. Once these operations are used throughout then specialized macros can be defined in non -x86 arches as well in order to optimize per cpu access by f.e. using a global register that may be set to the per cpu base. Transformations done to __get_cpu_var() 1. Determine the address of the percpu instance of the current processor. DEFINE_PER_CPU(int, y); int *x = &__get_cpu_var(y); Converts to int *x = this_cpu_ptr(&y); 2. Same as #1 but this time an array structure is involved. DEFINE_PER_CPU(int, y[20]); int *x = __get_cpu_var(y); Converts to int *x = this_cpu_ptr(y); 3. Retrieve the content of the current processors instance of a per cpu variable. DEFINE_PER_CPU(int, y); int x = __get_cpu_var(y) Converts to int x = __this_cpu_read(y); 4. Retrieve the content of a percpu struct DEFINE_PER_CPU(struct mystruct, y); struct mystruct x = __get_cpu_var(y); Converts to memcpy(&x, this_cpu_ptr(&y), sizeof(x)); 5. Assignment to a per cpu variable DEFINE_PER_CPU(int, y) __get_cpu_var(y) = x; Converts to __this_cpu_write(y, x); 6. Increment/Decrement etc of a per cpu variable DEFINE_PER_CPU(int, y); __get_cpu_var(y)++ Converts to __this_cpu_inc(y) Signed-off-by: Christoph Lameter <cl@linux.com> Tested-by: Geert Uytterhoeven <geert@linux-m68k.org> [compilation only] Cc: Paul Mundt <lethal@linux-sh.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-06-05 07:05:51 +08:00
struct perf_event **slot = this_cpu_ptr(&bp_per_reg[i]);
if (*slot == bp) {
*slot = NULL;
break;
}
}
if (WARN_ONCE(i == sh_ubc->num_events, "Can't find any breakpoint slot"))
return;
sh_ubc->disable(info, i);
clk_disable(sh_ubc->clk);
}
static int get_hbp_len(u16 hbp_len)
{
unsigned int len_in_bytes = 0;
switch (hbp_len) {
case SH_BREAKPOINT_LEN_1:
len_in_bytes = 1;
break;
case SH_BREAKPOINT_LEN_2:
len_in_bytes = 2;
break;
case SH_BREAKPOINT_LEN_4:
len_in_bytes = 4;
break;
case SH_BREAKPOINT_LEN_8:
len_in_bytes = 8;
break;
}
return len_in_bytes;
}
/*
* Check for virtual address in kernel space.
*/
int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
{
unsigned int len;
hw-breakpoints: Change/Enforce some breakpoints policies The current policies of breakpoints in x86 and SH are the following: - task bound breakpoints can only break on userspace addresses - cpu wide breakpoints can only break on kernel addresses The former rule prevents ptrace breakpoints to be set to trigger on kernel addresses, which is good. But as a side effect, we can't breakpoint on kernel addresses for task bound breakpoints. The latter rule simply makes no sense, there is no reason why we can't set breakpoints on userspace while performing cpu bound profiles. We want the following new policies: - task bound breakpoint can set userspace address breakpoints, with no particular privilege required. - task bound breakpoints can set kernelspace address breakpoints but must be privileged to do that. - cpu bound breakpoints can do what they want as they are privileged already. To implement these new policies, this patch checks if we are dealing with a kernel address breakpoint, if so and if the exclude_kernel parameter is set, we tell the user that the breakpoint is invalid, which makes a good generic ptrace protection. If we don't have exclude_kernel, ensure the user has the right privileges as kernel breakpoints are quite sensitive (risk of trap recursion attacks and global performance impacts). [ Paul Mundt: keep addr space check for sh signal delivery and fix double function declaration] Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Cc: K. Prasad <prasad@linux.vnet.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Jason Wessel <jason.wessel@windriver.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
2010-04-19 00:11:53 +08:00
unsigned long va;
va = hw->address;
len = get_hbp_len(hw->len);
return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
}
int arch_bp_generic_fields(int sh_len, int sh_type,
int *gen_len, int *gen_type)
{
/* Len */
switch (sh_len) {
case SH_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
case SH_BREAKPOINT_LEN_2:
*gen_len = HW_BREAKPOINT_LEN_2;
break;
case SH_BREAKPOINT_LEN_4:
*gen_len = HW_BREAKPOINT_LEN_4;
break;
case SH_BREAKPOINT_LEN_8:
*gen_len = HW_BREAKPOINT_LEN_8;
break;
default:
return -EINVAL;
}
/* Type */
switch (sh_type) {
case SH_BREAKPOINT_READ:
*gen_type = HW_BREAKPOINT_R;
break;
case SH_BREAKPOINT_WRITE:
*gen_type = HW_BREAKPOINT_W;
break;
case SH_BREAKPOINT_RW:
*gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
break;
default:
return -EINVAL;
}
return 0;
}
static int arch_build_bp_info(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw)
{
hw->address = attr->bp_addr;
/* Len */
switch (attr->bp_len) {
case HW_BREAKPOINT_LEN_1:
hw->len = SH_BREAKPOINT_LEN_1;
break;
case HW_BREAKPOINT_LEN_2:
hw->len = SH_BREAKPOINT_LEN_2;
break;
case HW_BREAKPOINT_LEN_4:
hw->len = SH_BREAKPOINT_LEN_4;
break;
case HW_BREAKPOINT_LEN_8:
hw->len = SH_BREAKPOINT_LEN_8;
break;
default:
return -EINVAL;
}
/* Type */
switch (attr->bp_type) {
case HW_BREAKPOINT_R:
hw->type = SH_BREAKPOINT_READ;
break;
case HW_BREAKPOINT_W:
hw->type = SH_BREAKPOINT_WRITE;
break;
case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
hw->type = SH_BREAKPOINT_RW;
break;
default:
return -EINVAL;
}
return 0;
}
/*
* Validate the arch-specific HW Breakpoint register settings
*/
int hw_breakpoint_arch_parse(struct perf_event *bp,
const struct perf_event_attr *attr,
struct arch_hw_breakpoint *hw)
{
unsigned int align;
int ret;
ret = arch_build_bp_info(bp, attr, hw);
if (ret)
return ret;
ret = -EINVAL;
switch (hw->len) {
case SH_BREAKPOINT_LEN_1:
align = 0;
break;
case SH_BREAKPOINT_LEN_2:
align = 1;
break;
case SH_BREAKPOINT_LEN_4:
align = 3;
break;
case SH_BREAKPOINT_LEN_8:
align = 7;
break;
default:
return ret;
}
/*
* Check that the low-order bits of the address are appropriate
* for the alignment implied by len.
*/
if (hw->address & align)
return -EINVAL;
return 0;
}
/*
* Release the user breakpoints used by ptrace
*/
void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
{
int i;
struct thread_struct *t = &tsk->thread;
for (i = 0; i < sh_ubc->num_events; i++) {
unregister_hw_breakpoint(t->ptrace_bps[i]);
t->ptrace_bps[i] = NULL;
}
}
static int __kprobes hw_breakpoint_handler(struct die_args *args)
{
int cpu, i, rc = NOTIFY_STOP;
struct perf_event *bp;
unsigned int cmf, resume_mask;
/*
* Do an early return if none of the channels triggered.
*/
cmf = sh_ubc->triggered_mask();
if (unlikely(!cmf))
return NOTIFY_DONE;
/*
* By default, resume all of the active channels.
*/
resume_mask = sh_ubc->active_mask();
/*
* Disable breakpoints during exception handling.
*/
sh_ubc->disable_all();
cpu = get_cpu();
for (i = 0; i < sh_ubc->num_events; i++) {
unsigned long event_mask = (1 << i);
if (likely(!(cmf & event_mask)))
continue;
/*
* The counter may be concurrently released but that can only
* occur from a call_rcu() path. We can then safely fetch
* the breakpoint, use its callback, touch its counter
* while we are in an rcu_read_lock() path.
*/
rcu_read_lock();
bp = per_cpu(bp_per_reg[i], cpu);
if (bp)
rc = NOTIFY_DONE;
/*
* Reset the condition match flag to denote completion of
* exception handling.
*/
sh_ubc->clear_triggered_mask(event_mask);
/*
* bp can be NULL due to concurrent perf counter
* removing.
*/
if (!bp) {
rcu_read_unlock();
break;
}
/*
* Don't restore the channel if the breakpoint is from
* ptrace, as it always operates in one-shot mode.
*/
if (bp->overflow_handler == ptrace_triggered)
resume_mask &= ~(1 << i);
perf_bp_event(bp, args->regs);
/* Deliver the signal to userspace */
if (!arch_check_bp_in_kernelspace(&bp->hw.info)) {
force_sig_fault(SIGTRAP, TRAP_HWBKPT,
(void __user *)NULL);
}
rcu_read_unlock();
}
if (cmf == 0)
rc = NOTIFY_DONE;
sh_ubc->enable_all(resume_mask);
put_cpu();
return rc;
}
BUILD_TRAP_HANDLER(breakpoint)
{
unsigned long ex = lookup_exception_vector();
TRAP_HANDLER_DECL;
notify_die(DIE_BREAKPOINT, "breakpoint", regs, 0, ex, SIGTRAP);
}
/*
* Handle debug exception notifications.
*/
int __kprobes hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data)
{
struct die_args *args = data;
if (val != DIE_BREAKPOINT)
return NOTIFY_DONE;
/*
* If the breakpoint hasn't been triggered by the UBC, it's
* probably from a debugger, so don't do anything more here.
*
* This also permits the UBC interface clock to remain off for
* non-UBC breakpoints, as we don't need to check the triggered
* or active channel masks.
*/
if (args->trapnr != sh_ubc->trap_nr)
return NOTIFY_DONE;
return hw_breakpoint_handler(data);
}
void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
int register_sh_ubc(struct sh_ubc *ubc)
{
/* Bail if it's already assigned */
if (sh_ubc != &ubc_dummy)
return -EBUSY;
sh_ubc = ubc;
pr_info("HW Breakpoints: %s UBC support registered\n", ubc->name);
WARN_ON(ubc->num_events > HBP_NUM);
return 0;
}