forked from luck/tmp_suning_uos_patched
perf/core, x86: Add PERF_SAMPLE_PHYS_ADDR
For understanding how the workload maps to memory channels and hardware behavior, it's very important to collect address maps with physical addresses. For example, 3D XPoint access can only be found by filtering the physical address. Add a new sample type for physical address. perf already has a facility to collect data virtual address. This patch introduces a function to convert the virtual address to physical address. The function is quite generic and can be extended to any architecture as long as a virtual address is provided. - For kernel direct mapping addresses, virt_to_phys is used to convert the virtual addresses to physical address. - For user virtual addresses, __get_user_pages_fast is used to walk the pages tables for user physical address. - This does not work for vmalloc addresses right now. These are not resolved, but code to do that could be added. The new sample type requires collecting the virtual address. The virtual address will not be output unless SAMPLE_ADDR is applied. For security, the physical address can only be exposed to root or privileged user. Tested-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com> Signed-off-by: Kan Liang <kan.liang@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: acme@kernel.org Cc: mpe@ellerman.id.au Link: http://lkml.kernel.org/r/1503967969-48278-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
8d4e6c4caa
commit
fc7ce9c74c
|
@ -2039,7 +2039,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||||
|
|
||||||
perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
|
perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
|
||||||
|
|
||||||
if (event->attr.sample_type & PERF_SAMPLE_ADDR)
|
if (event->attr.sample_type &
|
||||||
|
(PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
|
||||||
perf_get_data_addr(regs, &data.addr);
|
perf_get_data_addr(regs, &data.addr);
|
||||||
|
|
||||||
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||||
|
|
|
@ -1185,7 +1185,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
|
||||||
else
|
else
|
||||||
regs->flags &= ~PERF_EFLAGS_EXACT;
|
regs->flags &= ~PERF_EFLAGS_EXACT;
|
||||||
|
|
||||||
if ((sample_type & PERF_SAMPLE_ADDR) &&
|
if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
|
||||||
x86_pmu.intel_cap.pebs_format >= 1)
|
x86_pmu.intel_cap.pebs_format >= 1)
|
||||||
data->addr = pebs->dla;
|
data->addr = pebs->dla;
|
||||||
|
|
||||||
|
|
|
@ -91,7 +91,7 @@ struct amd_nb {
|
||||||
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
|
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
|
||||||
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
|
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
|
||||||
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
|
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
|
||||||
PERF_SAMPLE_TRANSACTION)
|
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A debug store configuration.
|
* A debug store configuration.
|
||||||
|
|
|
@ -943,6 +943,8 @@ struct perf_sample_data {
|
||||||
|
|
||||||
struct perf_regs regs_intr;
|
struct perf_regs regs_intr;
|
||||||
u64 stack_user_size;
|
u64 stack_user_size;
|
||||||
|
|
||||||
|
u64 phys_addr;
|
||||||
} ____cacheline_aligned;
|
} ____cacheline_aligned;
|
||||||
|
|
||||||
/* default value for data source */
|
/* default value for data source */
|
||||||
|
|
|
@ -139,8 +139,9 @@ enum perf_event_sample_format {
|
||||||
PERF_SAMPLE_IDENTIFIER = 1U << 16,
|
PERF_SAMPLE_IDENTIFIER = 1U << 16,
|
||||||
PERF_SAMPLE_TRANSACTION = 1U << 17,
|
PERF_SAMPLE_TRANSACTION = 1U << 17,
|
||||||
PERF_SAMPLE_REGS_INTR = 1U << 18,
|
PERF_SAMPLE_REGS_INTR = 1U << 18,
|
||||||
|
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
|
||||||
|
|
||||||
PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */
|
PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -814,6 +815,7 @@ enum perf_event_type {
|
||||||
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
|
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
|
||||||
* { u64 abi; # enum perf_sample_regs_abi
|
* { u64 abi; # enum perf_sample_regs_abi
|
||||||
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
|
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
|
||||||
|
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
|
||||||
* };
|
* };
|
||||||
*/
|
*/
|
||||||
PERF_RECORD_SAMPLE = 9,
|
PERF_RECORD_SAMPLE = 9,
|
||||||
|
|
|
@ -1575,6 +1575,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
|
||||||
if (sample_type & PERF_SAMPLE_TRANSACTION)
|
if (sample_type & PERF_SAMPLE_TRANSACTION)
|
||||||
size += sizeof(data->txn);
|
size += sizeof(data->txn);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
|
||||||
|
size += sizeof(data->phys_addr);
|
||||||
|
|
||||||
event->header_size = size;
|
event->header_size = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6017,6 +6020,9 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
|
||||||
|
perf_output_put(handle, data->phys_addr);
|
||||||
|
|
||||||
if (!event->attr.watermark) {
|
if (!event->attr.watermark) {
|
||||||
int wakeup_events = event->attr.wakeup_events;
|
int wakeup_events = event->attr.wakeup_events;
|
||||||
|
|
||||||
|
@ -6032,6 +6038,38 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static u64 perf_virt_to_phys(u64 virt)
|
||||||
|
{
|
||||||
|
u64 phys_addr = 0;
|
||||||
|
struct page *p = NULL;
|
||||||
|
|
||||||
|
if (!virt)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (virt >= TASK_SIZE) {
|
||||||
|
/* If it's vmalloc()d memory, leave phys_addr as 0 */
|
||||||
|
if (virt_addr_valid((void *)(uintptr_t)virt) &&
|
||||||
|
!(virt >= VMALLOC_START && virt < VMALLOC_END))
|
||||||
|
phys_addr = (u64)virt_to_phys((void *)(uintptr_t)virt);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Walking the pages tables for user address.
|
||||||
|
* Interrupts are disabled, so it prevents any tear down
|
||||||
|
* of the page tables.
|
||||||
|
* Try IRQ-safe __get_user_pages_fast first.
|
||||||
|
* If failed, leave phys_addr as 0.
|
||||||
|
*/
|
||||||
|
if ((current->mm != NULL) &&
|
||||||
|
(__get_user_pages_fast(virt, 1, 0, &p) == 1))
|
||||||
|
phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
|
||||||
|
|
||||||
|
if (p)
|
||||||
|
put_page(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
return phys_addr;
|
||||||
|
}
|
||||||
|
|
||||||
void perf_prepare_sample(struct perf_event_header *header,
|
void perf_prepare_sample(struct perf_event_header *header,
|
||||||
struct perf_sample_data *data,
|
struct perf_sample_data *data,
|
||||||
struct perf_event *event,
|
struct perf_event *event,
|
||||||
|
@ -6150,6 +6188,9 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||||
|
|
||||||
header->size += size;
|
header->size += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
|
||||||
|
data->phys_addr = perf_virt_to_phys(data->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __always_inline
|
static void __always_inline
|
||||||
|
@ -9909,6 +9950,11 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Only privileged users can get physical addresses */
|
||||||
|
if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
|
||||||
|
perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
|
||||||
|
return -EACCES;
|
||||||
|
|
||||||
if (!attr.sample_max_stack)
|
if (!attr.sample_max_stack)
|
||||||
attr.sample_max_stack = sysctl_perf_event_max_stack;
|
attr.sample_max_stack = sysctl_perf_event_max_stack;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user