forked from luck/tmp_suning_uos_patched
These are the performance events changes for v5.10:
x86 Intel updates: - Add Jasper Lake support - Add support for TopDown metrics on Ice Lake - Fix Ice Lake & Tiger Lake uncore support, add Snow Ridge support - Add a PCI sub driver to support uncore PMUs where the PCI resources have been claimed already - extending the range of supported systems. x86 AMD updates: - Restore 'perf stat -a' behaviour to program the uncore PMU to count all CPU threads. - Fix setting the proper count when sampling Large Increment per Cycle events / 'paired' events. - Fix IBS Fetch sampling on F17h and some other IBS fine tuning, greatly reducing the number of interrupts when large sample periods are specified. - Extends Family 17h RAPL support to also work on compatible F19h machines. Core code updates: - Fix race in perf_mmap_close() - Add PERF_EV_CAP_SIBLING, to denote that sibling events should be closed if the leader is removed. - Smaller fixes and updates. Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAl+Ef40RHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1h7NQ//ZdQ26Yg79ZaxBX1QSINJ9AgXDi6rXs75 qU9qNwr/6EF+633RZoPQGAE0Iy5v6h7iLFokcJzM9+kK/rE3ax44tSnPlcMa0+6N SHXKCa5iL+hH7o2Spo2MZwCYseH79rloX3TSH7ajnN3X8PvwgWshF0lUE3WEWtCs eHSojdCk43IuL9TpusuNOBM2FvgnheFYWiMbFHd0MTBUMxul30sLVCG8IIWCPA+q TwG4RJS3X42VbL3SuAGFmOv4OmqNsfkvHvjpDs4NF07tRB9zjXzGrxmGhgSw0NAN 2KK25qbmrpKATIb4Eqsgk/yikX/SCrDEXrjhg3r8FnyPvRfctq1crZjjf672PI2E bDda76dH6Lq9jv5fsyJjas5OsYdMKBCnA+tGQxXPGbmTXeEcYMRbDnwhYnevI/Q/ 8pP+xstF0pmBA3tvpDPrQnYH72Qt7CLJSdcTB15NqZftU2tJxaAyJGx4gJy33jxQ wu6BIEGHQ7onQYiIyTwsBHyz6xNsF/CRHwAPcGdYrRRbXB5K5nxHiXNb4awciTMx 2HF31/S4OqURNpfcpxOQo+1fb/cLqj3loGqE4jCTwkbS3lrHcAcfxyv9QNn77l1f hdQ0jworbUNVLUYEUQz1bkZ06GD3LSSas2ZlY1NNdHo62mjyXMQmgirNcZmrFgWl tl2gNFAU9x4= =2fuY -----END PGP SIGNATURE----- Merge tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull performance events updates from Ingo Molnar: "x86 Intel updates: - Add Jasper Lake support - Add support for TopDown metrics on Ice Lake - Fix Ice Lake & Tiger Lake uncore support, add Snow Ridge support - Add a PCI sub driver to support uncore PMUs where the PCI resources have been claimed already - extending the range of supported systems. x86 AMD updates: - Restore 'perf stat -a' behaviour to program the uncore PMU to count all CPU threads. - Fix setting the proper count when sampling Large Increment per Cycle events / 'paired' events. - Fix IBS Fetch sampling on F17h and some other IBS fine tuning, greatly reducing the number of interrupts when large sample periods are specified. - Extends Family 17h RAPL support to also work on compatible F19h machines. Core code updates: - Fix race in perf_mmap_close() - Add PERF_EV_CAP_SIBLING, to denote that sibling events should be closed if the leader is removed. - Smaller fixes and updates" * tag 'perf-core-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits) perf/core: Fix race in the perf_mmap_close() function perf/x86: Fix n_metric for cancelled txn perf/x86: Fix n_pair for cancelled txn x86/events/amd/iommu: Fix sizeof mismatch perf/x86/intel: Check perf metrics feature for each CPU perf/x86/intel: Fix Ice Lake event constraint table perf/x86/intel/uncore: Fix the scale of the IMC free-running events perf/x86/intel/uncore: Fix for iio mapping on Skylake Server perf/x86/msr: Add Jasper Lake support perf/x86/intel: Add Jasper Lake support perf/x86/intel/uncore: Reduce the number of CBOX counters perf/x86/intel/uncore: Update Ice Lake uncore units perf/x86/intel/uncore: Split the Ice Lake and Tiger Lake MSR uncore support perf/x86/intel/uncore: Support PCIe3 unit on Snow Ridge perf/x86/intel/uncore: Generic support for the PCI sub driver perf/x86/intel/uncore: Factor out uncore_pci_pmu_unregister() perf/x86/intel/uncore: Factor out uncore_pci_pmu_register() perf/x86/intel/uncore: Factor out uncore_pci_find_dev_pmu() perf/x86/intel/uncore: Factor out uncore_pci_get_dev_die_info() perf/amd/uncore: Inform the user how many counters each uncore PMU has ...
This commit is contained in:
commit
3bff6112c8
|
@ -89,6 +89,7 @@ struct perf_ibs {
|
|||
u64 max_period;
|
||||
unsigned long offset_mask[1];
|
||||
int offset_max;
|
||||
unsigned int fetch_count_reset_broken : 1;
|
||||
struct cpu_perf_ibs __percpu *pcpu;
|
||||
|
||||
struct attribute **format_attrs;
|
||||
|
@ -334,11 +335,18 @@ static u64 get_ibs_op_count(u64 config)
|
|||
{
|
||||
u64 count = 0;
|
||||
|
||||
if (config & IBS_OP_VAL)
|
||||
count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
|
||||
|
||||
if (ibs_caps & IBS_CAPS_RDWROPCNT)
|
||||
count += (config & IBS_OP_CUR_CNT) >> 32;
|
||||
/*
|
||||
* If the internal 27-bit counter rolled over, the count is MaxCnt
|
||||
* and the lower 7 bits of CurCnt are randomized.
|
||||
* Otherwise CurCnt has the full 27-bit current counter value.
|
||||
*/
|
||||
if (config & IBS_OP_VAL) {
|
||||
count = (config & IBS_OP_MAX_CNT) << 4;
|
||||
if (ibs_caps & IBS_CAPS_OPCNTEXT)
|
||||
count += config & IBS_OP_MAX_CNT_EXT_MASK;
|
||||
} else if (ibs_caps & IBS_CAPS_RDWROPCNT) {
|
||||
count = (config & IBS_OP_CUR_CNT) >> 32;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
@ -363,7 +371,12 @@ perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
|
|||
static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
|
||||
struct hw_perf_event *hwc, u64 config)
|
||||
{
|
||||
wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
|
||||
u64 tmp = hwc->config | config;
|
||||
|
||||
if (perf_ibs->fetch_count_reset_broken)
|
||||
wrmsrl(hwc->config_base, tmp & ~perf_ibs->enable_mask);
|
||||
|
||||
wrmsrl(hwc->config_base, tmp | perf_ibs->enable_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -394,7 +407,7 @@ static void perf_ibs_start(struct perf_event *event, int flags)
|
|||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
|
||||
struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
|
||||
u64 period;
|
||||
u64 period, config = 0;
|
||||
|
||||
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
|
||||
return;
|
||||
|
@ -403,13 +416,19 @@ static void perf_ibs_start(struct perf_event *event, int flags)
|
|||
hwc->state = 0;
|
||||
|
||||
perf_ibs_set_period(perf_ibs, hwc, &period);
|
||||
if (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_OPCNTEXT)) {
|
||||
config |= period & IBS_OP_MAX_CNT_EXT_MASK;
|
||||
period &= ~IBS_OP_MAX_CNT_EXT_MASK;
|
||||
}
|
||||
config |= period >> 4;
|
||||
|
||||
/*
|
||||
* Set STARTED before enabling the hardware, such that a subsequent NMI
|
||||
* must observe it.
|
||||
*/
|
||||
set_bit(IBS_STARTED, pcpu->state);
|
||||
clear_bit(IBS_STOPPING, pcpu->state);
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
|
||||
perf_ibs_enable_event(perf_ibs, hwc, config);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
@ -577,7 +596,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
|
|||
struct perf_ibs_data ibs_data;
|
||||
int offset, size, check_rip, offset_max, throttle = 0;
|
||||
unsigned int msr;
|
||||
u64 *buf, *config, period;
|
||||
u64 *buf, *config, period, new_config = 0;
|
||||
|
||||
if (!test_bit(IBS_STARTED, pcpu->state)) {
|
||||
fail:
|
||||
|
@ -626,12 +645,13 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
|
|||
perf_ibs->offset_max,
|
||||
offset + 1);
|
||||
} while (offset < offset_max);
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
/*
|
||||
* Read IbsBrTarget and IbsOpData4 separately
|
||||
* Read IbsBrTarget, IbsOpData4, and IbsExtdCtl separately
|
||||
* depending on their availability.
|
||||
* Can't add to offset_max as they are staggered
|
||||
*/
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
if (perf_ibs == &perf_ibs_op) {
|
||||
if (ibs_caps & IBS_CAPS_BRNTRGT) {
|
||||
rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
|
||||
size++;
|
||||
|
@ -641,6 +661,11 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
|
|||
size++;
|
||||
}
|
||||
}
|
||||
if (perf_ibs == &perf_ibs_fetch && (ibs_caps & IBS_CAPS_FETCHCTLEXTD)) {
|
||||
rdmsrl(MSR_AMD64_ICIBSEXTDCTL, *buf++);
|
||||
size++;
|
||||
}
|
||||
}
|
||||
ibs_data.size = sizeof(u64) * size;
|
||||
|
||||
regs = *iregs;
|
||||
|
@ -666,13 +691,17 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
|
|||
if (throttle) {
|
||||
perf_ibs_stop(event, 0);
|
||||
} else {
|
||||
period >>= 4;
|
||||
if (perf_ibs == &perf_ibs_op) {
|
||||
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
|
||||
new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
|
||||
period &= ~IBS_OP_MAX_CNT_EXT_MASK;
|
||||
}
|
||||
if ((ibs_caps & IBS_CAPS_RDWROPCNT) && (*config & IBS_OP_CNT_CTL))
|
||||
new_config |= *config & IBS_OP_CUR_CNT_RAND;
|
||||
}
|
||||
new_config |= period >> 4;
|
||||
|
||||
if ((ibs_caps & IBS_CAPS_RDWROPCNT) &&
|
||||
(*config & IBS_OP_CNT_CTL))
|
||||
period |= *config & IBS_OP_CUR_CNT_RAND;
|
||||
|
||||
perf_ibs_enable_event(perf_ibs, hwc, period);
|
||||
perf_ibs_enable_event(perf_ibs, hwc, new_config);
|
||||
}
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
@ -733,12 +762,26 @@ static __init void perf_event_ibs_init(void)
|
|||
{
|
||||
struct attribute **attr = ibs_op_format_attrs;
|
||||
|
||||
/*
|
||||
* Some chips fail to reset the fetch count when it is written; instead
|
||||
* they need a 0-1 transition of IbsFetchEn.
|
||||
*/
|
||||
if (boot_cpu_data.x86 >= 0x16 && boot_cpu_data.x86 <= 0x18)
|
||||
perf_ibs_fetch.fetch_count_reset_broken = 1;
|
||||
|
||||
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
|
||||
|
||||
if (ibs_caps & IBS_CAPS_OPCNT) {
|
||||
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
|
||||
*attr++ = &format_attr_cnt_ctl.attr;
|
||||
}
|
||||
|
||||
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
|
||||
perf_ibs_op.max_period |= IBS_OP_MAX_CNT_EXT_MASK;
|
||||
perf_ibs_op.config_mask |= IBS_OP_MAX_CNT_EXT_MASK;
|
||||
perf_ibs_op.cnt_mask |= IBS_OP_MAX_CNT_EXT_MASK;
|
||||
}
|
||||
|
||||
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
|
||||
|
||||
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
|
||||
|
|
|
@ -379,7 +379,7 @@ static __init int _init_events_attrs(void)
|
|||
while (amd_iommu_v2_event_descs[i].attr.attr.name)
|
||||
i++;
|
||||
|
||||
attrs = kcalloc(i + 1, sizeof(struct attribute **), GFP_KERNEL);
|
||||
attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL);
|
||||
if (!attrs)
|
||||
return -ENOMEM;
|
||||
|
||||
|
|
|
@ -181,28 +181,28 @@ static void amd_uncore_del(struct perf_event *event, int flags)
|
|||
}
|
||||
|
||||
/*
|
||||
* Convert logical CPU number to L3 PMC Config ThreadMask format
|
||||
* Return a full thread and slice mask unless user
|
||||
* has provided them
|
||||
*/
|
||||
static u64 l3_thread_slice_mask(int cpu)
|
||||
static u64 l3_thread_slice_mask(u64 config)
|
||||
{
|
||||
u64 thread_mask, core = topology_core_id(cpu);
|
||||
unsigned int shift, thread = 0;
|
||||
if (boot_cpu_data.x86 <= 0x18)
|
||||
return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
|
||||
((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
|
||||
|
||||
if (topology_smt_supported() && !topology_is_primary_thread(cpu))
|
||||
thread = 1;
|
||||
/*
|
||||
* If the user doesn't specify a threadmask, they're not trying to
|
||||
* count core 0, so we enable all cores & threads.
|
||||
* We'll also assume that they want to count slice 0 if they specify
|
||||
* a threadmask and leave sliceid and enallslices unpopulated.
|
||||
*/
|
||||
if (!(config & AMD64_L3_F19H_THREAD_MASK))
|
||||
return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
|
||||
AMD64_L3_EN_ALL_CORES;
|
||||
|
||||
if (boot_cpu_data.x86 <= 0x18) {
|
||||
shift = AMD64_L3_THREAD_SHIFT + 2 * (core % 4) + thread;
|
||||
thread_mask = BIT_ULL(shift);
|
||||
|
||||
return AMD64_L3_SLICE_MASK | thread_mask;
|
||||
}
|
||||
|
||||
core = (core << AMD64_L3_COREID_SHIFT) & AMD64_L3_COREID_MASK;
|
||||
shift = AMD64_L3_THREAD_SHIFT + thread;
|
||||
thread_mask = BIT_ULL(shift);
|
||||
|
||||
return AMD64_L3_EN_ALL_SLICES | core | thread_mask;
|
||||
return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
|
||||
AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
|
||||
AMD64_L3_COREID_MASK);
|
||||
}
|
||||
|
||||
static int amd_uncore_event_init(struct perf_event *event)
|
||||
|
@ -232,7 +232,7 @@ static int amd_uncore_event_init(struct perf_event *event)
|
|||
* For other events, the two fields do not affect the count.
|
||||
*/
|
||||
if (l3_mask && is_llc_event(event))
|
||||
hwc->config |= l3_thread_slice_mask(event->cpu);
|
||||
hwc->config |= l3_thread_slice_mask(event->attr.config);
|
||||
|
||||
uncore = event_to_amd_uncore(event);
|
||||
if (!uncore)
|
||||
|
@ -274,47 +274,72 @@ static struct attribute_group amd_uncore_attr_group = {
|
|||
.attrs = amd_uncore_attrs,
|
||||
};
|
||||
|
||||
/*
|
||||
* Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based
|
||||
* on family
|
||||
*/
|
||||
#define AMD_FORMAT_ATTR(_dev, _name, _format) \
|
||||
static ssize_t \
|
||||
_dev##_show##_name(struct device *dev, \
|
||||
struct device_attribute *attr, \
|
||||
#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
|
||||
static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
|
||||
struct kobj_attribute *attr, \
|
||||
char *page) \
|
||||
{ \
|
||||
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
|
||||
return sprintf(page, _format "\n"); \
|
||||
} \
|
||||
static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev);
|
||||
static struct kobj_attribute format_attr_##_var = \
|
||||
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
|
||||
|
||||
/* Used for each uncore counter type */
|
||||
#define AMD_ATTRIBUTE(_name) \
|
||||
static struct attribute *amd_uncore_format_attr_##_name[] = { \
|
||||
&format_attr_event_##_name.attr, \
|
||||
&format_attr_umask.attr, \
|
||||
NULL, \
|
||||
}; \
|
||||
static struct attribute_group amd_uncore_format_group_##_name = { \
|
||||
.name = "format", \
|
||||
.attrs = amd_uncore_format_attr_##_name, \
|
||||
}; \
|
||||
static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \
|
||||
&amd_uncore_attr_group, \
|
||||
&amd_uncore_format_group_##_name, \
|
||||
NULL, \
|
||||
DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */
|
||||
DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */
|
||||
DEFINE_UNCORE_FORMAT_ATTR(slicemask, slicemask, "config:48-51"); /* F17h L3 */
|
||||
DEFINE_UNCORE_FORMAT_ATTR(threadmask8, threadmask, "config:56-63"); /* F17h L3 */
|
||||
DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L3 */
|
||||
DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */
|
||||
DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */
|
||||
DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */
|
||||
|
||||
static struct attribute *amd_uncore_df_format_attr[] = {
|
||||
&format_attr_event12.attr, /* event14 if F17h+ */
|
||||
&format_attr_umask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
AMD_FORMAT_ATTR(event, , "config:0-7,32-35");
|
||||
AMD_FORMAT_ATTR(umask, , "config:8-15");
|
||||
AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60");
|
||||
AMD_FORMAT_ATTR(event, _l3, "config:0-7");
|
||||
AMD_ATTRIBUTE(df);
|
||||
AMD_ATTRIBUTE(l3);
|
||||
static struct attribute *amd_uncore_l3_format_attr[] = {
|
||||
&format_attr_event12.attr, /* event8 if F17h+ */
|
||||
&format_attr_umask.attr,
|
||||
NULL, /* slicemask if F17h, coreid if F19h */
|
||||
NULL, /* threadmask8 if F17h, enallslices if F19h */
|
||||
NULL, /* enallcores if F19h */
|
||||
NULL, /* sliceid if F19h */
|
||||
NULL, /* threadmask2 if F19h */
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group amd_uncore_df_format_group = {
|
||||
.name = "format",
|
||||
.attrs = amd_uncore_df_format_attr,
|
||||
};
|
||||
|
||||
static struct attribute_group amd_uncore_l3_format_group = {
|
||||
.name = "format",
|
||||
.attrs = amd_uncore_l3_format_attr,
|
||||
};
|
||||
|
||||
static const struct attribute_group *amd_uncore_df_attr_groups[] = {
|
||||
&amd_uncore_attr_group,
|
||||
&amd_uncore_df_format_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group *amd_uncore_l3_attr_groups[] = {
|
||||
&amd_uncore_attr_group,
|
||||
&amd_uncore_l3_format_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct pmu amd_nb_pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.attr_groups = amd_uncore_df_attr_groups,
|
||||
.name = "amd_nb",
|
||||
.event_init = amd_uncore_event_init,
|
||||
.add = amd_uncore_add,
|
||||
.del = amd_uncore_del,
|
||||
|
@ -326,6 +351,8 @@ static struct pmu amd_nb_pmu = {
|
|||
|
||||
static struct pmu amd_llc_pmu = {
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.attr_groups = amd_uncore_l3_attr_groups,
|
||||
.name = "amd_l2",
|
||||
.event_init = amd_uncore_event_init,
|
||||
.add = amd_uncore_add,
|
||||
.del = amd_uncore_del,
|
||||
|
@ -529,6 +556,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu)
|
|||
|
||||
static int __init amd_uncore_init(void)
|
||||
{
|
||||
struct attribute **df_attr = amd_uncore_df_format_attr;
|
||||
struct attribute **l3_attr = amd_uncore_l3_format_attr;
|
||||
int ret = -ENODEV;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
|
||||
|
@ -538,6 +567,8 @@ static int __init amd_uncore_init(void)
|
|||
if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
|
||||
return -ENODEV;
|
||||
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L2;
|
||||
if (boot_cpu_data.x86 >= 0x17) {
|
||||
/*
|
||||
* For F17h and above, the Northbridge counters are
|
||||
|
@ -545,27 +576,16 @@ static int __init amd_uncore_init(void)
|
|||
* counters are supported too. The PMUs are exported
|
||||
* based on family as either L2 or L3 and NB or DF.
|
||||
*/
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L3;
|
||||
amd_nb_pmu.name = "amd_df";
|
||||
amd_llc_pmu.name = "amd_l3";
|
||||
format_attr_event_df.show = &event_show_df;
|
||||
format_attr_event_l3.show = &event_show_l3;
|
||||
l3_mask = true;
|
||||
} else {
|
||||
num_counters_nb = NUM_COUNTERS_NB;
|
||||
num_counters_llc = NUM_COUNTERS_L2;
|
||||
amd_nb_pmu.name = "amd_nb";
|
||||
amd_llc_pmu.name = "amd_l2";
|
||||
format_attr_event_df = format_attr_event;
|
||||
format_attr_event_l3 = format_attr_event;
|
||||
l3_mask = false;
|
||||
}
|
||||
|
||||
amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
|
||||
amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
|
||||
if (boot_cpu_data.x86 >= 0x17)
|
||||
*df_attr = &format_attr_event14.attr;
|
||||
|
||||
amd_uncore_nb = alloc_percpu(struct amd_uncore *);
|
||||
if (!amd_uncore_nb) {
|
||||
ret = -ENOMEM;
|
||||
|
@ -575,13 +595,29 @@ static int __init amd_uncore_init(void)
|
|||
if (ret)
|
||||
goto fail_nb;
|
||||
|
||||
pr_info("%s NB counters detected\n",
|
||||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?
|
||||
"HYGON" : "AMD");
|
||||
pr_info("%d %s %s counters detected\n", num_counters_nb,
|
||||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
|
||||
amd_nb_pmu.name);
|
||||
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
|
||||
if (boot_cpu_data.x86 >= 0x19) {
|
||||
*l3_attr++ = &format_attr_event8.attr;
|
||||
*l3_attr++ = &format_attr_umask.attr;
|
||||
*l3_attr++ = &format_attr_coreid.attr;
|
||||
*l3_attr++ = &format_attr_enallslices.attr;
|
||||
*l3_attr++ = &format_attr_enallcores.attr;
|
||||
*l3_attr++ = &format_attr_sliceid.attr;
|
||||
*l3_attr++ = &format_attr_threadmask2.attr;
|
||||
} else if (boot_cpu_data.x86 >= 0x17) {
|
||||
*l3_attr++ = &format_attr_event8.attr;
|
||||
*l3_attr++ = &format_attr_umask.attr;
|
||||
*l3_attr++ = &format_attr_slicemask.attr;
|
||||
*l3_attr++ = &format_attr_threadmask8.attr;
|
||||
}
|
||||
|
||||
amd_uncore_llc = alloc_percpu(struct amd_uncore *);
|
||||
if (!amd_uncore_llc) {
|
||||
ret = -ENOMEM;
|
||||
|
@ -591,9 +627,9 @@ static int __init amd_uncore_init(void)
|
|||
if (ret)
|
||||
goto fail_llc;
|
||||
|
||||
pr_info("%s LLC counters detected\n",
|
||||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?
|
||||
"HYGON" : "AMD");
|
||||
pr_info("%d %s %s counters detected\n", num_counters_llc,
|
||||
boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "",
|
||||
amd_llc_pmu.name);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -105,6 +105,9 @@ u64 x86_perf_event_update(struct perf_event *event)
|
|||
if (unlikely(!hwc->event_base))
|
||||
return 0;
|
||||
|
||||
if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
|
||||
return x86_pmu.update_topdown_event(event);
|
||||
|
||||
/*
|
||||
* Careful: an NMI might modify the previous event value.
|
||||
*
|
||||
|
@ -1056,6 +1059,45 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
|||
return unsched ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
static int add_nr_metric_event(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (is_metric_event(event)) {
|
||||
if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
|
||||
return -EINVAL;
|
||||
cpuc->n_metric++;
|
||||
cpuc->n_txn_metric++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void del_nr_metric_event(struct cpu_hw_events *cpuc,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (is_metric_event(event))
|
||||
cpuc->n_metric--;
|
||||
}
|
||||
|
||||
static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
|
||||
int max_count, int n)
|
||||
{
|
||||
|
||||
if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
|
||||
return -EINVAL;
|
||||
|
||||
if (n >= max_count + cpuc->n_metric)
|
||||
return -EINVAL;
|
||||
|
||||
cpuc->event_list[n] = event;
|
||||
if (is_counter_pair(&event->hw)) {
|
||||
cpuc->n_pair++;
|
||||
cpuc->n_txn_pair++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* dogrp: true if must collect siblings events (group)
|
||||
* returns total number of events and error code
|
||||
|
@ -1092,28 +1134,22 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
|
|||
}
|
||||
|
||||
if (is_x86_event(leader)) {
|
||||
if (n >= max_count)
|
||||
if (collect_event(cpuc, leader, max_count, n))
|
||||
return -EINVAL;
|
||||
cpuc->event_list[n] = leader;
|
||||
n++;
|
||||
if (is_counter_pair(&leader->hw))
|
||||
cpuc->n_pair++;
|
||||
}
|
||||
|
||||
if (!dogrp)
|
||||
return n;
|
||||
|
||||
for_each_sibling_event(event, leader) {
|
||||
if (!is_x86_event(event) ||
|
||||
event->state <= PERF_EVENT_STATE_OFF)
|
||||
if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF)
|
||||
continue;
|
||||
|
||||
if (n >= max_count)
|
||||
if (collect_event(cpuc, event, max_count, n))
|
||||
return -EINVAL;
|
||||
|
||||
cpuc->event_list[n] = event;
|
||||
n++;
|
||||
if (is_counter_pair(&event->hw))
|
||||
cpuc->n_pair++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
@ -1135,11 +1171,16 @@ static inline void x86_assign_hw_event(struct perf_event *event,
|
|||
hwc->event_base = 0;
|
||||
break;
|
||||
|
||||
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
||||
/* All the metric events are mapped onto the fixed counter 3. */
|
||||
idx = INTEL_PMC_IDX_FIXED_SLOTS;
|
||||
/* fall through */
|
||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
|
||||
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
|
||||
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
|
||||
(idx - INTEL_PMC_IDX_FIXED);
|
||||
hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) | 1<<30;
|
||||
hwc->event_base_rdpmc = (idx - INTEL_PMC_IDX_FIXED) |
|
||||
INTEL_PMC_FIXED_RDPMC_BASE;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1270,6 +1311,10 @@ int x86_perf_event_set_period(struct perf_event *event)
|
|||
if (unlikely(!hwc->event_base))
|
||||
return 0;
|
||||
|
||||
if (unlikely(is_topdown_count(event)) &&
|
||||
x86_pmu.set_topdown_event_period)
|
||||
return x86_pmu.set_topdown_event_period(event);
|
||||
|
||||
/*
|
||||
* If we are way outside a reasonable range then just skip forward:
|
||||
*/
|
||||
|
@ -1309,11 +1354,11 @@ int x86_perf_event_set_period(struct perf_event *event)
|
|||
wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
|
||||
|
||||
/*
|
||||
* Clear the Merge event counter's upper 16 bits since
|
||||
* Sign extend the Merge event counter's upper 16 bits since
|
||||
* we currently declare a 48-bit counter width
|
||||
*/
|
||||
if (is_counter_pair(hwc))
|
||||
wrmsrl(x86_pmu_event_addr(idx + 1), 0);
|
||||
wrmsrl(x86_pmu_event_addr(idx + 1), 0xffff);
|
||||
|
||||
/*
|
||||
* Due to erratum on certan cpu we need
|
||||
|
@ -1551,6 +1596,8 @@ static void x86_pmu_del(struct perf_event *event, int flags)
|
|||
}
|
||||
cpuc->event_constraint[i-1] = NULL;
|
||||
--cpuc->n_events;
|
||||
if (x86_pmu.intel_cap.perf_metrics)
|
||||
del_nr_metric_event(cpuc, event);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
|
@ -2018,6 +2065,8 @@ static void x86_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
|
|||
|
||||
perf_pmu_disable(pmu);
|
||||
__this_cpu_write(cpu_hw_events.n_txn, 0);
|
||||
__this_cpu_write(cpu_hw_events.n_txn_pair, 0);
|
||||
__this_cpu_write(cpu_hw_events.n_txn_metric, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2043,6 +2092,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu)
|
|||
*/
|
||||
__this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
|
||||
__this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
|
||||
__this_cpu_sub(cpu_hw_events.n_pair, __this_cpu_read(cpu_hw_events.n_txn_pair));
|
||||
__this_cpu_sub(cpu_hw_events.n_metric, __this_cpu_read(cpu_hw_events.n_txn_metric));
|
||||
perf_pmu_enable(pmu);
|
||||
}
|
||||
|
||||
|
@ -2264,17 +2315,15 @@ static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *m
|
|||
|
||||
static int x86_pmu_event_idx(struct perf_event *event)
|
||||
{
|
||||
int idx = event->hw.idx;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
|
||||
if (!(hwc->flags & PERF_X86_EVENT_RDPMC_ALLOWED))
|
||||
return 0;
|
||||
|
||||
if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
|
||||
idx -= INTEL_PMC_IDX_FIXED;
|
||||
idx |= 1 << 30;
|
||||
}
|
||||
|
||||
return idx + 1;
|
||||
if (is_metric_idx(hwc->idx))
|
||||
return INTEL_PMC_FIXED_RDPMC_METRICS + 1;
|
||||
else
|
||||
return hwc->event_base_rdpmc + 1;
|
||||
}
|
||||
|
||||
static ssize_t get_attr_rdpmc(struct device *cdev,
|
||||
|
|
|
@ -243,10 +243,14 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
|
|||
|
||||
static struct event_constraint intel_icl_event_constraints[] = {
|
||||
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
|
||||
INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */
|
||||
FIXED_EVENT_CONSTRAINT(0x01c0, 0), /* INST_RETIRED.PREC_DIST */
|
||||
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
|
||||
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
|
||||
FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FE_BOUND, 2),
|
||||
METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BE_BOUND, 3),
|
||||
INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
|
||||
INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
|
||||
INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
|
||||
|
@ -309,6 +313,12 @@ EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
|
|||
EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
|
||||
"4", "2");
|
||||
|
||||
EVENT_ATTR_STR(slots, slots, "event=0x00,umask=0x4");
|
||||
EVENT_ATTR_STR(topdown-retiring, td_retiring, "event=0x00,umask=0x80");
|
||||
EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec, "event=0x00,umask=0x81");
|
||||
EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound, "event=0x00,umask=0x82");
|
||||
EVENT_ATTR_STR(topdown-be-bound, td_be_bound, "event=0x00,umask=0x83");
|
||||
|
||||
static struct attribute *snb_events_attrs[] = {
|
||||
EVENT_PTR(td_slots_issued),
|
||||
EVENT_PTR(td_slots_retired),
|
||||
|
@ -2165,11 +2175,24 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
|
|||
static void intel_pmu_disable_fixed(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
|
||||
u64 ctrl_val, mask;
|
||||
int idx = hwc->idx;
|
||||
|
||||
mask = 0xfULL << (idx * 4);
|
||||
if (is_topdown_idx(idx)) {
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
/*
|
||||
* When there are other active TopDown events,
|
||||
* don't disable the fixed counter 3.
|
||||
*/
|
||||
if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
|
||||
return;
|
||||
idx = INTEL_PMC_IDX_FIXED_SLOTS;
|
||||
}
|
||||
|
||||
intel_clear_masks(event, idx);
|
||||
|
||||
mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
|
||||
rdmsrl(hwc->config_base, ctrl_val);
|
||||
ctrl_val &= ~mask;
|
||||
wrmsrl(hwc->config_base, ctrl_val);
|
||||
|
@ -2180,17 +2203,28 @@ static void intel_pmu_disable_event(struct perf_event *event)
|
|||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (idx < INTEL_PMC_IDX_FIXED) {
|
||||
switch (idx) {
|
||||
case 0 ... INTEL_PMC_IDX_FIXED - 1:
|
||||
intel_clear_masks(event, idx);
|
||||
x86_pmu_disable_event(event);
|
||||
} else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
|
||||
intel_clear_masks(event, idx);
|
||||
break;
|
||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
||||
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
||||
intel_pmu_disable_fixed(event);
|
||||
} else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
|
||||
break;
|
||||
case INTEL_PMC_IDX_FIXED_BTS:
|
||||
intel_pmu_disable_bts();
|
||||
intel_pmu_drain_bts_buffer();
|
||||
} else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
|
||||
return;
|
||||
case INTEL_PMC_IDX_FIXED_VLBR:
|
||||
intel_clear_masks(event, idx);
|
||||
break;
|
||||
default:
|
||||
intel_clear_masks(event, idx);
|
||||
pr_warn("Failed to disable the event with invalid index %d\n",
|
||||
idx);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Needs to be called after x86_pmu_disable_event,
|
||||
|
@ -2208,10 +2242,189 @@ static void intel_pmu_del_event(struct perf_event *event)
|
|||
intel_pmu_pebs_del(event);
|
||||
}
|
||||
|
||||
static int icl_set_topdown_event_period(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
s64 left = local64_read(&hwc->period_left);
|
||||
|
||||
/*
|
||||
* The values in PERF_METRICS MSR are derived from fixed counter 3.
|
||||
* Software should start both registers, PERF_METRICS and fixed
|
||||
* counter 3, from zero.
|
||||
* Clear PERF_METRICS and Fixed counter 3 in initialization.
|
||||
* After that, both MSRs will be cleared for each read.
|
||||
* Don't need to clear them again.
|
||||
*/
|
||||
if (left == x86_pmu.max_period) {
|
||||
wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
|
||||
wrmsrl(MSR_PERF_METRICS, 0);
|
||||
hwc->saved_slots = 0;
|
||||
hwc->saved_metric = 0;
|
||||
}
|
||||
|
||||
if ((hwc->saved_slots) && is_slots_event(event)) {
|
||||
wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
|
||||
wrmsrl(MSR_PERF_METRICS, hwc->saved_metric);
|
||||
}
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
/*
|
||||
* The metric is reported as an 8bit integer fraction
|
||||
* suming up to 0xff.
|
||||
* slots-in-metric = (Metric / 0xff) * slots
|
||||
*/
|
||||
val = (metric >> ((idx - INTEL_PMC_IDX_METRIC_BASE) * 8)) & 0xff;
|
||||
return mul_u64_u32_div(slots, val, 0xff);
|
||||
}
|
||||
|
||||
static u64 icl_get_topdown_value(struct perf_event *event,
|
||||
u64 slots, u64 metrics)
|
||||
{
|
||||
int idx = event->hw.idx;
|
||||
u64 delta;
|
||||
|
||||
if (is_metric_idx(idx))
|
||||
delta = icl_get_metrics_event_value(metrics, slots, idx);
|
||||
else
|
||||
delta = slots;
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
static void __icl_update_topdown_event(struct perf_event *event,
|
||||
u64 slots, u64 metrics,
|
||||
u64 last_slots, u64 last_metrics)
|
||||
{
|
||||
u64 delta, last = 0;
|
||||
|
||||
delta = icl_get_topdown_value(event, slots, metrics);
|
||||
if (last_slots)
|
||||
last = icl_get_topdown_value(event, last_slots, last_metrics);
|
||||
|
||||
/*
|
||||
* The 8bit integer fraction of metric may be not accurate,
|
||||
* especially when the changes is very small.
|
||||
* For example, if only a few bad_spec happens, the fraction
|
||||
* may be reduced from 1 to 0. If so, the bad_spec event value
|
||||
* will be 0 which is definitely less than the last value.
|
||||
* Avoid update event->count for this case.
|
||||
*/
|
||||
if (delta > last) {
|
||||
delta -= last;
|
||||
local64_add(delta, &event->count);
|
||||
}
|
||||
}
|
||||
|
||||
static void update_saved_topdown_regs(struct perf_event *event,
|
||||
u64 slots, u64 metrics)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct perf_event *other;
|
||||
int idx;
|
||||
|
||||
event->hw.saved_slots = slots;
|
||||
event->hw.saved_metric = metrics;
|
||||
|
||||
for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
|
||||
if (!is_topdown_idx(idx))
|
||||
continue;
|
||||
other = cpuc->events[idx];
|
||||
other->hw.saved_slots = slots;
|
||||
other->hw.saved_metric = metrics;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Update all active Topdown events.
|
||||
*
|
||||
* The PERF_METRICS and Fixed counter 3 are read separately. The values may be
|
||||
* modify by a NMI. PMU has to be disabled before calling this function.
|
||||
*/
|
||||
static u64 icl_update_topdown_event(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct perf_event *other;
|
||||
u64 slots, metrics;
|
||||
bool reset = true;
|
||||
int idx;
|
||||
|
||||
/* read Fixed counter 3 */
|
||||
rdpmcl((3 | INTEL_PMC_FIXED_RDPMC_BASE), slots);
|
||||
if (!slots)
|
||||
return 0;
|
||||
|
||||
/* read PERF_METRICS */
|
||||
rdpmcl(INTEL_PMC_FIXED_RDPMC_METRICS, metrics);
|
||||
|
||||
for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
|
||||
if (!is_topdown_idx(idx))
|
||||
continue;
|
||||
other = cpuc->events[idx];
|
||||
__icl_update_topdown_event(other, slots, metrics,
|
||||
event ? event->hw.saved_slots : 0,
|
||||
event ? event->hw.saved_metric : 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check and update this event, which may have been cleared
|
||||
* in active_mask e.g. x86_pmu_stop()
|
||||
*/
|
||||
if (event && !test_bit(event->hw.idx, cpuc->active_mask)) {
|
||||
__icl_update_topdown_event(event, slots, metrics,
|
||||
event->hw.saved_slots,
|
||||
event->hw.saved_metric);
|
||||
|
||||
/*
|
||||
* In x86_pmu_stop(), the event is cleared in active_mask first,
|
||||
* then drain the delta, which indicates context switch for
|
||||
* counting.
|
||||
* Save metric and slots for context switch.
|
||||
* Don't need to reset the PERF_METRICS and Fixed counter 3.
|
||||
* Because the values will be restored in next schedule in.
|
||||
*/
|
||||
update_saved_topdown_regs(event, slots, metrics);
|
||||
reset = false;
|
||||
}
|
||||
|
||||
if (reset) {
|
||||
/* The fixed counter 3 has to be written before the PERF_METRICS. */
|
||||
wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
|
||||
wrmsrl(MSR_PERF_METRICS, 0);
|
||||
if (event)
|
||||
update_saved_topdown_regs(event, 0, 0);
|
||||
}
|
||||
|
||||
return slots;
|
||||
}
|
||||
|
||||
static void intel_pmu_read_topdown_event(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
/* Only need to call update_topdown_event() once for group read. */
|
||||
if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
|
||||
!is_slots_event(event))
|
||||
return;
|
||||
|
||||
perf_pmu_disable(event->pmu);
|
||||
x86_pmu.update_topdown_event(event);
|
||||
perf_pmu_enable(event->pmu);
|
||||
}
|
||||
|
||||
static void intel_pmu_read_event(struct perf_event *event)
|
||||
{
|
||||
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
|
||||
intel_pmu_auto_reload_read(event);
|
||||
else if (is_topdown_count(event) && x86_pmu.update_topdown_event)
|
||||
intel_pmu_read_topdown_event(event);
|
||||
else
|
||||
x86_perf_event_update(event);
|
||||
}
|
||||
|
@ -2219,8 +2432,22 @@ static void intel_pmu_read_event(struct perf_event *event)
|
|||
static void intel_pmu_enable_fixed(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
|
||||
u64 ctrl_val, mask, bits = 0;
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (is_topdown_idx(idx)) {
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
/*
|
||||
* When there are other active TopDown events,
|
||||
* don't enable the fixed counter 3 again.
|
||||
*/
|
||||
if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
|
||||
return;
|
||||
|
||||
idx = INTEL_PMC_IDX_FIXED_SLOTS;
|
||||
}
|
||||
|
||||
intel_set_masks(event, idx);
|
||||
|
||||
/*
|
||||
* Enable IRQ generation (0x8), if not PEBS,
|
||||
|
@ -2240,6 +2467,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
|
|||
if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
|
||||
bits |= 0x4;
|
||||
|
||||
idx -= INTEL_PMC_IDX_FIXED;
|
||||
bits <<= (idx * 4);
|
||||
mask = 0xfULL << (idx * 4);
|
||||
|
||||
|
@ -2262,18 +2490,27 @@ static void intel_pmu_enable_event(struct perf_event *event)
|
|||
if (unlikely(event->attr.precise_ip))
|
||||
intel_pmu_pebs_enable(event);
|
||||
|
||||
if (idx < INTEL_PMC_IDX_FIXED) {
|
||||
switch (idx) {
|
||||
case 0 ... INTEL_PMC_IDX_FIXED - 1:
|
||||
intel_set_masks(event, idx);
|
||||
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
|
||||
} else if (idx < INTEL_PMC_IDX_FIXED_BTS) {
|
||||
intel_set_masks(event, idx);
|
||||
break;
|
||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
||||
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
||||
intel_pmu_enable_fixed(event);
|
||||
} else if (idx == INTEL_PMC_IDX_FIXED_BTS) {
|
||||
break;
|
||||
case INTEL_PMC_IDX_FIXED_BTS:
|
||||
if (!__this_cpu_read(cpu_hw_events.enabled))
|
||||
return;
|
||||
intel_pmu_enable_bts(hwc->config);
|
||||
} else if (idx == INTEL_PMC_IDX_FIXED_VLBR)
|
||||
break;
|
||||
case INTEL_PMC_IDX_FIXED_VLBR:
|
||||
intel_set_masks(event, idx);
|
||||
break;
|
||||
default:
|
||||
pr_warn("Failed to enable the event with invalid index %d\n",
|
||||
idx);
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_add_event(struct perf_event *event)
|
||||
|
@ -2389,7 +2626,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
|||
/*
|
||||
* PEBS overflow sets bit 62 in the global status register
|
||||
*/
|
||||
if (__test_and_clear_bit(62, (unsigned long *)&status)) {
|
||||
if (__test_and_clear_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, (unsigned long *)&status)) {
|
||||
u64 pebs_enabled = cpuc->pebs_enabled;
|
||||
|
||||
handled++;
|
||||
|
@ -2410,7 +2647,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
|||
/*
|
||||
* Intel PT
|
||||
*/
|
||||
if (__test_and_clear_bit(55, (unsigned long *)&status)) {
|
||||
if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) {
|
||||
handled++;
|
||||
if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
|
||||
perf_guest_cbs->handle_intel_pt_intr))
|
||||
|
@ -2419,6 +2656,15 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
|||
intel_pt_interrupt();
|
||||
}
|
||||
|
||||
/*
|
||||
* Intel Perf mertrics
|
||||
*/
|
||||
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
|
||||
handled++;
|
||||
if (x86_pmu.update_topdown_event)
|
||||
x86_pmu.update_topdown_event(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Checkpointed counters can lead to 'spurious' PMIs because the
|
||||
* rollback caused by the PMI will have cleared the overflow status
|
||||
|
@ -3355,6 +3601,56 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
|||
if (event->attr.type != PERF_TYPE_RAW)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Config Topdown slots and metric events
|
||||
*
|
||||
* The slots event on Fixed Counter 3 can support sampling,
|
||||
* which will be handled normally in x86_perf_event_update().
|
||||
*
|
||||
* Metric events don't support sampling and require being paired
|
||||
* with a slots event as group leader. When the slots event
|
||||
* is used in a metrics group, it too cannot support sampling.
|
||||
*/
|
||||
if (x86_pmu.intel_cap.perf_metrics && is_topdown_event(event)) {
|
||||
if (event->attr.config1 || event->attr.config2)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* The TopDown metrics events and slots event don't
|
||||
* support any filters.
|
||||
*/
|
||||
if (event->attr.config & X86_ALL_EVENT_FLAGS)
|
||||
return -EINVAL;
|
||||
|
||||
if (is_metric_event(event)) {
|
||||
struct perf_event *leader = event->group_leader;
|
||||
|
||||
/* The metric events don't support sampling. */
|
||||
if (is_sampling_event(event))
|
||||
return -EINVAL;
|
||||
|
||||
/* The metric events require a slots group leader. */
|
||||
if (!is_slots_event(leader))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* The leader/SLOTS must not be a sampling event for
|
||||
* metric use; hardware requires it starts at 0 when used
|
||||
* in conjunction with MSR_PERF_METRICS.
|
||||
*/
|
||||
if (is_sampling_event(leader))
|
||||
return -EINVAL;
|
||||
|
||||
event->event_caps |= PERF_EV_CAP_SIBLING;
|
||||
/*
|
||||
* Only once we have a METRICs sibling do we
|
||||
* need TopDown magic.
|
||||
*/
|
||||
leader->hw.flags |= PERF_X86_EVENT_TOPDOWN;
|
||||
event->hw.flags |= PERF_X86_EVENT_TOPDOWN;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
|
||||
return 0;
|
||||
|
||||
|
@ -3787,6 +4083,17 @@ static void intel_pmu_cpu_starting(int cpu)
|
|||
if (x86_pmu.counter_freezing)
|
||||
enable_counter_freeze();
|
||||
|
||||
/* Disable perf metrics if any added CPU doesn't support it. */
|
||||
if (x86_pmu.intel_cap.perf_metrics) {
|
||||
union perf_capabilities perf_cap;
|
||||
|
||||
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
|
||||
if (!perf_cap.perf_metrics) {
|
||||
x86_pmu.intel_cap.perf_metrics = 0;
|
||||
x86_pmu.intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
|
||||
}
|
||||
}
|
||||
|
||||
if (!cpuc->shared_regs)
|
||||
return;
|
||||
|
||||
|
@ -4355,6 +4662,15 @@ static struct attribute *icl_events_attrs[] = {
|
|||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *icl_td_events_attrs[] = {
|
||||
EVENT_PTR(slots),
|
||||
EVENT_PTR(td_retiring),
|
||||
EVENT_PTR(td_bad_spec),
|
||||
EVENT_PTR(td_fe_bound),
|
||||
EVENT_PTR(td_be_bound),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *icl_tsx_events_attrs[] = {
|
||||
EVENT_PTR(tx_start),
|
||||
EVENT_PTR(tx_abort),
|
||||
|
@ -4830,6 +5146,7 @@ __init int intel_pmu_init(void)
|
|||
|
||||
case INTEL_FAM6_ATOM_TREMONT_D:
|
||||
case INTEL_FAM6_ATOM_TREMONT:
|
||||
case INTEL_FAM6_ATOM_TREMONT_L:
|
||||
x86_pmu.late_ack = true;
|
||||
memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
|
||||
sizeof(hw_cache_event_ids));
|
||||
|
@ -5139,10 +5456,13 @@ __init int intel_pmu_init(void)
|
|||
hsw_format_attr : nhm_format_attr;
|
||||
extra_skl_attr = skl_format_attr;
|
||||
mem_attr = icl_events_attrs;
|
||||
td_attr = icl_td_events_attrs;
|
||||
tsx_attr = icl_tsx_events_attrs;
|
||||
x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
intel_pmu_pebs_data_source_skl(pmem);
|
||||
x86_pmu.update_topdown_event = icl_update_topdown_event;
|
||||
x86_pmu.set_topdown_event_period = icl_set_topdown_event_period;
|
||||
pr_cont("Icelake events, ");
|
||||
name = "icelake";
|
||||
break;
|
||||
|
@ -5198,6 +5518,15 @@ __init int intel_pmu_init(void)
|
|||
* counter, so do not extend mask to generic counters
|
||||
*/
|
||||
for_each_event_constraint(c, x86_pmu.event_constraints) {
|
||||
/*
|
||||
* Don't extend the topdown slots and metrics
|
||||
* events to the generic counters.
|
||||
*/
|
||||
if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
|
||||
c->weight = hweight64(c->idxmsk64);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c->cmask == FIXED_EVENT_FLAGS
|
||||
&& c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
|
||||
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
|
||||
|
@ -5253,6 +5582,9 @@ __init int intel_pmu_init(void)
|
|||
if (x86_pmu.counter_freezing)
|
||||
x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
|
||||
|
||||
if (x86_pmu.intel_cap.perf_metrics)
|
||||
x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -670,9 +670,7 @@ int intel_pmu_drain_bts_buffer(void)
|
|||
|
||||
static inline void intel_pmu_drain_pebs_buffer(void)
|
||||
{
|
||||
struct pt_regs regs;
|
||||
|
||||
x86_pmu.drain_pebs(®s);
|
||||
x86_pmu.drain_pebs(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1737,6 +1735,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|||
struct x86_perf_regs perf_regs;
|
||||
struct pt_regs *regs = &perf_regs.regs;
|
||||
void *at = get_next_pebs_record_by_bit(base, top, bit);
|
||||
struct pt_regs dummy_iregs;
|
||||
|
||||
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
|
||||
/*
|
||||
|
@ -1749,6 +1748,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|||
} else if (!intel_pmu_save_and_restart(event))
|
||||
return;
|
||||
|
||||
if (!iregs)
|
||||
iregs = &dummy_iregs;
|
||||
|
||||
while (count > 1) {
|
||||
setup_sample(event, iregs, at, &data, regs);
|
||||
perf_event_output(event, &data, regs);
|
||||
|
@ -1758,16 +1760,22 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|||
}
|
||||
|
||||
setup_sample(event, iregs, at, &data, regs);
|
||||
|
||||
if (iregs == &dummy_iregs) {
|
||||
/*
|
||||
* The PEBS records may be drained in the non-overflow context,
|
||||
* e.g., large PEBS + context switch. Perf should treat the
|
||||
* last record the same as other PEBS records, and doesn't
|
||||
* invoke the generic overflow handler.
|
||||
*/
|
||||
perf_event_output(event, &data, regs);
|
||||
} else {
|
||||
/*
|
||||
* All but the last records are processed.
|
||||
* The last one is left to be able to call the overflow handler.
|
||||
*/
|
||||
if (perf_event_overflow(event, &data, regs)) {
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
|
||||
|
|
|
@ -12,6 +12,8 @@ struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
|
|||
|
||||
static bool pcidrv_registered;
|
||||
struct pci_driver *uncore_pci_driver;
|
||||
/* The PCI driver for the device which the uncore doesn't own. */
|
||||
struct pci_driver *uncore_pci_sub_driver;
|
||||
/* pci bus to socket mapping */
|
||||
DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
|
||||
struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
|
||||
|
@ -989,66 +991,72 @@ uncore_types_init(struct intel_uncore_type **types, bool setid)
|
|||
}
|
||||
|
||||
/*
|
||||
* add a pci uncore device
|
||||
* Get the die information of a PCI device.
|
||||
* @pdev: The PCI device.
|
||||
* @phys_id: The physical socket id which the device maps to.
|
||||
* @die: The die id which the device maps to.
|
||||
*/
|
||||
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
static int uncore_pci_get_dev_die_info(struct pci_dev *pdev,
|
||||
int *phys_id, int *die)
|
||||
{
|
||||
struct intel_uncore_type *type;
|
||||
struct intel_uncore_pmu *pmu = NULL;
|
||||
struct intel_uncore_box *box;
|
||||
int phys_id, die, ret;
|
||||
|
||||
phys_id = uncore_pcibus_to_physid(pdev->bus);
|
||||
if (phys_id < 0)
|
||||
*phys_id = uncore_pcibus_to_physid(pdev->bus);
|
||||
if (*phys_id < 0)
|
||||
return -ENODEV;
|
||||
|
||||
die = (topology_max_die_per_package() > 1) ? phys_id :
|
||||
topology_phys_to_logical_pkg(phys_id);
|
||||
if (die < 0)
|
||||
*die = (topology_max_die_per_package() > 1) ? *phys_id :
|
||||
topology_phys_to_logical_pkg(*phys_id);
|
||||
if (*die < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
|
||||
int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
|
||||
|
||||
uncore_extra_pci_dev[die].dev[idx] = pdev;
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
|
||||
|
||||
/*
|
||||
* Some platforms, e.g. Knights Landing, use a common PCI device ID
|
||||
* for multiple instances of an uncore PMU device type. We should check
|
||||
* PCI slot and func to indicate the uncore box.
|
||||
* Find the PMU of a PCI device.
|
||||
* @pdev: The PCI device.
|
||||
* @ids: The ID table of the available PCI devices with a PMU.
|
||||
*/
|
||||
if (id->driver_data & ~0xffff) {
|
||||
struct pci_driver *pci_drv = pdev->driver;
|
||||
const struct pci_device_id *ids = pci_drv->id_table;
|
||||
static struct intel_uncore_pmu *
|
||||
uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
|
||||
{
|
||||
struct intel_uncore_pmu *pmu = NULL;
|
||||
struct intel_uncore_type *type;
|
||||
kernel_ulong_t data;
|
||||
unsigned int devfn;
|
||||
|
||||
while (ids && ids->vendor) {
|
||||
if ((ids->vendor == pdev->vendor) &&
|
||||
(ids->device == pdev->device)) {
|
||||
devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
|
||||
UNCORE_PCI_DEV_FUNC(ids->driver_data));
|
||||
data = ids->driver_data;
|
||||
devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
|
||||
UNCORE_PCI_DEV_FUNC(data));
|
||||
if (devfn == pdev->devfn) {
|
||||
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
|
||||
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
|
||||
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
|
||||
break;
|
||||
}
|
||||
}
|
||||
ids++;
|
||||
}
|
||||
if (pmu == NULL)
|
||||
return -ENODEV;
|
||||
} else {
|
||||
/*
|
||||
* for performance monitoring unit with multiple boxes,
|
||||
* each box has a different function id.
|
||||
*/
|
||||
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
|
||||
return pmu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Register the PMU for a PCI device
|
||||
* @pdev: The PCI device.
|
||||
* @type: The corresponding PMU type of the device.
|
||||
* @pmu: The corresponding PMU of the device.
|
||||
* @phys_id: The physical socket id which the device maps to.
|
||||
* @die: The die id which the device maps to.
|
||||
*/
|
||||
static int uncore_pci_pmu_register(struct pci_dev *pdev,
|
||||
struct intel_uncore_type *type,
|
||||
struct intel_uncore_pmu *pmu,
|
||||
int phys_id, int die)
|
||||
{
|
||||
struct intel_uncore_box *box;
|
||||
int ret;
|
||||
|
||||
if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -1067,7 +1075,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
|
|||
box->pci_dev = pdev;
|
||||
box->pmu = pmu;
|
||||
uncore_box_init(box);
|
||||
pci_set_drvdata(pdev, box);
|
||||
|
||||
pmu->boxes[die] = box;
|
||||
if (atomic_inc_return(&pmu->activeboxes) > 1)
|
||||
|
@ -1076,7 +1083,6 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
|
|||
/* First active box registers the pmu */
|
||||
ret = uncore_pmu_register(pmu);
|
||||
if (ret) {
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
pmu->boxes[die] = NULL;
|
||||
uncore_box_exit(box);
|
||||
kfree(box);
|
||||
|
@ -1084,18 +1090,87 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* add a pci uncore device
|
||||
*/
|
||||
static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
{
|
||||
struct intel_uncore_type *type;
|
||||
struct intel_uncore_pmu *pmu = NULL;
|
||||
int phys_id, die, ret;
|
||||
|
||||
ret = uncore_pci_get_dev_die_info(pdev, &phys_id, &die);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
|
||||
int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
|
||||
|
||||
uncore_extra_pci_dev[die].dev[idx] = pdev;
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
|
||||
|
||||
/*
|
||||
* Some platforms, e.g. Knights Landing, use a common PCI device ID
|
||||
* for multiple instances of an uncore PMU device type. We should check
|
||||
* PCI slot and func to indicate the uncore box.
|
||||
*/
|
||||
if (id->driver_data & ~0xffff) {
|
||||
struct pci_driver *pci_drv = pdev->driver;
|
||||
|
||||
pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
|
||||
if (pmu == NULL)
|
||||
return -ENODEV;
|
||||
} else {
|
||||
/*
|
||||
* for performance monitoring unit with multiple boxes,
|
||||
* each box has a different function id.
|
||||
*/
|
||||
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
|
||||
}
|
||||
|
||||
ret = uncore_pci_pmu_register(pdev, type, pmu, phys_id, die);
|
||||
|
||||
pci_set_drvdata(pdev, pmu->boxes[die]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unregister the PMU of a PCI device
|
||||
* @pmu: The corresponding PMU is unregistered.
|
||||
* @phys_id: The physical socket id which the device maps to.
|
||||
* @die: The die id which the device maps to.
|
||||
*/
|
||||
static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu,
|
||||
int phys_id, int die)
|
||||
{
|
||||
struct intel_uncore_box *box = pmu->boxes[die];
|
||||
|
||||
if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
|
||||
return;
|
||||
|
||||
pmu->boxes[die] = NULL;
|
||||
if (atomic_dec_return(&pmu->activeboxes) == 0)
|
||||
uncore_pmu_unregister(pmu);
|
||||
uncore_box_exit(box);
|
||||
kfree(box);
|
||||
}
|
||||
|
||||
static void uncore_pci_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct intel_uncore_box *box;
|
||||
struct intel_uncore_pmu *pmu;
|
||||
int i, phys_id, die;
|
||||
|
||||
phys_id = uncore_pcibus_to_physid(pdev->bus);
|
||||
if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
|
||||
return;
|
||||
|
||||
box = pci_get_drvdata(pdev);
|
||||
if (!box) {
|
||||
die = (topology_max_die_per_package() > 1) ? phys_id :
|
||||
topology_phys_to_logical_pkg(phys_id);
|
||||
for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
|
||||
if (uncore_extra_pci_dev[die].dev[i] == pdev) {
|
||||
uncore_extra_pci_dev[die].dev[i] = NULL;
|
||||
|
@ -1107,15 +1182,84 @@ static void uncore_pci_remove(struct pci_dev *pdev)
|
|||
}
|
||||
|
||||
pmu = box->pmu;
|
||||
if (WARN_ON_ONCE(phys_id != box->pci_phys_id))
|
||||
return;
|
||||
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
pmu->boxes[box->dieid] = NULL;
|
||||
if (atomic_dec_return(&pmu->activeboxes) == 0)
|
||||
uncore_pmu_unregister(pmu);
|
||||
uncore_box_exit(box);
|
||||
kfree(box);
|
||||
|
||||
uncore_pci_pmu_unregister(pmu, phys_id, die);
|
||||
}
|
||||
|
||||
static int uncore_bus_notify(struct notifier_block *nb,
|
||||
unsigned long action, void *data)
|
||||
{
|
||||
struct device *dev = data;
|
||||
struct pci_dev *pdev = to_pci_dev(dev);
|
||||
struct intel_uncore_pmu *pmu;
|
||||
int phys_id, die;
|
||||
|
||||
/* Unregister the PMU when the device is going to be deleted. */
|
||||
if (action != BUS_NOTIFY_DEL_DEVICE)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
pmu = uncore_pci_find_dev_pmu(pdev, uncore_pci_sub_driver->id_table);
|
||||
if (!pmu)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (uncore_pci_get_dev_die_info(pdev, &phys_id, &die))
|
||||
return NOTIFY_DONE;
|
||||
|
||||
uncore_pci_pmu_unregister(pmu, phys_id, die);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block uncore_notifier = {
|
||||
.notifier_call = uncore_bus_notify,
|
||||
};
|
||||
|
||||
static void uncore_pci_sub_driver_init(void)
|
||||
{
|
||||
const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
|
||||
struct intel_uncore_type *type;
|
||||
struct intel_uncore_pmu *pmu;
|
||||
struct pci_dev *pci_sub_dev;
|
||||
bool notify = false;
|
||||
unsigned int devfn;
|
||||
int phys_id, die;
|
||||
|
||||
while (ids && ids->vendor) {
|
||||
pci_sub_dev = NULL;
|
||||
type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
|
||||
/*
|
||||
* Search the available device, and register the
|
||||
* corresponding PMU.
|
||||
*/
|
||||
while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
|
||||
ids->device, pci_sub_dev))) {
|
||||
devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
|
||||
UNCORE_PCI_DEV_FUNC(ids->driver_data));
|
||||
if (devfn != pci_sub_dev->devfn)
|
||||
continue;
|
||||
|
||||
pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
|
||||
if (!pmu)
|
||||
continue;
|
||||
|
||||
if (uncore_pci_get_dev_die_info(pci_sub_dev,
|
||||
&phys_id, &die))
|
||||
continue;
|
||||
|
||||
if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
|
||||
phys_id, die))
|
||||
notify = true;
|
||||
}
|
||||
ids++;
|
||||
}
|
||||
|
||||
if (notify && bus_register_notifier(&pci_bus_type, &uncore_notifier))
|
||||
notify = false;
|
||||
|
||||
if (!notify)
|
||||
uncore_pci_sub_driver = NULL;
|
||||
}
|
||||
|
||||
static int __init uncore_pci_init(void)
|
||||
|
@ -1141,6 +1285,9 @@ static int __init uncore_pci_init(void)
|
|||
if (ret)
|
||||
goto errtype;
|
||||
|
||||
if (uncore_pci_sub_driver)
|
||||
uncore_pci_sub_driver_init();
|
||||
|
||||
pcidrv_registered = true;
|
||||
return 0;
|
||||
|
||||
|
@ -1158,6 +1305,8 @@ static void uncore_pci_exit(void)
|
|||
{
|
||||
if (pcidrv_registered) {
|
||||
pcidrv_registered = false;
|
||||
if (uncore_pci_sub_driver)
|
||||
bus_unregister_notifier(&pci_bus_type, &uncore_notifier);
|
||||
pci_unregister_driver(uncore_pci_driver);
|
||||
uncore_types_exit(uncore_pci_uncores);
|
||||
kfree(uncore_extra_pci_dev);
|
||||
|
@ -1478,12 +1627,12 @@ static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
|
|||
};
|
||||
|
||||
static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
|
||||
.cpu_init = icl_uncore_cpu_init,
|
||||
.cpu_init = tgl_uncore_cpu_init,
|
||||
.mmio_init = tgl_uncore_mmio_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
|
||||
.cpu_init = icl_uncore_cpu_init,
|
||||
.cpu_init = tgl_uncore_cpu_init,
|
||||
.mmio_init = tgl_l_uncore_mmio_init,
|
||||
};
|
||||
|
||||
|
|
|
@ -552,6 +552,7 @@ extern struct intel_uncore_type **uncore_msr_uncores;
|
|||
extern struct intel_uncore_type **uncore_pci_uncores;
|
||||
extern struct intel_uncore_type **uncore_mmio_uncores;
|
||||
extern struct pci_driver *uncore_pci_driver;
|
||||
extern struct pci_driver *uncore_pci_sub_driver;
|
||||
extern raw_spinlock_t pci2phy_map_lock;
|
||||
extern struct list_head pci2phy_map_head;
|
||||
extern struct pci_extra_dev *uncore_extra_pci_dev;
|
||||
|
@ -567,6 +568,7 @@ void snb_uncore_cpu_init(void);
|
|||
void nhm_uncore_cpu_init(void);
|
||||
void skl_uncore_cpu_init(void);
|
||||
void icl_uncore_cpu_init(void);
|
||||
void tgl_uncore_cpu_init(void);
|
||||
void tgl_uncore_mmio_init(void);
|
||||
void tgl_l_uncore_mmio_init(void);
|
||||
int snb_pci2phy_map_init(int devid);
|
||||
|
|
|
@ -126,6 +126,10 @@
|
|||
#define ICL_UNC_CBO_0_PER_CTR0 0x702
|
||||
#define ICL_UNC_CBO_MSR_OFFSET 0x8
|
||||
|
||||
/* ICL ARB register */
|
||||
#define ICL_UNC_ARB_PER_CTR 0x3b1
|
||||
#define ICL_UNC_ARB_PERFEVTSEL 0x3b3
|
||||
|
||||
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
|
||||
|
@ -313,15 +317,21 @@ void skl_uncore_cpu_init(void)
|
|||
snb_uncore_arb.ops = &skl_uncore_msr_ops;
|
||||
}
|
||||
|
||||
static struct intel_uncore_ops icl_uncore_msr_ops = {
|
||||
.disable_event = snb_uncore_msr_disable_event,
|
||||
.enable_event = snb_uncore_msr_enable_event,
|
||||
.read_counter = uncore_msr_read_counter,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icl_uncore_cbox = {
|
||||
.name = "cbox",
|
||||
.num_counters = 4,
|
||||
.num_counters = 2,
|
||||
.perf_ctr_bits = 44,
|
||||
.perf_ctr = ICL_UNC_CBO_0_PER_CTR0,
|
||||
.event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
|
||||
.event_mask = SNB_UNC_RAW_EVENT_MASK,
|
||||
.msr_offset = ICL_UNC_CBO_MSR_OFFSET,
|
||||
.ops = &skl_uncore_msr_ops,
|
||||
.ops = &icl_uncore_msr_ops,
|
||||
.format_group = &snb_uncore_format_group,
|
||||
};
|
||||
|
||||
|
@ -350,13 +360,25 @@ static struct intel_uncore_type icl_uncore_clockbox = {
|
|||
.single_fixed = 1,
|
||||
.event_mask = SNB_UNC_CTL_EV_SEL_MASK,
|
||||
.format_group = &icl_uncore_clock_format_group,
|
||||
.ops = &skl_uncore_msr_ops,
|
||||
.ops = &icl_uncore_msr_ops,
|
||||
.event_descs = icl_uncore_events,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icl_uncore_arb = {
|
||||
.name = "arb",
|
||||
.num_counters = 1,
|
||||
.num_boxes = 1,
|
||||
.perf_ctr_bits = 44,
|
||||
.perf_ctr = ICL_UNC_ARB_PER_CTR,
|
||||
.event_ctl = ICL_UNC_ARB_PERFEVTSEL,
|
||||
.event_mask = SNB_UNC_RAW_EVENT_MASK,
|
||||
.ops = &icl_uncore_msr_ops,
|
||||
.format_group = &snb_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *icl_msr_uncores[] = {
|
||||
&icl_uncore_cbox,
|
||||
&snb_uncore_arb,
|
||||
&icl_uncore_arb,
|
||||
&icl_uncore_clockbox,
|
||||
NULL,
|
||||
};
|
||||
|
@ -374,6 +396,21 @@ void icl_uncore_cpu_init(void)
|
|||
{
|
||||
uncore_msr_uncores = icl_msr_uncores;
|
||||
icl_uncore_cbox.num_boxes = icl_get_cbox_num();
|
||||
}
|
||||
|
||||
static struct intel_uncore_type *tgl_msr_uncores[] = {
|
||||
&icl_uncore_cbox,
|
||||
&snb_uncore_arb,
|
||||
&icl_uncore_clockbox,
|
||||
NULL,
|
||||
};
|
||||
|
||||
void tgl_uncore_cpu_init(void)
|
||||
{
|
||||
uncore_msr_uncores = tgl_msr_uncores;
|
||||
icl_uncore_cbox.num_boxes = icl_get_cbox_num();
|
||||
icl_uncore_cbox.ops = &skl_uncore_msr_ops;
|
||||
icl_uncore_clockbox.ops = &skl_uncore_msr_ops;
|
||||
snb_uncore_arb.ops = &skl_uncore_msr_ops;
|
||||
}
|
||||
|
||||
|
|
|
@ -393,6 +393,11 @@
|
|||
#define SNR_M2M_PCI_PMON_BOX_CTL 0x438
|
||||
#define SNR_M2M_PCI_PMON_UMASK_EXT 0xff
|
||||
|
||||
/* SNR PCIE3 */
|
||||
#define SNR_PCIE3_PCI_PMON_CTL0 0x508
|
||||
#define SNR_PCIE3_PCI_PMON_CTR0 0x4e8
|
||||
#define SNR_PCIE3_PCI_PMON_BOX_CTL 0x4e0
|
||||
|
||||
/* SNR IMC */
|
||||
#define SNR_IMC_MMIO_PMON_FIXED_CTL 0x54
|
||||
#define SNR_IMC_MMIO_PMON_FIXED_CTR 0x38
|
||||
|
@ -3749,7 +3754,9 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
|
|||
|
||||
ret = skx_iio_get_topology(type);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto clear_attr_update;
|
||||
|
||||
ret = -ENOMEM;
|
||||
|
||||
/* One more for NULL. */
|
||||
attrs = kcalloc((uncore_max_dies() + 1), sizeof(*attrs), GFP_KERNEL);
|
||||
|
@ -3781,8 +3788,9 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
|
|||
kfree(eas);
|
||||
kfree(attrs);
|
||||
kfree(type->topology);
|
||||
clear_attr_update:
|
||||
type->attr_update = NULL;
|
||||
return -ENOMEM;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
|
||||
|
@ -4551,12 +4559,46 @@ static struct intel_uncore_type snr_uncore_m2m = {
|
|||
.format_group = &snr_m2m_uncore_format_group,
|
||||
};
|
||||
|
||||
static void snr_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
struct pci_dev *pdev = box->pci_dev;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
pci_write_config_dword(pdev, hwc->config_base, (u32)(hwc->config | SNBEP_PMON_CTL_EN));
|
||||
pci_write_config_dword(pdev, hwc->config_base + 4, (u32)(hwc->config >> 32));
|
||||
}
|
||||
|
||||
static struct intel_uncore_ops snr_pcie3_uncore_pci_ops = {
|
||||
.init_box = snr_m2m_uncore_pci_init_box,
|
||||
.disable_box = snbep_uncore_pci_disable_box,
|
||||
.enable_box = snbep_uncore_pci_enable_box,
|
||||
.disable_event = snbep_uncore_pci_disable_event,
|
||||
.enable_event = snr_uncore_pci_enable_event,
|
||||
.read_counter = snbep_uncore_pci_read_counter,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type snr_uncore_pcie3 = {
|
||||
.name = "pcie3",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 1,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = SNR_PCIE3_PCI_PMON_CTR0,
|
||||
.event_ctl = SNR_PCIE3_PCI_PMON_CTL0,
|
||||
.event_mask = SKX_IIO_PMON_RAW_EVENT_MASK,
|
||||
.event_mask_ext = SKX_IIO_PMON_RAW_EVENT_MASK_EXT,
|
||||
.box_ctl = SNR_PCIE3_PCI_PMON_BOX_CTL,
|
||||
.ops = &snr_pcie3_uncore_pci_ops,
|
||||
.format_group = &skx_uncore_iio_format_group,
|
||||
};
|
||||
|
||||
enum {
|
||||
SNR_PCI_UNCORE_M2M,
|
||||
SNR_PCI_UNCORE_PCIE3,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *snr_pci_uncores[] = {
|
||||
[SNR_PCI_UNCORE_M2M] = &snr_uncore_m2m,
|
||||
[SNR_PCI_UNCORE_PCIE3] = &snr_uncore_pcie3,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -4573,6 +4615,19 @@ static struct pci_driver snr_uncore_pci_driver = {
|
|||
.id_table = snr_uncore_pci_ids,
|
||||
};
|
||||
|
||||
static const struct pci_device_id snr_uncore_pci_sub_ids[] = {
|
||||
{ /* PCIe3 RP */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x334a),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 0, SNR_PCI_UNCORE_PCIE3, 0),
|
||||
},
|
||||
{ /* end: all zeroes */ }
|
||||
};
|
||||
|
||||
static struct pci_driver snr_uncore_pci_sub_driver = {
|
||||
.name = "snr_uncore_sub",
|
||||
.id_table = snr_uncore_pci_sub_ids,
|
||||
};
|
||||
|
||||
int snr_uncore_pci_init(void)
|
||||
{
|
||||
/* SNR UBOX DID */
|
||||
|
@ -4584,6 +4639,7 @@ int snr_uncore_pci_init(void)
|
|||
|
||||
uncore_pci_uncores = snr_pci_uncores;
|
||||
uncore_pci_driver = &snr_uncore_pci_driver;
|
||||
uncore_pci_sub_driver = &snr_uncore_pci_sub_driver;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -4751,10 +4807,10 @@ static struct uncore_event_desc snr_uncore_imc_freerunning_events[] = {
|
|||
INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
|
||||
|
||||
INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
|
||||
INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
|
||||
INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
@ -5212,17 +5268,17 @@ static struct uncore_event_desc icx_uncore_imc_freerunning_events[] = {
|
|||
INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
|
||||
|
||||
INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
|
||||
INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(read.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
|
||||
INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(write.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
|
||||
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_read, "event=0xff,umask=0x30"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_read.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_write, "event=0xff,umask=0x31"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "6.103515625e-5"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_write.unit, "MiB"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
|
|
@ -78,6 +78,7 @@ static bool test_intel(int idx, void *data)
|
|||
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
|
||||
case INTEL_FAM6_ATOM_TREMONT_D:
|
||||
case INTEL_FAM6_ATOM_TREMONT:
|
||||
case INTEL_FAM6_ATOM_TREMONT_L:
|
||||
|
||||
case INTEL_FAM6_XEON_PHI_KNL:
|
||||
case INTEL_FAM6_XEON_PHI_KNM:
|
||||
|
|
|
@ -79,6 +79,31 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
|
|||
#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
|
||||
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
|
||||
#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
|
||||
#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
|
||||
|
||||
static inline bool is_topdown_count(struct perf_event *event)
|
||||
{
|
||||
return event->hw.flags & PERF_X86_EVENT_TOPDOWN;
|
||||
}
|
||||
|
||||
static inline bool is_metric_event(struct perf_event *event)
|
||||
{
|
||||
u64 config = event->attr.config;
|
||||
|
||||
return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) &&
|
||||
((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) &&
|
||||
((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX);
|
||||
}
|
||||
|
||||
static inline bool is_slots_event(struct perf_event *event)
|
||||
{
|
||||
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS;
|
||||
}
|
||||
|
||||
static inline bool is_topdown_event(struct perf_event *event)
|
||||
{
|
||||
return is_metric_event(event) || is_slots_event(event);
|
||||
}
|
||||
|
||||
struct amd_nb {
|
||||
int nb_id; /* NorthBridge id */
|
||||
|
@ -210,6 +235,8 @@ struct cpu_hw_events {
|
|||
they've never been enabled yet */
|
||||
int n_txn; /* the # last events in the below arrays;
|
||||
added in the current transaction */
|
||||
int n_txn_pair;
|
||||
int n_txn_metric;
|
||||
int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
|
||||
u64 tags[X86_PMC_IDX_MAX];
|
||||
|
||||
|
@ -284,6 +311,12 @@ struct cpu_hw_events {
|
|||
*/
|
||||
u64 tfa_shadow;
|
||||
|
||||
/*
|
||||
* Perf Metrics
|
||||
*/
|
||||
/* number of accepted metrics events */
|
||||
int n_metric;
|
||||
|
||||
/*
|
||||
* AMD specific bits
|
||||
*/
|
||||
|
@ -375,6 +408,19 @@ struct cpu_hw_events {
|
|||
#define FIXED_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
|
||||
|
||||
/*
|
||||
* The special metric counters do not actually exist. They are calculated from
|
||||
* the combination of the FxCtr3 + MSR_PERF_METRICS.
|
||||
*
|
||||
* The special metric counters are mapped to a dummy offset for the scheduler.
|
||||
* The sharing between multiple users of the same metric without multiplexing
|
||||
* is not allowed, even though the hardware supports that in principle.
|
||||
*/
|
||||
|
||||
#define METRIC_EVENT_CONSTRAINT(c, n) \
|
||||
EVENT_CONSTRAINT(c, (1ULL << (INTEL_PMC_IDX_METRIC_BASE + n)), \
|
||||
INTEL_ARCH_EVENT_MASK)
|
||||
|
||||
/*
|
||||
* Constraint on the Event code + UMask
|
||||
*/
|
||||
|
@ -537,7 +583,7 @@ union perf_capabilities {
|
|||
*/
|
||||
u64 full_width_write:1;
|
||||
u64 pebs_baseline:1;
|
||||
u64 pebs_metrics_available:1;
|
||||
u64 perf_metrics:1;
|
||||
u64 pebs_output_pt_available:1;
|
||||
};
|
||||
u64 capabilities;
|
||||
|
@ -726,6 +772,12 @@ struct x86_pmu {
|
|||
*/
|
||||
atomic_t lbr_exclusive[x86_lbr_exclusive_max];
|
||||
|
||||
/*
|
||||
* Intel perf metrics
|
||||
*/
|
||||
u64 (*update_topdown_event)(struct perf_event *event);
|
||||
int (*set_topdown_event_period)(struct perf_event *event);
|
||||
|
||||
/*
|
||||
* perf task context (i.e. struct perf_event_context::task_ctx_data)
|
||||
* switch helper to bridge calls from perf/core to perf/x86.
|
||||
|
|
|
@ -815,6 +815,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
|
|||
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr),
|
||||
X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h),
|
||||
X86_MATCH_VENDOR_FAM(HYGON, 0x18, &model_amd_fam17h),
|
||||
X86_MATCH_VENDOR_FAM(AMD, 0x19, &model_amd_fam17h),
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
|
||||
|
|
|
@ -467,6 +467,7 @@
|
|||
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
|
||||
#define MSR_AMD64_IBSCTL 0xc001103a
|
||||
#define MSR_AMD64_IBSBRTARGET 0xc001103b
|
||||
#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
|
||||
#define MSR_AMD64_IBSOPDATA4 0xc001103d
|
||||
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
|
||||
#define MSR_AMD64_SEV 0xc0010131
|
||||
|
@ -860,11 +861,14 @@
|
|||
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
|
||||
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
|
||||
#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
|
||||
#define MSR_CORE_PERF_FIXED_CTR3 0x0000030c
|
||||
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
|
||||
#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
|
||||
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
|
||||
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
|
||||
|
||||
#define MSR_PERF_METRICS 0x00000329
|
||||
|
||||
/* PERF_GLOBAL_OVF_CTL bits */
|
||||
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
|
||||
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
|
||||
|
|
|
@ -196,13 +196,29 @@ struct x86_pmu_capability {
|
|||
* Fixed-purpose performance events:
|
||||
*/
|
||||
|
||||
/* RDPMC offset for Fixed PMCs */
|
||||
#define INTEL_PMC_FIXED_RDPMC_BASE (1 << 30)
|
||||
#define INTEL_PMC_FIXED_RDPMC_METRICS (1 << 29)
|
||||
|
||||
/*
|
||||
* All 3 fixed-mode PMCs are configured via this single MSR:
|
||||
* All the fixed-mode PMCs are configured via this single MSR:
|
||||
*/
|
||||
#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d
|
||||
|
||||
/*
|
||||
* The counts are available in three separate MSRs:
|
||||
* There is no event-code assigned to the fixed-mode PMCs.
|
||||
*
|
||||
* For a fixed-mode PMC, which has an equivalent event on a general-purpose
|
||||
* PMC, the event-code of the equivalent event is used for the fixed-mode PMC,
|
||||
* e.g., Instr_Retired.Any and CPU_CLK_Unhalted.Core.
|
||||
*
|
||||
* For a fixed-mode PMC, which doesn't have an equivalent event, a
|
||||
* pseudo-encoding is used, e.g., CPU_CLK_Unhalted.Ref and TOPDOWN.SLOTS.
|
||||
* The pseudo event-code for a fixed-mode PMC must be 0x00.
|
||||
* The pseudo umask-code is 0xX. The X equals the index of the fixed
|
||||
* counter + 1, e.g., the fixed counter 2 has the pseudo-encoding 0x0300.
|
||||
*
|
||||
* The counts are available in separate MSRs:
|
||||
*/
|
||||
|
||||
/* Instr_Retired.Any: */
|
||||
|
@ -213,29 +229,84 @@ struct x86_pmu_capability {
|
|||
#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a
|
||||
#define INTEL_PMC_IDX_FIXED_CPU_CYCLES (INTEL_PMC_IDX_FIXED + 1)
|
||||
|
||||
/* CPU_CLK_Unhalted.Ref: */
|
||||
/* CPU_CLK_Unhalted.Ref: event=0x00,umask=0x3 (pseudo-encoding) */
|
||||
#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b
|
||||
#define INTEL_PMC_IDX_FIXED_REF_CYCLES (INTEL_PMC_IDX_FIXED + 2)
|
||||
#define INTEL_PMC_MSK_FIXED_REF_CYCLES (1ULL << INTEL_PMC_IDX_FIXED_REF_CYCLES)
|
||||
|
||||
/* TOPDOWN.SLOTS: event=0x00,umask=0x4 (pseudo-encoding) */
|
||||
#define MSR_ARCH_PERFMON_FIXED_CTR3 0x30c
|
||||
#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
|
||||
#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
|
||||
|
||||
/*
|
||||
* We model BTS tracing as another fixed-mode PMC.
|
||||
*
|
||||
* We choose a value in the middle of the fixed event range, since lower
|
||||
* We choose the value 47 for the fixed index of BTS, since lower
|
||||
* values are used by actual fixed events and higher values are used
|
||||
* to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
|
||||
*/
|
||||
#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 16)
|
||||
#define INTEL_PMC_IDX_FIXED_BTS (INTEL_PMC_IDX_FIXED + 15)
|
||||
|
||||
/*
|
||||
* The PERF_METRICS MSR is modeled as several magic fixed-mode PMCs, one for
|
||||
* each TopDown metric event.
|
||||
*
|
||||
* Internally the TopDown metric events are mapped to the FxCtr 3 (SLOTS).
|
||||
*/
|
||||
#define INTEL_PMC_IDX_METRIC_BASE (INTEL_PMC_IDX_FIXED + 16)
|
||||
#define INTEL_PMC_IDX_TD_RETIRING (INTEL_PMC_IDX_METRIC_BASE + 0)
|
||||
#define INTEL_PMC_IDX_TD_BAD_SPEC (INTEL_PMC_IDX_METRIC_BASE + 1)
|
||||
#define INTEL_PMC_IDX_TD_FE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 2)
|
||||
#define INTEL_PMC_IDX_TD_BE_BOUND (INTEL_PMC_IDX_METRIC_BASE + 3)
|
||||
#define INTEL_PMC_IDX_METRIC_END INTEL_PMC_IDX_TD_BE_BOUND
|
||||
#define INTEL_PMC_MSK_TOPDOWN ((0xfull << INTEL_PMC_IDX_METRIC_BASE) | \
|
||||
INTEL_PMC_MSK_FIXED_SLOTS)
|
||||
|
||||
/*
|
||||
* There is no event-code assigned to the TopDown events.
|
||||
*
|
||||
* For the slots event, use the pseudo code of the fixed counter 3.
|
||||
*
|
||||
* For the metric events, the pseudo event-code is 0x00.
|
||||
* The pseudo umask-code starts from the middle of the pseudo event
|
||||
* space, 0x80.
|
||||
*/
|
||||
#define INTEL_TD_SLOTS 0x0400 /* TOPDOWN.SLOTS */
|
||||
/* Level 1 metrics */
|
||||
#define INTEL_TD_METRIC_RETIRING 0x8000 /* Retiring metric */
|
||||
#define INTEL_TD_METRIC_BAD_SPEC 0x8100 /* Bad speculation metric */
|
||||
#define INTEL_TD_METRIC_FE_BOUND 0x8200 /* FE bound metric */
|
||||
#define INTEL_TD_METRIC_BE_BOUND 0x8300 /* BE bound metric */
|
||||
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_BE_BOUND
|
||||
#define INTEL_TD_METRIC_NUM 4
|
||||
|
||||
static inline bool is_metric_idx(int idx)
|
||||
{
|
||||
return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
|
||||
}
|
||||
|
||||
static inline bool is_topdown_idx(int idx)
|
||||
{
|
||||
return is_metric_idx(idx) || idx == INTEL_PMC_IDX_FIXED_SLOTS;
|
||||
}
|
||||
|
||||
#define INTEL_PMC_OTHER_TOPDOWN_BITS(bit) \
|
||||
(~(0x1ull << bit) & INTEL_PMC_MSK_TOPDOWN)
|
||||
|
||||
#define GLOBAL_STATUS_COND_CHG BIT_ULL(63)
|
||||
#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(62)
|
||||
#define GLOBAL_STATUS_BUFFER_OVF_BIT 62
|
||||
#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT)
|
||||
#define GLOBAL_STATUS_UNC_OVF BIT_ULL(61)
|
||||
#define GLOBAL_STATUS_ASIF BIT_ULL(60)
|
||||
#define GLOBAL_STATUS_COUNTERS_FROZEN BIT_ULL(59)
|
||||
#define GLOBAL_STATUS_LBRS_FROZEN_BIT 58
|
||||
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
|
||||
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
|
||||
#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
|
||||
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
|
||||
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
|
||||
|
||||
#define GLOBAL_CTRL_EN_PERF_METRICS 48
|
||||
/*
|
||||
* We model guest LBR event tracing as another fixed-mode PMC like BTS.
|
||||
*
|
||||
|
@ -334,6 +405,7 @@ struct pebs_xmm {
|
|||
#define IBS_OP_ENABLE (1ULL<<17)
|
||||
#define IBS_OP_MAX_CNT 0x0000FFFFULL
|
||||
#define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */
|
||||
#define IBS_OP_MAX_CNT_EXT_MASK (0x7FULL<<20) /* separate upper 7 bits */
|
||||
#define IBS_RIP_INVALID (1ULL<<38)
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
|
|
|
@ -212,17 +212,26 @@ struct hw_perf_event {
|
|||
*/
|
||||
u64 sample_period;
|
||||
|
||||
union {
|
||||
struct { /* Sampling */
|
||||
/*
|
||||
* The period we started this sample with.
|
||||
*/
|
||||
u64 last_period;
|
||||
|
||||
/*
|
||||
* However much is left of the current period; note that this is
|
||||
* a full 64bit value and allows for generation of periods longer
|
||||
* However much is left of the current period;
|
||||
* note that this is a full 64bit value and
|
||||
* allows for generation of periods longer
|
||||
* than hardware might allow.
|
||||
*/
|
||||
local64_t period_left;
|
||||
};
|
||||
struct { /* Topdown events counting for context switch */
|
||||
u64 saved_metric;
|
||||
u64 saved_slots;
|
||||
};
|
||||
};
|
||||
|
||||
/*
|
||||
* State for throttling the event, see __perf_event_overflow() and
|
||||
|
@ -576,9 +585,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
|
|||
* PERF_EV_CAP_SOFTWARE: Is a software event.
|
||||
* PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
|
||||
* from any CPU in the package where it is active.
|
||||
* PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
|
||||
* cannot be a group leader. If an event with this flag is detached from the
|
||||
* group it is scheduled out and moved into an unrecoverable ERROR state.
|
||||
*/
|
||||
#define PERF_EV_CAP_SOFTWARE BIT(0)
|
||||
#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
|
||||
#define PERF_EV_CAP_SIBLING BIT(2)
|
||||
|
||||
#define SWEVENT_HLIST_BITS 8
|
||||
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
|
||||
|
@ -859,7 +872,6 @@ struct perf_cpu_context {
|
|||
struct list_head cgrp_cpuctx_entry;
|
||||
#endif
|
||||
|
||||
struct list_head sched_cb_entry;
|
||||
int sched_cb_usage;
|
||||
|
||||
int online;
|
||||
|
|
|
@ -383,7 +383,6 @@ static DEFINE_MUTEX(perf_sched_mutex);
|
|||
static atomic_t perf_sched_count;
|
||||
|
||||
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
|
||||
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
|
||||
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
|
||||
|
||||
static atomic_t nr_mmap_events __read_mostly;
|
||||
|
@ -2134,8 +2133,24 @@ static inline struct list_head *get_event_list(struct perf_event *event)
|
|||
return event->attr.pinned ? &ctx->pinned_active : &ctx->flexible_active;
|
||||
}
|
||||
|
||||
/*
|
||||
* Events that have PERF_EV_CAP_SIBLING require being part of a group and
|
||||
* cannot exist on their own, schedule them out and move them into the ERROR
|
||||
* state. Also see _perf_event_enable(), it will not be able to recover
|
||||
* this ERROR state.
|
||||
*/
|
||||
static inline void perf_remove_sibling_event(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
||||
|
||||
event_sched_out(event, cpuctx, ctx);
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
|
||||
}
|
||||
|
||||
static void perf_group_detach(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *leader = event->group_leader;
|
||||
struct perf_event *sibling, *tmp;
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
|
||||
|
@ -2154,7 +2169,7 @@ static void perf_group_detach(struct perf_event *event)
|
|||
/*
|
||||
* If this is a sibling, remove it from its group.
|
||||
*/
|
||||
if (event->group_leader != event) {
|
||||
if (leader != event) {
|
||||
list_del_init(&event->sibling_list);
|
||||
event->group_leader->nr_siblings--;
|
||||
goto out;
|
||||
|
@ -2167,6 +2182,9 @@ static void perf_group_detach(struct perf_event *event)
|
|||
*/
|
||||
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
|
||||
|
||||
if (sibling->event_caps & PERF_EV_CAP_SIBLING)
|
||||
perf_remove_sibling_event(sibling);
|
||||
|
||||
sibling->group_leader = sibling;
|
||||
list_del_init(&sibling->sibling_list);
|
||||
|
||||
|
@ -2184,10 +2202,10 @@ static void perf_group_detach(struct perf_event *event)
|
|||
}
|
||||
|
||||
out:
|
||||
perf_event__header_size(event->group_leader);
|
||||
|
||||
for_each_sibling_event(tmp, event->group_leader)
|
||||
for_each_sibling_event(tmp, leader)
|
||||
perf_event__header_size(tmp);
|
||||
|
||||
perf_event__header_size(leader);
|
||||
}
|
||||
|
||||
static bool is_orphaned_event(struct perf_event *event)
|
||||
|
@ -2980,6 +2998,7 @@ static void _perf_event_enable(struct perf_event *event)
|
|||
raw_spin_lock_irq(&ctx->lock);
|
||||
if (event->state >= PERF_EVENT_STATE_INACTIVE ||
|
||||
event->state < PERF_EVENT_STATE_ERROR) {
|
||||
out:
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
return;
|
||||
}
|
||||
|
@ -2991,8 +3010,16 @@ static void _perf_event_enable(struct perf_event *event)
|
|||
* has gone back into error state, as distinct from the task having
|
||||
* been scheduled away before the cross-call arrived.
|
||||
*/
|
||||
if (event->state == PERF_EVENT_STATE_ERROR)
|
||||
if (event->state == PERF_EVENT_STATE_ERROR) {
|
||||
/*
|
||||
* Detached SIBLING events cannot leave ERROR state.
|
||||
*/
|
||||
if (event->event_caps & PERF_EV_CAP_SIBLING &&
|
||||
event->group_leader == event)
|
||||
goto out;
|
||||
|
||||
event->state = PERF_EVENT_STATE_OFF;
|
||||
}
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
|
||||
event_function_call(event, __perf_event_enable, NULL);
|
||||
|
@ -3357,10 +3384,12 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
|||
struct perf_event_context *parent, *next_parent;
|
||||
struct perf_cpu_context *cpuctx;
|
||||
int do_switch = 1;
|
||||
struct pmu *pmu;
|
||||
|
||||
if (likely(!ctx))
|
||||
return;
|
||||
|
||||
pmu = ctx->pmu;
|
||||
cpuctx = __get_cpu_context(ctx);
|
||||
if (!cpuctx->task_ctx)
|
||||
return;
|
||||
|
@ -3390,11 +3419,15 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
|||
raw_spin_lock(&ctx->lock);
|
||||
raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
|
||||
if (context_equiv(ctx, next_ctx)) {
|
||||
struct pmu *pmu = ctx->pmu;
|
||||
|
||||
WRITE_ONCE(ctx->task, next);
|
||||
WRITE_ONCE(next_ctx->task, task);
|
||||
|
||||
perf_pmu_disable(pmu);
|
||||
|
||||
if (cpuctx->sched_cb_usage && pmu->sched_task)
|
||||
pmu->sched_task(ctx, false);
|
||||
|
||||
/*
|
||||
* PMU specific parts of task perf context can require
|
||||
* additional synchronization. As an example of such
|
||||
|
@ -3406,6 +3439,8 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
|||
else
|
||||
swap(ctx->task_ctx_data, next_ctx->task_ctx_data);
|
||||
|
||||
perf_pmu_enable(pmu);
|
||||
|
||||
/*
|
||||
* RCU_INIT_POINTER here is safe because we've not
|
||||
* modified the ctx and the above modification of
|
||||
|
@ -3428,21 +3463,22 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
|
|||
|
||||
if (do_switch) {
|
||||
raw_spin_lock(&ctx->lock);
|
||||
perf_pmu_disable(pmu);
|
||||
|
||||
if (cpuctx->sched_cb_usage && pmu->sched_task)
|
||||
pmu->sched_task(ctx, false);
|
||||
task_ctx_sched_out(cpuctx, ctx, EVENT_ALL);
|
||||
|
||||
perf_pmu_enable(pmu);
|
||||
raw_spin_unlock(&ctx->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct list_head, sched_cb_list);
|
||||
|
||||
void perf_sched_cb_dec(struct pmu *pmu)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
||||
|
||||
this_cpu_dec(perf_sched_cb_usages);
|
||||
|
||||
if (!--cpuctx->sched_cb_usage)
|
||||
list_del(&cpuctx->sched_cb_entry);
|
||||
--cpuctx->sched_cb_usage;
|
||||
}
|
||||
|
||||
|
||||
|
@ -3450,10 +3486,7 @@ void perf_sched_cb_inc(struct pmu *pmu)
|
|||
{
|
||||
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
|
||||
|
||||
if (!cpuctx->sched_cb_usage++)
|
||||
list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
|
||||
|
||||
this_cpu_inc(perf_sched_cb_usages);
|
||||
cpuctx->sched_cb_usage++;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3464,21 +3497,14 @@ void perf_sched_cb_inc(struct pmu *pmu)
|
|||
* PEBS requires this to provide PID/TID information. This requires we flush
|
||||
* all queued PEBS records before we context switch to a new task.
|
||||
*/
|
||||
static void perf_pmu_sched_task(struct task_struct *prev,
|
||||
struct task_struct *next,
|
||||
bool sched_in)
|
||||
static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct pmu *pmu;
|
||||
|
||||
if (prev == next)
|
||||
return;
|
||||
|
||||
list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
|
||||
pmu = cpuctx->ctx.pmu; /* software PMUs will not have sched_task */
|
||||
|
||||
if (WARN_ON_ONCE(!pmu->sched_task))
|
||||
continue;
|
||||
return;
|
||||
|
||||
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
|
||||
perf_pmu_disable(pmu);
|
||||
|
@ -3488,7 +3514,6 @@ static void perf_pmu_sched_task(struct task_struct *prev,
|
|||
perf_pmu_enable(pmu);
|
||||
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_event_switch(struct task_struct *task,
|
||||
struct task_struct *next_prev, bool sched_in);
|
||||
|
@ -3512,9 +3537,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
|
|||
{
|
||||
int ctxn;
|
||||
|
||||
if (__this_cpu_read(perf_sched_cb_usages))
|
||||
perf_pmu_sched_task(task, next, false);
|
||||
|
||||
if (atomic_read(&nr_switch_events))
|
||||
perf_event_switch(task, next, false);
|
||||
|
||||
|
@ -3746,10 +3768,14 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
|
|||
struct task_struct *task)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct pmu *pmu = ctx->pmu;
|
||||
|
||||
cpuctx = __get_cpu_context(ctx);
|
||||
if (cpuctx->task_ctx == ctx)
|
||||
if (cpuctx->task_ctx == ctx) {
|
||||
if (cpuctx->sched_cb_usage)
|
||||
__perf_pmu_sched_task(cpuctx, true);
|
||||
return;
|
||||
}
|
||||
|
||||
perf_ctx_lock(cpuctx, ctx);
|
||||
/*
|
||||
|
@ -3759,7 +3785,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
|
|||
if (!ctx->nr_events)
|
||||
goto unlock;
|
||||
|
||||
perf_pmu_disable(ctx->pmu);
|
||||
perf_pmu_disable(pmu);
|
||||
/*
|
||||
* We want to keep the following priority order:
|
||||
* cpu pinned (that don't need to move), task pinned,
|
||||
|
@ -3771,7 +3797,11 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
|
|||
if (!RB_EMPTY_ROOT(&ctx->pinned_groups.tree))
|
||||
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
|
||||
perf_event_sched_in(cpuctx, ctx, task);
|
||||
perf_pmu_enable(ctx->pmu);
|
||||
|
||||
if (cpuctx->sched_cb_usage && pmu->sched_task)
|
||||
pmu->sched_task(cpuctx->task_ctx, true);
|
||||
|
||||
perf_pmu_enable(pmu);
|
||||
|
||||
unlock:
|
||||
perf_ctx_unlock(cpuctx, ctx);
|
||||
|
@ -3814,9 +3844,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
|
|||
|
||||
if (atomic_read(&nr_switch_events))
|
||||
perf_event_switch(task, prev, true);
|
||||
|
||||
if (__this_cpu_read(perf_sched_cb_usages))
|
||||
perf_pmu_sched_task(prev, task, true);
|
||||
}
|
||||
|
||||
static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
|
||||
|
@ -5869,11 +5896,11 @@ static void perf_pmu_output_stop(struct perf_event *event);
|
|||
static void perf_mmap_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct perf_event *event = vma->vm_file->private_data;
|
||||
|
||||
struct perf_buffer *rb = ring_buffer_get(event);
|
||||
struct user_struct *mmap_user = rb->mmap_user;
|
||||
int mmap_locked = rb->mmap_locked;
|
||||
unsigned long size = perf_data_size(rb);
|
||||
bool detach_rest = false;
|
||||
|
||||
if (event->pmu->event_unmapped)
|
||||
event->pmu->event_unmapped(event, vma->vm_mm);
|
||||
|
@ -5904,7 +5931,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
|
|||
mutex_unlock(&event->mmap_mutex);
|
||||
}
|
||||
|
||||
atomic_dec(&rb->mmap_count);
|
||||
if (atomic_dec_and_test(&rb->mmap_count))
|
||||
detach_rest = true;
|
||||
|
||||
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
|
||||
goto out_put;
|
||||
|
@ -5913,7 +5941,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
|
|||
mutex_unlock(&event->mmap_mutex);
|
||||
|
||||
/* If there's still other mmap()s of this buffer, we're done. */
|
||||
if (atomic_read(&rb->mmap_count))
|
||||
if (!detach_rest)
|
||||
goto out_put;
|
||||
|
||||
/*
|
||||
|
@ -12829,7 +12857,6 @@ static void __init perf_event_init_all_cpus(void)
|
|||
#ifdef CONFIG_CGROUP_PERF
|
||||
INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
|
||||
#endif
|
||||
INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -106,9 +106,10 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
|
|||
static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
|
||||
struct module *mod)
|
||||
{
|
||||
int len = strlen(mod->name);
|
||||
int len = strlen(module_name(mod));
|
||||
const char *name = trace_kprobe_symbol(tk);
|
||||
return strncmp(mod->name, name, len) == 0 && name[len] == ':';
|
||||
|
||||
return strncmp(module_name(mod), name, len) == 0 && name[len] == ':';
|
||||
}
|
||||
|
||||
static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
|
||||
|
@ -688,7 +689,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
|
|||
if (ret)
|
||||
pr_warn("Failed to re-register probe %s on %s: %d\n",
|
||||
trace_probe_name(&tk->tp),
|
||||
mod->name, ret);
|
||||
module_name(mod), ret);
|
||||
}
|
||||
}
|
||||
mutex_unlock(&event_mutex);
|
||||
|
|
Loading…
Reference in New Issue
Block a user