From 765047932f153265db6ef15be208d6cbfc03dc62 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Fri, 21 Feb 2020 19:52:05 -0500 Subject: [PATCH] sched/pelt: Add support to track thermal pressure Extrapolating on the existing framework to track rt/dl utilization using pelt signals, add a similar mechanism to track thermal pressure. The difference here from rt/dl utilization tracking is that, instead of tracking time spent by a CPU running a RT/DL task through util_avg, the average thermal pressure is tracked through load_avg. This is because thermal pressure signal is weighted time "delta" capacity unlike util_avg which is binary. "delta capacity" here means delta between the actual capacity of a CPU and the decreased capacity a CPU due to a thermal event. In order to track average thermal pressure, a new sched_avg variable avg_thermal is introduced. Function update_thermal_load_avg can be called to do the periodic bookkeeping (accumulate, decay and average) of the thermal pressure. Reviewed-by: Vincent Guittot Signed-off-by: Thara Gopinath Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200222005213.3873-2-thara.gopinath@linaro.org --- include/trace/events/sched.h | 4 ++++ init/Kconfig | 4 ++++ kernel/sched/pelt.c | 31 +++++++++++++++++++++++++++++++ kernel/sched/pelt.h | 31 +++++++++++++++++++++++++++++++ kernel/sched/sched.h | 3 +++ 5 files changed, 73 insertions(+) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9c3ebb7c83a5..ed168b0e2c53 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -618,6 +618,10 @@ DECLARE_TRACE(pelt_dl_tp, TP_PROTO(struct rq *rq), TP_ARGS(rq)); +DECLARE_TRACE(pelt_thermal_tp, + TP_PROTO(struct rq *rq), + TP_ARGS(rq)); + DECLARE_TRACE(pelt_irq_tp, TP_PROTO(struct rq *rq), TP_ARGS(rq)); diff --git a/init/Kconfig b/init/Kconfig index 20a6ac33761c..275c848b02c6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -451,6 +451,10 @@ config HAVE_SCHED_AVG_IRQ depends on IRQ_TIME_ACCOUNTING || PARAVIRT_TIME_ACCOUNTING depends on SMP +config SCHED_THERMAL_PRESSURE + bool "Enable periodic averaging of thermal pressure" + depends on SMP + config BSD_PROCESS_ACCT bool "BSD Process Accounting" depends on MULTIUSER diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index c40d57a2a248..b647d04d9c8b 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -368,6 +368,37 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running) return 0; } +#ifdef CONFIG_SCHED_THERMAL_PRESSURE +/* + * thermal: + * + * load_sum = \Sum se->avg.load_sum but se->avg.load_sum is not tracked + * + * util_avg and runnable_load_avg are not supported and meaningless. + * + * Unlike rt/dl utilization tracking that track time spent by a cpu + * running a rt/dl task through util_avg, the average thermal pressure is + * tracked through load_avg. This is because thermal pressure signal is + * time weighted "delta" capacity unlike util_avg which is binary. + * "delta capacity" = actual capacity - + * capped capacity a cpu due to a thermal event. + */ + +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) +{ + if (___update_load_sum(now, &rq->avg_thermal, + capacity, + capacity, + capacity)) { + ___update_load_avg(&rq->avg_thermal, 1); + trace_pelt_thermal_tp(rq); + return 1; + } + + return 0; +} +#endif + #ifdef CONFIG_HAVE_SCHED_AVG_IRQ /* * irq: diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index afff644da065..eb034d9f024d 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -7,6 +7,26 @@ int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq); int update_rt_rq_load_avg(u64 now, struct rq *rq, int running); int update_dl_rq_load_avg(u64 now, struct rq *rq, int running); +#ifdef CONFIG_SCHED_THERMAL_PRESSURE +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity); + +static inline u64 thermal_load_avg(struct rq *rq) +{ + return READ_ONCE(rq->avg_thermal.load_avg); +} +#else +static inline int +update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) +{ + return 0; +} + +static inline u64 thermal_load_avg(struct rq *rq) +{ + return 0; +} +#endif + #ifdef CONFIG_HAVE_SCHED_AVG_IRQ int update_irq_load_avg(struct rq *rq, u64 running); #else @@ -158,6 +178,17 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running) return 0; } +static inline int +update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity) +{ + return 0; +} + +static inline u64 thermal_load_avg(struct rq *rq) +{ + return 0; +} + static inline int update_irq_load_avg(struct rq *rq, u64 running) { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 2a0caf394dd4..6c839f829a25 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -960,6 +960,9 @@ struct rq { struct sched_avg avg_dl; #ifdef CONFIG_HAVE_SCHED_AVG_IRQ struct sched_avg avg_irq; +#endif +#ifdef CONFIG_SCHED_THERMAL_PRESSURE + struct sched_avg avg_thermal; #endif u64 idle_stamp; u64 avg_idle;