From 45a41875fa20836f2839d4dda1f37d119a275542 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 12 May 2020 15:53:18 +0300 Subject: [PATCH 01/16] dt-bindings: opp: Introduce opp-peak-kBps and opp-avg-kBps bindings Interconnects often quantify their performance points in terms of bandwidth. So, add opp-peak-kBps (required) and opp-avg-kBps (optional) to allow specifying Bandwidth OPP tables in DT. opp-peak-kBps is a required property that replaces opp-hz for Bandwidth OPP tables. opp-avg-kBps is an optional property that can be used in Bandwidth OPP tables. Signed-off-by: Saravana Kannan Signed-off-by: Georgi Djakov Reviewed-by: Sibi Sankar Reviewed-by: Rob Herring Signed-off-by: Viresh Kumar --- Documentation/devicetree/bindings/opp/opp.txt | 17 ++++++++++++++--- .../devicetree/bindings/property-units.txt | 4 ++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 68592271461f..9d16d417e9be 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -83,9 +83,14 @@ properties. Required properties: - opp-hz: Frequency in Hz, expressed as a 64-bit big-endian integer. This is a - required property for all device nodes but devices like power domains. The - power domain nodes must have another (implementation dependent) property which - uniquely identifies the OPP nodes. + required property for all device nodes, unless another "required" property to + uniquely identify the OPP nodes exists. Devices like power domains must have + another (implementation dependent) property. + +- opp-peak-kBps: Peak bandwidth in kilobytes per second, expressed as an array + of 32-bit big-endian integers. Each element of the array represents the + peak bandwidth value of each interconnect path. The number of elements should + match the number of interconnect paths. Optional properties: - opp-microvolt: voltage in micro Volts. @@ -132,6 +137,12 @@ Optional properties: - opp-level: A value representing the performance level of the device, expressed as a 32-bit integer. +- opp-avg-kBps: Average bandwidth in kilobytes per second, expressed as an array + of 32-bit big-endian integers. Each element of the array represents the + average bandwidth value of each interconnect path. The number of elements + should match the number of interconnect paths. This property is only + meaningful in OPP tables where opp-peak-kBps is present. + - clock-latency-ns: Specifies the maximum possible transition latency (in nanoseconds) for switching to this OPP from any other OPP. diff --git a/Documentation/devicetree/bindings/property-units.txt b/Documentation/devicetree/bindings/property-units.txt index e9b8360b3288..c80a110c1e26 100644 --- a/Documentation/devicetree/bindings/property-units.txt +++ b/Documentation/devicetree/bindings/property-units.txt @@ -41,3 +41,7 @@ Temperature Pressure ---------------------------------------- -kpascal : kilopascal + +Throughput +---------------------------------------- +-kBps : kilobytes per second From 6c591eec67cbb4db988ab35b944f5cf9013c0714 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 12 May 2020 15:53:19 +0300 Subject: [PATCH 02/16] OPP: Add helpers for reading the binding properties The opp-hz DT property is not mandatory and we may use another property as a key in the OPP table. Add helper functions to simplify the reading and comparing the keys. Signed-off-by: Saravana Kannan Signed-off-by: Georgi Djakov Reviewed-by: Matthias Kaehlcke Reviewed-by: Sibi Sankar [ Viresh: Removed an unnecessary comment ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 15 +++++++++++++-- drivers/opp/of.c | 44 ++++++++++++++++++++++++++------------------ drivers/opp/opp.h | 1 + 3 files changed, 40 insertions(+), 20 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index e4f01e7771a2..ce7e4103ec09 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1286,11 +1286,21 @@ static bool _opp_supported_by_regulators(struct dev_pm_opp *opp, return true; } +int _opp_compare_key(struct dev_pm_opp *opp1, struct dev_pm_opp *opp2) +{ + if (opp1->rate != opp2->rate) + return opp1->rate < opp2->rate ? -1 : 1; + if (opp1->level != opp2->level) + return opp1->level < opp2->level ? -1 : 1; + return 0; +} + static int _opp_is_duplicate(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table, struct list_head **head) { struct dev_pm_opp *opp; + int opp_cmp; /* * Insert new OPP in order of increasing frequency and discard if @@ -1301,12 +1311,13 @@ static int _opp_is_duplicate(struct device *dev, struct dev_pm_opp *new_opp, * loop. */ list_for_each_entry(opp, &opp_table->opp_list, node) { - if (new_opp->rate > opp->rate) { + opp_cmp = _opp_compare_key(new_opp, opp); + if (opp_cmp > 0) { *head = &opp->node; continue; } - if (new_opp->rate < opp->rate) + if (opp_cmp < 0) return 0; /* Duplicate OPPs */ diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 9cd8f0adacae..303d2207e0ff 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -521,6 +521,28 @@ void dev_pm_opp_of_remove_table(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); +static int _read_opp_key(struct dev_pm_opp *new_opp, struct device_node *np, + bool *rate_not_available) +{ + u64 rate; + int ret; + + ret = of_property_read_u64(np, "opp-hz", &rate); + if (!ret) { + /* + * Rate is defined as an unsigned long in clk API, and so + * casting explicitly to its type. Must be fixed once rate is 64 + * bit guaranteed in clk API. + */ + new_opp->rate = (unsigned long)rate; + } + *rate_not_available = !!ret; + + of_property_read_u32(np, "opp-level", &new_opp->level); + + return ret; +} + /** * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings) * @opp_table: OPP table @@ -558,26 +580,12 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, if (!new_opp) return ERR_PTR(-ENOMEM); - ret = of_property_read_u64(np, "opp-hz", &rate); - if (ret < 0) { - /* "opp-hz" is optional for devices like power domains. */ - if (!opp_table->is_genpd) { - dev_err(dev, "%s: opp-hz not found\n", __func__); - goto free_opp; - } - - rate_not_available = true; - } else { - /* - * Rate is defined as an unsigned long in clk API, and so - * casting explicitly to its type. Must be fixed once rate is 64 - * bit guaranteed in clk API. - */ - new_opp->rate = (unsigned long)rate; + ret = _read_opp_key(new_opp, np, &rate_not_available); + if (ret < 0 && !opp_table->is_genpd) { + dev_err(dev, "%s: opp key field not found\n", __func__); + goto free_opp; } - of_property_read_u32(np, "opp-level", &new_opp->level); - /* Check if the OPP supports hardware's hierarchy of versions or not */ if (!_opp_is_supported(dev, opp_table, np)) { dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index d14e27102730..bcadb1e328a4 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -211,6 +211,7 @@ struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_ void _dev_pm_opp_find_and_remove_table(struct device *dev); struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table); void _opp_free(struct dev_pm_opp *opp); +int _opp_compare_key(struct dev_pm_opp *opp1, struct dev_pm_opp *opp2); int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table, bool rate_not_available); int _opp_add_v1(struct opp_table *opp_table, struct device *dev, unsigned long freq, long u_volt, bool dynamic); void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, int last_cpu); From 6cc3d0e9a097981c8a0317c65b8a2278593bd2b0 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Wed, 23 Oct 2019 11:27:03 +0200 Subject: [PATCH 03/16] cpufreq: tegra186: add CPUFREQ_NEED_INITIAL_FREQ_CHECK flag The driver doesn't provide ->get() method to read current frequency and the frequency is set to 0 at initialization which makes the driver fail at initialization time. Set the CPUFREQ_NEED_INITIAL_FREQ_CHECK flag for the driver, so the cpufreq core checks for the unlisted frequency and sets the CPU to a valid frequency from the frequency table. Signed-off-by: Mian Yousaf Kaukab [ Viresh: Massaged change log ] Signed-off-by: Viresh Kumar --- drivers/cpufreq/tegra186-cpufreq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index 2e233ad72758..3d2f143748ef 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -93,7 +93,8 @@ static int tegra186_cpufreq_set_target(struct cpufreq_policy *policy, static struct cpufreq_driver tegra186_cpufreq_driver = { .name = "tegra186", - .flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY, + .flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY | + CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = tegra186_cpufreq_set_target, .init = tegra186_cpufreq_init, From 6d3f922c46f2e91f63c92f8dd28381f097082912 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Tue, 12 May 2020 15:53:21 +0300 Subject: [PATCH 04/16] opp: Add support for parsing interconnect bandwidth The OPP bindings now support bandwidth values, so add support to parse it from device tree and store it into the new dev_pm_opp_icc_bw struct, which is part of the dev_pm_opp. Signed-off-by: Georgi Djakov Reviewed-by: Matthias Kaehlcke [ Viresh: Create _read_bw() and use it, renamed _of_find_icc_paths() to dev_pm_opp_of_find_icc_paths(), exported it and made opp_table argument optional. Also drop the depends on from Kconfig. ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 28 ++++++++-- drivers/opp/of.c | 114 ++++++++++++++++++++++++++++++++++++++++- drivers/opp/opp.h | 7 +++ include/linux/pm_opp.h | 18 +++++++ 4 files changed, 162 insertions(+), 5 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index ce7e4103ec09..d19cc7970643 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -999,6 +999,12 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) ret); } + /* Find interconnect path(s) for the device */ + ret = dev_pm_opp_of_find_icc_paths(dev, opp_table); + if (ret) + dev_warn(dev, "%s: Error finding interconnect paths: %d\n", + __func__, ret); + BLOCKING_INIT_NOTIFIER_HEAD(&opp_table->head); INIT_LIST_HEAD(&opp_table->opp_list); kref_init(&opp_table->kref); @@ -1057,6 +1063,7 @@ static void _opp_table_kref_release(struct kref *kref) { struct opp_table *opp_table = container_of(kref, struct opp_table, kref); struct opp_device *opp_dev, *temp; + int i; _of_clear_opp_table(opp_table); @@ -1064,6 +1071,12 @@ static void _opp_table_kref_release(struct kref *kref) if (!IS_ERR(opp_table->clk)) clk_put(opp_table->clk); + if (opp_table->paths) { + for (i = 0; i < opp_table->path_count; i++) + icc_put(opp_table->paths[i]); + kfree(opp_table->paths); + } + WARN_ON(!list_empty(&opp_table->opp_list)); list_for_each_entry_safe(opp_dev, temp, &opp_table->dev_list, node) { @@ -1243,19 +1256,23 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_remove_all_dynamic); struct dev_pm_opp *_opp_allocate(struct opp_table *table) { struct dev_pm_opp *opp; - int count, supply_size; + int supply_count, supply_size, icc_size; /* Allocate space for at least one supply */ - count = table->regulator_count > 0 ? table->regulator_count : 1; - supply_size = sizeof(*opp->supplies) * count; + supply_count = table->regulator_count > 0 ? table->regulator_count : 1; + supply_size = sizeof(*opp->supplies) * supply_count; + icc_size = sizeof(*opp->bandwidth) * table->path_count; /* allocate new OPP node and supplies structures */ - opp = kzalloc(sizeof(*opp) + supply_size, GFP_KERNEL); + opp = kzalloc(sizeof(*opp) + supply_size + icc_size, GFP_KERNEL); + if (!opp) return NULL; /* Put the supplies at the end of the OPP structure as an empty array */ opp->supplies = (struct dev_pm_opp_supply *)(opp + 1); + if (icc_size) + opp->bandwidth = (struct dev_pm_opp_icc_bw *)(opp->supplies + supply_count); INIT_LIST_HEAD(&opp->node); return opp; @@ -1290,6 +1307,9 @@ int _opp_compare_key(struct dev_pm_opp *opp1, struct dev_pm_opp *opp2) { if (opp1->rate != opp2->rate) return opp1->rate < opp2->rate ? -1 : 1; + if (opp1->bandwidth && opp2->bandwidth && + opp1->bandwidth[0].peak != opp2->bandwidth[0].peak) + return opp1->bandwidth[0].peak < opp2->bandwidth[0].peak ? -1 : 1; if (opp1->level != opp2->level) return opp1->level < opp2->level ? -1 : 1; return 0; diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 303d2207e0ff..0c55862f5624 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -332,6 +332,62 @@ static int _of_opp_alloc_required_opps(struct opp_table *opp_table, return ret; } +int dev_pm_opp_of_find_icc_paths(struct device *dev, + struct opp_table *opp_table) +{ + struct device_node *np; + int ret = 0, i, count, num_paths; + struct icc_path **paths; + + np = of_node_get(dev->of_node); + if (!np) + return 0; + + count = of_count_phandle_with_args(np, "interconnects", + "#interconnect-cells"); + of_node_put(np); + if (count < 0) + return 0; + + /* two phandles when #interconnect-cells = <1> */ + if (count % 2) { + dev_err(dev, "%s: Invalid interconnects values\n", __func__); + return -EINVAL; + } + + num_paths = count / 2; + paths = kcalloc(num_paths, sizeof(*paths), GFP_KERNEL); + if (!paths) + return -ENOMEM; + + for (i = 0; i < num_paths; i++) { + paths[i] = of_icc_get_by_index(dev, i); + if (IS_ERR(paths[i])) { + ret = PTR_ERR(paths[i]); + if (ret != -EPROBE_DEFER) { + dev_err(dev, "%s: Unable to get path%d: %d\n", + __func__, i, ret); + } + goto err; + } + } + + if (opp_table) { + opp_table->paths = paths; + opp_table->path_count = num_paths; + return 0; + } + +err: + while (i--) + icc_put(paths[i]); + + kfree(paths); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_of_find_icc_paths); + static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, struct device_node *np) { @@ -521,9 +577,45 @@ void dev_pm_opp_of_remove_table(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); +static int _read_bw(struct dev_pm_opp *new_opp, struct device_node *np, + bool peak) +{ + const char *name = peak ? "opp-peak-kBps" : "opp-avg-kBps"; + struct property *prop; + int i, count, ret; + u32 *bw; + + prop = of_find_property(np, name, NULL); + if (!prop) + return -ENODEV; + + count = prop->length / sizeof(u32); + bw = kmalloc_array(count, sizeof(*bw), GFP_KERNEL); + if (!bw) + return -ENOMEM; + + ret = of_property_read_u32_array(np, name, bw, count); + if (ret) { + pr_err("%s: Error parsing %s: %d\n", __func__, name, ret); + goto out; + } + + for (i = 0; i < count; i++) { + if (peak) + new_opp->bandwidth[i].peak = kBps_to_icc(bw[i]); + else + new_opp->bandwidth[i].avg = kBps_to_icc(bw[i]); + } + +out: + kfree(bw); + return ret; +} + static int _read_opp_key(struct dev_pm_opp *new_opp, struct device_node *np, bool *rate_not_available) { + bool found = false; u64 rate; int ret; @@ -535,10 +627,30 @@ static int _read_opp_key(struct dev_pm_opp *new_opp, struct device_node *np, * bit guaranteed in clk API. */ new_opp->rate = (unsigned long)rate; + found = true; } *rate_not_available = !!ret; - of_property_read_u32(np, "opp-level", &new_opp->level); + /* + * Bandwidth consists of peak and average (optional) values: + * opp-peak-kBps = ; + * opp-avg-kBps = ; + */ + ret = _read_bw(new_opp, np, true); + if (!ret) { + found = true; + ret = _read_bw(new_opp, np, false); + } + + /* The properties were found but we failed to parse them */ + if (ret && ret != -ENODEV) + return ret; + + if (!of_property_read_u32(np, "opp-level", &new_opp->level)) + found = true; + + if (found) + return 0; return ret; } diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index bcadb1e328a4..2b81ffef1ba4 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -12,6 +12,7 @@ #define __DRIVER_OPP_H__ #include +#include #include #include #include @@ -59,6 +60,7 @@ extern struct list_head opp_tables; * @rate: Frequency in hertz * @level: Performance level * @supplies: Power supplies voltage/current values + * @bandwidth: Interconnect bandwidth values * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's * frequency from any other OPP's frequency. * @required_opps: List of OPPs that are required by this OPP. @@ -81,6 +83,7 @@ struct dev_pm_opp { unsigned int level; struct dev_pm_opp_supply *supplies; + struct dev_pm_opp_icc_bw *bandwidth; unsigned long clock_latency_ns; @@ -146,6 +149,8 @@ enum opp_table_access { * @regulator_count: Number of power supply regulators. Its value can be -1 * (uninitialized), 0 (no opp-microvolt property) or > 0 (has opp-microvolt * property). + * @paths: Interconnect path handles + * @path_count: Number of interconnect paths * @genpd_performance_state: Device's power domain support performance state. * @is_genpd: Marks if the OPP table belongs to a genpd. * @set_opp: Platform specific set_opp callback @@ -189,6 +194,8 @@ struct opp_table { struct clk *clk; struct regulator **regulators; int regulator_count; + struct icc_path **paths; + unsigned int path_count; bool genpd_performance_state; bool is_genpd; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 747861816f4f..d5c4a329321d 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -41,6 +41,18 @@ struct dev_pm_opp_supply { unsigned long u_amp; }; +/** + * struct dev_pm_opp_icc_bw - Interconnect bandwidth values + * @avg: Average bandwidth corresponding to this OPP (in icc units) + * @peak: Peak bandwidth corresponding to this OPP (in icc units) + * + * This structure stores the bandwidth values for a single interconnect path. + */ +struct dev_pm_opp_icc_bw { + u32 avg; + u32 peak; +}; + /** * struct dev_pm_opp_info - OPP freq/voltage/current values * @rate: Target clk rate in hz @@ -360,6 +372,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpuma struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); int of_get_required_opp_performance_state(struct device_node *np, int index); +int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table); void dev_pm_opp_of_register_em(struct cpumask *cpus); #else static inline int dev_pm_opp_of_add_table(struct device *dev) @@ -408,6 +421,11 @@ static inline int of_get_required_opp_performance_state(struct device_node *np, { return -ENOTSUPP; } + +static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table) +{ + return -ENOTSUPP; +} #endif #endif /* __LINUX_OPP_H__ */ From 120e117bdc8483773fdae19dd326ef460b1ab4c2 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Tue, 12 May 2020 15:53:22 +0300 Subject: [PATCH 05/16] opp: Add sanity checks in _read_opp_key() When we read the OPP keys, it would be nice to do some sanity checks of the values we get from DT and see if they match with the information that is populated in the OPP table. Let's pass a pointer of the table, so that we can do some validation. Signed-off-by: Georgi Djakov Reviewed-by: Matthias Kaehlcke Reviewed-by: Sibi Sankar [ Viresh: Fix rebase conflicts ] Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 0c55862f5624..61fce1284f01 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -577,8 +577,8 @@ void dev_pm_opp_of_remove_table(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); -static int _read_bw(struct dev_pm_opp *new_opp, struct device_node *np, - bool peak) +static int _read_bw(struct dev_pm_opp *new_opp, struct opp_table *table, + struct device_node *np, bool peak) { const char *name = peak ? "opp-peak-kBps" : "opp-avg-kBps"; struct property *prop; @@ -590,6 +590,12 @@ static int _read_bw(struct dev_pm_opp *new_opp, struct device_node *np, return -ENODEV; count = prop->length / sizeof(u32); + if (table->path_count != count) { + pr_err("%s: Mismatch between %s and paths (%d %d)\n", + __func__, name, count, table->path_count); + return -EINVAL; + } + bw = kmalloc_array(count, sizeof(*bw), GFP_KERNEL); if (!bw) return -ENOMEM; @@ -612,8 +618,8 @@ static int _read_bw(struct dev_pm_opp *new_opp, struct device_node *np, return ret; } -static int _read_opp_key(struct dev_pm_opp *new_opp, struct device_node *np, - bool *rate_not_available) +static int _read_opp_key(struct dev_pm_opp *new_opp, struct opp_table *table, + struct device_node *np, bool *rate_not_available) { bool found = false; u64 rate; @@ -636,10 +642,10 @@ static int _read_opp_key(struct dev_pm_opp *new_opp, struct device_node *np, * opp-peak-kBps = ; * opp-avg-kBps = ; */ - ret = _read_bw(new_opp, np, true); + ret = _read_bw(new_opp, table, np, true); if (!ret) { found = true; - ret = _read_bw(new_opp, np, false); + ret = _read_bw(new_opp, table, np, false); } /* The properties were found but we failed to parse them */ @@ -692,7 +698,7 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, if (!new_opp) return ERR_PTR(-ENOMEM); - ret = _read_opp_key(new_opp, np, &rate_not_available); + ret = _read_opp_key(new_opp, opp_table, np, &rate_not_available); if (ret < 0 && !opp_table->is_genpd) { dev_err(dev, "%s: opp key field not found\n", __func__); goto free_opp; From fe2af40250bfc3cca9120cd5c315e2ea7a1082ed Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Tue, 12 May 2020 15:53:23 +0300 Subject: [PATCH 06/16] opp: Update the bandwidth on OPP frequency changes If the OPP bandwidth values are populated, we want to switch also the interconnect bandwidth in addition to frequency and voltage. Signed-off-by: Georgi Djakov Reviewed-by: Matthias Kaehlcke Reviewed-by: Sibi Sankar Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index d19cc7970643..c3ce39c74de8 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -808,7 +808,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) unsigned long freq, old_freq, temp_freq; struct dev_pm_opp *old_opp, *opp; struct clk *clk; - int ret; + int ret, i; opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) { @@ -909,6 +909,17 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) dev_err(dev, "Failed to set required opps: %d\n", ret); } + if (!ret && opp_table->paths) { + for (i = 0; i < opp_table->path_count; i++) { + ret = icc_set_bw(opp_table->paths[i], + opp->bandwidth[i].avg, + opp->bandwidth[i].peak); + if (ret) + dev_err(dev, "Failed to set bandwidth[%d]: %d\n", + i, ret); + } + } + put_opp: dev_pm_opp_put(opp); put_old_opp: From 8b17f17a097bdbc9546f5fdde3098b5f21cbfbff Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Tue, 12 May 2020 15:53:24 +0300 Subject: [PATCH 07/16] cpufreq: dt: Add support for interconnect bandwidth scaling In addition to clocks and regulators, some devices can scale the bandwidth of their on-chip interconnect - for example between CPU and DDR memory. Add support for that, so that platforms which support it can make use of it. Signed-off-by: Georgi Djakov Reviewed-by: Matthias Kaehlcke [ Viresh: Reused dev_pm_opp_of_find_icc_paths(). Also drop the depends on from Kconfig. ] Signed-off-by: Viresh Kumar fixup! cpufreq: dt: Add support for interconnect bandwidth scaling --- drivers/cpufreq/cpufreq-dt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index 26fe8dfb9ce6..79742bbd221f 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -121,6 +121,10 @@ static int resources_available(void) clk_put(cpu_clk); + ret = dev_pm_opp_of_find_icc_paths(cpu_dev, NULL); + if (ret) + return ret; + name = find_supply_name(cpu_dev); /* Platform doesn't require regulator */ if (!name) From 0430b1d5704b0f0f1d237236dde9c143f8669e49 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 18 May 2020 14:25:32 +0300 Subject: [PATCH 08/16] opp: Expose bandwidth information via debugfs Expose the bandwidth information as well via debugfs. Signed-off-by: Viresh Kumar Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 18 ++++++++++++++++ drivers/opp/debugfs.c | 42 ++++++++++++++++++++++++++++++++++++ include/linux/interconnect.h | 6 ++++++ 3 files changed, 66 insertions(+) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 2d2e49780511..a56349c14985 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -514,6 +514,24 @@ void icc_set_tag(struct icc_path *path, u32 tag) } EXPORT_SYMBOL_GPL(icc_set_tag); +/** + * icc_get_name() - Get name of the icc path + * @path: reference to the path returned by icc_get() + * + * This function is used by an interconnect consumer to get the name of the icc + * path. + * + * Returns a valid pointer on success, or NULL otherwise. + */ +const char *icc_get_name(struct icc_path *path) +{ + if (!path) + return NULL; + + return path->name; +} +EXPORT_SYMBOL_GPL(icc_get_name); + /** * icc_set_bw() - set bandwidth constraints on an interconnect path * @path: reference to the path returned by icc_get() diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 609665e339b6..596c185b5dda 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -32,6 +32,47 @@ void opp_debug_remove_one(struct dev_pm_opp *opp) debugfs_remove_recursive(opp->dentry); } +static ssize_t bw_name_read(struct file *fp, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct icc_path *path = fp->private_data; + char buf[64]; + int i; + + i = scnprintf(buf, sizeof(buf), "%.62s\n", icc_get_name(path)); + + return simple_read_from_buffer(userbuf, count, ppos, buf, i); +} + +static const struct file_operations bw_name_fops = { + .open = simple_open, + .read = bw_name_read, + .llseek = default_llseek, +}; + +static void opp_debug_create_bw(struct dev_pm_opp *opp, + struct opp_table *opp_table, + struct dentry *pdentry) +{ + struct dentry *d; + char name[11]; + int i; + + for (i = 0; i < opp_table->path_count; i++) { + snprintf(name, sizeof(name), "icc-path-%.1d", i); + + /* Create per-path directory */ + d = debugfs_create_dir(name, pdentry); + + debugfs_create_file("name", S_IRUGO, d, opp_table->paths[i], + &bw_name_fops); + debugfs_create_u32("peak_bw", S_IRUGO, d, + &opp->bandwidth[i].peak); + debugfs_create_u32("avg_bw", S_IRUGO, d, + &opp->bandwidth[i].avg); + } +} + static void opp_debug_create_supplies(struct dev_pm_opp *opp, struct opp_table *opp_table, struct dentry *pdentry) @@ -94,6 +135,7 @@ void opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table) &opp->clock_latency_ns); opp_debug_create_supplies(opp, opp_table, d); + opp_debug_create_bw(opp, opp_table, d); opp->dentry = d; } diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h index 34e97231a6ab..1ad09efd296e 100644 --- a/include/linux/interconnect.h +++ b/include/linux/interconnect.h @@ -32,6 +32,7 @@ struct icc_path *of_icc_get_by_index(struct device *dev, int idx); void icc_put(struct icc_path *path); int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw); void icc_set_tag(struct icc_path *path, u32 tag); +const char *icc_get_name(struct icc_path *path); #else @@ -65,6 +66,11 @@ static inline void icc_set_tag(struct icc_path *path, u32 tag) { } +static inline const char *icc_get_name(struct icc_path *path) +{ + return NULL; +} + #endif /* CONFIG_INTERCONNECT */ #endif /* __LINUX_INTERCONNECT_H */ From b23dfa3543f31fbb8c0098925bf90fc23193d17a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 15 May 2020 12:37:24 +0530 Subject: [PATCH 09/16] opp: Reorder the code for !target_freq case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reorder the code a bit to make it more readable. Add additional comment as well. Tested-by: Marek Szyprowski Acked-by: Clément Péron Tested-by: Clément Péron Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index c3ce39c74de8..5e1035a041ae 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -817,15 +817,21 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) } if (unlikely(!target_freq)) { - if (opp_table->required_opp_tables) { - ret = _set_required_opps(dev, opp_table, NULL); - } else if (!_get_opp_count(opp_table)) { + /* + * Some drivers need to support cases where some platforms may + * have OPP table for the device, while others don't and + * opp_set_rate() just needs to behave like clk_set_rate(). + */ + if (!_get_opp_count(opp_table)) return 0; - } else { + + if (!opp_table->required_opp_tables) { dev_err(dev, "target frequency can't be 0\n"); ret = -EINVAL; + goto put_opp_table; } + ret = _set_required_opps(dev, opp_table, NULL); goto put_opp_table; } From 8d45719caaf56c859be0172447f8559c0df40f93 Mon Sep 17 00:00:00 2001 From: Kamil Konieczny Date: Fri, 19 Jul 2019 17:05:32 +0200 Subject: [PATCH 10/16] opp: core: add regulators enable and disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add enable regulators to dev_pm_opp_set_regulators() and disable regulators to dev_pm_opp_put_regulators(). Even if bootloader leaves regulators enabled, they should be enabled in kernel in order to increase the reference count. Tested-by: Marek Szyprowski Acked-by: Clément Péron Tested-by: Clément Péron Signed-off-by: Kamil Konieczny [ Viresh: Enable the regulator only after it is programmed and add a flag to track its status. ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 28 ++++++++++++++++++++++++++-- drivers/opp/opp.h | 2 ++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 5e1035a041ae..8efe12a90ce8 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -664,7 +664,7 @@ static inline int _generic_set_opp_clk_only(struct device *dev, struct clk *clk, return ret; } -static int _generic_set_opp_regulator(const struct opp_table *opp_table, +static int _generic_set_opp_regulator(struct opp_table *opp_table, struct device *dev, unsigned long old_freq, unsigned long freq, @@ -699,6 +699,18 @@ static int _generic_set_opp_regulator(const struct opp_table *opp_table, goto restore_freq; } + /* + * Enable the regulator after setting its voltages, otherwise it breaks + * some boot-enabled regulators. + */ + if (unlikely(!opp_table->regulator_enabled)) { + ret = regulator_enable(reg); + if (ret < 0) + dev_warn(dev, "Failed to enable regulator: %d", ret); + else + opp_table->regulator_enabled = true; + } + return 0; restore_freq: @@ -825,12 +837,17 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) if (!_get_opp_count(opp_table)) return 0; - if (!opp_table->required_opp_tables) { + if (!opp_table->required_opp_tables && !opp_table->regulators) { dev_err(dev, "target frequency can't be 0\n"); ret = -EINVAL; goto put_opp_table; } + if (opp_table->regulator_enabled) { + regulator_disable(opp_table->regulators[0]); + opp_table->regulator_enabled = false; + } + ret = _set_required_opps(dev, opp_table, NULL); goto put_opp_table; } @@ -1718,6 +1735,13 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table) /* Make sure there are no concurrent readers while updating opp_table */ WARN_ON(!list_empty(&opp_table->opp_list)); + if (opp_table->regulator_enabled) { + for (i = opp_table->regulator_count - 1; i >= 0; i--) + regulator_disable(opp_table->regulators[i]); + + opp_table->regulator_enabled = false; + } + for (i = opp_table->regulator_count - 1; i >= 0; i--) regulator_put(opp_table->regulators[i]); diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 2b81ffef1ba4..e51646ff279e 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -147,6 +147,7 @@ enum opp_table_access { * @clk: Device's clock handle * @regulators: Supply regulators * @regulator_count: Number of power supply regulators. Its value can be -1 + * @regulator_enabled: Set to true if regulators were previously enabled. * (uninitialized), 0 (no opp-microvolt property) or > 0 (has opp-microvolt * property). * @paths: Interconnect path handles @@ -194,6 +195,7 @@ struct opp_table { struct clk *clk; struct regulator **regulators; int regulator_count; + bool regulator_enabled; struct icc_path **paths; unsigned int path_count; bool genpd_performance_state; From b00e667a6d8b2b0f53098ec12ff1ffe7f7c9be20 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 27 May 2020 09:33:44 +0530 Subject: [PATCH 11/16] opp: Remove bandwidth votes when target_freq is zero We already drop several votes when target_freq is set to zero, drop bandwidth votes as well. Reported-by: Sibi Sankar Reviewed-by: Georgi Djakov Tested-by: Georgi Djakov Reviewed-by: Sibi Sankar Tested-by: Sibi Sankar Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 49 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 8efe12a90ce8..dfbd3d10410c 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -725,6 +725,34 @@ static int _generic_set_opp_regulator(struct opp_table *opp_table, return ret; } +static int _set_opp_bw(const struct opp_table *opp_table, + struct dev_pm_opp *opp, struct device *dev, bool remove) +{ + u32 avg, peak; + int i, ret; + + if (!opp_table->paths) + return 0; + + for (i = 0; i < opp_table->path_count; i++) { + if (remove) { + avg = 0; + peak = 0; + } else { + avg = opp->bandwidth[i].avg; + peak = opp->bandwidth[i].peak; + } + ret = icc_set_bw(opp_table->paths[i], avg, peak); + if (ret) { + dev_err(dev, "Failed to %s bandwidth[%d]: %d\n", + remove ? "remove" : "set", i, ret); + return ret; + } + } + + return 0; +} + static int _set_opp_custom(const struct opp_table *opp_table, struct device *dev, unsigned long old_freq, unsigned long freq, @@ -820,7 +848,7 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) unsigned long freq, old_freq, temp_freq; struct dev_pm_opp *old_opp, *opp; struct clk *clk; - int ret, i; + int ret; opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) { @@ -837,12 +865,17 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) if (!_get_opp_count(opp_table)) return 0; - if (!opp_table->required_opp_tables && !opp_table->regulators) { + if (!opp_table->required_opp_tables && !opp_table->regulators && + !opp_table->paths) { dev_err(dev, "target frequency can't be 0\n"); ret = -EINVAL; goto put_opp_table; } + ret = _set_opp_bw(opp_table, NULL, dev, true); + if (ret) + return ret; + if (opp_table->regulator_enabled) { regulator_disable(opp_table->regulators[0]); opp_table->regulator_enabled = false; @@ -932,16 +965,8 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) dev_err(dev, "Failed to set required opps: %d\n", ret); } - if (!ret && opp_table->paths) { - for (i = 0; i < opp_table->path_count; i++) { - ret = icc_set_bw(opp_table->paths[i], - opp->bandwidth[i].avg, - opp->bandwidth[i].peak); - if (ret) - dev_err(dev, "Failed to set bandwidth[%d]: %d\n", - i, ret); - } - } + if (!ret) + ret = _set_opp_bw(opp_table, opp, dev, false); put_opp: dev_pm_opp_put(opp); From 45679f9b508f10c12a1e93cf2bdccbc1c594aa39 Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Thu, 28 May 2020 00:54:18 +0530 Subject: [PATCH 12/16] opp: Don't parse icc paths unnecessarily The DT node of the device may contain interconnect paths while the OPP table doesn't have the bandwidth values. There is no need to parse the paths in such cases. Signed-off-by: Sibi Sankar Tested-by: Sibi Sankar Reviewed-by: Sibi Sankar [ Viresh: Support the case of !opp_table and massaged changelog ] Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 61fce1284f01..9a5873591a40 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -332,13 +332,56 @@ static int _of_opp_alloc_required_opps(struct opp_table *opp_table, return ret; } +static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table) +{ + struct device_node *np, *opp_np; + struct property *prop; + + if (!opp_table) { + np = of_node_get(dev->of_node); + if (!np) + return -ENODEV; + + opp_np = _opp_of_get_opp_desc_node(np, 0); + of_node_put(np); + } else { + opp_np = of_node_get(opp_table->np); + } + + /* Lets not fail in case we are parsing opp-v1 bindings */ + if (!opp_np) + return 0; + + /* Checking only first OPP is sufficient */ + np = of_get_next_available_child(opp_np, NULL); + if (!np) { + dev_err(dev, "OPP table empty\n"); + return -EINVAL; + } + of_node_put(opp_np); + + prop = of_find_property(np, "opp-peak-kBps", NULL); + of_node_put(np); + + if (!prop || !prop->length) + return 0; + + return 1; +} + int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table) { struct device_node *np; - int ret = 0, i, count, num_paths; + int ret, i, count, num_paths; struct icc_path **paths; + ret = _bandwidth_supported(dev, opp_table); + if (ret <= 0) + return ret; + + ret = 0; + np = of_node_get(dev->of_node); if (!np) return 0; From afd8d7c7f93681370f6fc2ec62f2705710eee62d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 31 May 2020 23:00:59 +0200 Subject: [PATCH 13/16] PM: hibernate: Add __init annotation to swsusp_header_init() 'swsusp_header_init()' is only called via 'core_initcall'. It can be marked as __init to save a few bytes of memory. Signed-off-by: Christophe JAILLET [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index ca0fcb5ced71..01e2858b5fe3 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -1590,7 +1590,7 @@ int swsusp_unmark(void) } #endif -static int swsusp_header_init(void) +static int __init swsusp_header_init(void) { swsusp_header = (struct swsusp_header*) __get_free_page(GFP_KERNEL); if (!swsusp_header) From cf6fada71543ceea0f6228ffdc0b85778f3f5a6e Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 30 May 2020 10:08:30 +0800 Subject: [PATCH 14/16] cpufreq: change '.set_boost' to act on one policy Macro 'for_each_active_policy()' is defined internally. To avoid some cpufreq driver needing this macro to iterate over all the policies in '.set_boost' callback, we redefine '.set_boost' to act on only one policy and pass the policy as an argument. 'cpufreq_boost_trigger_state()' iterates over all the policies to set boost for the system. This is preparation for adding SW BOOST support for CPPC. To protect Boost enable/disable by sysfs from CPU online/offline, add 'cpu_hotplug_lock' before calling '.set_boost' for each CPU. Also move the lock from 'set_boost()' to 'store_cpb()' in acpi_cpufreq. Signed-off-by: Xiongfeng Wang Suggested-by: Viresh Kumar Acked-by: Viresh Kumar [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 14 +++++---- drivers/cpufreq/cpufreq.c | 57 ++++++++++++++++++---------------- include/linux/cpufreq.h | 2 +- 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 289e8ce3fd13..429e5a36c08a 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -126,12 +126,12 @@ static void boost_set_msr_each(void *p_en) boost_set_msr(enable); } -static int set_boost(int val) +static int set_boost(struct cpufreq_policy *policy, int val) { - get_online_cpus(); - on_each_cpu(boost_set_msr_each, (void *)(long)val, 1); - put_online_cpus(); - pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis"); + on_each_cpu_mask(policy->cpus, boost_set_msr_each, + (void *)(long)val, 1); + pr_debug("CPU %*pbl: Core Boosting %sabled.\n", + cpumask_pr_args(policy->cpus), val ? "en" : "dis"); return 0; } @@ -162,7 +162,9 @@ static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, if (ret || val > 1) return -EINVAL; - set_boost(val); + get_online_cpus(); + set_boost(policy, val); + put_online_cpus(); return count; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d03f250f68e4..0128de3603df 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2532,34 +2532,29 @@ EXPORT_SYMBOL_GPL(cpufreq_update_limits); /********************************************************************* * BOOST * *********************************************************************/ -static int cpufreq_boost_set_sw(int state) +static int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state) { - struct cpufreq_policy *policy; + int ret; - for_each_active_policy(policy) { - int ret; + if (!policy->freq_table) + return -ENXIO; - if (!policy->freq_table) - return -ENXIO; - - ret = cpufreq_frequency_table_cpuinfo(policy, - policy->freq_table); - if (ret) { - pr_err("%s: Policy frequency update failed\n", - __func__); - return ret; - } - - ret = freq_qos_update_request(policy->max_freq_req, policy->max); - if (ret < 0) - return ret; + ret = cpufreq_frequency_table_cpuinfo(policy, policy->freq_table); + if (ret) { + pr_err("%s: Policy frequency update failed\n", __func__); + return ret; } + ret = freq_qos_update_request(policy->max_freq_req, policy->max); + if (ret < 0) + return ret; + return 0; } int cpufreq_boost_trigger_state(int state) { + struct cpufreq_policy *policy; unsigned long flags; int ret = 0; @@ -2570,15 +2565,25 @@ int cpufreq_boost_trigger_state(int state) cpufreq_driver->boost_enabled = state; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - ret = cpufreq_driver->set_boost(state); - if (ret) { - write_lock_irqsave(&cpufreq_driver_lock, flags); - cpufreq_driver->boost_enabled = !state; - write_unlock_irqrestore(&cpufreq_driver_lock, flags); - - pr_err("%s: Cannot %s BOOST\n", - __func__, state ? "enable" : "disable"); + get_online_cpus(); + for_each_active_policy(policy) { + ret = cpufreq_driver->set_boost(policy, state); + if (ret) + goto err_reset_state; } + put_online_cpus(); + + return 0; + +err_reset_state: + put_online_cpus(); + + write_lock_irqsave(&cpufreq_driver_lock, flags); + cpufreq_driver->boost_enabled = !state; + write_unlock_irqrestore(&cpufreq_driver_lock, flags); + + pr_err("%s: Cannot %s BOOST\n", + __func__, state ? "enable" : "disable"); return ret; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 67d5950bd878..3494f6763597 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -367,7 +367,7 @@ struct cpufreq_driver { /* platform specific boost support code */ bool boost_enabled; - int (*set_boost)(int state); + int (*set_boost)(struct cpufreq_policy *policy, int state); }; /* flags */ From 54e74df5d76dea824c7c0c9d1b97150bf9b33793 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 30 May 2020 10:08:31 +0800 Subject: [PATCH 15/16] cpufreq: CPPC: add SW BOOST support To add SW BOOST support for CPPC, we need to get the max frequency of boost mode and non-boost mode. ACPI spec 6.2 section 8.4.7.1 describes the following two CPC registers. "Highest performance is the absolute maximum performance an individual processor may reach, assuming ideal conditions. This performance level may not be sustainable for long durations, and may only be achievable if other platform components are in a specific state; for example, it may require other processors be in an idle state. Nominal Performance is the maximum sustained performance level of the processor, assuming ideal operating conditions. In absence of an external constraint (power, thermal, etc.) this is the performance level the platform is expected to be able to maintain continuously. All processors are expected to be able to sustain their nominal performance state simultaneously." To add SW BOOST support for CPPC, we can use Highest Performance as the max performance in boost mode and Nominal Performance as the max performance in non-boost mode. If the Highest Performance is greater than the Nominal Performance, we assume SW BOOST is supported. The current CPPC driver does not support SW BOOST and use 'Highest Performance' as the max performance the CPU can achieve. 'Nominal Performance' is used to convert 'performance' to 'frequency'. That means, if firmware enable boost and provide a value for Highest Performance which is greater than Nominal Performance, boost feature is enabled by default. Because SW BOOST is disabled by default, so, after this patch, boost feature is disabled by default even if boost is enabled by firmware. Signed-off-by: Xiongfeng Wang Suggested-by: Viresh Kumar Acked-by: Viresh Kumar [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cppc_cpufreq.c | 39 ++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index bda0b2406fba..257d726a4456 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -37,6 +37,7 @@ * requested etc. */ static struct cppc_cpudata **all_cpu_data; +static bool boost_supported; struct cppc_workaround_oem_info { char oem_id[ACPI_OEM_ID_SIZE + 1]; @@ -310,7 +311,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * Section 8.4.7.1.1.5 of ACPI 6.1 spec) */ policy->min = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.lowest_nonlinear_perf); - policy->max = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.highest_perf); + policy->max = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.nominal_perf); /* * Set cpuinfo.min_freq to Lowest to make the full range of performance @@ -318,7 +319,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) * nonlinear perf */ policy->cpuinfo.min_freq = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.lowest_perf); - policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.highest_perf); + policy->cpuinfo.max_freq = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.nominal_perf); policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu_num); policy->shared_type = cpu->shared_type; @@ -343,6 +344,13 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu->cur_policy = policy; + /* + * If 'highest_perf' is greater than 'nominal_perf', we assume CPU Boost + * is supported. + */ + if (cpu->perf_caps.highest_perf > cpu->perf_caps.nominal_perf) + boost_supported = true; + /* Set policy->cur to max now. The governors will adjust later. */ policy->cur = cppc_cpufreq_perf_to_khz(cpu, cpu->perf_caps.highest_perf); @@ -410,6 +418,32 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpunum) return cppc_get_rate_from_fbctrs(cpu, fb_ctrs_t0, fb_ctrs_t1); } +static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state) +{ + struct cppc_cpudata *cpudata; + int ret; + + if (!boost_supported) { + pr_err("BOOST not supported by CPU or firmware\n"); + return -EINVAL; + } + + cpudata = all_cpu_data[policy->cpu]; + if (state) + policy->max = cppc_cpufreq_perf_to_khz(cpudata, + cpudata->perf_caps.highest_perf); + else + policy->max = cppc_cpufreq_perf_to_khz(cpudata, + cpudata->perf_caps.nominal_perf); + policy->cpuinfo.max_freq = policy->max; + + ret = freq_qos_update_request(policy->max_freq_req, policy->max); + if (ret < 0) + return ret; + + return 0; +} + static struct cpufreq_driver cppc_cpufreq_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = cppc_verify_policy, @@ -417,6 +451,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = { .get = cppc_cpufreq_get_rate, .init = cppc_cpufreq_cpu_init, .stop_cpu = cppc_cpufreq_stop_cpu, + .set_boost = cppc_cpufreq_set_boost, .name = "cppc_cpufreq", }; From 956ad9d98b73f59e442cc119c98ba1e04e94fe6d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 4 Jun 2020 19:22:26 +0200 Subject: [PATCH 16/16] ACPI: PM: Avoid using power resources if there are none for D0 As recently reported, some platforms provide a list of power resources for device power state D3hot, through the _PR3 object, but they do not provide a list of power resources for device power state D0. Among other things, this causes acpi_device_get_power() to return D3hot as the current state of the device in question if all of the D3hot power resources are "on", because it sees the power_resources flag set and calls acpi_power_get_inferred_state() which finds that D3hot is the shallowest power state with all of the associated power resources turned "on", so that's what it returns. Moreover, that value takes precedence over the acpi_dev_pm_explicit_get() return value, because it means a deeper power state. The device may very well be in D0 physically at that point, however. Moreover, the presence of _PR3 without _PR0 for a given device means that only one D3-level power state can be supported by it. Namely, because there are no power resources to turn "off" when transitioning the device from D0 into D3cold (which should be supported since _PR3 is present), the evaluation of _PS3 should be sufficient to put it straight into D3cold, but this means that the effect of turning "on" the _PR3 power resources is unclear, so it is better to avoid doing that altogether. Consequently, there is no practical way do distinguish D3cold from D3hot for the device in question and the power states of it can be labeled so that D3hot is the deepest supported one (and Linux assumes that putting a device into D3hot via ACPI may cause power to be removed from it anyway, for legacy reasons). To work around the problem described above modify the ACPI enumeration of devices so that power resources are only used for device power management if the list of D0 power resources is not empty and make it mart D3cold as supported only if that is the case and the D3hot list of power resources is not empty too. Fixes: ef85bdbec444 ("ACPI / scan: Consolidate extraction of power resources lists") Link: https://bugzilla.kernel.org/show_bug.cgi?id=205057 Link: https://lore.kernel.org/linux-acpi/20200603194659.185757-1-hdegoede@redhat.com/ Reported-by: Hans de Goede Tested-by: Hans de Goede Tested-by: youling257@gmail.com Cc: 3.10+ # 3.10+ Signed-off-by: Rafael J. Wysocki Reviewed-by: Hans de Goede --- drivers/acpi/device_pm.c | 2 +- drivers/acpi/scan.c | 28 +++++++++++++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 5832bc10aca8..95e200b618bd 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -186,7 +186,7 @@ int acpi_device_set_power(struct acpi_device *device, int state) * possibly drop references to the power resources in use. */ state = ACPI_STATE_D3_HOT; - /* If _PR3 is not available, use D3hot as the target state. */ + /* If D3cold is not supported, use D3hot as the target state. */ if (!device->power.states[ACPI_STATE_D3_COLD].flags.valid) target_state = state; } else if (!device->power.states[state].flags.valid) { diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 6d3448895382..1b255e98de4d 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -919,12 +919,9 @@ static void acpi_bus_init_power_state(struct acpi_device *device, int state) if (buffer.length && package && package->type == ACPI_TYPE_PACKAGE - && package->package.count) { - int err = acpi_extract_power_resources(package, 0, - &ps->resources); - if (!err) - device->power.flags.power_resources = 1; - } + && package->package.count) + acpi_extract_power_resources(package, 0, &ps->resources); + ACPI_FREE(buffer.pointer); } @@ -971,14 +968,27 @@ static void acpi_bus_get_power_flags(struct acpi_device *device) acpi_bus_init_power_state(device, i); INIT_LIST_HEAD(&device->power.states[ACPI_STATE_D3_COLD].resources); - if (!list_empty(&device->power.states[ACPI_STATE_D3_HOT].resources)) - device->power.states[ACPI_STATE_D3_COLD].flags.valid = 1; - /* Set defaults for D0 and D3hot states (always valid) */ + /* Set the defaults for D0 and D3hot (always supported). */ device->power.states[ACPI_STATE_D0].flags.valid = 1; device->power.states[ACPI_STATE_D0].power = 100; device->power.states[ACPI_STATE_D3_HOT].flags.valid = 1; + /* + * Use power resources only if the D0 list of them is populated, because + * some platforms may provide _PR3 only to indicate D3cold support and + * in those cases the power resources list returned by it may be bogus. + */ + if (!list_empty(&device->power.states[ACPI_STATE_D0].resources)) { + device->power.flags.power_resources = 1; + /* + * D3cold is supported if the D3hot list of power resources is + * not empty. + */ + if (!list_empty(&device->power.states[ACPI_STATE_D3_HOT].resources)) + device->power.states[ACPI_STATE_D3_COLD].flags.valid = 1; + } + if (acpi_bus_init_power(device)) device->flags.power_manageable = 0; }