From 069035c5db3459b9b5f12caf3bffed9a863fa5c4 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Tue, 22 May 2018 17:13:04 +0300
Subject: [PATCH 001/215] drm: Fix possible race conditions while unplugging
 DRM device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When unplugging a hotpluggable DRM device we first unregister it
with drm_dev_unregister and then set drm_device.unplugged flag which
is used to mark device critical sections with drm_dev_enter()/
drm_dev_exit() preventing access to device resources that are not
available after the device is gone.
But drm_dev_unregister may lead to hotplug uevent(s) fired to
user-space on card and/or connector removal, thus making it possible
for user-space to try accessing a disconnected device.

Fix this by first making sure device is properly marked as
disconnected and only then unregister it.

Fixes: bee330f3d672 ("drm: Use srcu to protect drm_device.unplugged")

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Reported-by: Andrii Chepurnyi <andrii_chepurnyi@epam.com>
Cc: "Noralf Trønnes" <noralf@tronnes.org>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20180522141304.18646-1-andr2000@gmail.com
---
 drivers/gpu/drm/drm_drv.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index f6910ebe4d0e..cc2675550e28 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -369,13 +369,6 @@ EXPORT_SYMBOL(drm_dev_exit);
  */
 void drm_dev_unplug(struct drm_device *dev)
 {
-	drm_dev_unregister(dev);
-
-	mutex_lock(&drm_global_mutex);
-	if (dev->open_count == 0)
-		drm_dev_put(dev);
-	mutex_unlock(&drm_global_mutex);
-
 	/*
 	 * After synchronizing any critical read section is guaranteed to see
 	 * the new value of ->unplugged, and any critical section which might
@@ -384,6 +377,13 @@ void drm_dev_unplug(struct drm_device *dev)
 	 */
 	dev->unplugged = true;
 	synchronize_srcu(&drm_unplug_srcu);
+
+	drm_dev_unregister(dev);
+
+	mutex_lock(&drm_global_mutex);
+	if (dev->open_count == 0)
+		drm_dev_put(dev);
+	mutex_unlock(&drm_global_mutex);
 }
 EXPORT_SYMBOL(drm_dev_unplug);
 

From 889ad63d41eea20184b0483e7e585e5b20fb6cfe Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Fri, 1 Jun 2018 16:05:32 -0400
Subject: [PATCH 002/215] drm/qxl: Call qxl_bo_unref outside atomic context

"qxl_bo_unref" may sleep, but calling "qxl_release_map" causes
"preempt_disable()" to be called and "preempt_enable()" isn't called
until "qxl_release_unmap" is used. Move the call to "qxl_bo_unref" out
from in between the two to avoid sleeping from an atomic context.

This issue can be demonstrated on a kernel with CONFIG_LOCKDEP=y by
creating a VM using QXL, using a desktop environment using Xorg, then
moving the cursor on or off a window.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1571128
Fixes: 9428088c90b6 ("drm/qxl: reapply cursor after resetting primary")
Cc: stable@vger.kernel.org
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20180601200532.13619-1-jcline@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 drivers/gpu/drm/qxl/qxl_display.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index ecb35ed0eac8..61e51516fec5 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -630,7 +630,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 	struct qxl_cursor_cmd *cmd;
 	struct qxl_cursor *cursor;
 	struct drm_gem_object *obj;
-	struct qxl_bo *cursor_bo = NULL, *user_bo = NULL;
+	struct qxl_bo *cursor_bo = NULL, *user_bo = NULL, *old_cursor_bo = NULL;
 	int ret;
 	void *user_ptr;
 	int size = 64*64*4;
@@ -684,7 +684,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 							   cursor_bo, 0);
 		cmd->type = QXL_CURSOR_SET;
 
-		qxl_bo_unref(&qcrtc->cursor_bo);
+		old_cursor_bo = qcrtc->cursor_bo;
 		qcrtc->cursor_bo = cursor_bo;
 		cursor_bo = NULL;
 	} else {
@@ -704,6 +704,9 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
 	qxl_release_fence_buffer_objects(release);
 
+	if (old_cursor_bo)
+		qxl_bo_unref(&old_cursor_bo);
+
 	qxl_bo_unref(&cursor_bo);
 
 	return;

From 41477acf092251eb0cfe83068f48dbcb2521478a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 Jun 2018 14:19:54 -0300
Subject: [PATCH 003/215] perf hists: Save the callchain_size in struct
 hist_entry

So that we can figure out the real size of the struct and also be able
to tell if callchains may be present in this histogram entry.

Since we can't always guarantee that from hist_entry->hists we can use
hists_to_evsel, to then look at evsel->attr.sample_type for
PERF_SAMPLE_CALLCHAIN, like with the 'perf c2c' tool, that uses plain
'struct hists' instances, we need another way of deciding if a specific
hist_entry instance has callchains associated with it, i.e. if its
hist_entry->callchain[0] has space allocated for.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-ptvndealxs1k7myluvu9flnq@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.c | 6 ++++--
 tools/perf/util/sort.h | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 52e8fda93a47..0441a92b855f 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -370,9 +370,11 @@ void hists__delete_entries(struct hists *hists)
 
 static int hist_entry__init(struct hist_entry *he,
 			    struct hist_entry *template,
-			    bool sample_self)
+			    bool sample_self,
+			    size_t callchain_size)
 {
 	*he = *template;
+	he->callchain_size = callchain_size;
 
 	if (symbol_conf.cumulate_callchain) {
 		he->stat_acc = malloc(sizeof(he->stat));
@@ -473,7 +475,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
 
 	he = ops->new(callchain_size);
 	if (he) {
-		err = hist_entry__init(he, template, sample_self);
+		err = hist_entry__init(he, template, sample_self, callchain_size);
 		if (err) {
 			ops->free(he);
 			he = NULL;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7cf2d5cc038e..9ab9257ed887 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -112,6 +112,8 @@ struct hist_entry {
 
 	char			level;
 	u8			filtered;
+
+	u16			callchain_size;
 	union {
 		/*
 		 * Since perf diff only supports the stdio output, TUI

From e5654455795f2f89328f7b301dacb6926e57e2b8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 Jun 2018 14:27:19 -0300
Subject: [PATCH 004/215] perf hists: Make hist_entry__has_callchains() work
 with 'perf c2c'

Since 'perf c2c' uses 'struct hists' not allocated together with a
'struct perf_evsel' instance, we can't go from a 'struct hist_entry'
pointer to a 'struct perf_evsel' via he->hists, so, instead, check if
space was set aside for hist_entry->callchain[0] at hist_entry__new()
time.

Reported-by: Jin Yao <yao.jin@linux.intel.com>
Reported-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: fabd37b837f6 ("perf hists: Check if a hist_entry has callchains before using them")
Link: https://lkml.kernel.org/n/tip-e8ife8djvvvwmeze3s4yodii@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 9ab9257ed887..8bf302cafcec 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -155,7 +155,7 @@ struct hist_entry {
 
 static __pure inline bool hist_entry__has_callchains(struct hist_entry *he)
 {
-	return hists__has_callchains(he->hists);
+	return he->callchain_size != 0;
 }
 
 static inline bool hist_entry__has_pairs(struct hist_entry *he)

From 29f9fcdd3f8edccad5809cf939ce921752460fe7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 Jun 2018 14:33:31 -0300
Subject: [PATCH 005/215] perf hists browser gtk: Use
 hist_entry__has_callchains()

Since we can't go from struct hists to struct evsel for all cases (c2c
is an exception) and we have access to the hist_entry, use
hist_entry__has_callchains() in the GTK+ hists browser to figure out
if callchains are available.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-8owkgrruzzi5emvblwh4e6le@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/gtk/hists.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index b085f1b3e34d..4ab663ec3e5e 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -382,7 +382,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
 			gtk_tree_store_set(store, &iter, col_idx++, s, -1);
 		}
 
-		if (hists__has_callchains(hists) &&
+		if (hist_entry__has_callchains(h) &&
 		    symbol_conf.use_callchain && hists__has(hists, sym)) {
 			if (callchain_param.mode == CHAIN_GRAPH_REL)
 				total = symbol_conf.cumulate_callchain ?

From c9d366287042489090da0391318df528bdce9941 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 Jun 2018 14:42:27 -0300
Subject: [PATCH 006/215] perf hists: Reimplement hists__has_callchains()

There are places where we have only access to struct hists and need to
know if any of its hist_entries has callchains, like when drawing
headers for the various output modes (stdio, TUI, etc), so, when adding
a new hist_entry, check if it has callchains, storing this info for
later use by hists__has_callchains().

This reimplementation is necessary because not always a 'struct hists'
is allocated together with a 'struct perf evsel', so we can't go from
'hists' to 'perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN'.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-hg5g7yddjio3ljwyqnnaj5dt@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.c | 6 ++++--
 tools/perf/util/hist.h | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 0441a92b855f..828cb9794c76 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -621,9 +621,11 @@ __hists__add_entry(struct hists *hists,
 		.raw_data = sample->raw_data,
 		.raw_size = sample->raw_size,
 		.ops = ops,
-	};
+	}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
 
-	return hists__findnew_entry(hists, &entry, al, sample_self);
+	if (!hists->has_callchains && he && he->callchain_size != 0)
+		hists->has_callchains = true;
+	return he;
 }
 
 struct hist_entry *hists__add_entry(struct hists *hists,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 06607c434949..73049f7f0f60 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -85,6 +85,7 @@ struct hists {
 	struct events_stats	stats;
 	u64			event_stream;
 	u16			col_len[HISTC_NR_COLS];
+	bool			has_callchains;
 	int			socket_filter;
 	struct perf_hpp_list	*hpp_list;
 	struct list_head	hpp_formats;
@@ -222,8 +223,7 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel)
 
 static __pure inline bool hists__has_callchains(struct hists *hists)
 {
-	const struct perf_evsel *evsel = hists_to_evsel(hists);
-	return evsel__has_callchain(evsel);
+	return hists->has_callchains;
 }
 
 int hists__init(void);

From f7fa827f5f432a0b1f34e10fc49da93aeef9f817 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 7 Jun 2018 00:15:05 +0200
Subject: [PATCH 007/215] perf tools: Fix error index for pmu event parser

For events we provide specific error message we need to set error column
index, PMU parser is missing that, adding it.

Before:

  $ perf stat -e cycles,krava/cycles/ kill
  event syntax error: 'cycles,krava/cycles/'
                       \___ Cannot find PMU `krava'. Missing kernel support?

After:

  $ perf stat -e cycles,krava/cycles/ kill
  event syntax error: 'cycles,krava/cycles/'
                              \___ Cannot find PMU `krava'. Missing kernel support?

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20180606221513.11302-3-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.y | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 155d2570274f..da8fe57691b8 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -227,11 +227,16 @@ event_def: event_pmu |
 event_pmu:
 PE_NAME opt_pmu_config
 {
+	struct parse_events_state *parse_state = _parse_state;
+	struct parse_events_error *error = parse_state->error;
 	struct list_head *list, *orig_terms, *terms;
 
 	if (parse_events_copy_term_list($2, &orig_terms))
 		YYABORT;
 
+	if (error)
+		error->idx = @1.first_column;
+
 	ALLOC_LIST(list);
 	if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) {
 		struct perf_pmu *pmu = NULL;

From 9660e08ee8cbc94ac835f2c30576c6e51fbece8f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 7 Jun 2018 00:15:06 +0200
Subject: [PATCH 008/215] perf stat: Add --interval-clear option

Adding --interval-clear option to clear the screen before next interval.

Committer testing:

  # perf stat -I 1000 --interval-clear

And, as expected, it behaves almost like:

  # watch -n 0 perf stat -a sleep 1

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20180606221513.11302-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt |  3 +++
 tools/perf/builtin-stat.c              | 11 +++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 5dfe102fb5b5..b10a90b6a718 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -178,6 +178,9 @@ Print count deltas for fixed number of times.
 This option should be used together with "-I" option.
 	example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
 
+--interval-clear::
+Clear the screen before next interval.
+
 --timeout msecs::
 Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms).
 This option is not supported with the "-I" option.
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 096ccb25c11f..f1532e3ac7d7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,7 @@
 #include "util/tool.h"
 #include "util/string2.h"
 #include "util/metricgroup.h"
+#include "util/top.h"
 #include "asm/bug.h"
 
 #include <linux/time64.h>
@@ -173,6 +174,7 @@ static struct cpu_map		*aggr_map;
 static aggr_get_id_t		aggr_get_id;
 static bool			append_file;
 static bool			interval_count;
+static bool			interval_clear;
 static const char		*output_name;
 static int			output_fd;
 static int			print_free_counters_hint;
@@ -1704,9 +1706,12 @@ static void print_interval(char *prefix, struct timespec *ts)
 	FILE *output = stat_config.output;
 	static int num_print_interval;
 
+	if (interval_clear)
+		puts(CONSOLE_CLEAR);
+
 	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
 
-	if (num_print_interval == 0 && !csv_output) {
+	if ((num_print_interval == 0 && !csv_output) || interval_clear) {
 		switch (stat_config.aggr_mode) {
 		case AGGR_SOCKET:
 			fprintf(output, "#           time socket cpus");
@@ -1738,7 +1743,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 		}
 	}
 
-	if (num_print_interval == 0 && metric_only)
+	if ((num_print_interval == 0 && metric_only) || interval_clear)
 		print_metric_headers(" ", true);
 	if (++num_print_interval == 25)
 		num_print_interval = 0;
@@ -2057,6 +2062,8 @@ static const struct option stat_options[] = {
 		    "(overhead is possible for values <= 100ms)"),
 	OPT_INTEGER(0, "interval-count", &stat_config.times,
 		    "print counts for fixed number of times"),
+	OPT_BOOLEAN(0, "interval-clear", &interval_clear,
+		    "clear screen in between new interval"),
 	OPT_UINTEGER(0, "timeout", &stat_config.timeout,
 		    "stop workload and print counts after a timeout period in ms (>= 10ms)"),
 	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,

From b37d33edbf41b532ddd156707c037c6f4784e40b Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 7 Jun 2018 00:15:07 +0200
Subject: [PATCH 009/215] perf stat: Use only color_fprintf call in
 print_metric_only

We can call color_fprintf also for non color case, it's handled
properly. This change simplifies following patch.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20180606221513.11302-5-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f1532e3ac7d7..9e7b6f108956 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1008,10 +1008,7 @@ static void print_metric_only(void *ctx, const char *color, const char *fmt,
 	if (!valid_only_metric(unit))
 		return;
 	unit = fixunit(buf, os->evsel, unit);
-	if (color)
-		n = color_fprintf(out, color, fmt, val);
-	else
-		n = fprintf(out, fmt, val);
+	n = color_fprintf(out, color ?: "", fmt, val);
 	if (n > METRIC_ONLY_LEN)
 		n = METRIC_ONLY_LEN;
 	if (mlen < strlen(unit))

From f515572734fb323aa0efe9ea2c546cd7fee327f7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 7 Jun 2018 00:15:08 +0200
Subject: [PATCH 010/215] perf stat: Fix metric column header display alignment

Make the metric only display aligned.

Before:
  # perf stat --topdown -I 1000
  #           time core         cpus retiring             bad speculation      frontend bound       backend bound
       1.000394323 S0-C0           2     37.4%               12.0%               31.4%               19.2%
       1.000394323 S0-C1           2     25.1%                9.2%               43.8%               21.9%
       2.001521204 S0-C0           2     36.4%               11.4%               32.4%               19.8%
       2.001521204 S0-C1           2     26.2%                9.4%               43.1%               21.3%
       3.001930208 S0-C0           2     35.1%               10.7%               33.6%               20.6%
       3.001930208 S0-C1           2     28.9%               10.0%               40.0%               21.1%

After:
  # perf stat --topdown -I 1000
  #           time core         cpus             retiring      bad speculation       frontend bound        backend bound
       1.000303722 S0-C0           2                34.2%                 7.6%                34.2%                24.0%
       1.000303722 S0-C1           2                33.1%                 6.4%                36.9%                23.6%
       2.001281055 S0-C0           2                34.6%                 6.7%                36.8%                21.8%
       2.001281055 S0-C1           2                32.8%                 7.1%                38.1%                22.0%
       3.001546080 S0-C0           2                39.3%                 5.5%                32.7%                22.5%
       3.001546080 S0-C1           2                37.8%                 6.0%                33.1%                23.1%

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20180606221513.11302-6-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9e7b6f108956..8f3fdc052728 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1001,19 +1001,20 @@ static void print_metric_only(void *ctx, const char *color, const char *fmt,
 {
 	struct outstate *os = ctx;
 	FILE *out = os->fh;
-	int n;
-	char buf[1024];
+	char buf[1024], str[1024];
 	unsigned mlen = METRIC_ONLY_LEN;
 
 	if (!valid_only_metric(unit))
 		return;
 	unit = fixunit(buf, os->evsel, unit);
-	n = color_fprintf(out, color ?: "", fmt, val);
-	if (n > METRIC_ONLY_LEN)
-		n = METRIC_ONLY_LEN;
 	if (mlen < strlen(unit))
 		mlen = strlen(unit) + 1;
-	fprintf(out, "%*s", mlen - n, "");
+
+	if (color)
+		mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
+
+	color_snprintf(str, sizeof(str), color ?: "", fmt, val);
+	fprintf(out, "%*s ", mlen, str);
 }
 
 static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
@@ -1053,7 +1054,7 @@ static void print_metric_header(void *ctx, const char *color __maybe_unused,
 	if (csv_output)
 		fprintf(os->fh, "%s%s", unit, csv_sep);
 	else
-		fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit);
+		fprintf(os->fh, "%*s ", METRIC_ONLY_LEN, unit);
 }
 
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
@@ -1721,7 +1722,7 @@ static void print_interval(char *prefix, struct timespec *ts)
 				fprintf(output, "             counts %*s events\n", unit_width, "unit");
 			break;
 		case AGGR_NONE:
-			fprintf(output, "#           time CPU");
+			fprintf(output, "#           time CPU    ");
 			if (!metric_only)
 				fprintf(output, "                counts %*s events\n", unit_width, "unit");
 			break;

From c1a1f5d9da800dc715d8c1d8a9692c63c70c2955 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 7 Jun 2018 00:15:09 +0200
Subject: [PATCH 011/215] perf stat: Allow to specify specific metric column
 len

The following change will introduce new metrics, that doesn't need such
wide hard coded spacing. Switch METRIC_ONLY_LEN macro usage with
metric_only_len variable.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20180606221513.11302-7-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8f3fdc052728..3fc1f5286d50 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -145,6 +145,8 @@ static struct target target = {
 
 typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
 
+#define METRIC_ONLY_LEN 20
+
 static int			run_count			=  1;
 static bool			no_inherit			= false;
 static volatile pid_t		child_pid			= -1;
@@ -182,6 +184,7 @@ static int			print_mixed_hw_group_error;
 static u64			*walltime_run;
 static bool			ru_display			= false;
 static struct rusage		ru_data;
+static unsigned int		metric_only_len			= METRIC_ONLY_LEN;
 
 struct perf_stat {
 	bool			 record;
@@ -969,8 +972,6 @@ static void print_metric_csv(void *ctx,
 	fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
 }
 
-#define METRIC_ONLY_LEN 20
-
 /* Filter out some columns that don't work well in metrics only mode */
 
 static bool valid_only_metric(const char *unit)
@@ -1002,7 +1003,7 @@ static void print_metric_only(void *ctx, const char *color, const char *fmt,
 	struct outstate *os = ctx;
 	FILE *out = os->fh;
 	char buf[1024], str[1024];
-	unsigned mlen = METRIC_ONLY_LEN;
+	unsigned mlen = metric_only_len;
 
 	if (!valid_only_metric(unit))
 		return;
@@ -1054,7 +1055,7 @@ static void print_metric_header(void *ctx, const char *color __maybe_unused,
 	if (csv_output)
 		fprintf(os->fh, "%s%s", unit, csv_sep);
 	else
-		fprintf(os->fh, "%*s ", METRIC_ONLY_LEN, unit);
+		fprintf(os->fh, "%*s ", metric_only_len, unit);
 }
 
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)

From a5cfa6217c94a1f1cfad4481fc14f5fc399abde3 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 7 Jun 2018 00:15:10 +0200
Subject: [PATCH 012/215] perf stat: Add event parsing error handling to
 add_default_attributes

Add missing error handling for parse_events calls in add_default_attributes
functions. The error handler displays error details, like for transactions (-T):

Before:
  $ perf stat -T
  Cannot set up transaction events

After:
  $ perf stat -T
  Cannot set up transaction events
  event syntax error: '..cycles,cpu/cycles-t/,cpu/tx-start/,cpu/el-start/,cpu/cycles-ct/}'
                                    \___ unknown term

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20180606221513.11302-8-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3fc1f5286d50..22547a490e1f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2442,14 +2442,13 @@ static int add_default_attributes(void)
 	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
 };
+	struct parse_events_error errinfo;
 
 	/* Set attrs if no event is selected and !null_run: */
 	if (null_run)
 		return 0;
 
 	if (transaction_run) {
-		struct parse_events_error errinfo;
-
 		if (pmu_have_event("cpu", "cycles-ct") &&
 		    pmu_have_event("cpu", "el-start"))
 			err = parse_events(evsel_list, transaction_attrs,
@@ -2460,6 +2459,7 @@ static int add_default_attributes(void)
 					   &errinfo);
 		if (err) {
 			fprintf(stderr, "Cannot set up transaction events\n");
+			parse_events_print_error(&errinfo, transaction_attrs);
 			return -1;
 		}
 		return 0;
@@ -2485,10 +2485,11 @@ static int add_default_attributes(void)
 		    pmu_have_event("msr", "smi")) {
 			if (!force_metric_only)
 				metric_only = true;
-			err = parse_events(evsel_list, smi_cost_attrs, NULL);
+			err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
 		} else {
 			fprintf(stderr, "To measure SMI cost, it needs "
 				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
+			parse_events_print_error(&errinfo, smi_cost_attrs);
 			return -1;
 		}
 		if (err) {
@@ -2523,12 +2524,13 @@ static int add_default_attributes(void)
 		if (topdown_attrs[0] && str) {
 			if (warn)
 				arch_topdown_group_warn();
-			err = parse_events(evsel_list, str, NULL);
+			err = parse_events(evsel_list, str, &errinfo);
 			if (err) {
 				fprintf(stderr,
 					"Cannot set up top down events %s: %d\n",
 					str, err);
 				free(str);
+				parse_events_print_error(&errinfo, str);
 				return -1;
 			}
 		} else {

From c7d606f560e4c698884697fef503e4abacdd8c25 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 25 May 2018 14:41:39 -0700
Subject: [PATCH 013/215] x86/mce: Improve error message when kernel cannot
 recover

Since we added support to add recovery from some errors inside the kernel in:

commit b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries")

we have done a less than stellar job at reporting the cause of recoverable
machine checks that occur in other parts of the kernel. The user just gets
the unhelpful message:

	mce: [Hardware Error]: Machine check: Action required: unknown MCACOD

doubly unhelpful when they check the manual for the reported IA32_MSR_STATUS.MCACOD
and see that it is listed as one of the standard recoverable values.

Add an extra rule to the MCE severity table to catch this case and report it
as:

	mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel

Fixes: b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries")
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: stable@vger.kernel.org # 4.6+
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/4cc7c465150a9a48b8b9f45d0b840278e77eb9b5.1527283897.git.tony.luck@intel.com
---
 arch/x86/kernel/cpu/mcheck/mce-severity.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 5bbd06f38ff6..f34d89c01edc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -160,6 +160,11 @@ static struct severity {
 		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
 		USER
 		),
+	MCESEV(
+		PANIC, "Data load in unrecoverable area of kernel",
+		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+		KERNEL
+		),
 #endif
 	MCESEV(
 		PANIC, "Action required: unknown MCACOD",

From 4c5717da1d021cf368eabb3cb1adcaead56c0d1e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 25 May 2018 14:42:09 -0700
Subject: [PATCH 014/215] x86/mce: Check for alternate indication of machine
 check recovery on Skylake

Currently we just check the "CAPID0" register to see whether the CPU
can recover from machine checks.

But there are also some special SKUs which do not have all advanced
RAS features, but do enable machine check recovery for use with NVDIMMs.

Add a check for any of bits {8:5} in the "CAPID5" register (each
reports some NVDIMM mode available, if any of them are set, then
the system supports memory machine check recovery).

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: stable@vger.kernel.org # 4.9
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/03cbed6e99ddafb51c2eadf9a3b7c8d7a0cc204e.1527283897.git.tony.luck@intel.com
---
 arch/x86/kernel/quirks.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 697a4ce04308..736348ead421 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -645,12 +645,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
 /* Skylake */
 static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
 {
-	u32 capid0;
+	u32 capid0, capid5;
 
 	pci_read_config_dword(pdev, 0x84, &capid0);
+	pci_read_config_dword(pdev, 0x98, &capid5);
 
-	if ((capid0 & 0xc0) == 0xc0)
+	/*
+	 * CAPID0{7:6} indicate whether this is an advanced RAS SKU
+	 * CAPID5{8:5} indicate that various NVDIMM usage modes are
+	 * enabled, so memory machine check recovery is also enabled.
+	 */
+	if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
 		static_branch_inc(&mcsafe_key);
+
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);

From 4c8205273626f27b9e5a64bdc194ab483a8cce66 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 8 Jun 2018 02:22:11 +0200
Subject: [PATCH 015/215] perf c2c: Keep struct hist_entry at the end of struct
 c2c_hist_entry

Exactly as the comment just before 'struct c2c_hist_entry" says, i.e.
the last entry in struct hist_entry is a zero length array, that when
allocating space for hist_entry gets extra space if callchains are in
use, which, if hist_entry is not at the end of c2c_hist_entry, the
members after it gets corrupted when callchains get added to the rb
trees collecting them, etc.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jin Yao <yao.jin@linux.intel.com>
Fixes: 7f834c2e84bb ("perf c2c report: Display node for cacheline address")
Link: http://lkml.kernel.org/n/tip-bh0ke4fh2ygpj3yowna7o1di@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-c2c.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 307b3594525f..6a8738f7ead3 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -56,16 +56,16 @@ struct c2c_hist_entry {
 
 	struct compute_stats	 cstats;
 
+	unsigned long		 paddr;
+	unsigned long		 paddr_cnt;
+	bool			 paddr_zero;
+	char			*nodestr;
+
 	/*
 	 * must be at the end,
 	 * because of its callchain dynamic entry
 	 */
 	struct hist_entry	he;
-
-	unsigned long		 paddr;
-	unsigned long		 paddr_cnt;
-	bool			 paddr_zero;
-	char			*nodestr;
 };
 
 static char const *coalesce_default = "pid,iaddr";

From fad76d4333fe73cf3f73704aa34d4ce523b1c458 Mon Sep 17 00:00:00 2001
From: Seeteena Thoufeek <s1seetee@linux.vnet.ibm.com>
Date: Fri, 8 Jun 2018 16:32:28 +0530
Subject: [PATCH 016/215] perf script: Show hw-cache events

'perf script' fails to report hardware cache events (PERF_TYPE_HW_CACHE)
where as 'perf report' shows the samples. Fix it. Ex,

  # perf record -e L1-dcache-loads ./a.out
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.008 MB perf.data (11 samples)]

Before patch:

  # perf script | wc -l
  0

After patch:

  # perf script | wc -l
  11

Committer testing:

  [root@jouet ~]# perf script | head -30 | tail
        Timer 9803 [2] 8.963330:  1554 L1-dcache-loads: 7ffef89baae4 __vdso_clock_gettime+0xf4 ([vdso])
      swapper    0 [2] 8.963343:  5626 L1-dcache-loads: ffffffffa66f4f6b cpuidle_not_av+0xb (/lib/modules/4.17.0-rc5/build/vmlinux)
      firefox 4853 [2] 8.964070: 18935 L1-dcache-loads: 7f0b9a00dc30 xcb_poll_for_event+0x0 (/usr/lib64/libxcb.so.1.1.0)
  Softwar~cTh 4928 [2] 8.964548: 15928 L1-dcache-loads: ffffffffa60d795c update_curr+0x10c (/lib/modules/4.17.0-rc5/build/vmlinux)
      firefox 4853 [2] 8.964675: 14978 L1-dcache-loads: ffffffffa6897018 mutex_unlock+0x18 (/lib/modules/4.17.0-rc5/build/vmlinux)
  gnome-shell 2026 [3] 8.964693: 50670 L1-dcache-loads: 7fa08854de6d g_source_iter_next+0x6d (/usr/lib64/libglib-2.0.so.0.5400.3)
   Compositor 4929 [1] 8.964784: 71772 L1-dcache-loads: 7f0b936bf078 [unknown] (/usr/lib64/firefox/libxul.so)
     Xwayland 2096 [2] 8.964919: 16799 L1-dcache-loads: 7f68ce2fcb8a glXGetCurrentContext+0x1a (/usr/lib64/libGLX.so.0.0.0)
  gnome-shell 2026 [3] 8.964997: 50670 L1-dcache-loads: 7fa08854de6d g_source_iter_next+0x6d (/usr/lib64/libglib-2.0.so.0.5400.3)
  [root@jouet ~]#

Signed-off-by: Seeteena Thoufeek <s1seetee@linux.vnet.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1528455748-20087-1-git-send-email-s1seetee@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index b3bf35512d21..a31d7082188e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -180,6 +180,18 @@ static struct {
 				  PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE
 	},
 
+	[PERF_TYPE_HW_CACHE] = {
+		.user_set = false,
+
+		.fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+			      PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+			      PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+			      PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
+
+		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+	},
+
 	[PERF_TYPE_RAW] = {
 		.user_set = false,
 

From c60c32a5775615d5456a09527aaa12e4a109c3da Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 4 May 2018 17:25:48 +0200
Subject: [PATCH 017/215] posix-cpu-timers: Remove
 lockdep_assert_irqs_disabled()

The lockdep_assert_irqs_disabled() was a BUG_ON() statement in the
beginning and it was added just before the "spin_lock(siglock)"
statement to ensure this lock was taken with disabled interrupts.

This is no longer the case: the siglock is acquired via
lock_task_sighand() and this function already disables the interrupts.
The lock is also acquired before this "lockdep_assert_irqs_disabled" so
it is best to remove it.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r20180504152548.7166-1-bigeasy@linutronix.de
---
 kernel/time/posix-cpu-timers.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 5a6251ac6f7a..9cdf54b04ca8 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -604,7 +604,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 	/*
 	 * Disarm any old timer after extracting its expiry time.
 	 */
-	lockdep_assert_irqs_disabled();
 
 	ret = 0;
 	old_incr = timer->it.cpu.incr;
@@ -1049,7 +1048,6 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer)
 	/*
 	 * Now re-arm for the new expiry time.
 	 */
-	lockdep_assert_irqs_disabled();
 	arm_timer(timer);
 unlock:
 	unlock_task_sighand(p, &flags);

From f2ae67941138a1e53cb1bc6a1b5878a8bdc74d26 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 12 Jun 2018 18:16:19 +0200
Subject: [PATCH 018/215] alpha: Remove custom dec_and_lock() implementation

Alpha provides a custom implementation of dec_and_lock(). The functions
is split into two parts:
- atomic_add_unless() + return 0 (fast path in assembly)
- remaining part including locking (slow path in C)

Comparing the result of the alpha implementation with the generic
implementation compiled by gcc it looks like the fast path is optimized
by avoiding a stack frame (and reloading the GP), register store and all
this. This is only done in the slowpath.
After marking the slowpath (atomic_dec_and_lock_1()) as "noinline" and
doing the slowpath in C (the atomic_add_unless(atomic, -1, 1) part) I
noticed differences in the resulting assembly:
- the GP is still reloaded
- atomic_add_unless() adds more memory barriers compared to the custom
  assembly
- the custom assembly here does "load, sub, beq" while
  atomic_add_unless() does "load, cmpeq, add, bne". This is okay because
  it compares against zero after subtraction while the generic code
  compares against 1 before.

I'm not sure if avoiding the stack frame (and GP reloading) brings a lot
in terms of performance. Regarding the different barriers, Peter
Zijlstra says:

|refcount decrement needs to be a RELEASE operation, such that all the
|load/stores to the object happen before we decrement the refcount.
|
|Otherwise things like:
|
|        obj->foo = 5;
|        refcnt_dec(&obj->ref);
|
|can be re-ordered, which then allows fun scenarios like:
|
|        CPU0                            CPU1
|
|        refcnt_dec(&obj->ref);
|                                        if (dec_and_test(&obj->ref))
|                                                free(obj);
|        obj->foo = 5; // oops UaF
|
|
|This means (for alpha) that there should be a memory barrier _before_
|the decrement, however the dec_and_lock asm thing only has one _after_,
|which, per the above, is too late.
|
|The generic version using add_unless will result in memory barrier
|before and after (because that is the rule for atomic ops with a return
|value) which is strictly too many barriers for the refcount story, but
|who knows what other ordering requirements code has.

Remove the custom alpha implementation of dec_and_lock() and if it is an
issue (performance wise) then the fast path could still be inlined.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: linux-alpha@vger.kernel.org
Link: https://lkml.kernel.org/r/20180606115918.GG12198@hirez.programming.kicks-ass.net
Link: https://lkml.kernel.org/r20180612161621.22645-2-bigeasy@linutronix.de
---
 arch/alpha/Kconfig            |  5 ----
 arch/alpha/lib/Makefile       |  2 --
 arch/alpha/lib/dec_and_lock.c | 44 -----------------------------------
 lib/Makefile                  |  6 +----
 4 files changed, 1 insertion(+), 56 deletions(-)
 delete mode 100644 arch/alpha/lib/dec_and_lock.c

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 0c4805a572c8..04a4a138ed13 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -555,11 +555,6 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
-config HAVE_DEC_LOCK
-	bool
-	depends on SMP
-	default y
-
 config NR_CPUS
 	int "Maximum number of CPUs (2-32)"
 	range 2 32
diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile
index 04f9729de57c..854d5e79979e 100644
--- a/arch/alpha/lib/Makefile
+++ b/arch/alpha/lib/Makefile
@@ -35,8 +35,6 @@ lib-y =	__divqu.o __remqu.o __divlu.o __remlu.o \
 	callback_srm.o srm_puts.o srm_printk.o \
 	fls.o
 
-lib-$(CONFIG_SMP) += dec_and_lock.o
-
 # The division routines are built from single source, with different defines.
 AFLAGS___divqu.o = -DDIV
 AFLAGS___remqu.o =       -DREM
diff --git a/arch/alpha/lib/dec_and_lock.c b/arch/alpha/lib/dec_and_lock.c
deleted file mode 100644
index a117707f57fe..000000000000
--- a/arch/alpha/lib/dec_and_lock.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * arch/alpha/lib/dec_and_lock.c
- *
- * ll/sc version of atomic_dec_and_lock()
- * 
- */
-
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <linux/export.h>
-
-  asm (".text					\n\
-	.global _atomic_dec_and_lock		\n\
-	.ent _atomic_dec_and_lock		\n\
-	.align	4				\n\
-_atomic_dec_and_lock:				\n\
-	.prologue 0				\n\
-1:	ldl_l	$1, 0($16)			\n\
-	subl	$1, 1, $1			\n\
-	beq	$1, 2f				\n\
-	stl_c	$1, 0($16)			\n\
-	beq	$1, 4f				\n\
-	mb					\n\
-	clr	$0				\n\
-	ret					\n\
-2:	br	$29, 3f				\n\
-3:	ldgp	$29, 0($29)			\n\
-	br	$atomic_dec_and_lock_1..ng	\n\
-	.subsection 2				\n\
-4:	br	1b				\n\
-	.previous				\n\
-	.end _atomic_dec_and_lock");
-
-static int __used atomic_dec_and_lock_1(atomic_t *atomic, spinlock_t *lock)
-{
-	/* Slow path */
-	spin_lock(lock);
-	if (atomic_dec_and_test(atomic))
-		return 1;
-	spin_unlock(lock);
-	return 0;
-}
-EXPORT_SYMBOL(_atomic_dec_and_lock);
diff --git a/lib/Makefile b/lib/Makefile
index 84c6dcb31fbb..8b59f4a7c0e2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -23,7 +23,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 sha1.o chacha20.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-	 earlycpio.o seq_buf.o siphash.o \
+	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
 	 nmi_backtrace.o nodemask.o win_minmax.o
 
 lib-$(CONFIG_PRINTK) += dump_stack.o
@@ -98,10 +98,6 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
-ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
-  lib-y += dec_and_lock.o
-endif
-
 obj-$(CONFIG_BITREVERSE) += bitrev.o
 obj-$(CONFIG_RATIONAL)	+= rational.o
 obj-$(CONFIG_CRC_CCITT)	+= crc-ccitt.o

From ccfbb5bed407053b27492a9adc06064d949a9aa6 Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Tue, 12 Jun 2018 18:16:20 +0200
Subject: [PATCH 019/215] atomic: Add irqsave variant of atomic_dec_and_lock()

There are in-tree users of atomic_dec_and_lock() which must acquire the
spin lock with interrupts disabled. To workaround the lack of an irqsave
variant of atomic_dec_and_lock() they use local_irq_save() at the call
site. This causes extra code and creates in some places unneeded long
interrupt disabled times. These places need also extra treatment for
PREEMPT_RT due to the disconnect of the irq disabling and the lock
function.

Implement the missing irqsave variant of the function.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r20180612161621.22645-3-bigeasy@linutronix.de
---
 include/linux/spinlock.h |  5 +++++
 lib/dec_and_lock.c       | 16 ++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 1e8a46435838..fd57888d4942 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -427,6 +427,11 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
 #define atomic_dec_and_lock(atomic, lock) \
 		__cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
 
+extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
+					unsigned long *flags);
+#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \
+		__cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)))
+
 int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
 			   size_t max_size, unsigned int cpu_mult,
 			   gfp_t gfp);
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index 347fa7ac2e8a..9555b68bb774 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
 }
 
 EXPORT_SYMBOL(_atomic_dec_and_lock);
+
+int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
+				 unsigned long *flags)
+{
+	/* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+	if (atomic_add_unless(atomic, -1, 1))
+		return 0;
+
+	/* Otherwise do it the slow way */
+	spin_lock_irqsave(lock, *flags);
+	if (atomic_dec_and_test(atomic))
+		return 1;
+	spin_unlock_irqrestore(lock, *flags);
+	return 0;
+}
+EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave);

From 7ea959c45769612aa92557fb6464679f5fec7d9e Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Tue, 12 Jun 2018 18:16:21 +0200
Subject: [PATCH 020/215] locking/refcounts: Implement
 refcount_dec_and_lock_irqsave()

There are in-tree users of refcount_dec_and_lock() which must acquire the
spin lock with interrupts disabled. To workaround the lack of an irqsave
variant of refcount_dec_and_lock() they use local_irq_save() at the call
site. This causes extra code and creates in some places unneeded long
interrupt disabled times. These places need also extra treatment for
PREEMPT_RT due to the disconnect of the irq disabling and the lock
function.

Implement the missing irqsave variant of the function.

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r20180612161621.22645-4-bigeasy@linutronix.de

[bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g]
---
 include/linux/refcount.h |  4 +++-
 lib/refcount.c           | 28 ++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 4193c41e383a..a685da2c4522 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -98,5 +98,7 @@ extern __must_check bool refcount_dec_if_one(refcount_t *r);
 extern __must_check bool refcount_dec_not_one(refcount_t *r);
 extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock);
 extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock);
-
+extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r,
+						       spinlock_t *lock,
+						       unsigned long *flags);
 #endif /* _LINUX_REFCOUNT_H */
diff --git a/lib/refcount.c b/lib/refcount.c
index 0eb48353abe3..d3b81cefce91 100644
--- a/lib/refcount.c
+++ b/lib/refcount.c
@@ -350,3 +350,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
 }
 EXPORT_SYMBOL(refcount_dec_and_lock);
 
+/**
+ * refcount_dec_and_lock_irqsave - return holding spinlock with disabled
+ *                                 interrupts if able to decrement refcount to 0
+ * @r: the refcount
+ * @lock: the spinlock to be locked
+ * @flags: saved IRQ-flags if the is acquired
+ *
+ * Same as refcount_dec_and_lock() above except that the spinlock is acquired
+ * with disabled interupts.
+ *
+ * Return: true and hold spinlock if able to decrement refcount to 0, false
+ *         otherwise
+ */
+bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock,
+				   unsigned long *flags)
+{
+	if (refcount_dec_not_one(r))
+		return false;
+
+	spin_lock_irqsave(lock, *flags);
+	if (!refcount_dec_and_test(r)) {
+		spin_unlock_irqrestore(lock, *flags);
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(refcount_dec_and_lock_irqsave);

From a26ed66c20f080c510fcf5bd448bce204f2c19d7 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sun, 10 Jun 2018 16:24:15 +0200
Subject: [PATCH 021/215] clocksource/drivers/stm32: Fix error return code

Return an error code on failure.

Problem found using Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: kernel-janitors@vger.kernel.org
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lkml.kernel.org/r1528640655-18948-3-git-send-email-Julia.Lawall@lip6.fr
---
 drivers/clocksource/timer-stm32.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c
index e5cdc3af684c..2717f88c7904 100644
--- a/drivers/clocksource/timer-stm32.c
+++ b/drivers/clocksource/timer-stm32.c
@@ -304,8 +304,10 @@ static int __init stm32_timer_init(struct device_node *node)
 
 	to->private_data = kzalloc(sizeof(struct stm32_timer_private),
 				   GFP_KERNEL);
-	if (!to->private_data)
+	if (!to->private_data) {
+		ret = -ENOMEM;
 		goto deinit;
+	}
 
 	rstc = of_reset_control_get(node, NULL);
 	if (!IS_ERR(rstc)) {

From 8efaac07d7e6694f39521f9fb8a5c848b712ecee Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 8 Jun 2018 08:04:57 +0200
Subject: [PATCH 022/215] drm/bridge/sii8620: simplify hardware reset procedure

There is no need to flip reset pin twice. Also delays can be changed to
values present in vendor's code.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Maciej Purski <m.purski@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180608060457.18357-1-a.hajda@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index 7ab36042a822..d1e780fba4b6 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -971,8 +971,17 @@ static int sii8620_hw_on(struct sii8620 *ctx)
 	ret = regulator_bulk_enable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
 	if (ret)
 		return ret;
+
 	usleep_range(10000, 20000);
-	return clk_prepare_enable(ctx->clk_xtal);
+	ret = clk_prepare_enable(ctx->clk_xtal);
+	if (ret)
+		return ret;
+
+	msleep(100);
+	gpiod_set_value(ctx->gpio_reset, 0);
+	msleep(100);
+
+	return 0;
 }
 
 static int sii8620_hw_off(struct sii8620 *ctx)
@@ -982,17 +991,6 @@ static int sii8620_hw_off(struct sii8620 *ctx)
 	return regulator_bulk_disable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
 }
 
-static void sii8620_hw_reset(struct sii8620 *ctx)
-{
-	usleep_range(10000, 20000);
-	gpiod_set_value(ctx->gpio_reset, 0);
-	usleep_range(5000, 20000);
-	gpiod_set_value(ctx->gpio_reset, 1);
-	usleep_range(10000, 20000);
-	gpiod_set_value(ctx->gpio_reset, 0);
-	msleep(300);
-}
-
 static void sii8620_cbus_reset(struct sii8620 *ctx)
 {
 	sii8620_write(ctx, REG_PWD_SRST, BIT_PWD_SRST_CBUS_RST
@@ -2112,7 +2110,6 @@ static void sii8620_cable_in(struct sii8620 *ctx)
 		dev_err(dev, "Error powering on, %d.\n", ret);
 		return;
 	}
-	sii8620_hw_reset(ctx);
 
 	sii8620_read_buf(ctx, REG_VND_IDL, ver, ARRAY_SIZE(ver));
 	ret = sii8620_clear_error(ctx);

From 8e627a1b1ce8feb3e1da4428b71b9b4905f04888 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Mon, 15 Jan 2018 18:33:57 +0100
Subject: [PATCH 023/215] drm/bridge/sii8620: fix loops in EDID fetch logic

Function should constantly check if cable is connected and finish
in finite time.

Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Maciej Purski <m.purski@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180115173357.31067-4-a.hajda@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 31 ++++++++++++++++++----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index d1e780fba4b6..720bc7c325e0 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -807,6 +807,7 @@ static void sii8620_burst_rx_all(struct sii8620 *ctx)
 static void sii8620_fetch_edid(struct sii8620 *ctx)
 {
 	u8 lm_ddc, ddc_cmd, int3, cbus;
+	unsigned long timeout;
 	int fetched, i;
 	int edid_len = EDID_LENGTH;
 	u8 *edid;
@@ -856,23 +857,31 @@ static void sii8620_fetch_edid(struct sii8620 *ctx)
 			REG_DDC_CMD, ddc_cmd | VAL_DDC_CMD_ENH_DDC_READ_NO_ACK
 		);
 
-		do {
-			int3 = sii8620_readb(ctx, REG_INTR3);
+		int3 = 0;
+		timeout = jiffies + msecs_to_jiffies(200);
+		for (;;) {
 			cbus = sii8620_readb(ctx, REG_CBUS_STATUS);
-
-			if (int3 & BIT_DDC_CMD_DONE)
-				break;
-
-			if (!(cbus & BIT_CBUS_STATUS_CBUS_CONNECTED)) {
+			if (~cbus & BIT_CBUS_STATUS_CBUS_CONNECTED) {
+				kfree(edid);
+				edid = NULL;
+				goto end;
+			}
+			if (int3 & BIT_DDC_CMD_DONE) {
+				if (sii8620_readb(ctx, REG_DDC_DOUT_CNT)
+				    >= FETCH_SIZE)
+					break;
+			} else {
+				int3 = sii8620_readb(ctx, REG_INTR3);
+			}
+			if (time_is_before_jiffies(timeout)) {
+				ctx->error = -ETIMEDOUT;
+				dev_err(ctx->dev, "timeout during EDID read\n");
 				kfree(edid);
 				edid = NULL;
 				goto end;
 			}
-		} while (1);
-
-		sii8620_readb(ctx, REG_DDC_STATUS);
-		while (sii8620_readb(ctx, REG_DDC_DOUT_CNT) < FETCH_SIZE)
 			usleep_range(10, 20);
+		}
 
 		sii8620_read_buf(ctx, REG_DDC_DATA, edid + fetched, FETCH_SIZE);
 		if (fetched + FETCH_SIZE == EDID_LENGTH) {

From ecba7cfa3afbe489288f2c819158b7402afd7ee9 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Fri, 2 Feb 2018 11:54:25 +0100
Subject: [PATCH 024/215] drm/bridge/sii8620: fix display modes validation

Current implementation of mode_valid() and mode_fixup() callbacks
handle packed pixel modes improperly.

Fix it by using proper maximum clock values from the documentation.

Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1517568865-25219-1-git-send-email-m.purski@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 78 ++++++++++++++--------------
 1 file changed, 40 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index 720bc7c325e0..5267dc6551af 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -36,8 +36,11 @@
 
 #define SII8620_BURST_BUF_LEN 288
 #define VAL_RX_HDMI_CTRL2_DEFVAL VAL_RX_HDMI_CTRL2_IDLE_CNT(3)
-#define MHL1_MAX_LCLK 225000
-#define MHL3_MAX_LCLK 600000
+
+#define MHL1_MAX_PCLK 75000
+#define MHL1_MAX_PCLK_PP_MODE 150000
+#define MHL3_MAX_PCLK 200000
+#define MHL3_MAX_PCLK_PP_MODE 300000
 
 enum sii8620_mode {
 	CM_DISCONNECTED,
@@ -2274,17 +2277,43 @@ static void sii8620_detach(struct drm_bridge *bridge)
 	rc_unregister_device(ctx->rc_dev);
 }
 
+static int sii8620_is_packing_required(struct sii8620 *ctx,
+				       const struct drm_display_mode *mode)
+{
+	int max_pclk, max_pclk_pp_mode;
+
+	if (sii8620_is_mhl3(ctx)) {
+		max_pclk = MHL3_MAX_PCLK;
+		max_pclk_pp_mode = MHL3_MAX_PCLK_PP_MODE;
+	} else {
+		max_pclk = MHL1_MAX_PCLK;
+		max_pclk_pp_mode = MHL1_MAX_PCLK_PP_MODE;
+	}
+
+	if (mode->clock < max_pclk)
+		return 0;
+	else if (mode->clock < max_pclk_pp_mode)
+		return 1;
+	else
+		return -1;
+}
+
 static enum drm_mode_status sii8620_mode_valid(struct drm_bridge *bridge,
 					 const struct drm_display_mode *mode)
 {
 	struct sii8620 *ctx = bridge_to_sii8620(bridge);
+	int pack_required = sii8620_is_packing_required(ctx, mode);
 	bool can_pack = ctx->devcap[MHL_DCAP_VID_LINK_MODE] &
 			MHL_DCAP_VID_LINK_PPIXEL;
-	unsigned int max_pclk = sii8620_is_mhl3(ctx) ? MHL3_MAX_LCLK :
-						       MHL1_MAX_LCLK;
-	max_pclk /= can_pack ? 2 : 3;
 
-	return (mode->clock > max_pclk) ? MODE_CLOCK_HIGH : MODE_OK;
+	switch (pack_required) {
+	case 0:
+		return MODE_OK;
+	case 1:
+		return (can_pack) ? MODE_OK : MODE_CLOCK_HIGH;
+	default:
+		return MODE_CLOCK_HIGH;
+	}
 }
 
 static bool sii8620_mode_fixup(struct drm_bridge *bridge,
@@ -2292,43 +2321,16 @@ static bool sii8620_mode_fixup(struct drm_bridge *bridge,
 			       struct drm_display_mode *adjusted_mode)
 {
 	struct sii8620 *ctx = bridge_to_sii8620(bridge);
-	int max_lclk;
-	bool ret = true;
 
 	mutex_lock(&ctx->lock);
 
-	max_lclk = sii8620_is_mhl3(ctx) ? MHL3_MAX_LCLK : MHL1_MAX_LCLK;
-	if (max_lclk > 3 * adjusted_mode->clock) {
-		ctx->use_packed_pixel = 0;
-		goto end;
-	}
-	if ((ctx->devcap[MHL_DCAP_VID_LINK_MODE] & MHL_DCAP_VID_LINK_PPIXEL) &&
-	    max_lclk > 2 * adjusted_mode->clock) {
-		ctx->use_packed_pixel = 1;
-		goto end;
-	}
-	ret = false;
-end:
-	if (ret) {
-		u8 vic = drm_match_cea_mode(adjusted_mode);
+	ctx->use_packed_pixel = sii8620_is_packing_required(ctx, adjusted_mode);
+	ctx->video_code = drm_match_cea_mode(adjusted_mode);
+	ctx->pixel_clock = adjusted_mode->clock;
 
-		if (!vic) {
-			union hdmi_infoframe frm;
-			u8 mhl_vic[] = { 0, 95, 94, 93, 98 };
-
-			/* FIXME: We need the connector here */
-			drm_hdmi_vendor_infoframe_from_display_mode(
-				&frm.vendor.hdmi, NULL, adjusted_mode);
-			vic = frm.vendor.hdmi.vic;
-			if (vic >= ARRAY_SIZE(mhl_vic))
-				vic = 0;
-			vic = mhl_vic[vic];
-		}
-		ctx->video_code = vic;
-		ctx->pixel_clock = adjusted_mode->clock;
-	}
 	mutex_unlock(&ctx->lock);
-	return ret;
+
+	return true;
 }
 
 static const struct drm_bridge_funcs sii8620_bridge_funcs = {

From 9378cecb1ce5d618b8aff4d65113ddcf72fc1011 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Wed, 22 Nov 2017 10:08:38 +0100
Subject: [PATCH 025/215] drm/bridge/sii8620: fix potential buffer overflow

Buffer overflow error should not occur, as mode_fixup() callback
filters pixel clock value and it should never exceed 600000. However,
current implementation is not obviously safe and relies on
implementation of mode_fixup().

Make 'i' variable never reach unsafe value in order to avoid buffer
overflow error.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: bf1722ca ("drm/bridge/sii8620: rewrite hdmi start sequence")
Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1511341718-6974-1-git-send-email-m.purski@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index 5267dc6551af..61fd3e0a4ba6 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -1226,7 +1226,7 @@ static void sii8620_start_video(struct sii8620 *ctx)
 		int clk = ctx->pixel_clock * (ctx->use_packed_pixel ? 2 : 3);
 		int i;
 
-		for (i = 0; i < ARRAY_SIZE(clk_spec); ++i)
+		for (i = 0; i < ARRAY_SIZE(clk_spec) - 1; ++i)
 			if (clk < clk_spec[i].max_clk)
 				break;
 

From bbc05e172fad9affa388be35b78b9e5e5da76648 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Wed, 29 Nov 2017 12:48:50 +0100
Subject: [PATCH 026/215] drm/bridge/sii8620: start MHL transmission after HDMI
 signal detection

The vendor code waits for infoframe to detect video mode set by source.
We do not need to follow this pattern, because video mode information is
provided by drm core. As a result most of the infoframe handling
code can be removed.

Start transmission immediately after detecting stream on HDMI lines
in irq_scdt() function without waiting for infoframe interrupt.

Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1511956130-24482-1-git-send-email-m.purski@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 53 ++--------------------------
 1 file changed, 2 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index 61fd3e0a4ba6..853c4f97c7c9 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -1941,14 +1941,6 @@ static void sii8620_irq_edid(struct sii8620 *ctx)
 		ctx->mt_state = MT_STATE_DONE;
 }
 
-static void sii8620_scdt_high(struct sii8620 *ctx)
-{
-	sii8620_write_seq_static(ctx,
-		REG_INTR8_MASK, BIT_CEA_NEW_AVI | BIT_CEA_NEW_VSI,
-		REG_TPI_SC, BIT_TPI_SC_TPI_OUTPUT_MODE_0_HDMI,
-	);
-}
-
 static void sii8620_irq_scdt(struct sii8620 *ctx)
 {
 	u8 stat = sii8620_readb(ctx, REG_INTR5);
@@ -1956,53 +1948,13 @@ static void sii8620_irq_scdt(struct sii8620 *ctx)
 	if (stat & BIT_INTR_SCDT_CHANGE) {
 		u8 cstat = sii8620_readb(ctx, REG_TMDS_CSTAT_P3);
 
-		if (cstat & BIT_TMDS_CSTAT_P3_SCDT) {
-			if (ctx->sink_type == SINK_HDMI)
-				/* enable infoframe interrupt */
-				sii8620_scdt_high(ctx);
-			else
-				sii8620_start_video(ctx);
-		}
+		if (cstat & BIT_TMDS_CSTAT_P3_SCDT)
+			sii8620_start_video(ctx);
 	}
 
 	sii8620_write(ctx, REG_INTR5, stat);
 }
 
-static void sii8620_new_vsi(struct sii8620 *ctx)
-{
-	u8 vsif[11];
-
-	sii8620_write(ctx, REG_RX_HDMI_CTRL2,
-		      VAL_RX_HDMI_CTRL2_DEFVAL |
-		      BIT_RX_HDMI_CTRL2_VSI_MON_SEL_VSI);
-	sii8620_read_buf(ctx, REG_RX_HDMI_MON_PKT_HEADER1, vsif,
-			 ARRAY_SIZE(vsif));
-}
-
-static void sii8620_new_avi(struct sii8620 *ctx)
-{
-	sii8620_write(ctx, REG_RX_HDMI_CTRL2, VAL_RX_HDMI_CTRL2_DEFVAL);
-	sii8620_read_buf(ctx, REG_RX_HDMI_MON_PKT_HEADER1, ctx->avif,
-			 ARRAY_SIZE(ctx->avif));
-}
-
-static void sii8620_irq_infr(struct sii8620 *ctx)
-{
-	u8 stat = sii8620_readb(ctx, REG_INTR8)
-		& (BIT_CEA_NEW_VSI | BIT_CEA_NEW_AVI);
-
-	sii8620_write(ctx, REG_INTR8, stat);
-
-	if (stat & BIT_CEA_NEW_VSI)
-		sii8620_new_vsi(ctx);
-
-	if (stat & BIT_CEA_NEW_AVI)
-		sii8620_new_avi(ctx);
-
-	if (stat & (BIT_CEA_NEW_VSI | BIT_CEA_NEW_AVI))
-		sii8620_start_video(ctx);
-}
-
 static void sii8620_got_xdevcap(struct sii8620 *ctx, int ret)
 {
 	if (ret < 0)
@@ -2084,7 +2036,6 @@ static irqreturn_t sii8620_irq_thread(int irq, void *data)
 		{ BIT_FAST_INTR_STAT_EDID, sii8620_irq_edid },
 		{ BIT_FAST_INTR_STAT_DDC, sii8620_irq_ddc },
 		{ BIT_FAST_INTR_STAT_SCDT, sii8620_irq_scdt },
-		{ BIT_FAST_INTR_STAT_INFR, sii8620_irq_infr },
 	};
 	struct sii8620 *ctx = data;
 	u8 stats[LEN_FAST_INTR_STAT];

From 95e8522588c81fdef1b10777a874c4a1fe14bf88 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Wed, 29 Nov 2017 16:12:47 +0100
Subject: [PATCH 027/215] drm/bridge/sii8620: remove HSIC initialization

HSIC initialization was taken from the vendor code. HSIC in MHL circuit
is not connected, so it is not possible to test it. Tests prove that
without HSIC the device works well. Therefore it can be removed.

Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1511968368-30884-1-git-send-email-m.purski@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 38 ----------------------------
 1 file changed, 38 deletions(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index 853c4f97c7c9..ff041bec0cf4 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -511,50 +511,12 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret)
 		 sink_str[ctx->sink_type], sink_name);
 }
 
-static void sii8620_hsic_init(struct sii8620 *ctx)
-{
-	if (!sii8620_is_mhl3(ctx))
-		return;
-
-	sii8620_write(ctx, REG_FCGC,
-		BIT_FCGC_HSIC_HOSTMODE | BIT_FCGC_HSIC_ENABLE);
-	sii8620_setbits(ctx, REG_HRXCTRL3,
-		BIT_HRXCTRL3_HRX_STAY_RESET | BIT_HRXCTRL3_STATUS_EN, ~0);
-	sii8620_setbits(ctx, REG_TTXNUMB, MSK_TTXNUMB_TTX_NUMBPS, 4);
-	sii8620_setbits(ctx, REG_TRXCTRL, BIT_TRXCTRL_TRX_FROM_SE_COC, ~0);
-	sii8620_setbits(ctx, REG_HTXCTRL, BIT_HTXCTRL_HTX_DRVCONN1, 0);
-	sii8620_setbits(ctx, REG_KEEPER, MSK_KEEPER_MODE, VAL_KEEPER_MODE_HOST);
-	sii8620_write_seq_static(ctx,
-		REG_TDMLLCTL, 0,
-		REG_UTSRST, BIT_UTSRST_HRX_SRST | BIT_UTSRST_HTX_SRST |
-			BIT_UTSRST_KEEPER_SRST | BIT_UTSRST_FC_SRST,
-		REG_UTSRST, BIT_UTSRST_HRX_SRST | BIT_UTSRST_HTX_SRST,
-		REG_HRXINTL, 0xff,
-		REG_HRXINTH, 0xff,
-		REG_TTXINTL, 0xff,
-		REG_TTXINTH, 0xff,
-		REG_TRXINTL, 0xff,
-		REG_TRXINTH, 0xff,
-		REG_HTXINTL, 0xff,
-		REG_HTXINTH, 0xff,
-		REG_FCINTR0, 0xff,
-		REG_FCINTR1, 0xff,
-		REG_FCINTR2, 0xff,
-		REG_FCINTR3, 0xff,
-		REG_FCINTR4, 0xff,
-		REG_FCINTR5, 0xff,
-		REG_FCINTR6, 0xff,
-		REG_FCINTR7, 0xff
-	);
-}
-
 static void sii8620_edid_read(struct sii8620 *ctx, int ret)
 {
 	if (ret < 0)
 		return;
 
 	sii8620_set_upstream_edid(ctx);
-	sii8620_hsic_init(ctx);
 	sii8620_enable_hpd(ctx);
 }
 

From c7d6d511eb56495f065600c90336da0f0fe1b174 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Tue, 23 Jan 2018 12:13:16 +0100
Subject: [PATCH 028/215] drm/bridge/sii8620: fix HDMI cable connection to
 dongle

MHL bridge is usually connected to TV via MHL dongle. Currently plugging
HDMI cable to dongle is handled improperly.

Fix it by splitting connecting of a dongle and a HDMI cable. The driver
should now handle unplugging a sink from a dongle and plugging a
different sink with new edid.

Tested on MHL1, MHL2 and MHL3 using various vendors' dongles both in
DVI and HDMI mode.

Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1516705996-8928-1-git-send-email-m.purski@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 65 ++++++++++++++++++----------
 1 file changed, 42 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index ff041bec0cf4..4a3deeda065c 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -83,6 +83,9 @@ struct sii8620 {
 	u8 devcap[MHL_DCAP_SIZE];
 	u8 xdevcap[MHL_XDC_SIZE];
 	u8 avif[HDMI_INFOFRAME_SIZE(AVI)];
+	bool feature_complete;
+	bool devcap_read;
+	bool sink_detected;
 	struct edid *edid;
 	unsigned int gen2_write_burst:1;
 	enum sii8620_mt_state mt_state;
@@ -479,7 +482,7 @@ static void sii8620_update_array(u8 *dst, u8 *src, int count)
 	}
 }
 
-static void sii8620_sink_detected(struct sii8620 *ctx, int ret)
+static void sii8620_identify_sink(struct sii8620 *ctx)
 {
 	static const char * const sink_str[] = {
 		[SINK_NONE] = "NONE",
@@ -490,7 +493,7 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret)
 	char sink_name[20];
 	struct device *dev = ctx->dev;
 
-	if (ret < 0)
+	if (!ctx->sink_detected || !ctx->devcap_read)
 		return;
 
 	sii8620_fetch_edid(ctx);
@@ -499,6 +502,7 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret)
 		sii8620_mhl_disconnected(ctx);
 		return;
 	}
+	sii8620_set_upstream_edid(ctx);
 
 	if (drm_detect_hdmi_monitor(ctx->edid))
 		ctx->sink_type = SINK_HDMI;
@@ -511,15 +515,6 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret)
 		 sink_str[ctx->sink_type], sink_name);
 }
 
-static void sii8620_edid_read(struct sii8620 *ctx, int ret)
-{
-	if (ret < 0)
-		return;
-
-	sii8620_set_upstream_edid(ctx);
-	sii8620_enable_hpd(ctx);
-}
-
 static void sii8620_mr_devcap(struct sii8620 *ctx)
 {
 	u8 dcap[MHL_DCAP_SIZE];
@@ -535,6 +530,8 @@ static void sii8620_mr_devcap(struct sii8620 *ctx)
 		 dcap[MHL_DCAP_ADOPTER_ID_H], dcap[MHL_DCAP_ADOPTER_ID_L],
 		 dcap[MHL_DCAP_DEVICE_ID_H], dcap[MHL_DCAP_DEVICE_ID_L]);
 	sii8620_update_array(ctx->devcap, dcap, MHL_DCAP_SIZE);
+	ctx->devcap_read = true;
+	sii8620_identify_sink(ctx);
 }
 
 static void sii8620_mr_xdevcap(struct sii8620 *ctx)
@@ -1506,6 +1503,16 @@ static void sii8620_set_mode(struct sii8620 *ctx, enum sii8620_mode mode)
 	);
 }
 
+static void sii8620_hpd_unplugged(struct sii8620 *ctx)
+{
+	sii8620_disable_hpd(ctx);
+	ctx->sink_type = SINK_NONE;
+	ctx->sink_detected = false;
+	ctx->feature_complete = false;
+	kfree(ctx->edid);
+	ctx->edid = NULL;
+}
+
 static void sii8620_disconnect(struct sii8620 *ctx)
 {
 	sii8620_disable_gen2_write_burst(ctx);
@@ -1533,7 +1540,7 @@ static void sii8620_disconnect(struct sii8620 *ctx)
 		REG_MHL_DP_CTL6, 0x2A,
 		REG_MHL_DP_CTL7, 0x03
 	);
-	sii8620_disable_hpd(ctx);
+	sii8620_hpd_unplugged(ctx);
 	sii8620_write_seq_static(ctx,
 		REG_M3_CTRL, VAL_M3_CTRL_MHL3_VALUE,
 		REG_MHL_COC_CTL1, 0x07,
@@ -1581,10 +1588,8 @@ static void sii8620_disconnect(struct sii8620 *ctx)
 	memset(ctx->xstat, 0, sizeof(ctx->xstat));
 	memset(ctx->devcap, 0, sizeof(ctx->devcap));
 	memset(ctx->xdevcap, 0, sizeof(ctx->xdevcap));
+	ctx->devcap_read = false;
 	ctx->cbus_status = 0;
-	ctx->sink_type = SINK_NONE;
-	kfree(ctx->edid);
-	ctx->edid = NULL;
 	sii8620_mt_cleanup(ctx);
 }
 
@@ -1675,9 +1680,6 @@ static void sii8620_status_changed_path(struct sii8620 *ctx)
 		sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
 				      MHL_DST_LM_CLK_MODE_NORMAL
 				      | MHL_DST_LM_PATH_ENABLED);
-		if (!sii8620_is_mhl3(ctx))
-			sii8620_mt_read_devcap(ctx, false);
-		sii8620_mt_set_cont(ctx, sii8620_sink_detected);
 	} else {
 		sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
 				      MHL_DST_LM_CLK_MODE_NORMAL);
@@ -1694,9 +1696,14 @@ static void sii8620_msc_mr_write_stat(struct sii8620 *ctx)
 	sii8620_update_array(ctx->stat, st, MHL_DST_SIZE);
 	sii8620_update_array(ctx->xstat, xst, MHL_XDS_SIZE);
 
-	if (ctx->stat[MHL_DST_CONNECTED_RDY] & MHL_DST_CONN_DCAP_RDY)
+	if (ctx->stat[MHL_DST_CONNECTED_RDY] & st[MHL_DST_CONNECTED_RDY] &
+	    MHL_DST_CONN_DCAP_RDY) {
 		sii8620_status_dcap_ready(ctx);
 
+		if (!sii8620_is_mhl3(ctx))
+			sii8620_mt_read_devcap(ctx, false);
+	}
+
 	if (st[MHL_DST_LINK_MODE] & MHL_DST_LM_PATH_ENABLED)
 		sii8620_status_changed_path(ctx);
 }
@@ -1780,8 +1787,11 @@ static void sii8620_msc_mr_set_int(struct sii8620 *ctx)
 	}
 	if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_REQ)
 		sii8620_send_features(ctx);
-	if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_COMPLETE)
-		sii8620_edid_read(ctx, 0);
+	if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_COMPLETE) {
+		ctx->feature_complete = true;
+		if (ctx->edid)
+			sii8620_enable_hpd(ctx);
+	}
 }
 
 static struct sii8620_mt_msg *sii8620_msc_msg_first(struct sii8620 *ctx)
@@ -1856,6 +1866,15 @@ static void sii8620_irq_msc(struct sii8620 *ctx)
 	if (stat & BIT_CBUS_MSC_MR_WRITE_STAT)
 		sii8620_msc_mr_write_stat(ctx);
 
+	if (stat & BIT_CBUS_HPD_CHG) {
+		if (ctx->cbus_status & BIT_CBUS_STATUS_CBUS_HPD) {
+			ctx->sink_detected = true;
+			sii8620_identify_sink(ctx);
+		} else {
+			sii8620_hpd_unplugged(ctx);
+		}
+	}
+
 	if (stat & BIT_CBUS_MSC_MR_SET_INT)
 		sii8620_msc_mr_set_int(ctx);
 
@@ -1967,11 +1986,11 @@ static void sii8620_irq_ddc(struct sii8620 *ctx)
 
 	if (stat & BIT_DDC_CMD_DONE) {
 		sii8620_write(ctx, REG_INTR3_MASK, 0);
-		if (sii8620_is_mhl3(ctx))
+		if (sii8620_is_mhl3(ctx) && !ctx->feature_complete)
 			sii8620_mt_set_int(ctx, MHL_INT_REG(RCHANGE),
 					   MHL_INT_RC_FEAT_REQ);
 		else
-			sii8620_edid_read(ctx, 0);
+			sii8620_enable_hpd(ctx);
 	}
 	sii8620_write(ctx, REG_INTR3, stat);
 }

From a09c591306881dfb04387c6ee7b7e2e4683fa531 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 13 Jun 2018 13:17:26 +0200
Subject: [PATCH 029/215] ACPI / LPSS: Avoid PM quirks on suspend and resume
 from S3

It is reported that commit a192aa923b66a (ACPI / LPSS: Consolidate
runtime PM and system sleep handling) introduced a system suspend
regression on some machines, but the only functional change made by
it was to cause the PM quirks in the LPSS to also be used during
system suspend and resume.  While that should always work for
suspend-to-idle, it turns out to be problematic for S3
(suspend-to-RAM).

To address that issue restore the previous S3 suspend and resume
behavior of the LPSS to avoid applying PM quirks then.

Fixes: a192aa923b66a (ACPI / LPSS: Consolidate runtime PM and system sleep handling)
Link: https://bugs.launchpad.net/bugs/1774950
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: 4.15+ <stable@vger.kernel.org> # 4.15+
---
 drivers/acpi/acpi_lpss.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index cb6ac5c65c2e..55e4577b504c 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -22,6 +22,7 @@
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
+#include <linux/suspend.h>
 #include <linux/delay.h>
 
 #include "internal.h"
@@ -944,9 +945,10 @@ static void lpss_iosf_exit_d3_state(void)
 	mutex_unlock(&lpss_iosf_mutex);
 }
 
-static int acpi_lpss_suspend(struct device *dev, bool wakeup)
+static int acpi_lpss_suspend(struct device *dev, bool runtime)
 {
 	struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev));
+	bool wakeup = runtime || device_may_wakeup(dev);
 	int ret;
 
 	if (pdata->dev_desc->flags & LPSS_SAVE_CTX)
@@ -959,13 +961,14 @@ static int acpi_lpss_suspend(struct device *dev, bool wakeup)
 	 * wrong status for devices being about to be powered off. See
 	 * lpss_iosf_enter_d3_state() for further information.
 	 */
-	if (lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
+	if ((runtime || !pm_suspend_via_firmware()) &&
+	    lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
 		lpss_iosf_enter_d3_state();
 
 	return ret;
 }
 
-static int acpi_lpss_resume(struct device *dev)
+static int acpi_lpss_resume(struct device *dev, bool runtime)
 {
 	struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev));
 	int ret;
@@ -974,7 +977,8 @@ static int acpi_lpss_resume(struct device *dev)
 	 * This call is kept first to be in symmetry with
 	 * acpi_lpss_runtime_suspend() one.
 	 */
-	if (lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
+	if ((runtime || !pm_resume_via_firmware()) &&
+	    lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
 		lpss_iosf_exit_d3_state();
 
 	ret = acpi_dev_resume(dev);
@@ -998,12 +1002,12 @@ static int acpi_lpss_suspend_late(struct device *dev)
 		return 0;
 
 	ret = pm_generic_suspend_late(dev);
-	return ret ? ret : acpi_lpss_suspend(dev, device_may_wakeup(dev));
+	return ret ? ret : acpi_lpss_suspend(dev, false);
 }
 
 static int acpi_lpss_resume_early(struct device *dev)
 {
-	int ret = acpi_lpss_resume(dev);
+	int ret = acpi_lpss_resume(dev, false);
 
 	return ret ? ret : pm_generic_resume_early(dev);
 }
@@ -1018,7 +1022,7 @@ static int acpi_lpss_runtime_suspend(struct device *dev)
 
 static int acpi_lpss_runtime_resume(struct device *dev)
 {
-	int ret = acpi_lpss_resume(dev);
+	int ret = acpi_lpss_resume(dev, true);
 
 	return ret ? ret : pm_generic_runtime_resume(dev);
 }

From 47e5abfb546a3ace23a77453dc2e9db92704c5ac Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 14 Jun 2018 10:01:52 +0200
Subject: [PATCH 030/215] PM / core: Fix supplier device runtime PM usage
 counter imbalance

If a device link is added via device_link_add() by the driver of the
link's consumer device, the supplier's runtime PM usage counter is
going to be dropped by the pm_runtime_put_suppliers() call in
driver_probe_device().  However, in that case it is not incremented
unless the supplier driver is already present and the link is not
stateless.  That leads to a runtime PM usage counter imbalance for
the supplier device in a few cases.

To prevent that from happening, bump up the supplier runtime
PM usage counter in device_link_add() for all links with the
DL_FLAG_PM_RUNTIME flag set that are added at the consumer probe
time.  Use pm_runtime_get_noresume() for that as the callers of
device_link_add() who want the supplier to be resumed by it are
expected to pass DL_FLAG_RPM_ACTIVE in flags to it anyway, but
additionally resume the supplier if the link is added during
consumer driver probe to retain the existing behavior for the
callers depending on it.

Fixes: 21d5c57b3726 (PM / runtime: Use device links)
Reported-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: 4.10+ <stable@vger.kernel.org> # 4.10+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/core.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 36622b52e419..df3e1a44707a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -236,6 +236,13 @@ struct device_link *device_link_add(struct device *consumer,
 			link->rpm_active = true;
 		}
 		pm_runtime_new_link(consumer);
+		/*
+		 * If the link is being added by the consumer driver at probe
+		 * time, balance the decrementation of the supplier's runtime PM
+		 * usage counter after consumer probe in driver_probe_device().
+		 */
+		if (consumer->links.status == DL_DEV_PROBING)
+			pm_runtime_get_noresume(supplier);
 	}
 	get_device(supplier);
 	link->supplier = supplier;
@@ -255,12 +262,12 @@ struct device_link *device_link_add(struct device *consumer,
 			switch (consumer->links.status) {
 			case DL_DEV_PROBING:
 				/*
-				 * Balance the decrementation of the supplier's
-				 * runtime PM usage counter after consumer probe
-				 * in driver_probe_device().
+				 * Some callers expect the link creation during
+				 * consumer driver probe to resume the supplier
+				 * even without DL_FLAG_RPM_ACTIVE.
 				 */
 				if (flags & DL_FLAG_PM_RUNTIME)
-					pm_runtime_get_sync(supplier);
+					pm_runtime_resume(supplier);
 
 				link->status = DL_STATE_CONSUMER_PROBE;
 				break;

From d5681f59ee3d4a2e60b9234e94c163cbbf559d0a Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Thu, 14 Jun 2018 09:39:17 -0400
Subject: [PATCH 031/215] NFS: Fix an rcu deadlock in
 nfs_delegation_find_inode()

I was able to reproduce this pretty regularily using xfstests
generic/013 on NFS v4.0.

Reported-by: Ross Zwisler <Ross.Zwisler@linux.intel.com>
Fixes: 6c342655022d (NFSv4: Return NFS4ERR_DELAY when a delegation recall fails due to igrab())
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/delegation.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index bbd0465535eb..f033f3a69a3b 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -883,8 +883,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
 	rcu_read_lock();
 	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
 		res = nfs_delegation_find_inode_server(server, fhandle);
-		if (res != ERR_PTR(-ENOENT))
+		if (res != ERR_PTR(-ENOENT)) {
+			rcu_read_unlock();
 			return res;
+		}
 	}
 	rcu_read_unlock();
 	return ERR_PTR(-ENOENT);

From f70b359b3830b7a5b72c78136edc740839b67acd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 1 Jun 2018 10:59:25 +0300
Subject: [PATCH 032/215] crypto: chtls - use after free in chtls_pt_recvmsg()

We call chtls_free_skb() but then we dereference it on the next lines.
Also "skb" can't be NULL, we just dereferenced it on the line before.

I have moved the free down a couple lines to fix this issue.

Fixes: 17a7d24aa89d ("crypto: chtls - generic handling of data and hdr")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/chelsio/chtls/chtls_io.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index 51fc6821cbbf..708e232e3cdf 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -1548,15 +1548,14 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			tp->urg_data = 0;
 
 		if ((avail + offset) >= skb->len) {
-			if (likely(skb))
-				chtls_free_skb(sk, skb);
-			buffers_freed++;
 			if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
 				tp->copied_seq += skb->len;
 				hws->rcvpld = skb->hdr_len;
 			} else {
 				tp->copied_seq += hws->rcvpld;
 			}
+			chtls_free_skb(sk, skb);
+			buffers_freed++;
 			hws->copied_seq = 0;
 			if (copied >= target &&
 			    !skb_peek(&sk->sk_receive_queue))

From 6e88f01206edab0e5bc105d8f35fac10f4ee14c5 Mon Sep 17 00:00:00 2001
From: Jia He <hejianet@gmail.com>
Date: Fri, 8 Jun 2018 15:41:44 +0800
Subject: [PATCH 033/215] crypto: arm64/aes-blk - fix and move
 skcipher_walk_done out of kernel_neon_begin, _end

In a arm64 server(QDF2400),I met a similar might-sleep warning as [1]:
[    7.019116] BUG: sleeping function called from invalid context at
./include/crypto/algapi.h:416
[    7.027863] in_atomic(): 1, irqs_disabled(): 0, pid: 410, name:
cryptomgr_test
[    7.035106] 1 lock held by cryptomgr_test/410:
[    7.039549]  #0:         (ptrval) (&drbg->drbg_mutex){+.+.}, at:
drbg_instantiate+0x34/0x398
[    7.048038] CPU: 9 PID: 410 Comm: cryptomgr_test Not tainted
4.17.0-rc6+ #27
[    7.068228]  dump_backtrace+0x0/0x1c0
[    7.071890]  show_stack+0x24/0x30
[    7.075208]  dump_stack+0xb0/0xec
[    7.078523]  ___might_sleep+0x160/0x238
[    7.082360]  skcipher_walk_done+0x118/0x2c8
[    7.086545]  ctr_encrypt+0x98/0x130
[    7.090035]  simd_skcipher_encrypt+0x68/0xc0
[    7.094304]  drbg_kcapi_sym_ctr+0xd4/0x1f8
[    7.098400]  drbg_ctr_update+0x98/0x330
[    7.102236]  drbg_seed+0x1b8/0x2f0
[    7.105637]  drbg_instantiate+0x2ac/0x398
[    7.109646]  drbg_kcapi_seed+0xbc/0x188
[    7.113482]  crypto_rng_reset+0x4c/0xb0
[    7.117319]  alg_test_drbg+0xec/0x330
[    7.120981]  alg_test.part.6+0x1c8/0x3c8
[    7.124903]  alg_test+0x58/0xa0
[    7.128044]  cryptomgr_test+0x50/0x58
[    7.131708]  kthread+0x134/0x138
[    7.134936]  ret_from_fork+0x10/0x1c

Seems there is a bug in Ard Biesheuvel's commit.
Fixes: 683381747270 ("crypto: arm64/aes-blk - move kernel mode neon
en/disable into loop")

[1] https://www.spinics.net/lists/linux-crypto/msg33103.html

Signed-off-by: jia.he@hxt-semitech.com
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # 4.17
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 253188fb8cb0..e3e50950a863 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -223,8 +223,8 @@ static int ctr_encrypt(struct skcipher_request *req)
 		kernel_neon_begin();
 		aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
-		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 		kernel_neon_end();
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	if (walk.nbytes) {
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];

From f044a84e040b85cd609851ac88ae8b54b2cc0b75 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Fri, 8 Jun 2018 11:53:41 +0200
Subject: [PATCH 034/215] crypto: don't optimize keccakf()

keccakf() is the only function in kernel that uses __optimize() macro.
__optimize() breaks frame pointer unwinder as optimized code uses RBP,
and amusingly this always lead to degraded performance as gcc does not
inline across different optimizations levels, so keccakf() wasn't inlined
into its callers and keccakf_round() wasn't inlined into keccakf().

Drop __optimize() to resolve both problems.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Fixes: 83dee2ce1ae7 ("crypto: sha3-generic - rewrite KECCAK transform to help the compiler optimize")
Reported-by: syzbot+37035ccfa9a0a017ffcf@syzkaller.appspotmail.com
Reported-by: syzbot+e073e4740cfbb3ae200b@syzkaller.appspotmail.com
Cc: linux-crypto@vger.kernel.org
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha3_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c
index 264ec12c0b9c..7f6735d9003f 100644
--- a/crypto/sha3_generic.c
+++ b/crypto/sha3_generic.c
@@ -152,7 +152,7 @@ static SHA3_INLINE void keccakf_round(u64 st[25])
 	st[24] ^= bc[ 4];
 }
 
-static void __optimize("O3") keccakf(u64 st[25])
+static void keccakf(u64 st[25])
 {
 	int round;
 

From a81ae8095712d1513fe8d58527c92c439b43233e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ondrej=20Mosn=C3=A1=C4=8Dek?= <omosnace@redhat.com>
Date: Wed, 13 Jun 2018 16:44:17 +0200
Subject: [PATCH 035/215] crypto: morus640 - Fix out-of-bounds access

We must load the block from the temporary variable here, not directly
from the input.

Also add forgotten zeroing-out of the uninitialized part of the
temporary block (as is done correctly in morus1280.c).

Fixes: 396be41f16fd ("crypto: morus - Add generic MORUS AEAD implementations")
Reported-by: syzbot+1fafa9c4cf42df33f716@syzkaller.appspotmail.com
Reported-by: syzbot+d82643ba80bf6937cd44@syzkaller.appspotmail.com
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/morus640.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crypto/morus640.c b/crypto/morus640.c
index 9fbcde307daf..5eede3749e64 100644
--- a/crypto/morus640.c
+++ b/crypto/morus640.c
@@ -274,8 +274,9 @@ static void crypto_morus640_decrypt_chunk(struct morus640_state *state, u8 *dst,
 		union morus640_block_in tail;
 
 		memcpy(tail.bytes, src, size);
+		memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
 
-		crypto_morus640_load_a(&m, src);
+		crypto_morus640_load_a(&m, tail.bytes);
 		crypto_morus640_core(state, &m);
 		crypto_morus640_store_a(tail.bytes, &m);
 		memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);

From 837bf7cc3b7504385ae0e829c72e470dfc27cf6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michael=20B=C3=BCsch?= <m@bues.ch>
Date: Thu, 14 Jun 2018 20:08:11 +0200
Subject: [PATCH 036/215] hwrng: core - Always drop the RNG in
 hwrng_unregister()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

enable_best_rng() is used in hwrng_unregister() to switch away from the
currently active RNG, if that is the one currently being removed.
However enable_best_rng() might fail, if the next RNG's init routine
fails. In that case enable_best_rng() will return an error code and
the currently active RNG will remain active.
After unregistering this might lead to crashes due to use-after-free.

Fix this by dropping the currently active RNG, if enable_best_rng()
failed. This will result in no RNG to be active, if the next-best
one failed to initialize.

This problem was introduced by 142a27f0a731ddcf467546960a5585970ca98e21

Fixes: 142a27f0a731 ("hwrng: core - Reset user selected rng by...")
Reported-by: Wirz <spam@lukas-wirz.de>
Tested-by: Wirz <spam@lukas-wirz.de>
Signed-off-by: Michael Büsch <m@bues.ch>
Cc: stable@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 91bb98c42a1c..aaf9e5afaad4 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -516,11 +516,18 @@ EXPORT_SYMBOL_GPL(hwrng_register);
 
 void hwrng_unregister(struct hwrng *rng)
 {
+	int err;
+
 	mutex_lock(&rng_mutex);
 
 	list_del(&rng->list);
-	if (current_rng == rng)
-		enable_best_rng();
+	if (current_rng == rng) {
+		err = enable_best_rng();
+		if (err) {
+			drop_current_rng();
+			cur_rng_set_by_user = 0;
+		}
+	}
 
 	if (list_empty(&rng_list)) {
 		mutex_unlock(&rng_mutex);

From 536e0019b7da4eb3badb4da5acbb70ae29e1b5ef Mon Sep 17 00:00:00 2001
From: Helge Eichelberg <kernelorg@elchenberg.name>
Date: Tue, 5 Jun 2018 19:38:32 +0200
Subject: [PATCH 037/215] hwmon: (dell-smm) Disable fan support for Dell XPS13
 9333
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Calling fan related SMM functions implemented by Dell BIOS firmware on Dell
XPS13 9333 freeze kernel for about 500ms. Until Dell fixes it we need to
disable fan support for Dell XPS13 9333.

Via "force" module param fan support can be enabled.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=195751
Signed-off-by: Helge Eichelberg <kernelorg@elchenberg.name>
Reviewed-by: Pali Rohár <pali.rohar@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/dell-smm-hwmon.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index bf3bb7e1adab..9d3ef879dc51 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -1074,6 +1074,13 @@ static struct dmi_system_id i8k_blacklist_fan_support_dmi_table[] __initdata = {
 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Vostro 3360"),
 		},
 	},
+	{
+		.ident = "Dell XPS13 9333",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "XPS13 9333"),
+		},
+	},
 	{ }
 };
 

From 91bb8f45f73f19a0150c233c0f11cdeb6d71d1e9 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 12 Jun 2018 15:19:35 -0700
Subject: [PATCH 038/215] hwmon: (nct6775) Fix loop limit

Commit cc66b3038254 ("hwmon: (nct6775) Rework temperature source and label
handling") changed a loop limit from "data->temp_label_num - 1" to "32",
as part of moving from a string array to a bit mask. This results in the
following error, reported by UBSAN.

UBSAN: Undefined behaviour in drivers/hwmon/nct6775.c:4179:27
shift exponent 32 is too large for 32-bit type 'long unsigned int'

Similar to the original loop, the limit has to be one less than the
number of bits.

Fixes: cc66b3038254 ("hwmon: (nct6775) Rework temperature source and label handling")
Reported-by: Paul Menzel <pmenzel+linux-hwmon@molgen.mpg.de>
Cc: Paul Menzel <pmenzel+linux-hwmon@molgen.mpg.de>
Tested-by: Paul Menzel <pmenzel+linux-hwmon@molgen.mpg.de>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/nct6775.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index 155d4d1d1585..f9d1349c3286 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -4175,7 +4175,7 @@ static int nct6775_probe(struct platform_device *pdev)
 	 * The temperature is already monitored if the respective bit in <mask>
 	 * is set.
 	 */
-	for (i = 0; i < 32; i++) {
+	for (i = 0; i < 31; i++) {
 		if (!(data->temp_mask & BIT(i + 1)))
 			continue;
 		if (!reg_temp_alternate[i])

From 9fcf2b3c1c0276650fea537c71b513d27d929b05 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sun, 17 Jun 2018 10:48:22 +0200
Subject: [PATCH 039/215] drm/atmel-hlcdc: check stride values in the first
 plane

The statement always evaluates to true since the struct fields
are arrays. This has shown up as a warning when compiling with
clang:
  warning: address of array 'desc->layout.xstride' will always
      evaluate to 'true' [-Wpointer-bool-conversion]

Check for values in the first plane instead.

Fixes: 1a396789f65a ("drm: add Atmel HLCDC Display Controller support")
Cc: <stable@vger.kernel.org>
Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180617084826.31885-1-stefan@agner.ch
---
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index e18800ed7cd1..7b8191eae68a 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -875,7 +875,7 @@ static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane,
 		drm_object_attach_property(&plane->base.base,
 					   props->alpha, 255);
 
-	if (desc->layout.xstride && desc->layout.pstride) {
+	if (desc->layout.xstride[0] && desc->layout.pstride[0]) {
 		int ret;
 
 		ret = drm_plane_create_rotation_property(&plane->base,

From fb7298e1669ee84aa76f3483c91f1de4814aac11 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 31 May 2018 11:39:42 +0000
Subject: [PATCH 040/215] pinctrl: mediatek: remove redundant return value
 check of platform_get_resource()

Remove unneeded error handling on the result of a call
to platform_get_resource() when the value is passed to
devm_ioremap_resource().

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index b3799695d8db..16ff56f93501 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -1000,11 +1000,6 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev)
 		return -ENOMEM;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "Unable to get eint resource\n");
-		return -ENODEV;
-	}
-
 	pctl->eint->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(pctl->eint->base))
 		return PTR_ERR(pctl->eint->base);

From bc3322bc166a2905bc91f774d7b22773dc7c063a Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Thu, 7 Jun 2018 13:51:33 -0300
Subject: [PATCH 041/215] pinctrl: devicetree: Fix pctldev pointer overwrite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit b89405b6102f ("pinctrl: devicetree: Fix dt_to_map_one_config
handling of hogs") causes the pinctrl hog pins to not get initialized
on i.MX platforms leaving them with the IOMUX settings untouched.

This causes several regressions on i.MX such as:

- OV5640 camera driver can not be probed anymore on imx6qdl-sabresd
because the camera clock pin is in a pinctrl_hog group and since
its pinctrl initialization is skipped, the camera clock is kept
in GPIO functionality instead of CLK_CKO function.

- Audio stopped working on imx6qdl-wandboard and imx53-qsb for
the same reason.

Richard Fitzgerald explains the problem:

"I see the bug. If the hog node isn't a 1st level child of the pinctrl
parent node it will go around the for(;;) loop again but on the first
pass I overwrite pctldev with the result of
get_pinctrl_dev_from_of_node() so it doesn't point to the pinctrl driver
any more."

Fix the issue by stashing the original pctldev so it doesn't
get overwritten.

Fixes:  b89405b6102f ("pinctrl: devicetree: Fix dt_to_map_one_config handling of hogs")
Cc: <stable@vger.kernel.org>
Reported-by: Mika Penttilä <mika.penttila@nextfour.com>
Reported-by: Steve Longerbeam <slongerbeam@gmail.com>
Suggested-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Reviewed-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/devicetree.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c
index b601039d6c69..c4aa411f5935 100644
--- a/drivers/pinctrl/devicetree.c
+++ b/drivers/pinctrl/devicetree.c
@@ -101,10 +101,11 @@ struct pinctrl_dev *of_pinctrl_get(struct device_node *np)
 }
 
 static int dt_to_map_one_config(struct pinctrl *p,
-				struct pinctrl_dev *pctldev,
+				struct pinctrl_dev *hog_pctldev,
 				const char *statename,
 				struct device_node *np_config)
 {
+	struct pinctrl_dev *pctldev = NULL;
 	struct device_node *np_pctldev;
 	const struct pinctrl_ops *ops;
 	int ret;
@@ -123,8 +124,10 @@ static int dt_to_map_one_config(struct pinctrl *p,
 			return -EPROBE_DEFER;
 		}
 		/* If we're creating a hog we can use the passed pctldev */
-		if (pctldev && (np_pctldev == p->dev->of_node))
+		if (hog_pctldev && (np_pctldev == p->dev->of_node)) {
+			pctldev = hog_pctldev;
 			break;
+		}
 		pctldev = get_pinctrl_dev_from_of_node(np_pctldev);
 		if (pctldev)
 			break;

From 7f57871f39912978e95db920ddbbfb2304a4bfbf Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 6 Jun 2018 14:43:38 +0100
Subject: [PATCH 042/215] pinctrl: single: Add allocation failure checking of
 saved_vals

Currently saved_vals is being allocated and there is no check for
failed allocation (which is more likely than normal when using
GFP_ATOMIC).  Fix this by checking for a failed allocation and
propagating this error return down the the caller chain.

Detected by CoverityScan, CID#1469841 ("Dereference null return value")
Fixes: 88a1dbdec682 ("pinctrl: pinctrl-single: Add functions to save and restore pinctrl context")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Johan Hovold <johan@kernel.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-single.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index b3153c095199..e5647dac0818 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1590,8 +1590,11 @@ static int pcs_save_context(struct pcs_device *pcs)
 
 	mux_bytes = pcs->width / BITS_PER_BYTE;
 
-	if (!pcs->saved_vals)
+	if (!pcs->saved_vals) {
 		pcs->saved_vals = devm_kzalloc(pcs->dev, pcs->size, GFP_ATOMIC);
+		if (!pcs->saved_vals)
+			return -ENOMEM;
+	}
 
 	switch (pcs->width) {
 	case 64:
@@ -1651,8 +1654,13 @@ static int pinctrl_single_suspend(struct platform_device *pdev,
 	if (!pcs)
 		return -EINVAL;
 
-	if (pcs->flags & PCS_CONTEXT_LOSS_OFF)
-		pcs_save_context(pcs);
+	if (pcs->flags & PCS_CONTEXT_LOSS_OFF) {
+		int ret;
+
+		ret = pcs_save_context(pcs);
+		if (ret < 0)
+			return ret;
+	}
 
 	return pinctrl_force_sleep(pcs->pctl);
 }

From 71fd5d07b791b90587f695f048ad82e8d4c1c67e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 8 Jun 2018 12:05:47 +0200
Subject: [PATCH 043/215] pinctrl: actions: Fix uninitialized error in
 owl_pin_config_set()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With gcc 4.1.2:

    drivers/pinctrl/actions/pinctrl-owl.c: In function ‘owl_pin_config_set’:
    drivers/pinctrl/actions/pinctrl-owl.c:336: warning: ‘ret’ may be used uninitialized in this function

Indeed, if num_configs is zero, the uninitialized value will be returned
as an error code.

Fix this by preinitializing it to zero.

Fixes: 2242ddfbf4d699b5 ("pinctrl: actions: Add Actions S900 pinctrl driver")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/actions/pinctrl-owl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/actions/pinctrl-owl.c b/drivers/pinctrl/actions/pinctrl-owl.c
index 76243caa08c6..b5c880b50bb3 100644
--- a/drivers/pinctrl/actions/pinctrl-owl.c
+++ b/drivers/pinctrl/actions/pinctrl-owl.c
@@ -333,7 +333,7 @@ static int owl_pin_config_set(struct pinctrl_dev *pctrldev,
 	unsigned long flags;
 	unsigned int param;
 	u32 reg, bit, width, arg;
-	int ret, i;
+	int ret = 0, i;
 
 	info = &pctrl->soc->padinfo[pin];
 

From 5f591543a937310e48baf0ee23680be09cdedfb8 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 14 Jun 2018 16:55:49 +0800
Subject: [PATCH 044/215] pinctrl: mt7622: fix a kernel panic when pio don't
 work as EINT controller

The function, external interrupt controller, is made as an optional to
mt7622 pinctrl. But if we don't want pio behaves as an external interrupt
controller, it would lead to hw->eint not be created properly and then
will cause 'kernel NULL pointer' issue when gpiochip try to call .to_irq
or .set_config. To fix it, check hw->eint before accessing the member.

[    1.339494] Unable to handle kernel NULL pointer dereference at virtual
	       address 00000010
[    1.347857] Mem abort info:
[    1.350742]   ESR = 0x96000005
[    1.353905]   Exception class = DABT (current EL), IL = 32 bits
[    1.360024]   SET = 0, FnV = 0
[    1.363185]   EA = 0, S1PTW = 0
[    1.366431] Data abort info:
[    1.369405]   ISV = 0, ISS = 0x00000005
[    1.373363]   CM = 0, WnR = 0
[    1.376437] [0000000000000010] user address but active_mm is swapper
[    1.383005] Internal error: Oops: 96000005 [#1] PREEMPT SMP
[    1.388748] Modules linked in:
[    1.391897] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.16.0-rc1+ #344
[    1.398625] Hardware name: MediaTek MT7622 RFB1 board (DT)
[    1.404279] pstate: 80000005 (Nzcv daif -PAN -UAO)
[    1.409221] pc : mtk_eint_find_irq+0x8/0x24
[    1.413532] lr : mtk_gpio_to_irq+0x20/0x28
[    1.417749] sp : ffffff800801baf0
[    1.421161] x29: ffffff800801baf0 x28: ffffff8008792f40
[    1.426637] x27: ffffff800886b000 x26: ffffff8008615620
[    1.432113] x25: ffffffc00e4dbdc8 x24: ffffff80087b8000
[    1.437589] x23: ffffffc00325a000 x22: ffffffc00325a010
[    1.443066] x21: ffffffc0033dec18 x20: 00000000ffffffea
[    1.448542] x19: ffffffc00e4db800 x18: 0000000000000130
[    1.454018] x17: 000000000000000e x16: 0000000000000007
[    1.459494] x15: ffffff80085ee000 x14: 0000000000000001
[    1.464970] x13: 0000000000000001 x12: 0000000000000010
[    1.470446] x11: 0101010101010101 x10: 0000000000000880
[    1.475922] x9 : ffffff800801b990 x8 : ffffffc0030688e0
[    1.481399] x7 : ffffff80080c0660 x6 : ffffffc00e4dbbb0
[    1.486875] x5 : 0000000000000000 x4 : 0000000000000000
[    1.492351] x3 : ffffff80082a92f4 x2 : 00000000fffffffa
[    1.497826] x1 : 0000000000000051 x0 : 0000000000000000
[    1.503305] Process swapper/0 (pid: 1, stack limit = 0x0000000054e053bd)
[    1.510210] Call trace:
[    1.512727]  mtk_eint_find_irq+0x8/0x24
[    1.516677]  mtk_gpio_to_irq+0x20/0x28
[    1.520539]  gpiod_to_irq+0x48/0x60
[    1.524135]  mmc_gpiod_request_cd_irq+0x3c/0xc4
[    1.528804]  mmc_start_host+0x6c/0x8c
[    1.532575]  mmc_add_host+0x58/0x7c
[    1.536168]  msdc_drv_probe+0x4fc/0x67c
[    1.540121]  platform_drv_probe+0x58/0xa4
[    1.544251]  driver_probe_device+0x204/0x44c
[    1.548649]  __driver_attach+0x84/0xf8
[    1.552512]  bus_for_each_dev+0x68/0xa0
[    1.556461]  driver_attach+0x20/0x28
[    1.560142]  bus_add_driver+0xec/0x240
[    1.564002]  driver_register+0x98/0xe4
[    1.567863]  __platform_driver_register+0x48/0x50
[    1.572711]  mt_msdc_driver_init+0x18/0x20
[    1.576932]  do_one_initcall+0x98/0x130
[    1.580886]  kernel_init_freeable+0x13c/0x1d4
[    1.585375]  kernel_init+0x10/0xf8
[    1.588879]  ret_from_fork+0x10/0x18
[    1.592564] Code: a8c67bfd d65f03c0 a9bf7bfd 910003fd (f9400800)
[    1.598849] ---[ end trace 4bbcb7bc30e98492 ]---
[    1.603677] Kernel panic - not syncing: Attempted to kill init!
	       exitcode=0x0000000b
[    1.603677]

cc: Kevin Hilman <khilman@baylibre.com>
Cc: stable@vger.kernel.org
Fixes: e6dabd38d8e7 ("pinctrl: mediatek: add EINT support to MT7622 SoC")
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mediatek/pinctrl-mt7622.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/mediatek/pinctrl-mt7622.c b/drivers/pinctrl/mediatek/pinctrl-mt7622.c
index ad6da1184c9f..e3f1ab2290fc 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mt7622.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mt7622.c
@@ -1459,6 +1459,9 @@ static int mtk_gpio_to_irq(struct gpio_chip *chip, unsigned int offset)
 	struct mtk_pinctrl *hw = gpiochip_get_data(chip);
 	unsigned long eint_n;
 
+	if (!hw->eint)
+		return -ENOTSUPP;
+
 	eint_n = offset;
 
 	return mtk_eint_find_irq(hw->eint, eint_n);
@@ -1471,7 +1474,8 @@ static int mtk_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
 	unsigned long eint_n;
 	u32 debounce;
 
-	if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE)
+	if (!hw->eint ||
+	    pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE)
 		return -ENOTSUPP;
 
 	debounce = pinconf_to_config_argument(config);

From 58b3d02f066e5b1480d80bd308c545526eea3250 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Date: Wed, 13 Jun 2018 10:16:47 +0200
Subject: [PATCH 045/215] Revert "drm/sun4i: Handle DRM_BUS_FLAG_PIXDATA_*EDGE"

This reverts commit 2c17a4368aad2b88b68e4390c819e226cf320f70.

The offending commit triggers a run-time fault when accessing the panel
element of the sun4i_tcon structure when no such panel is attached.

It was apparently assumed in said commit that a panel is always used with
the TCON. Although it is often the case, this is not always true.
For instance a bridge might be used instead of a panel.

This issue was discovered using an A13-OLinuXino, that uses the TCON
in RGB mode for a simple DAC-based VGA bridge.

Cc: stable@vger.kernel.org
Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180613081647.31183-1-paul.kocialkowski@bootlin.com
---
 drivers/gpu/drm/sun4i/sun4i_tcon.c | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index c3d92d537240..8045871335b5 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -17,7 +17,6 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_modes.h>
 #include <drm/drm_of.h>
-#include <drm/drm_panel.h>
 
 #include <uapi/drm/drm_mode.h>
 
@@ -350,9 +349,6 @@ static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon,
 static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
 				     const struct drm_display_mode *mode)
 {
-	struct drm_panel *panel = tcon->panel;
-	struct drm_connector *connector = panel->connector;
-	struct drm_display_info display_info = connector->display_info;
 	unsigned int bp, hsync, vsync;
 	u8 clk_delay;
 	u32 val = 0;
@@ -410,27 +406,6 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
 	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
 		val |= SUN4I_TCON0_IO_POL_VSYNC_POSITIVE;
 
-	/*
-	 * On A20 and similar SoCs, the only way to achieve Positive Edge
-	 * (Rising Edge), is setting dclk clock phase to 2/3(240°).
-	 * By default TCON works in Negative Edge(Falling Edge),
-	 * this is why phase is set to 0 in that case.
-	 * Unfortunately there's no way to logically invert dclk through
-	 * IO_POL register.
-	 * The only acceptable way to work, triple checked with scope,
-	 * is using clock phase set to 0° for Negative Edge and set to 240°
-	 * for Positive Edge.
-	 * On A33 and similar SoCs there would be a 90° phase option,
-	 * but it divides also dclk by 2.
-	 * Following code is a way to avoid quirks all around TCON
-	 * and DOTCLOCK drivers.
-	 */
-	if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE)
-		clk_set_phase(tcon->dclk, 240);
-
-	if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
-		clk_set_phase(tcon->dclk, 0);
-
 	regmap_update_bits(tcon->regs, SUN4I_TCON0_IO_POL_REG,
 			   SUN4I_TCON0_IO_POL_HSYNC_POSITIVE | SUN4I_TCON0_IO_POL_VSYNC_POSITIVE,
 			   val);

From 8195a655e5ce09550aff81b2573d9b015d520cb9 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 18 Jun 2018 14:17:16 +0300
Subject: [PATCH 046/215] ACPI / EC: Use ec_no_wakeup on Thinkpad X1 Carbon 6th

On this system EC interrupt triggers constantly kicking devices out of
low power states and thus blocking power management. The system also has
a PCIe root port hosting Alpine Ridge Thunderbolt controller and it
never gets a chance to go to D3cold because of this.

Since the power button works the same regardless if EC interrupt is
enabled or not during s2idle, add a quirk for this machine that sets
ec_no_wakeup=true preventing spurious wakeups.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index bb94cf0731fe..442a9e24f439 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -2037,6 +2037,17 @@ static inline void acpi_ec_query_exit(void)
 	}
 }
 
+static const struct dmi_system_id acpi_ec_no_wakeup[] = {
+	{
+		.ident = "Thinkpad X1 Carbon 6th",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "20KGS3JF01"),
+		},
+	},
+	{ },
+};
+
 int __init acpi_ec_init(void)
 {
 	int result;
@@ -2047,6 +2058,15 @@ int __init acpi_ec_init(void)
 	if (result)
 		return result;
 
+	/*
+	 * Disable EC wakeup on following systems to prevent periodic
+	 * wakeup from EC GPE.
+	 */
+	if (dmi_check_system(acpi_ec_no_wakeup)) {
+		ec_no_wakeup = true;
+		pr_debug("Disabling EC wakeup on suspend-to-idle\n");
+	}
+
 	/* Drivers must be started after acpi_ec_query_init() */
 	dsdt_fail = acpi_bus_register_driver(&acpi_ec_driver);
 	/*

From 856e7c4b619af622d56b3b454f7bec32a170ac99 Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Tue, 12 Jun 2018 16:46:03 -0600
Subject: [PATCH 047/215] selftests: pstore: return Kselftest Skip code for
 skipped tests

When pstore_post_reboot test gets skipped because of unmet dependencies
and/or unsupported configuration, it returns 0 which is treated as a pass
by the Kselftest framework. This leads to false positive result even when
the test could not be run.

Change it to return kselftest skip code when a test gets skipped to clearly
report that the test could not be run.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/testing/selftests/pstore/pstore_post_reboot_tests | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests
index 6ccb154cb4aa..22f8df1ad7d4 100755
--- a/tools/testing/selftests/pstore/pstore_post_reboot_tests
+++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests
@@ -7,13 +7,16 @@
 #
 # Released under the terms of the GPL v2.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 . ./common_tests
 
 if [ -e $REBOOT_FLAG  ]; then
     rm $REBOOT_FLAG
 else
     prlog "pstore_crash_test has not been executed yet. we skip further tests."
-    exit 0
+    exit $ksft_skip
 fi
 
 prlog -n "Mounting pstore filesystem ... "

From 8781578087b8fb8829558bac96c3c24e5ba26f82 Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Tue, 12 Jun 2018 17:40:31 -0600
Subject: [PATCH 048/215] selftests: static_keys: return Kselftest Skip code
 for skipped tests

When static_keys test is skipped because of unmet dependencies and/or
unsupported configuration, it exits with error which is treated as a fail
by the Kselftest framework. This leads to false negative result even when
the test could not be run.

Change it to return kselftest skip code when a test gets skipped to clearly
report that the test could not be run.

Added an explicit searches for test_static_key_base and test_static_keys
modules and return skip code if they aren't found to differentiate between
the failure to load the module condition and module not found condition.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 .../selftests/static_keys/test_static_keys.sh       | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
index 24cff498b31a..fc9f8cde7d42 100755
--- a/tools/testing/selftests/static_keys/test_static_keys.sh
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -2,6 +2,19 @@
 # SPDX-License-Identifier: GPL-2.0
 # Runs static keys kernel module tests
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_static_key_base; then
+	echo "static_key: module test_static_key_base is not found [SKIP]"
+	exit $ksft_skip
+fi
+
+if ! /sbin/modprobe -q -n test_static_keys; then
+	echo "static_key: module test_static_keys is not found [SKIP]"
+	exit $ksft_skip
+fi
+
 if /sbin/modprobe -q test_static_key_base; then
 	if /sbin/modprobe -q test_static_keys; then
 		echo "static_key: ok"

From c7db6ffb831fd36a03485a0d88b1e505378975ad Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Tue, 12 Jun 2018 18:11:37 -0600
Subject: [PATCH 049/215] selftests: sysctl: return Kselftest Skip code for
 skipped tests

When sysctl test is skipped because of unmet dependencies and/or
unsupported configuration, it exits with error which is treated as
a fail by the Kselftest framework. This leads to false negative result
even when the test could not be run.

Change it to return kselftest skip code when a test gets skipped to
clearly report that the test could not be run.

Changed return code to kselftest skip code in skip error legs that check
requirements and module probe test error leg.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/testing/selftests/sysctl/sysctl.sh | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index ec232c3cfcaa..584eb8ea780a 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -14,6 +14,9 @@
 
 # This performs a series tests against the proc sysctl interface.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 TEST_NAME="sysctl"
 TEST_DRIVER="test_${TEST_NAME}"
 TEST_DIR=$(dirname $0)
@@ -41,7 +44,7 @@ test_modprobe()
                echo "$0: $DIR not present" >&2
                echo "You must have the following enabled in your kernel:" >&2
                cat $TEST_DIR/config >&2
-               exit 1
+               exit $ksft_skip
        fi
 }
 
@@ -98,28 +101,30 @@ test_reqs()
 	uid=$(id -u)
 	if [ $uid -ne 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 
 	if ! which perl 2> /dev/null > /dev/null; then
 		echo "$0: You need perl installed"
-		exit 1
+		exit $ksft_skip
 	fi
 	if ! which getconf 2> /dev/null > /dev/null; then
 		echo "$0: You need getconf installed"
-		exit 1
+		exit $ksft_skip
 	fi
 	if ! which diff 2> /dev/null > /dev/null; then
 		echo "$0: You need diff installed"
-		exit 1
+		exit $ksft_skip
 	fi
 }
 
 function load_req_mod()
 {
-	trap "test_modprobe" EXIT
-
 	if [ ! -d $DIR ]; then
+		if ! modprobe -q -n $TEST_DRIVER; then
+			echo "$0: module $TEST_DRIVER not found [SKIP]"
+			exit $ksft_skip
+		fi
 		modprobe $TEST_DRIVER
 		if [ $? -ne 0 ]; then
 			exit
@@ -765,6 +770,7 @@ function parse_args()
 test_reqs
 allow_user_defaults
 check_production_sysctl_writes_strict
+test_modprobe
 load_req_mod
 
 trap "test_finish" EXIT

From d7d5311d4aa9611fe1a5a851e6f75733237a668a Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Wed, 13 Jun 2018 21:10:48 -0600
Subject: [PATCH 050/215] selftests: user: return Kselftest Skip code for
 skipped tests

When user test is skipped because of unmet dependencies and/or
unsupported configuration, it exits with error which is treated as
a fail by the Kselftest framework. This leads to false negative result
even when the test could not be run.

Change it to return kselftest skip code when a test gets skipped to
clearly report that the test could not be run. Add an explicit check
for module presence and return skip code if module isn't present.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/testing/selftests/user/test_user_copy.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh
index d60506fc77f8..f9b31a57439b 100755
--- a/tools/testing/selftests/user/test_user_copy.sh
+++ b/tools/testing/selftests/user/test_user_copy.sh
@@ -2,6 +2,13 @@
 # SPDX-License-Identifier: GPL-2.0
 # Runs copy_to/from_user infrastructure using test_user_copy kernel module
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_user_copy; then
+	echo "user: module test_user_copy is not found [SKIP]"
+	exit $ksft_skip
+fi
 if /sbin/modprobe -q test_user_copy; then
 	/sbin/modprobe -q -r test_user_copy
 	echo "user_copy: ok"

From 685814466bf8398192cf855415a0bb2cefc1930e Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Thu, 14 Jun 2018 16:56:13 -0600
Subject: [PATCH 051/215] selftests: zram: return Kselftest Skip code for
 skipped tests

When zram test is skipped because of unmet dependencies and/or
unsupported configuration, it exits with error which is treated as
a fail by the Kselftest framework. This leads to false negative result
even when the test could not be run.

Change it to return kselftest skip code when a test gets skipped to
clearly report that the test could not be run.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/testing/selftests/zram/zram.sh     | 5 ++++-
 tools/testing/selftests/zram/zram_lib.sh | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
index 754de7da426a..232e958ec454 100755
--- a/tools/testing/selftests/zram/zram.sh
+++ b/tools/testing/selftests/zram/zram.sh
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 TCID="zram.sh"
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 . ./zram_lib.sh
 
 run_zram () {
@@ -24,5 +27,5 @@ elif [ -b /dev/zram0 ]; then
 else
 	echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
 	echo "$TCID : CONFIG_ZRAM is not set"
-	exit 1
+	exit $ksft_skip
 fi
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
index f6a9c73e7a44..9e73a4fb9b0a 100755
--- a/tools/testing/selftests/zram/zram_lib.sh
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -18,6 +18,9 @@ MODULE=0
 dev_makeswap=-1
 dev_mounted=-1
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 trap INT
 
 check_prereqs()
@@ -27,7 +30,7 @@ check_prereqs()
 
 	if [ $uid -ne 0 ]; then
 		echo $msg must be run as root >&2
-		exit 0
+		exit $ksft_skip
 	fi
 }
 

From a4d7537789724985cafbc9260a31ca4f2b7cf123 Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Wed, 13 Jun 2018 21:31:43 -0600
Subject: [PATCH 052/215] selftests: vm: return Kselftest Skip code for skipped
 tests

When vm test is skipped because of unmet dependencies and/or unsupported
configuration, it exits with error which is treated as a fail by the
Kselftest framework. This leads to false negative result even when the
test could not be run.

Change it to return kselftest skip code when a test gets skipped to
clearly report that the test could not be run.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/testing/selftests/vm/compaction_test.c |  4 +++-
 tools/testing/selftests/vm/mlock2-tests.c    | 12 +++++++-----
 tools/testing/selftests/vm/run_vmtests       |  5 ++++-
 tools/testing/selftests/vm/userfaultfd.c     |  4 +++-
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index 1097f04e4d80..bcec71250873 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -16,6 +16,8 @@
 #include <unistd.h>
 #include <string.h>
 
+#include "../kselftest.h"
+
 #define MAP_SIZE 1048576
 
 struct map_list {
@@ -169,7 +171,7 @@ int main(int argc, char **argv)
 		printf("Either the sysctl compact_unevictable_allowed is not\n"
 		       "set to 1 or couldn't read the proc file.\n"
 		       "Skipping the test\n");
-		return 0;
+		return KSFT_SKIP;
 	}
 
 	lim.rlim_cur = RLIM_INFINITY;
diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c
index 4997b9222cfa..637b6d0ac0d0 100644
--- a/tools/testing/selftests/vm/mlock2-tests.c
+++ b/tools/testing/selftests/vm/mlock2-tests.c
@@ -9,6 +9,8 @@
 #include <stdbool.h>
 #include "mlock2.h"
 
+#include "../kselftest.h"
+
 struct vm_boundaries {
 	unsigned long start;
 	unsigned long end;
@@ -303,7 +305,7 @@ static int test_mlock_lock()
 	if (mlock2_(map, 2 * page_size, 0)) {
 		if (errno == ENOSYS) {
 			printf("Cannot call new mlock family, skipping test\n");
-			_exit(0);
+			_exit(KSFT_SKIP);
 		}
 		perror("mlock2(0)");
 		goto unmap;
@@ -412,7 +414,7 @@ static int test_mlock_onfault()
 	if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
 		if (errno == ENOSYS) {
 			printf("Cannot call new mlock family, skipping test\n");
-			_exit(0);
+			_exit(KSFT_SKIP);
 		}
 		perror("mlock2(MLOCK_ONFAULT)");
 		goto unmap;
@@ -425,7 +427,7 @@ static int test_mlock_onfault()
 	if (munlock(map, 2 * page_size)) {
 		if (errno == ENOSYS) {
 			printf("Cannot call new mlock family, skipping test\n");
-			_exit(0);
+			_exit(KSFT_SKIP);
 		}
 		perror("munlock()");
 		goto unmap;
@@ -457,7 +459,7 @@ static int test_lock_onfault_of_present()
 	if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
 		if (errno == ENOSYS) {
 			printf("Cannot call new mlock family, skipping test\n");
-			_exit(0);
+			_exit(KSFT_SKIP);
 		}
 		perror("mlock2(MLOCK_ONFAULT)");
 		goto unmap;
@@ -583,7 +585,7 @@ static int test_vma_management(bool call_mlock)
 	if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
 		if (errno == ENOSYS) {
 			printf("Cannot call new mlock family, skipping test\n");
-			_exit(0);
+			_exit(KSFT_SKIP);
 		}
 		perror("mlock(ONFAULT)\n");
 		goto out;
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 22d564673830..88cbe5575f0c 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 #please run as root
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 mnt=./huge
 exitcode=0
 
@@ -36,7 +39,7 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
 		echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
 		if [ $? -ne 0 ]; then
 			echo "Please run this test as root"
-			exit 1
+			exit $ksft_skip
 		fi
 		while read name size unit; do
 			if [ "$name" = "HugePages_Free:" ]; then
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index de2f9ec8a87f..7b8171e3128a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -69,6 +69,8 @@
 #include <setjmp.h>
 #include <stdbool.h>
 
+#include "../kselftest.h"
+
 #ifdef __NR_userfaultfd
 
 static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
@@ -1322,7 +1324,7 @@ int main(int argc, char **argv)
 int main(void)
 {
 	printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
-	return 0;
+	return KSFT_SKIP;
 }
 
 #endif /* __NR_userfaultfd */

From d6a3e55131fcb1e5ca1753f4b6f297a177b2fc91 Mon Sep 17 00:00:00 2001
From: Fathi Boudra <fathi.boudra@linaro.org>
Date: Thu, 14 Jun 2018 11:57:08 +0200
Subject: [PATCH 053/215] selftests: sync: add config fragment for testing sync
 framework

Unless the software synchronization objects (CONFIG_SW_SYNC) is enabled,
the sync test will be skipped:

TAP version 13
1..0 # Skipped: Sync framework not supported by kernel

Add a config fragment file to be able to run "make kselftest-merge" to
enable relevant configuration required in order to run the sync test.

Signed-off-by: Fathi Boudra <fathi.boudra@linaro.org>
Link: https://lkml.org/lkml/2017/5/5/14
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/testing/selftests/sync/config | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 tools/testing/selftests/sync/config

diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
new file mode 100644
index 000000000000..1ab7e8130db2
--- /dev/null
+++ b/tools/testing/selftests/sync/config
@@ -0,0 +1,4 @@
+CONFIG_STAGING=y
+CONFIG_ANDROID=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y

From 3c62c91a3635d6c4ef23e00b4fc84fa77bbf99cc Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Tue, 12 Jun 2018 16:50:37 -0600
Subject: [PATCH 054/215] selftests: sparc64: Fix to do nothing on non-sparc64

sparc64 test fails with the following errors on non-sparc64 systems. Fix
the Makefile to do nothing on non-sparc64 systems to suppress the errors:

make run_tests
adi-test.c: Assembler messages:
adi-test.c:302: Error: no such instruction: `rd %tick,%r13'
adi-test.c:304: Error: no such instruction: `rd %tick,%rbp'
adi-test.c:190: Error: no such instruction: `rd %tick,%rbp'
adi-test.c:192: Error: no such instruction: `rd %tick,%rdx'
adi-test.c:273: Error: no such instruction: `rd %tick,%rbx'
adi-test.c:276: Error: no such instruction: `rd %tick,%rdx'
adi-test.c:217: Error: no such instruction: `rd %tick,%rbp'
adi-test.c:220: Error: no such instruction: `rd %tick,%rdx'
adi-test.c:79: Error: no such instruction: `rd %tick,%rax'
adi-test.c:79: Error: no such instruction: `rd %tick,%rax'
adi-test.c:79: Error: no such instruction: `rd %tick,%rax'
adi-test.c:79: Error: no such instruction: `rd %tick,%rax'
adi-test.c:246: Error: no such instruction: `rd %tick,%rbp'
adi-test.c:248: Error: no such instruction: `rd %tick,%rdx'
adi-test.c:79: Error: no such instruction: `rd %tick,%rax'
adi-test.c:79: Error: no such instruction: `rd %tick,%rax'
adi-test.c:79: Error: no such instruction: `rd %tick,%rax'
<builtin>: recipe for target 'adi-test' failed
make[1]: *** [adi-test] Error 1
adi: [FAIL]
./drivers_test.sh: 24: ./drivers_test.sh: ./adi-test: not found
../lib.mk:73: recipe for target 'run_tests' failed
make: *** [run_tests] Error 1

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Reviewed-by: Tom Hromatka <tom.hromatka@oracle.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/testing/selftests/sparc64/Makefile | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile
index 2082eeffd779..442f0ca45441 100644
--- a/tools/testing/selftests/sparc64/Makefile
+++ b/tools/testing/selftests/sparc64/Makefile
@@ -1,7 +1,17 @@
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/)
+
+ifneq ($(ARCH),sparc64)
+nothing:
+.PHONY: all clean run_tests install
+.SILENT:
+else
+
 SUBDIRS := drivers
 
 TEST_PROGS := run.sh
 
+
 .PHONY: all clean
 
 include ../lib.mk
@@ -44,3 +54,4 @@ override define CLEAN
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
 	done
 endef
+endif

From 953c9d28d2c9bca15a17b11a022fbc657cc5631a Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Tue, 12 Jun 2018 17:12:24 -0600
Subject: [PATCH 055/215] selftests: sparc64: delete RUN_TESTS and EMIT_TESTS
 overrides

Delete RUN_TESTS and EMIT_TESTS overrides and use common defines in
lib.mk. Common defines work just fine and there is no need to define
custom overrides.

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Reviewed-by: Tom Hromatka <tom.hromatka@oracle.com>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/testing/selftests/sparc64/Makefile | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile
index 442f0ca45441..76b2206932c3 100644
--- a/tools/testing/selftests/sparc64/Makefile
+++ b/tools/testing/selftests/sparc64/Makefile
@@ -28,10 +28,6 @@ all:
 		fi \
 	done
 
-override define RUN_TESTS
-	@cd $(OUTPUT); ./run.sh
-endef
-
 override define INSTALL_RULE
 	mkdir -p $(INSTALL_PATH)
 	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
@@ -43,10 +39,6 @@ override define INSTALL_RULE
 	done;
 endef
 
-override define EMIT_TESTS
-	echo "./run.sh"
-endef
-
 override define CLEAN
 	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\

From eb83d5f7d07ed913d62ca4ad1e14fb6ca4937bab Mon Sep 17 00:00:00 2001
From: "Shuah Khan (Samsung OSG)" <shuah@kernel.org>
Date: Wed, 13 Jun 2018 16:20:52 -0600
Subject: [PATCH 056/215] selftests: sparc64: Add missing SPDX License
 Identifiers

Add missing SPDX License Identifiers to Makefile(s).

Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
Reviewed-by: Tom Hromatka <tom.hromatka@oracle.com>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/testing/selftests/sparc64/Makefile         | 1 +
 tools/testing/selftests/sparc64/drivers/Makefile | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile
index 76b2206932c3..a19531dba4dc 100644
--- a/tools/testing/selftests/sparc64/Makefile
+++ b/tools/testing/selftests/sparc64/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/)
 
diff --git a/tools/testing/selftests/sparc64/drivers/Makefile b/tools/testing/selftests/sparc64/drivers/Makefile
index 6264f40bbdbc..deb0df415565 100644
--- a/tools/testing/selftests/sparc64/drivers/Makefile
+++ b/tools/testing/selftests/sparc64/drivers/Makefile
@@ -1,4 +1,4 @@
-
+# SPDX-License-Identifier: GPL-2.0
 INCLUDEDIR := -I.
 CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
 

From 326345f995a83e326fa2e01d54bfa9a6a307bd4d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 26 May 2018 19:12:51 +0200
Subject: [PATCH 057/215] MIPS: pb44: Fix i2c-gpio GPIO descriptor table

I used bad names in my clumsiness when rewriting many board
files to use GPIO descriptors instead of platform data. A few
had the platform_device ID set to -1 which would indeed give
the device name "i2c-gpio".

But several had it set to >=0 which gives the names
"i2c-gpio.0", "i2c-gpio.1" ...

Fix the one affected board in the MIPS tree. Sorry.

Fixes: b2e63555592f ("i2c: gpio: Convert to use descriptors")
Reported-by: Simon Guinot <simon.guinot@sequanux.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Wolfram Sang <wsa@the-dreams.de>
Cc: Simon Guinot <simon.guinot@sequanux.org>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 4.15+
Patchwork: https://patchwork.linux-mips.org/patch/19387/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/ath79/mach-pb44.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/ath79/mach-pb44.c b/arch/mips/ath79/mach-pb44.c
index 6b2c6f3baefa..75fb96ca61db 100644
--- a/arch/mips/ath79/mach-pb44.c
+++ b/arch/mips/ath79/mach-pb44.c
@@ -34,7 +34,7 @@
 #define PB44_KEYS_DEBOUNCE_INTERVAL	(3 * PB44_KEYS_POLL_INTERVAL)
 
 static struct gpiod_lookup_table pb44_i2c_gpiod_table = {
-	.dev_id = "i2c-gpio",
+	.dev_id = "i2c-gpio.0",
 	.table = {
 		GPIO_LOOKUP_IDX("ath79-gpio", PB44_GPIO_I2C_SDA,
 				NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),

From 2a027b47dba6b77ab8c8e47b589ae9bbc5ac6175 Mon Sep 17 00:00:00 2001
From: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Date: Sun, 3 Jun 2018 23:02:01 +0900
Subject: [PATCH 058/215] MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe
 erratum
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The erratum and workaround are described by BCM5300X-ES300-RDS.pdf as
below.

  R10: PCIe Transactions Periodically Fail

    Description: The BCM5300X PCIe does not maintain transaction ordering.
                 This may cause PCIe transaction failure.
    Fix Comment: Add a dummy PCIe configuration read after a PCIe
                 configuration write to ensure PCIe configuration access
                 ordering. Set ES bit of CP0 configu7 register to enable
                 sync function so that the sync instruction is functional.
    Resolution:  hndpci.c: extpci_write_config()
                 hndmips.c: si_mips_init()
                 mipsinc.h CONF7_ES

This is fixed by the CFE MIPS bcmsi chipset driver also for BCM47XX.
Also the dummy PCIe configuration read is already implemented in the
Linux BCMA driver.

Enable ExternalSync in Config7 when CONFIG_BCMA_DRIVER_PCI_HOSTMODE=y
too so that the sync instruction is externalised.

Signed-off-by: Tokunori Ikegami <ikegami@allied-telesis.co.jp>
Reviewed-by: Paul Burton <paul.burton@mips.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: Rafał Miłecki <zajec5@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/19461/
Signed-off-by: James Hogan <jhogan@kernel.org>
---
 arch/mips/bcm47xx/setup.c        | 6 ++++++
 arch/mips/include/asm/mipsregs.h | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index 6054d49e608e..8c9cbf13d32a 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -212,6 +212,12 @@ static int __init bcm47xx_cpu_fixes(void)
 		 */
 		if (bcm47xx_bus.bcma.bus.chipinfo.id == BCMA_CHIP_ID_BCM4706)
 			cpu_wait = NULL;
+
+		/*
+		 * BCM47XX Erratum "R10: PCIe Transactions Periodically Fail"
+		 * Enable ExternalSync for sync instruction to take effect
+		 */
+		set_c0_config7(MIPS_CONF7_ES);
 		break;
 #endif
 	}
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index ae461d91cd1f..0bc270806ec5 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -681,6 +681,8 @@
 #define MIPS_CONF7_WII		(_ULCAST_(1) << 31)
 
 #define MIPS_CONF7_RPS		(_ULCAST_(1) << 2)
+/* ExternalSync */
+#define MIPS_CONF7_ES		(_ULCAST_(1) << 8)
 
 #define MIPS_CONF7_IAR		(_ULCAST_(1) << 10)
 #define MIPS_CONF7_AR		(_ULCAST_(1) << 16)
@@ -2765,6 +2767,7 @@ __BUILD_SET_C0(status)
 __BUILD_SET_C0(cause)
 __BUILD_SET_C0(config)
 __BUILD_SET_C0(config5)
+__BUILD_SET_C0(config7)
 __BUILD_SET_C0(intcontrol)
 __BUILD_SET_C0(intctl)
 __BUILD_SET_C0(srsmap)

From 1264f8325e9b8c004f36f1ae7bacd2a46a7ed771 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 18 Jun 2018 18:06:13 +1000
Subject: [PATCH 059/215] drm/nouveau/kms/nv50-: cursors always use core
 channel vram ctxdma

Ctxdmas for cursors from all heads are setup in the core channel, and due
to us tracking allocated handles per-window, we were failing with -EEXIST
on multiple-head setups trying to allocate duplicate handles.

The cursor code is hardcoded to use the core channel vram ctxdma already,
so just skip ctxdma allocation for cursor fbs to fix the issue.

Fixes: 5bca1621c07 ("drm/nouveau/kms/nv50-: move fb ctxdma tracking into windows")
Reported-by: Adam Borowski <kilobyte@angband.pl>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/dispnv50/curs507a.c |  2 +-
 drivers/gpu/drm/nouveau/dispnv50/wndw.c     | 13 ++++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
index 291c08117ab6..397143b639c6 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
@@ -132,7 +132,7 @@ curs507a_new_(const struct nv50_wimm_func *func, struct nouveau_drm *drm,
 
 	nvif_object_map(&wndw->wimm.base.user, NULL, 0);
 	wndw->immd = func;
-	wndw->ctxdma.parent = &disp->core->chan.base.user;
+	wndw->ctxdma.parent = NULL;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index 224963b533a6..c5a9bc1af5af 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -444,14 +444,17 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
 	if (ret)
 		return ret;
 
-	ctxdma = nv50_wndw_ctxdma_new(wndw, fb);
-	if (IS_ERR(ctxdma)) {
-		nouveau_bo_unpin(fb->nvbo);
-		return PTR_ERR(ctxdma);
+	if (wndw->ctxdma.parent) {
+		ctxdma = nv50_wndw_ctxdma_new(wndw, fb);
+		if (IS_ERR(ctxdma)) {
+			nouveau_bo_unpin(fb->nvbo);
+			return PTR_ERR(ctxdma);
+		}
+
+		asyw->image.handle[0] = ctxdma->object.handle;
 	}
 
 	asyw->state.fence = reservation_object_get_excl_rcu(fb->nvbo->bo.resv);
-	asyw->image.handle[0] = ctxdma->object.handle;
 	asyw->image.offset[0] = fb->nvbo->bo.offset;
 
 	if (wndw->func->prepare) {

From 0fe2795516b9e1c59b58b02bdf8658698117ec4e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 18 Jun 2018 16:07:59 +0200
Subject: [PATCH 060/215] posix-timers: Fix nanosleep_copyout() for
 CONFIG_COMPAT_32BIT_TIME

Commit b5793b0d92c9 added support for building the nanosleep compat system
call on 32-bit architectures, but missed one change in nanosleep_copyout(),
which would trigger a BUG() as soon as any architecture is switched over to
use it.

Use the proper config symbol to enable the code path.

Fixes: Commit b5793b0d92c9 ("posix-timers: Make compat syscalls depend on CONFIG_COMPAT_32BIT_TIME")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: y2038@lists.linaro.org
Cc: Anna-Maria Gleixner <anna-maria@linutronix.de>
Cc: Deepa Dinamani <deepa.kernel@gmail.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Link: https://lkml.kernel.org/r/20180618140811.2998503-1-arnd@arndb.de
---
 kernel/time/hrtimer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 055a4a728c00..3e93c54bd3a1 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1659,7 +1659,7 @@ EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
 {
 	switch(restart->nanosleep.type) {
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 	case TT_COMPAT:
 		if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp))
 			return -EFAULT;

From ee3dbcf963c17775417d2fdf74e60f0c1b563dda Mon Sep 17 00:00:00 2001
From: Ilia Lin <ilia.lin@gmail.com>
Date: Sun, 17 Jun 2018 21:58:42 +0200
Subject: [PATCH 061/215] cpufreq: kryo: Fix possible error code dereference

In event of error returned by the nvmem_cell_read() non-pointer value
may be dereferenced. Fix this with error handling.
Additionally free the allocated speedbin buffer, as per the API.

Fixes: 9ce36edd1a52 (cpufreq: Add Kryo CPU scaling driver)
Signed-off-by: Ilia Lin <ilia.lin@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/qcom-cpufreq-kryo.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c
index d049fe4b80c4..74b9b93d511b 100644
--- a/drivers/cpufreq/qcom-cpufreq-kryo.c
+++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
@@ -115,6 +115,8 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
 
 	speedbin = nvmem_cell_read(speedbin_nvmem, &len);
 	nvmem_cell_put(speedbin_nvmem);
+	if (IS_ERR(speedbin))
+		return PTR_ERR(speedbin);
 
 	switch (msm8996_version) {
 	case MSM8996_V3:
@@ -127,6 +129,7 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
 		BUG();
 		break;
 	}
+	kfree(speedbin);
 
 	for_each_possible_cpu(cpu) {
 		cpu_dev = get_cpu_device(cpu);

From 5ad7346b4ae2d59cfee106ec8e40ee2561476d47 Mon Sep 17 00:00:00 2001
From: Ilia Lin <ilia.lin@gmail.com>
Date: Sun, 17 Jun 2018 22:01:46 +0200
Subject: [PATCH 062/215] cpufreq: kryo: Add module remove and exit

Add device remove and module exit code to make the driver
functioning as a loadable module.

Fixes: ac28927659be (cpufreq: kryo: allow building as a loadable module)
Signed-off-by: Ilia Lin <ilia.lin@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/qcom-cpufreq-kryo.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c
index 74b9b93d511b..01bddacf5c3b 100644
--- a/drivers/cpufreq/qcom-cpufreq-kryo.c
+++ b/drivers/cpufreq/qcom-cpufreq-kryo.c
@@ -42,6 +42,8 @@ enum _msm8996_version {
 	NUM_OF_MSM8996_VERSIONS,
 };
 
+struct platform_device *cpufreq_dt_pdev, *kryo_cpufreq_pdev;
+
 static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
 {
 	size_t len;
@@ -74,7 +76,6 @@ static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
 static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
 {
 	struct opp_table *opp_tables[NR_CPUS] = {0};
-	struct platform_device *cpufreq_dt_pdev;
 	enum _msm8996_version msm8996_version;
 	struct nvmem_cell *speedbin_nvmem;
 	struct device_node *np;
@@ -165,8 +166,15 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
 	return ret;
 }
 
+static int qcom_cpufreq_kryo_remove(struct platform_device *pdev)
+{
+	platform_device_unregister(cpufreq_dt_pdev);
+	return 0;
+}
+
 static struct platform_driver qcom_cpufreq_kryo_driver = {
 	.probe = qcom_cpufreq_kryo_probe,
+	.remove = qcom_cpufreq_kryo_remove,
 	.driver = {
 		.name = "qcom-cpufreq-kryo",
 	},
@@ -201,8 +209,9 @@ static int __init qcom_cpufreq_kryo_init(void)
 	if (unlikely(ret < 0))
 		return ret;
 
-	ret = PTR_ERR_OR_ZERO(platform_device_register_simple(
-		"qcom-cpufreq-kryo", -1, NULL, 0));
+	kryo_cpufreq_pdev = platform_device_register_simple(
+		"qcom-cpufreq-kryo", -1, NULL, 0);
+	ret = PTR_ERR_OR_ZERO(kryo_cpufreq_pdev);
 	if (0 == ret)
 		return 0;
 
@@ -211,5 +220,12 @@ static int __init qcom_cpufreq_kryo_init(void)
 }
 module_init(qcom_cpufreq_kryo_init);
 
+static void __init qcom_cpufreq_kryo_exit(void)
+{
+	platform_device_unregister(kryo_cpufreq_pdev);
+	platform_driver_unregister(&qcom_cpufreq_kryo_driver);
+}
+module_exit(qcom_cpufreq_kryo_exit);
+
 MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver");
 MODULE_LICENSE("GPL v2");

From ff7c9917143b3a6cf2fa61212a32d67cf259bf9c Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 18 Jun 2018 12:47:45 -0700
Subject: [PATCH 063/215] cpufreq: intel_pstate: Fix scaling max/min limits
 with Turbo 3.0

When scaling max/min settings are changed, internally they are converted
to a ratio using the max turbo 1 core turbo frequency. This works fine
when 1 core max is same irrespective of the core. But under Turbo 3.0,
this will not be the case. For example:
Core 0: max turbo pstate: 43 (4.3GHz)
Core 1: max turbo pstate: 45 (4.5GHz)
In this case 1 core turbo ratio will be maximum of all, so it will be
45 (4.5GHz). Suppose scaling max is set to 4GHz (ratio 40) for all cores
,then on core one it will be
 = max_state * policy->max / max_freq;
 = 43 * (4000000/4500000) = 38 (3.8GHz)
 = 38
which is 200MHz less than the desired.
On core2, it will be correctly set to ratio 40 (4GHz). Same holds true
for scaling min frequency limit. So this requires usage of correct turbo
max frequency for core one, which in this case is 4.3GHz. So we need to
adjust per CPU cpu->pstate.turbo_freq using the maximum HWP ratio of that
core.

This change uses the HWP capability of a core to adjust max turbo
frequency. But since Broadwell HWP doesn't use ratios in the HWP
capabilities, we have to use legacy max 1 core turbo ratio. This is not
a problem as the HWP capabilities don't differ among cores in Broadwell.
We need to check for non Broadwell CPU model for applying this change,
though.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: 4.6+ <stable@vger.kernel.org> # 4.6+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 1de5ec8d5ea3..ece120da3353 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -294,6 +294,7 @@ struct pstate_funcs {
 static struct pstate_funcs pstate_funcs __read_mostly;
 
 static int hwp_active __read_mostly;
+static int hwp_mode_bdw __read_mostly;
 static bool per_cpu_limits __read_mostly;
 static bool hwp_boost __read_mostly;
 
@@ -1413,7 +1414,15 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 	cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
 	cpu->pstate.scaling = pstate_funcs.get_scaling();
 	cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
-	cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+
+	if (hwp_active && !hwp_mode_bdw) {
+		unsigned int phy_max, current_max;
+
+		intel_pstate_get_hwp_max(cpu->cpu, &phy_max, &current_max);
+		cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
+	} else {
+		cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+	}
 
 	if (pstate_funcs.get_aperf_mperf_shift)
 		cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
@@ -2467,28 +2476,36 @@ static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
 static inline void intel_pstate_request_control_from_smm(void) {}
 #endif /* CONFIG_ACPI */
 
+#define INTEL_PSTATE_HWP_BROADWELL	0x01
+
+#define ICPU_HWP(model, hwp_mode) \
+	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode }
+
 static const struct x86_cpu_id hwp_support_ids[] __initconst = {
-	{ X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP },
+	ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
+	ICPU_HWP(INTEL_FAM6_BROADWELL_XEON_D, INTEL_PSTATE_HWP_BROADWELL),
+	ICPU_HWP(X86_MODEL_ANY, 0),
 	{}
 };
 
 static int __init intel_pstate_init(void)
 {
+	const struct x86_cpu_id *id;
 	int rc;
 
 	if (no_load)
 		return -ENODEV;
 
-	if (x86_match_cpu(hwp_support_ids)) {
+	id = x86_match_cpu(hwp_support_ids);
+	if (id) {
 		copy_cpu_funcs(&core_funcs);
 		if (!no_hwp) {
 			hwp_active++;
+			hwp_mode_bdw = id->driver_data;
 			intel_pstate.attr = hwp_cpufreq_attrs;
 			goto hwp_cpu_matched;
 		}
 	} else {
-		const struct x86_cpu_id *id;
-
 		id = x86_match_cpu(intel_pstate_cpu_ids);
 		if (!id)
 			return -ENODEV;

From c5c2a97b3ac7d1ec19e7cff9e38caca6afefc3de Mon Sep 17 00:00:00 2001
From: Waldemar Rymarkiewicz <waldemar.rymarkiewicz@gmail.com>
Date: Thu, 14 Jun 2018 15:56:08 +0200
Subject: [PATCH 064/215] PM / OPP: Update voltage in case freq == old_freq

This commit fixes a rare but possible case when the clk rate is updated
without update of the regulator voltage.

At boot up, CPUfreq checks if the system is running at the right freq. This
is a sanity check in case a bootloader set clk rate that is outside of freq
table present with cpufreq core. In such cases system can be unstable so
better to change it to a freq that is preset in freq-table.

The CPUfreq takes next freq that is >= policy->cur and this is our
target_freq that needs to be set now.

dev_pm_opp_set_rate(dev, target_freq) checks the target_freq and the
old_freq (a current rate). If these are equal it returns early. If not,
it searches for OPP (old_opp) that fits best to old_freq (not listed in
the table) and updates old_freq (!).

Here, we can end up with old_freq = old_opp.rate = target_freq, which
is not handled in _generic_set_opp_regulator(). It's supposed to update
voltage only when freq > old_freq  || freq > old_freq.

if (freq > old_freq) {
		ret = _set_opp_voltage(dev, reg, new_supply);
[...]
if (freq < old_freq) {
		ret = _set_opp_voltage(dev, reg, new_supply);
		if (ret)

It results in, no voltage update while clk rate is updated.

Example:
freq-table = {
	1000MHz   1.15V
	 666MHZ   1.10V
	 333MHz   1.05V
}
boot-up-freq        = 800MHz   # not listed in freq-table
freq = target_freq  = 1GHz
old_freq            = 800Mhz
old_opp = _find_freq_ceil(opp_table, &old_freq);  #(old_freq is modified!)
old_freq            = 1GHz

Fixes: 6a0712f6f199 ("PM / OPP: Add dev_pm_opp_set_rate()")
Cc: 4.6+ <stable@vger.kernel.org> # v4.6+
Signed-off-by: Waldemar Rymarkiewicz <waldemar.rymarkiewicz@gmail.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index ab2f3fead6b1..31ff03dbeb83 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -598,7 +598,7 @@ static int _generic_set_opp_regulator(const struct opp_table *opp_table,
 	}
 
 	/* Scaling up? Scale voltage before frequency */
-	if (freq > old_freq) {
+	if (freq >= old_freq) {
 		ret = _set_opp_voltage(dev, reg, new_supply);
 		if (ret)
 			goto restore_voltage;

From 69a8405999aa1c489de4b8d349468f0c2b83f093 Mon Sep 17 00:00:00 2001
From: Michael Jeanson <mjeanson@efficios.com>
Date: Thu, 14 Jun 2018 11:27:42 -0400
Subject: [PATCH 065/215] powerpc/e500mc: Set assembler machine type to e500mc

In binutils 2.26 a new opcode for the "wait" instruction was added for the
POWER9 and has precedence over the one specific to the e500mc. Commit
ebf714ff3756 ("powerpc/e500mc: Add support for the wait instruction in
e500_idle") uses this instruction specifically on the e500mc to work around
an erratum.

This results in an invalid instruction in idle_e500 when we build for the
e500mc on bintutils >= 2.26 with the default assembler machine type.

Since multiplatform between e500 and non-e500 is not supported, set the
assembler machine type globaly when CONFIG_PPC_E500MC=y.

Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: Paul Mackerras <paulus@samba.org>
CC: Michael Ellerman <mpe@ellerman.id.au>
CC: Kumar Gala <galak@kernel.crashing.org>
CC: Vakul Garg <vakul.garg@nxp.com>
CC: Scott Wood <swood@redhat.com>
CC: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: linuxppc-dev@lists.ozlabs.org
CC: linux-kernel@vger.kernel.org
CC: stable@vger.kernel.org
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index bd06a3ccda31..2ea575cb3401 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -244,6 +244,7 @@ cpu-as-$(CONFIG_4xx)		+= -Wa,-m405
 cpu-as-$(CONFIG_ALTIVEC)	+= $(call as-option,-Wa$(comma)-maltivec)
 cpu-as-$(CONFIG_E200)		+= -Wa,-me200
 cpu-as-$(CONFIG_PPC_BOOK3S_64)	+= -Wa,-mpower4
+cpu-as-$(CONFIG_PPC_E500MC)	+= $(call as-option,-Wa$(comma)-me500mc)
 
 KBUILD_AFLAGS += $(cpu-as-y)
 KBUILD_CFLAGS += $(cpu-as-y)

From 02390f66bd2362df114a0a0770d80ec33061f6d1 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 15 Jun 2018 11:38:37 +1000
Subject: [PATCH 066/215] powerpc/64s/radix: Fix MADV_[FREE|DONTNEED] TLB flush
 miss problem with THP

The patch 99baac21e4 ("mm: fix MADV_[FREE|DONTNEED] TLB flush miss
problem") added a force flush mode to the mmu_gather flush, which
unconditionally flushes the entire address range being invalidated
(even if actual ptes only covered a smaller range), to solve a problem
with concurrent threads invalidating the same PTEs causing them to
miss TLBs that need flushing.

This does not work with powerpc that invalidates mmu_gather batches
according to page size. Have powerpc flush all possible page sizes in
the range if it encounters this concurrency condition.

Patch 4647706ebe ("mm: always flush VMA ranges affected by
zap_page_range") does add a TLB flush for all page sizes on powerpc for
the zap_page_range case, but that is to be removed and replaced with
the mmu_gather flush to avoid redundant flushing. It is also thought to
not cover other obscure race conditions:

https://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com

Hash does not have a problem because it invalidates TLBs inside the
page table locks.

Reported-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/tlb-radix.c | 96 +++++++++++++++++++++++++++++--------
 1 file changed, 75 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 67a6e86d3e7e..a734e486664d 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -689,22 +689,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
 
-void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-		     unsigned long end)
+static inline void __radix__flush_tlb_range(struct mm_struct *mm,
+					unsigned long start, unsigned long end,
+					bool flush_all_sizes)
 
 {
-	struct mm_struct *mm = vma->vm_mm;
 	unsigned long pid;
 	unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
 	unsigned long page_size = 1UL << page_shift;
 	unsigned long nr_pages = (end - start) >> page_shift;
 	bool local, full;
 
-#ifdef CONFIG_HUGETLB_PAGE
-	if (is_vm_hugetlb_page(vma))
-		return radix__flush_hugetlb_tlb_range(vma, start, end);
-#endif
-
 	pid = mm->context.id;
 	if (unlikely(pid == MMU_NO_CONTEXT))
 		return;
@@ -738,37 +733,64 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 				_tlbie_pid(pid, RIC_FLUSH_TLB);
 		}
 	} else {
-		bool hflush = false;
+		bool hflush = flush_all_sizes;
+		bool gflush = flush_all_sizes;
 		unsigned long hstart, hend;
+		unsigned long gstart, gend;
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
-		hend = end >> HPAGE_PMD_SHIFT;
-		if (hstart < hend) {
-			hstart <<= HPAGE_PMD_SHIFT;
-			hend <<= HPAGE_PMD_SHIFT;
+		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 			hflush = true;
+
+		if (hflush) {
+			hstart = (start + PMD_SIZE - 1) & PMD_MASK;
+			hend = end & PMD_MASK;
+			if (hstart == hend)
+				hflush = false;
+		}
+
+		if (gflush) {
+			gstart = (start + PUD_SIZE - 1) & PUD_MASK;
+			gend = end & PUD_MASK;
+			if (gstart == gend)
+				gflush = false;
 		}
-#endif
 
 		asm volatile("ptesync": : :"memory");
 		if (local) {
 			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
 			if (hflush)
 				__tlbiel_va_range(hstart, hend, pid,
-						HPAGE_PMD_SIZE, MMU_PAGE_2M);
+						PMD_SIZE, MMU_PAGE_2M);
+			if (gflush)
+				__tlbiel_va_range(gstart, gend, pid,
+						PUD_SIZE, MMU_PAGE_1G);
 			asm volatile("ptesync": : :"memory");
 		} else {
 			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
 			if (hflush)
 				__tlbie_va_range(hstart, hend, pid,
-						HPAGE_PMD_SIZE, MMU_PAGE_2M);
+						PMD_SIZE, MMU_PAGE_2M);
+			if (gflush)
+				__tlbie_va_range(gstart, gend, pid,
+						PUD_SIZE, MMU_PAGE_1G);
 			fixup_tlbie();
 			asm volatile("eieio; tlbsync; ptesync": : :"memory");
 		}
 	}
 	preempt_enable();
 }
+
+void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+
+{
+#ifdef CONFIG_HUGETLB_PAGE
+	if (is_vm_hugetlb_page(vma))
+		return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+	__radix__flush_tlb_range(vma->vm_mm, start, end, false);
+}
 EXPORT_SYMBOL(radix__flush_tlb_range);
 
 static int radix_get_mmu_psize(int page_size)
@@ -837,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb)
 	int psize = 0;
 	struct mm_struct *mm = tlb->mm;
 	int page_size = tlb->page_size;
+	unsigned long start = tlb->start;
+	unsigned long end = tlb->end;
 
 	/*
 	 * if page size is not something we understand, do a full mm flush
@@ -847,15 +871,45 @@ void radix__tlb_flush(struct mmu_gather *tlb)
 	 */
 	if (tlb->fullmm) {
 		__flush_all_mm(mm, true);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+	} else if (mm_tlb_flush_nested(mm)) {
+		/*
+		 * If there is a concurrent invalidation that is clearing ptes,
+		 * then it's possible this invalidation will miss one of those
+		 * cleared ptes and miss flushing the TLB. If this invalidate
+		 * returns before the other one flushes TLBs, that can result
+		 * in it returning while there are still valid TLBs inside the
+		 * range to be invalidated.
+		 *
+		 * See mm/memory.c:tlb_finish_mmu() for more details.
+		 *
+		 * The solution to this is ensure the entire range is always
+		 * flushed here. The problem for powerpc is that the flushes
+		 * are page size specific, so this "forced flush" would not
+		 * do the right thing if there are a mix of page sizes in
+		 * the range to be invalidated. So use __flush_tlb_range
+		 * which invalidates all possible page sizes in the range.
+		 *
+		 * PWC flush probably is not be required because the core code
+		 * shouldn't free page tables in this path, but accounting
+		 * for the possibility makes us a bit more robust.
+		 *
+		 * need_flush_all is an uncommon case because page table
+		 * teardown should be done with exclusive locks held (but
+		 * after locks are dropped another invalidate could come
+		 * in), it could be optimized further if necessary.
+		 */
+		if (!tlb->need_flush_all)
+			__radix__flush_tlb_range(mm, start, end, true);
+		else
+			radix__flush_all_mm(mm);
+#endif
 	} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
 		if (!tlb->need_flush_all)
 			radix__flush_tlb_mm(mm);
 		else
 			radix__flush_all_mm(mm);
 	} else {
-		unsigned long start = tlb->start;
-		unsigned long end = tlb->end;
-
 		if (!tlb->need_flush_all)
 			radix__flush_tlb_range_psize(mm, start, end, psize);
 		else

From 749a0278c2177b2d16da5d8b135ba7f940bb4199 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 13 Jun 2018 23:23:56 +1000
Subject: [PATCH 067/215] powerpc/64s: Fix DT CPU features Power9 DD2.1 logic

In the device tree CPU features quirk code we want to set
CPU_FTR_POWER9_DD2_1 on all Power9s that aren't DD2.0 or earlier. But
we got the logic wrong and instead set it on all CPUs that aren't
Power9 DD2.0 or earlier, ie. including Power8.

Fix it by making sure we're on a Power9. This isn't a bug in practice
because the only code that checks the feature is Power9 only to begin
with. But we'll backport it anyway to avoid confusion.

Fixes: 9e9626ed3a4a ("powerpc/64s: Fix POWER9 DD2.2 and above in DT CPU features")
Cc: stable@vger.kernel.org # v4.17+
Reported-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/dt_cpu_ftrs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 4be1c0de9406..96dd3d871986 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -711,7 +711,8 @@ static __init void cpufeatures_cpu_quirks(void)
 		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
 		cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
 		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
-	} else /* DD2.1 and up have DD2_1 */
+	} else if ((version & 0xffff0000) == 0x004e0000)
+		/* DD2.1 and up have DD2_1 */
 		cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
 
 	if ((version & 0xffff0000) == 0x004e0000) {

From 8c1aef6a682f87a059f10ab606cc1e2cdd663d5a Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 19 May 2018 14:35:52 +1000
Subject: [PATCH 068/215] powerpc/64: hard disable irqs in panic_smp_self_stop

Similarly to commit 855bfe0de1 ("powerpc: hard disable irqs in
smp_send_stop loop"), irqs should be hard disabled by
panic_smp_self_stop.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup_64.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 7a7ce8ad455e..225bc5f91049 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -387,6 +387,14 @@ void early_setup_secondary(void)
 
 #endif /* CONFIG_SMP */
 
+void panic_smp_self_stop(void)
+{
+	hard_irq_disable();
+	spin_begin();
+	while (1)
+		spin_cpu_relax();
+}
+
 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
 static bool use_spinloop(void)
 {

From de6e5d38417e6cdb005843db420a2974993d36ff Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 19 May 2018 14:35:53 +1000
Subject: [PATCH 069/215] powerpc: smp_send_stop do not offline stopped CPUs

Marking CPUs stopped by smp_send_stop as offline can cause warnings
due to cross-CPU wakeups. This trace was noticed on a busy system
running a sysrq+c crash test, after the injected crash:

WARNING: CPU: 51 PID: 1546 at kernel/sched/core.c:1179 set_task_cpu+0x22c/0x240
CPU: 51 PID: 1546 Comm: kworker/u352:1 Tainted: G      D
Workqueue: mlx5e mlx5e_update_stats_work [mlx5_core]
[...]
NIP [c00000000017c21c] set_task_cpu+0x22c/0x240
LR [c00000000017d580] try_to_wake_up+0x230/0x720
Call Trace:
[c000000001017700] runqueues+0x0/0xb00 (unreliable)
[c00000000017d580] try_to_wake_up+0x230/0x720
[c00000000015a214] insert_work+0x104/0x140
[c00000000015adb0] __queue_work+0x230/0x690
[c000003fc5007910] [c00000000015b26c] queue_work_on+0x5c/0x90
[c0080000135fc8f8] mlx5_cmd_exec+0x538/0xcb0 [mlx5_core]
[c008000013608fd0] mlx5_core_access_reg+0x140/0x1d0 [mlx5_core]
[c00800001362777c] mlx5e_update_pport_counters.constprop.59+0x6c/0x90 [mlx5_core]
[c008000013628868] mlx5e_update_ndo_stats+0x28/0x90 [mlx5_core]
[c008000013625558] mlx5e_update_stats_work+0x68/0xb0 [mlx5_core]
[c00000000015bcec] process_one_work+0x1bc/0x5f0
[c00000000015ecac] worker_thread+0xac/0x6b0
[c000000000168338] kthread+0x168/0x1b0
[c00000000000b628] ret_from_kernel_thread+0x5c/0xb4

This happens because firstly the CPU is not really offline in the
usual sense, processes and interrupts have not been migrated away.
Secondly smp_send_stop does not happen atomically on all CPUs, so
one CPU can have marked itself offline, while another CPU is still
running processes or interrupts which can affect the first CPU.

Fix this by just not marking the CPU as offline. It's more like
frozen in time, so offline does not really reflect its state properly
anyway. There should be nothing in the crash/panic path that walks
online CPUs and synchronously waits for them, so this change should
not introduce new hangs.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/smp.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5eadfffabe35..4794d6b4f4d2 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -600,9 +600,6 @@ static void nmi_stop_this_cpu(struct pt_regs *regs)
 	nmi_ipi_busy_count--;
 	nmi_ipi_unlock();
 
-	/* Remove this CPU */
-	set_cpu_online(smp_processor_id(), false);
-
 	spin_begin();
 	while (1)
 		spin_cpu_relax();
@@ -617,9 +614,6 @@ void smp_send_stop(void)
 
 static void stop_this_cpu(void *dummy)
 {
-	/* Remove this CPU */
-	set_cpu_online(smp_processor_id(), false);
-
 	hard_irq_disable();
 	spin_begin();
 	while (1)

From 855b6232dda2b6941ecd22979893e8a1d25642db Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 19 May 2018 14:35:54 +1000
Subject: [PATCH 070/215] powerpc/64: hard disable irqs on the panic()ing CPU

Similar to previous patches, hard disable interrupts when a CPU is
in panic. This reduces the chance the watchdog has to interfere with
the panic, and avoids any other type of masked interrupt being
executed when crashing which minimises the length of the crash path.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup-common.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 62b1a40d8957..40b44bb53a4e 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -700,12 +700,19 @@ EXPORT_SYMBOL(check_legacy_ioport);
 static int ppc_panic_event(struct notifier_block *this,
                              unsigned long event, void *ptr)
 {
+	/*
+	 * panic does a local_irq_disable, but we really
+	 * want interrupts to be hard disabled.
+	 */
+	hard_irq_disable();
+
 	/*
 	 * If firmware-assisted dump has been registered then trigger
 	 * firmware-assisted dump and let firmware handle everything else.
 	 */
 	crash_fadump(NULL, ptr);
-	ppc_md.panic(ptr);  /* May not return */
+	if (ppc_md.panic)
+		ppc_md.panic(ptr);  /* May not return */
 	return NOTIFY_DONE;
 }
 
@@ -716,7 +723,8 @@ static struct notifier_block ppc_panic_block = {
 
 void __init setup_panic(void)
 {
-	if (!ppc_md.panic)
+	/* PPC64 always does a hard irq disable in its panic handler */
+	if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
 		return;
 	atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
 }

From 1fe83888a2b776c204cb06629700adfb8e9cc123 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Fri, 8 Jun 2018 10:40:38 +0200
Subject: [PATCH 071/215] xen: share start flags between PV and PVH
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use a global variable to store the start flags for both PV and PVH.
This allows the xen_initial_domain macro to work properly on PVH.

Note that ARM is also switched to use the new variable.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/arm/xen/enlighten.c     | 7 ++++---
 arch/x86/xen/enlighten.c     | 7 +++++++
 arch/x86/xen/enlighten_pv.c  | 1 +
 arch/x86/xen/enlighten_pvh.c | 1 +
 include/xen/xen.h            | 6 +++++-
 5 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 8073625371f5..07060e5b5864 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -59,6 +59,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
 
 static __read_mostly unsigned int xen_events_irq;
 
+uint32_t xen_start_flags;
+EXPORT_SYMBOL(xen_start_flags);
+
 int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
 			       unsigned long addr,
 			       xen_pfn_t *gfn, int nr,
@@ -293,9 +296,7 @@ void __init xen_early_init(void)
 	xen_setup_features();
 
 	if (xen_feature(XENFEAT_dom0))
-		xen_start_info->flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
-	else
-		xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED);
+		xen_start_flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
 
 	if (!console_set_on_cmdline && !xen_initial_domain())
 		add_preferred_console("hvc", 0, NULL);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c9081c6671f0..3b5318505c69 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -64,6 +64,13 @@ struct shared_info xen_dummy_shared_info;
 __read_mostly int xen_have_vector_callback;
 EXPORT_SYMBOL_GPL(xen_have_vector_callback);
 
+/*
+ * NB: needs to live in .data because it's used by xen_prepare_pvh which runs
+ * before clearing the bss.
+ */
+uint32_t xen_start_flags __attribute__((section(".data"))) = 0;
+EXPORT_SYMBOL(xen_start_flags);
+
 /*
  * Point at some empty memory to start with. We map the real shared_info
  * page as soon as fixmap is up and running.
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 357969a3697c..8d4e2e1ae60b 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1203,6 +1203,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
 		return;
 
 	xen_domain_type = XEN_PV_DOMAIN;
+	xen_start_flags = xen_start_info->flags;
 
 	xen_setup_features();
 
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index aa1c6a6831a9..c85d1a88f476 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -97,6 +97,7 @@ void __init xen_prepare_pvh(void)
 	}
 
 	xen_pvh = 1;
+	xen_start_flags = pvh_start_info.flags;
 
 	msr = cpuid_ebx(xen_cpuid_base() + 2);
 	pfn = __pa(hypercall_page);
diff --git a/include/xen/xen.h b/include/xen/xen.h
index 9d4340c907d1..1e1d9bd0bd37 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -25,12 +25,16 @@ extern bool xen_pvh;
 #define xen_hvm_domain()	(xen_domain_type == XEN_HVM_DOMAIN)
 #define xen_pvh_domain()	(xen_pvh)
 
+#include <linux/types.h>
+
+extern uint32_t xen_start_flags;
+
 #ifdef CONFIG_XEN_DOM0
 #include <xen/interface/xen.h>
 #include <asm/xen/hypervisor.h>
 
 #define xen_initial_domain()	(xen_domain() && \
-				 xen_start_info && xen_start_info->flags & SIF_INITDOMAIN)
+				 (xen_start_flags & SIF_INITDOMAIN))
 #else  /* !CONFIG_XEN_DOM0 */
 #define xen_initial_domain()	(0)
 #endif	/* CONFIG_XEN_DOM0 */

From 84c029a73327cef571eaa61c7d6e67e8031b52ec Mon Sep 17 00:00:00 2001
From: Zhouyang Jia <jiazhouyang09@gmail.com>
Date: Fri, 15 Jun 2018 07:34:52 +0800
Subject: [PATCH 072/215] xen: add error handling for xenbus_printf

When xenbus_printf fails, the lack of error-handling code may
cause unexpected results.

This patch adds error-handling code after calling xenbus_printf.

Signed-off-by: Zhouyang Jia <jiazhouyang09@gmail.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/manage.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 8835065029d3..c93d8ef8df34 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -289,8 +289,15 @@ static void sysrq_handler(struct xenbus_watch *watch, const char *path,
 		return;
 	}
 
-	if (sysrq_key != '\0')
-		xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
+	if (sysrq_key != '\0') {
+		err = xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
+		if (err) {
+			pr_err("%s: Error %d writing sysrq in control/sysrq\n",
+			       __func__, err);
+			xenbus_transaction_end(xbt, 1);
+			return;
+		}
+	}
 
 	err = xenbus_transaction_end(xbt, 0);
 	if (err == -EAGAIN)
@@ -342,7 +349,12 @@ static int setup_shutdown_watcher(void)
 			continue;
 		snprintf(node, FEATURE_PATH_SIZE, "feature-%s",
 			 shutdown_handlers[idx].command);
-		xenbus_printf(XBT_NIL, "control", node, "%u", 1);
+		err = xenbus_printf(XBT_NIL, "control", node, "%u", 1);
+		if (err) {
+			pr_err("%s: Error %d writing %s\n", __func__,
+				err, node);
+			return err;
+		}
 	}
 
 	return 0;

From 6e3cc2a6e259f49f5817d1561109832eff90f8e4 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Fri, 1 Jun 2018 14:41:24 +0300
Subject: [PATCH 073/215] xen/grant-table: Export gnttab_{alloc|free}_pages as
 GPL

Only gnttab_{alloc|free}_pages are exported as EXPORT_SYMBOL
while all the rest are exported as EXPORT_SYMBOL_GPL, thus
effectively making it not possible for non-GPL driver modules
to use grant table module. Export gnttab_{alloc|free}_pages as
EXPORT_SYMBOL_GPL so all the exports are aligned.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/grant-table.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 27be107d6480..ba36ff3e4903 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -799,7 +799,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages)
 
 	return 0;
 }
-EXPORT_SYMBOL(gnttab_alloc_pages);
+EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
 
 /**
  * gnttab_free_pages - free pages allocated by gnttab_alloc_pages()
@@ -820,7 +820,7 @@ void gnttab_free_pages(int nr_pages, struct page **pages)
 	}
 	free_xenballooned_pages(nr_pages, pages);
 }
-EXPORT_SYMBOL(gnttab_free_pages);
+EXPORT_SYMBOL_GPL(gnttab_free_pages);
 
 /* Handling of paged out grant targets (GNTST_eagain) */
 #define MAX_DELAY 256

From 7ba33e1c9d1e03f442b161c701d1f811ea13c75e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun, 10 Jun 2018 20:43:09 +0100
Subject: [PATCH 074/215] drm/i915: Apply batch location restrictions before
 pinning

We special case the position of the batch within the GTT to prevent
negative self-relocation deltas from underflowing. However, that
restriction is being applied after a trial pin of the batch in its
current position. Thus we are not rejecting an invalid location if the
batch has been used before, leading to an assertion if we happen to need
to rearrange the entire payload. In the worst case, this may cause a GPU
hang on gen7 or perhaps missing state.

References: https://bugs.freedesktop.org/show_bug.cgi?id=105720
Fixes: 2889caa92321 ("drm/i915: Eliminate lots of iterations over the execobjects array")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Martin Peres <martin.peres@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180610194325.13467-2-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit 746c8f143afad7aaa66c484485fc39888d437a3f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 49 ++++++++++++----------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f627a8c47c58..22df17c8ca9b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -489,7 +489,9 @@ eb_validate_vma(struct i915_execbuffer *eb,
 }
 
 static int
-eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
+eb_add_vma(struct i915_execbuffer *eb,
+	   unsigned int i, unsigned batch_idx,
+	   struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 	int err;
@@ -522,6 +524,24 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 	eb->flags[i] = entry->flags;
 	vma->exec_flags = &eb->flags[i];
 
+	/*
+	 * SNA is doing fancy tricks with compressing batch buffers, which leads
+	 * to negative relocation deltas. Usually that works out ok since the
+	 * relocate address is still positive, except when the batch is placed
+	 * very low in the GTT. Ensure this doesn't happen.
+	 *
+	 * Note that actual hangs have only been observed on gen7, but for
+	 * paranoia do it everywhere.
+	 */
+	if (i == batch_idx) {
+		if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
+			eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
+		if (eb->reloc_cache.has_fence)
+			eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
+
+		eb->batch = vma;
+	}
+
 	err = 0;
 	if (eb_pin_vma(eb, entry, vma)) {
 		if (entry->offset != vma->node.start) {
@@ -716,7 +736,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
 	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
 	struct drm_i915_gem_object *obj;
-	unsigned int i;
+	unsigned int i, batch;
 	int err;
 
 	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
@@ -728,6 +748,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	INIT_LIST_HEAD(&eb->relocs);
 	INIT_LIST_HEAD(&eb->unbound);
 
+	batch = eb_batch_index(eb);
+
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
 		struct i915_lut_handle *lut;
@@ -770,33 +792,16 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		lut->handle = handle;
 
 add_vma:
-		err = eb_add_vma(eb, i, vma);
+		err = eb_add_vma(eb, i, batch, vma);
 		if (unlikely(err))
 			goto err_vma;
 
 		GEM_BUG_ON(vma != eb->vma[i]);
 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
 	}
 
-	/* take note of the batch buffer before we might reorder the lists */
-	i = eb_batch_index(eb);
-	eb->batch = eb->vma[i];
-	GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
-
-	/*
-	 * SNA is doing fancy tricks with compressing batch buffers, which leads
-	 * to negative relocation deltas. Usually that works out ok since the
-	 * relocate address is still positive, except when the batch is placed
-	 * very low in the GTT. Ensure this doesn't happen.
-	 *
-	 * Note that actual hangs have only been observed on gen7, but for
-	 * paranoia do it everywhere.
-	 */
-	if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
-		eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
-	if (eb->reloc_cache.has_fence)
-		eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
-
 	eb->args->flags |= __EXEC_VALIDATED;
 	return eb_reserve(eb);
 

From a5bfcdf0e16b33c1690ded31f863466136480ddc Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 11 Jun 2018 16:33:32 +0100
Subject: [PATCH 075/215] drm/i915/execlists: Avoid putting the error pointer

On allocation error, do not jump to the unwind handler that tries to
free the error pointer.

Reported-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: a89d1f921c15 ("drm/i915: Split i915_gem_timeline into individual timelines")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180611153332.14824-1-chris@chris-wilson.co.uk
(cherry picked from commit 467d35789e5a4f47428b65ef711b30fdabbb0fd4)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 15434cad5430..f3968580e5e2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2641,10 +2641,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 	context_size += LRC_HEADER_PAGES * PAGE_SIZE;
 
 	ctx_obj = i915_gem_object_create(ctx->i915, context_size);
-	if (IS_ERR(ctx_obj)) {
-		ret = PTR_ERR(ctx_obj);
-		goto error_deref_obj;
-	}
+	if (IS_ERR(ctx_obj))
+		return PTR_ERR(ctx_obj);
 
 	vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
 	if (IS_ERR(vma)) {

From 541ab84d2b6ea79021d5df0b54d81600334fa2a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 24 May 2018 15:54:03 +0300
Subject: [PATCH 076/215] drm/i915: Allow DBLSCAN user modes with eDP/LVDS/DSI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When encountering a connector with the scaling mode property both
intel and modesetting ddxs sometimes add tons of DBLSCAN modes
to the output's mode list. The idea presumably being that since the
output will be going through the panel fitter anyway we can pretend
to use any kind of mode.

Sadly that means we can't reject user modes with the DBLSCAN flag
until we know whether we're going to be using the panel's native
mode or the user mode directly. Doing otherwise means X clients using
xf86vidmode/xrandr will get a protocol error (and often self
terminate as a result) when the kernel refuses to use the requested
mode with the DBLSCAN flag.

To undo the regression we'll move the DBLSCAN checks into the
connector->mode_valid() and encoder->compute_config() hooks.

Cc: stable@vger.kernel.org
Cc: Vito Caputo <vcaputo@pengaru.com>
Reported-by: Vito Caputo <vcaputo@pengaru.com>
Fixes: e995ca0b8139 ("drm/i915: Provide a device level .mode_valid() hook")
References: https://lkml.org/lkml/2018/5/21/715
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180524125403.23445-1-ville.syrjala@linux.intel.com
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106804
Tested-by: Arkadiusz Miskiewicz <arekm@maven.pl>
(cherry picked from commit e4dd27aadd205417a2e9ea9902b698a0252ec3a0)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_crt.c     | 20 ++++++++++++++++++++
 drivers/gpu/drm/i915/intel_display.c | 16 +++++++++++++---
 drivers/gpu/drm/i915/intel_dp.c      |  6 ++++++
 drivers/gpu/drm/i915/intel_dp_mst.c  |  6 ++++++
 drivers/gpu/drm/i915/intel_dsi.c     |  6 ++++++
 drivers/gpu/drm/i915/intel_dvo.c     |  6 ++++++
 drivers/gpu/drm/i915/intel_hdmi.c    |  6 ++++++
 drivers/gpu/drm/i915/intel_lvds.c    |  5 +++++
 drivers/gpu/drm/i915/intel_sdvo.c    |  6 ++++++
 drivers/gpu/drm/i915/intel_tv.c      | 12 ++++++++++--
 10 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index de0e22322c76..072b326d5ee0 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -304,6 +304,9 @@ intel_crt_mode_valid(struct drm_connector *connector,
 	int max_dotclk = dev_priv->max_dotclk_freq;
 	int max_clock;
 
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
 	if (mode->clock < 25000)
 		return MODE_CLOCK_LOW;
 
@@ -337,6 +340,12 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder,
 				     struct intel_crtc_state *pipe_config,
 				     struct drm_connector_state *conn_state)
 {
+	struct drm_display_mode *adjusted_mode =
+		&pipe_config->base.adjusted_mode;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
+
 	return true;
 }
 
@@ -344,6 +353,12 @@ static bool pch_crt_compute_config(struct intel_encoder *encoder,
 				   struct intel_crtc_state *pipe_config,
 				   struct drm_connector_state *conn_state)
 {
+	struct drm_display_mode *adjusted_mode =
+		&pipe_config->base.adjusted_mode;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
+
 	pipe_config->has_pch_encoder = true;
 
 	return true;
@@ -354,6 +369,11 @@ static bool hsw_crt_compute_config(struct intel_encoder *encoder,
 				   struct drm_connector_state *conn_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+	struct drm_display_mode *adjusted_mode =
+		&pipe_config->base.adjusted_mode;
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
 
 	pipe_config->has_pch_encoder = true;
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index dee3a8e659f1..2cc6faa1daa8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14469,12 +14469,22 @@ static enum drm_mode_status
 intel_mode_valid(struct drm_device *dev,
 		 const struct drm_display_mode *mode)
 {
+	/*
+	 * Can't reject DBLSCAN here because Xorg ddxen can add piles
+	 * of DBLSCAN modes to the output's mode list when they detect
+	 * the scaling mode property on the connector. And they don't
+	 * ask the kernel to validate those modes in any way until
+	 * modeset time at which point the client gets a protocol error.
+	 * So in order to not upset those clients we silently ignore the
+	 * DBLSCAN flag on such connectors. For other connectors we will
+	 * reject modes with the DBLSCAN flag in encoder->compute_config().
+	 * And we always reject DBLSCAN modes in connector->mode_valid()
+	 * as we never want such modes on the connector's mode list.
+	 */
+
 	if (mode->vscan > 1)
 		return MODE_NO_VSCAN;
 
-	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
-		return MODE_NO_DBLESCAN;
-
 	if (mode->flags & DRM_MODE_FLAG_HSKEW)
 		return MODE_H_ILLEGAL;
 
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 8320f0e8e3be..6cce43a6eaad 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -420,6 +420,9 @@ intel_dp_mode_valid(struct drm_connector *connector,
 	int max_rate, mode_rate, max_lanes, max_link_clock;
 	int max_dotclk;
 
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
 	max_dotclk = intel_dp_downstream_max_dotclock(intel_dp);
 
 	if (intel_dp_is_edp(intel_dp) && fixed_mode) {
@@ -1862,6 +1865,9 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 						conn_state->scaling_mode);
 	}
 
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
+
 	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
 	    adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
 		return false;
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 9e6956c08688..5890500a3a8b 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -48,6 +48,9 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc,
 					   DP_DPCD_QUIRK_LIMITED_M_N);
 
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
+
 	pipe_config->has_pch_encoder = false;
 	bpp = 24;
 	if (intel_dp->compliance.test_data.bpc) {
@@ -366,6 +369,9 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
 	if (!intel_dp)
 		return MODE_ERROR;
 
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
 	max_link_clock = intel_dp_max_link_rate(intel_dp);
 	max_lanes = intel_dp_max_lane_count(intel_dp);
 
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index cf39ca90d887..f349b3920199 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -326,6 +326,9 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder,
 						conn_state->scaling_mode);
 	}
 
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
+
 	/* DSI uses short packets for sync events, so clear mode flags for DSI */
 	adjusted_mode->flags = 0;
 
@@ -1266,6 +1269,9 @@ intel_dsi_mode_valid(struct drm_connector *connector,
 
 	DRM_DEBUG_KMS("\n");
 
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
 	if (fixed_mode) {
 		if (mode->hdisplay > fixed_mode->hdisplay)
 			return MODE_PANEL;
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index a70d767313aa..61d908e0df0e 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -219,6 +219,9 @@ intel_dvo_mode_valid(struct drm_connector *connector,
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 	int target_clock = mode->clock;
 
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
 	/* XXX: Validate clock range */
 
 	if (fixed_mode) {
@@ -254,6 +257,9 @@ static bool intel_dvo_compute_config(struct intel_encoder *encoder,
 	if (fixed_mode)
 		intel_fixed_panel_mode(fixed_mode, adjusted_mode);
 
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index ee929f31f7db..d8cb53ef4351 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1557,6 +1557,9 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
 	bool force_dvi =
 		READ_ONCE(to_intel_digital_connector_state(connector->state)->force_audio) == HDMI_AUDIO_OFF_DVI;
 
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
 	clock = mode->clock;
 
 	if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING)
@@ -1677,6 +1680,9 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 	int desired_bpp;
 	bool force_dvi = intel_conn_state->force_audio == HDMI_AUDIO_OFF_DVI;
 
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
+
 	pipe_config->has_hdmi_sink = !force_dvi && intel_hdmi->has_hdmi_sink;
 
 	if (pipe_config->has_hdmi_sink)
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index d278f24ba6ae..48f618dc9abb 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -380,6 +380,8 @@ intel_lvds_mode_valid(struct drm_connector *connector,
 	struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
 	int max_pixclk = to_i915(connector->dev)->max_dotclk_freq;
 
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
 	if (mode->hdisplay > fixed_mode->hdisplay)
 		return MODE_PANEL;
 	if (mode->vdisplay > fixed_mode->vdisplay)
@@ -429,6 +431,9 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder,
 	intel_fixed_panel_mode(intel_connector->panel.fixed_mode,
 			       adjusted_mode);
 
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
+
 	if (HAS_PCH_SPLIT(dev_priv)) {
 		pipe_config->has_pch_encoder = true;
 
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 25005023c243..26975df4e593 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1160,6 +1160,9 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder,
 							   adjusted_mode);
 	}
 
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
+
 	/*
 	 * Make the CRTC code factor in the SDVO pixel multiplier.  The
 	 * SDVO device will factor out the multiplier during mode_set.
@@ -1621,6 +1624,9 @@ intel_sdvo_mode_valid(struct drm_connector *connector,
 	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
 	if (intel_sdvo->pixel_clock_min > mode->clock)
 		return MODE_CLOCK_LOW;
 
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 885fc3809f7f..b55b5c157e38 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -850,6 +850,9 @@ intel_tv_mode_valid(struct drm_connector *connector,
 	const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
 	int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return MODE_NO_DBLESCAN;
+
 	if (mode->clock > max_dotclk)
 		return MODE_CLOCK_HIGH;
 
@@ -877,16 +880,21 @@ intel_tv_compute_config(struct intel_encoder *encoder,
 			struct drm_connector_state *conn_state)
 {
 	const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
+	struct drm_display_mode *adjusted_mode =
+		&pipe_config->base.adjusted_mode;
 
 	if (!tv_mode)
 		return false;
 
-	pipe_config->base.adjusted_mode.crtc_clock = tv_mode->clock;
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		return false;
+
+	adjusted_mode->crtc_clock = tv_mode->clock;
 	DRM_DEBUG_KMS("forcing bpc to 8 for TV\n");
 	pipe_config->pipe_bpp = 8*3;
 
 	/* TV has it's own notion of sync and other mode flags, so clear them. */
-	pipe_config->base.adjusted_mode.flags = 0;
+	adjusted_mode->flags = 0;
 
 	/*
 	 * FIXME: We don't check whether the input mode is actually what we want

From 4dc055c9cc8b3dac966b54d3cd5cf463a988299b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 11 Jun 2018 23:02:55 +0300
Subject: [PATCH 077/215] drm/i915: Fix PIPESTAT irq ack on i965/g4x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On i965/g4x IIR is edge triggered. So in order for IIR to notice that
there is still a pending interrupt we have to force and edge in ISR.
For the ISR/IIR pipe event bits we can do that by temporarily
clearing all the PIPESTAT enable bits when we ack the status bits.
This will force the ISR pipe event bit low, and it can then go back
high when we restore the PIPESTAT enable bits.

This avoids the following race:
1. stat = read(PIPESTAT)
2. an enabled PIPESTAT status bit goes high
3. write(PIPESTAT, enable|stat);
4. write(IIR, PIPE_EVENT)

The end result is IIR==0 and ISR!=0. This can lead to nasty
vblank wait/flip_done timeouts if another interrupt source
doesn't trick us into looking at the PIPESTAT status bits despite
the IIR PIPE_EVENT bit being low.

Before i965 IIR was level triggered so this problem can't actually
happen there. And curiously VLV/CHV went back to the level triggered
scheme as well. But for simplicity we'll use the same i965/g4x
compatible code for all platforms.

Cc: stable@vger.kernel.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106033
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105225
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106030
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180611200258.27121-1-ville.syrjala@linux.intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit 132c27c97cb958f637dc05adc35a61b47779bcd8)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_irq.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f9bc3aaa90d0..4a02747ac658 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1893,9 +1893,17 @@ static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv,
 
 		/*
 		 * Clear the PIPE*STAT regs before the IIR
+		 *
+		 * Toggle the enable bits to make sure we get an
+		 * edge in the ISR pipe event bit if we don't clear
+		 * all the enabled status bits. Otherwise the edge
+		 * triggered IIR on i965/g4x wouldn't notice that
+		 * an interrupt is still pending.
 		 */
-		if (pipe_stats[pipe])
-			I915_WRITE(reg, enable_mask | pipe_stats[pipe]);
+		if (pipe_stats[pipe]) {
+			I915_WRITE(reg, pipe_stats[pipe]);
+			I915_WRITE(reg, enable_mask);
+		}
 	}
 	spin_unlock(&dev_priv->irq_lock);
 }

From 1e34f1d36804be1a446212a33ca5397bf0e5acdd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 13 Jun 2018 19:05:52 +0300
Subject: [PATCH 078/215] drm/i915: Disallow interlaced modes on g4x DP outputs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Looks like interlaced DP output doesn't work on g4x either. Not all
that surprising considering we already established that interlaced
DP output is busted on VLV/CHV.

Cc: stable@vger.kernel.org
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180613160553.11664-1-ville.syrjala@linux.intel.com
(cherry picked from commit 929168c5f3df5d9ea0ef426c33e971157d045eab)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 6cce43a6eaad..2bdfe4b2d6dc 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1868,7 +1868,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return false;
 
-	if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+	if (HAS_GMCH_DISPLAY(dev_priv) &&
 	    adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
 		return false;
 
@@ -6343,7 +6343,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 	drm_connector_init(dev, connector, &intel_dp_connector_funcs, type);
 	drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs);
 
-	if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
+	if (!HAS_GMCH_DISPLAY(dev_priv))
 		connector->interlace_allowed = true;
 	connector->doublescan_allowed = 0;
 

From 4dccc4d517481282e84335c7acbfd7a1481004b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 13 Jun 2018 19:05:53 +0300
Subject: [PATCH 079/215] drm/i915: Turn off g4x DP port in .post_disable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While Bspec doesn't list a specific sequence for turning off the DP port
on g4x we are getting an underrun if the port is disabled in the
.disable() hook. Looks like the pipe stops when the port stops, and by
that time the plane disable may not have completed yet. Also the plane(s)
seem to end up in some wonky state when this happens as they also signal
another underrun immediately after we turn them back on during the next
enable sequence.

We could add a vblank wait in .disable() to avoid wedging the planes,
but I assume we're still tripping up the pipe in some way. So it seems
better to me to just follow the ILK+ sequence and turn off the DP port
in .post_disable() instead. This sequence doesn't seem to suffer from
this problem. Could be it was always the intended sequence for DP and
the gen4 bspec was just never updated to include it.

Originally we used the bad sequence even on ilk+, but I changed that
in commit 08aff3fe26ae ("drm/i915: Move DP port disable to post_disable
for pch platforms") as it was causing issues on those platforms as well.
I left out g4x then only because I didn't have the hardware to test it.
Now that I do it's fairly clear that the ilk+ sequence is also the
right choice for g4x.

v2: Fix whitespace fail (Jani)
    Mention the ilk+ commit (Jani)

Cc: stable@vger.kernel.org
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180613160553.11664-2-ville.syrjala@linux.intel.com
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
(cherry picked from commit 51a9f6dfc00d35f927ecfaf6f0ae8ebaba39b3fe)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 2bdfe4b2d6dc..16faea30114a 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -2788,16 +2788,6 @@ static void intel_disable_dp(struct intel_encoder *encoder,
 static void g4x_disable_dp(struct intel_encoder *encoder,
 			   const struct intel_crtc_state *old_crtc_state,
 			   const struct drm_connector_state *old_conn_state)
-{
-	intel_disable_dp(encoder, old_crtc_state, old_conn_state);
-
-	/* disable the port before the pipe on g4x */
-	intel_dp_link_down(encoder, old_crtc_state);
-}
-
-static void ilk_disable_dp(struct intel_encoder *encoder,
-			   const struct intel_crtc_state *old_crtc_state,
-			   const struct drm_connector_state *old_conn_state)
 {
 	intel_disable_dp(encoder, old_crtc_state, old_conn_state);
 }
@@ -2813,13 +2803,19 @@ static void vlv_disable_dp(struct intel_encoder *encoder,
 	intel_disable_dp(encoder, old_crtc_state, old_conn_state);
 }
 
-static void ilk_post_disable_dp(struct intel_encoder *encoder,
+static void g4x_post_disable_dp(struct intel_encoder *encoder,
 				const struct intel_crtc_state *old_crtc_state,
 				const struct drm_connector_state *old_conn_state)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
 	enum port port = encoder->port;
 
+	/*
+	 * Bspec does not list a specific disable sequence for g4x DP.
+	 * Follow the ilk+ sequence (disable pipe before the port) for
+	 * g4x DP as it does not suffer from underruns like the normal
+	 * g4x modeset sequence (disable pipe after the port).
+	 */
 	intel_dp_link_down(encoder, old_crtc_state);
 
 	/* Only ilk+ has port A */
@@ -6442,15 +6438,11 @@ bool intel_dp_init(struct drm_i915_private *dev_priv,
 		intel_encoder->enable = vlv_enable_dp;
 		intel_encoder->disable = vlv_disable_dp;
 		intel_encoder->post_disable = vlv_post_disable_dp;
-	} else if (INTEL_GEN(dev_priv) >= 5) {
-		intel_encoder->pre_enable = g4x_pre_enable_dp;
-		intel_encoder->enable = g4x_enable_dp;
-		intel_encoder->disable = ilk_disable_dp;
-		intel_encoder->post_disable = ilk_post_disable_dp;
 	} else {
 		intel_encoder->pre_enable = g4x_pre_enable_dp;
 		intel_encoder->enable = g4x_enable_dp;
 		intel_encoder->disable = g4x_disable_dp;
+		intel_encoder->post_disable = g4x_post_disable_dp;
 	}
 
 	intel_dig_port->dp.output_reg = output_reg;

From bc64e05408cafe3668e7460834935ea3f1764f31 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Fri, 15 Jun 2018 13:44:29 +0300
Subject: [PATCH 080/215] drm/i915: Fix context ban and hang accounting for
 client

If client is smart or lucky enough to create a new context
after each hang, our context banning mechanism will never
catch up, and as a result of that it will be saved from
client banning. This can result in a never ending streak of
gpu hangs caused by bad or malicious client, preventing
access from other legit gpu clients.

Fix this by always incrementing per client ban score if
it hangs in short successions regardless of context ban
scoring. The exception are non bannable contexts. They remain
detached from client ban scoring mechanism.

v2: xchg timestamp, tidyup (Chris)
v3: comment, bannable & banned together (Chris)

Fixes: b083a0870c79 ("drm/i915: Add per client max context ban limit")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180615104429.31477-1-mika.kuoppala@linux.intel.com
(cherry picked from commit 14921f3cef85b0167a9145e5f29b9dfc3b2a84dc)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         | 21 ++++++---
 drivers/gpu/drm/i915/i915_gem.c         | 57 +++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
 3 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 34c125e2d90c..7014a96546f4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -340,14 +340,21 @@ struct drm_i915_file_private {
 
 	unsigned int bsd_engine;
 
-/* Client can have a maximum of 3 contexts banned before
- * it is denied of creating new contexts. As one context
- * ban needs 4 consecutive hangs, and more if there is
- * progress in between, this is a last resort stop gap measure
- * to limit the badly behaving clients access to gpu.
+/*
+ * Every context ban increments per client ban score. Also
+ * hangs in short succession increments ban score. If ban threshold
+ * is reached, client is considered banned and submitting more work
+ * will fail. This is a stop gap measure to limit the badly behaving
+ * clients access to gpu. Note that unbannable contexts never increment
+ * the client ban score.
  */
-#define I915_MAX_CLIENT_CONTEXT_BANS 3
-	atomic_t context_bans;
+#define I915_CLIENT_SCORE_HANG_FAST	1
+#define   I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ)
+#define I915_CLIENT_SCORE_CONTEXT_BAN   3
+#define I915_CLIENT_SCORE_BANNED	9
+	/** ban_score: Accumulated score of all ctx bans and fast hangs. */
+	atomic_t ban_score;
+	unsigned long hang_timestamp;
 };
 
 /* Interface history:
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3704f4c0c2c9..d44ad7bc1e94 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2933,32 +2933,54 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
+static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
+					const struct i915_gem_context *ctx)
+{
+	unsigned int score;
+	unsigned long prev_hang;
+
+	if (i915_gem_context_is_banned(ctx))
+		score = I915_CLIENT_SCORE_CONTEXT_BAN;
+	else
+		score = 0;
+
+	prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
+	if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
+		score += I915_CLIENT_SCORE_HANG_FAST;
+
+	if (score) {
+		atomic_add(score, &file_priv->ban_score);
+
+		DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
+				 ctx->name, score,
+				 atomic_read(&file_priv->ban_score));
+	}
+}
+
 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
 {
-	bool banned;
+	unsigned int score;
+	bool banned, bannable;
 
 	atomic_inc(&ctx->guilty_count);
 
-	banned = false;
-	if (i915_gem_context_is_bannable(ctx)) {
-		unsigned int score;
+	bannable = i915_gem_context_is_bannable(ctx);
+	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
+	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
 
-		score = atomic_add_return(CONTEXT_SCORE_GUILTY,
-					  &ctx->ban_score);
-		banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
+	DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s\n",
+			 ctx->name, atomic_read(&ctx->guilty_count),
+			 score, yesno(banned && bannable));
 
-		DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
-				 ctx->name, score, yesno(banned));
-	}
-	if (!banned)
+	/* Cool contexts don't accumulate client ban score */
+	if (!bannable)
 		return;
 
-	i915_gem_context_set_banned(ctx);
-	if (!IS_ERR_OR_NULL(ctx->file_priv)) {
-		atomic_inc(&ctx->file_priv->context_bans);
-		DRM_DEBUG_DRIVER("client %s has had %d context banned\n",
-				 ctx->name, atomic_read(&ctx->file_priv->context_bans));
-	}
+	if (banned)
+		i915_gem_context_set_banned(ctx);
+
+	if (!IS_ERR_OR_NULL(ctx->file_priv))
+		i915_gem_client_mark_guilty(ctx->file_priv, ctx);
 }
 
 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
@@ -5736,6 +5758,7 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
 	INIT_LIST_HEAD(&file_priv->mm.request_list);
 
 	file_priv->bsd_engine = -1;
+	file_priv->hang_timestamp = jiffies;
 
 	ret = i915_gem_context_open(i915, file);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 33f8a4b3c981..060335d3d9e0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -652,7 +652,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
-	return atomic_read(&file_priv->context_bans) > I915_MAX_CLIENT_CONTEXT_BANS;
+	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
 }
 
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,

From 7a3727f385dc64773db1c144f6b15c1e9d4735bb Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 15 Jun 2018 20:06:05 +0100
Subject: [PATCH 081/215] drm/i915: Enable provoking vertex fix on Gen9
 systems.

The SF and clipper units mishandle the provoking vertex in some cases,
which can cause misrendering with shaders that use flat shaded inputs.

There are chicken bits in 3D_CHICKEN3 (for SF) and FF_SLICE_CHICKEN
(for the clipper) that work around the issue.  These registers are
unfortunately not part of the logical context (even the power context),
and so we must reload them every time we start executing in a context.

Bugzilla: https://bugs.freedesktop.org/103047
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180615190605.16238-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: stable@vger.kernel.org
(cherry picked from commit b77422f80337d363eed60c8c48db9cb6e33085c9)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h  |  5 +++++
 drivers/gpu/drm/i915/intel_lrc.c | 12 +++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f11bb213ec07..7720569f2024 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2425,12 +2425,17 @@ enum i915_power_well_id {
 #define _3D_CHICKEN	_MMIO(0x2084)
 #define  _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB	(1 << 10)
 #define _3D_CHICKEN2	_MMIO(0x208c)
+
+#define FF_SLICE_CHICKEN	_MMIO(0x2088)
+#define  FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX	(1 << 1)
+
 /* Disables pipelining of read flushes past the SF-WIZ interface.
  * Required on all Ironlake steppings according to the B-Spec, but the
  * particular danger of not doing so is not specified.
  */
 # define _3D_CHICKEN2_WM_READ_PIPELINED			(1 << 14)
 #define _3D_CHICKEN3	_MMIO(0x2090)
+#define  _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX		(1 << 12)
 #define  _3D_CHICKEN_SF_DISABLE_OBJEND_CULL		(1 << 10)
 #define  _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE	(1 << 5)
 #define  _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL		(1 << 5)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f3968580e5e2..7c4c8fb1dae4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1545,11 +1545,21 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
+	*batch++ = MI_LOAD_REGISTER_IMM(3);
+
 	/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
-	*batch++ = MI_LOAD_REGISTER_IMM(1);
 	*batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2);
 	*batch++ = _MASKED_BIT_DISABLE(
 			GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE);
+
+	/* BSpec: 11391 */
+	*batch++ = i915_mmio_reg_offset(FF_SLICE_CHICKEN);
+	*batch++ = _MASKED_BIT_ENABLE(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX);
+
+	/* BSpec: 11299 */
+	*batch++ = i915_mmio_reg_offset(_3D_CHICKEN3);
+	*batch++ = _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX);
+
 	*batch++ = MI_NOOP;
 
 	/* WaClearSlmSpaceAtContextSwitch:kbl */

From 2dbf8dffbf35fd8f611083b9d9fe74fdccf912a3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 15 Jun 2018 15:58:45 -0400
Subject: [PATCH 082/215] pNFS: Always free the session slot on error in
 nfs4_layoutget_handle_exception

Right now, we can call nfs_commit_inode() while holding the session slot,
which could lead to NFSv4 deadlocks. Ensure we only keep the slot if
the server returned a layout that we have to process.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4proc.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ed45090e4df6..2c8c2696415e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8650,6 +8650,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 
 	dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
 
+	nfs4_sequence_free_slot(&lgp->res.seq_res);
+
 	switch (nfs4err) {
 	case 0:
 		goto out;
@@ -8714,7 +8716,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 		goto out;
 	}
 
-	nfs4_sequence_free_slot(&lgp->res.seq_res);
 	err = nfs4_handle_exception(server, nfs4err, exception);
 	if (!status) {
 		if (exception->retry)
@@ -8786,20 +8787,22 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
 	if (IS_ERR(task))
 		return ERR_CAST(task);
 	status = rpc_wait_for_completion_task(task);
-	if (status == 0) {
+	if (status != 0)
+		goto out;
+
+	/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
+	if (task->tk_status < 0 || lgp->res.layoutp->len == 0) {
 		status = nfs4_layoutget_handle_exception(task, lgp, &exception);
 		*timeout = exception.timeout;
-	}
-
+	} else
+		lseg = pnfs_layout_process(lgp);
+out:
 	trace_nfs4_layoutget(lgp->args.ctx,
 			&lgp->args.range,
 			&lgp->res.range,
 			&lgp->res.stateid,
 			status);
 
-	/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
-	if (status == 0 && lgp->res.layoutp->len)
-		lseg = pnfs_layout_process(lgp);
 	rpc_put_task(task);
 	dprintk("<-- %s status=%d\n", __func__, status);
 	if (status)

From c8bf70735382401a161d9c818e8ea89500812d0c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 15 Jun 2018 16:31:02 -0400
Subject: [PATCH 083/215] pNFS: Don't send layoutreturn if the layout is
 already invalid

If the layout was invalidated due to a reboot, then don't try to send
a layoutreturn for it.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4proc.c | 16 ++++++++++++++++
 fs/nfs/pnfs.h     |  5 +++++
 2 files changed, 21 insertions(+)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2c8c2696415e..6dd146885da9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3294,6 +3294,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 	struct nfs4_closedata *calldata = data;
 	struct nfs4_state *state = calldata->state;
 	struct inode *inode = calldata->inode;
+	struct pnfs_layout_hdr *lo;
 	bool is_rdonly, is_wronly, is_rdwr;
 	int call_close = 0;
 
@@ -3337,6 +3338,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		goto out_wait;
 	}
 
+	lo = calldata->arg.lr_args ? calldata->arg.lr_args->layout : NULL;
+	if (lo && !pnfs_layout_is_valid(lo)) {
+		calldata->arg.lr_args = NULL;
+		calldata->res.lr_res = NULL;
+	}
+
 	if (calldata->arg.fmode == 0)
 		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
 
@@ -5972,12 +5979,19 @@ static void nfs4_delegreturn_release(void *calldata)
 static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_delegreturndata *d_data;
+	struct pnfs_layout_hdr *lo;
 
 	d_data = (struct nfs4_delegreturndata *)data;
 
 	if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task))
 		return;
 
+	lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL;
+	if (lo && !pnfs_layout_is_valid(lo)) {
+		d_data->args.lr_args = NULL;
+		d_data->res.lr_res = NULL;
+	}
+
 	nfs4_setup_sequence(d_data->res.server->nfs_client,
 			&d_data->args.seq_args,
 			&d_data->res.seq_res,
@@ -8820,6 +8834,8 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
 			&lrp->args.seq_args,
 			&lrp->res.seq_res,
 			task);
+	if (!pnfs_layout_is_valid(lrp->args.layout))
+		rpc_exit(task, 0);
 }
 
 static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index a8f5e6b16749..3fe81424337d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -801,6 +801,11 @@ static inline void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
 {
 }
 
+static inline bool pnfs_layout_is_valid(const struct pnfs_layout_hdr *lo)
+{
+	return false;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #if IS_ENABLED(CONFIG_NFS_V4_2)

From 0dae72d581dfe795aedaf5523c1faeb18958b1a7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 18 Jun 2018 15:55:43 -0400
Subject: [PATCH 084/215] sunrpc: Prevent duplicate XID allocation

Krzysztof Kozlowski <krzk@kernel.org> reports that a heavy NFSv4
WRITE workload against a slow NFS server causes his Raspberry Pi
clients to stall. Krzysztof bisected it to commit 37ac86c3a76c
("SUNRPC: Initialize rpc_rqst outside of xprt->reserve_lock") .

I was able to reproduce similar behavior and it appears that rarely
the RPC client layer is re-allocating an XID for an RPC that it has
already partially sent. This results in the client ignoring the
subsequent reply, which carries the original XID.

For various reasons, checking !req->rq_xmit_bytes_sent in
xprt_prepare_transmit is not a 100% reliable mechanism for
determining when a fresh XID is needed.

Trond's preference is to allocate the XID at the time each rpc_rqst
slot is initialized.

This patch should also address a gcc 4.1.2 complaint reported by
Geert Uytterhoeven <geert@linux-m68k.org>.

Reported-by: Krzysztof Kozlowski <krzk@kernel.org>
Fixes: 37ac86c3a76c ("SUNRPC: Initialize rpc_rqst outside of ... ")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprt.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3c85af058227..3fabf9f6a0f9 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -987,8 +987,6 @@ bool xprt_prepare_transmit(struct rpc_task *task)
 		task->tk_status = -EAGAIN;
 		goto out_unlock;
 	}
-	if (!bc_prealloc(req) && !req->rq_xmit_bytes_sent)
-		req->rq_xid = xprt_alloc_xid(xprt);
 	ret = true;
 out_unlock:
 	spin_unlock_bh(&xprt->transport_lock);
@@ -1298,7 +1296,12 @@ void xprt_retry_reserve(struct rpc_task *task)
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
 {
-	return (__force __be32)xprt->xid++;
+	__be32 xid;
+
+	spin_lock(&xprt->reserve_lock);
+	xid = (__force __be32)xprt->xid++;
+	spin_unlock(&xprt->reserve_lock);
+	return xid;
 }
 
 static inline void xprt_init_xid(struct rpc_xprt *xprt)
@@ -1316,6 +1319,7 @@ void xprt_request_init(struct rpc_task *task)
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
 	req->rq_buffer  = NULL;
+	req->rq_xid	= xprt_alloc_xid(xprt);
 	req->rq_connect_cookie = xprt->connect_cookie - 1;
 	req->rq_bytes_sent = 0;
 	req->rq_snd_buf.len = 0;

From 93efbd39870474cc536b9caf4a6efeb03b0bc56f Mon Sep 17 00:00:00 2001
From: Zhouyang Jia <jiazhouyang09@gmail.com>
Date: Sat, 16 Jun 2018 01:05:01 +0800
Subject: [PATCH 085/215] scsi: xen-scsifront: add error handling for
 xenbus_printf

When xenbus_printf fails, the lack of error-handling code may
cause unexpected results.

This patch adds error-handling code after calling xenbus_printf.

Signed-off-by: Zhouyang Jia <jiazhouyang09@gmail.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/scsi/xen-scsifront.c | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 36f59a1be7e9..61389bdc7926 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -654,10 +654,17 @@ static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
 static int scsifront_sdev_configure(struct scsi_device *sdev)
 {
 	struct vscsifrnt_info *info = shost_priv(sdev->host);
+	int err;
 
-	if (info && current == info->curr)
-		xenbus_printf(XBT_NIL, info->dev->nodename,
+	if (info && current == info->curr) {
+		err = xenbus_printf(XBT_NIL, info->dev->nodename,
 			      info->dev_state_path, "%d", XenbusStateConnected);
+		if (err) {
+			xenbus_dev_error(info->dev, err,
+				"%s: writing dev_state_path", __func__);
+			return err;
+		}
+	}
 
 	return 0;
 }
@@ -665,10 +672,15 @@ static int scsifront_sdev_configure(struct scsi_device *sdev)
 static void scsifront_sdev_destroy(struct scsi_device *sdev)
 {
 	struct vscsifrnt_info *info = shost_priv(sdev->host);
+	int err;
 
-	if (info && current == info->curr)
-		xenbus_printf(XBT_NIL, info->dev->nodename,
+	if (info && current == info->curr) {
+		err = xenbus_printf(XBT_NIL, info->dev->nodename,
 			      info->dev_state_path, "%d", XenbusStateClosed);
+		if (err)
+			xenbus_dev_error(info->dev, err,
+				"%s: writing dev_state_path", __func__);
+	}
 }
 
 static struct scsi_host_template scsifront_sht = {
@@ -1003,9 +1015,12 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
 
 			if (scsi_add_device(info->host, chn, tgt, lun)) {
 				dev_err(&dev->dev, "scsi_add_device\n");
-				xenbus_printf(XBT_NIL, dev->nodename,
+				err = xenbus_printf(XBT_NIL, dev->nodename,
 					      info->dev_state_path,
 					      "%d", XenbusStateClosed);
+				if (err)
+					xenbus_dev_error(dev, err,
+						"%s: writing dev_state_path", __func__);
 			}
 			break;
 		case VSCSIFRONT_OP_DEL_LUN:
@@ -1019,10 +1034,14 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
 			}
 			break;
 		case VSCSIFRONT_OP_READD_LUN:
-			if (device_state == XenbusStateConnected)
-				xenbus_printf(XBT_NIL, dev->nodename,
+			if (device_state == XenbusStateConnected) {
+				err = xenbus_printf(XBT_NIL, dev->nodename,
 					      info->dev_state_path,
 					      "%d", XenbusStateConnected);
+				if (err)
+					xenbus_dev_error(dev, err,
+						"%s: writing dev_state_path", __func__);
+			}
 			break;
 		default:
 			break;

From 7c63ca24c878e0051c91904b72174029320ef4bd Mon Sep 17 00:00:00 2001
From: Zhouyang Jia <jiazhouyang09@gmail.com>
Date: Sat, 16 Jun 2018 08:14:37 +0800
Subject: [PATCH 086/215] xen/scsiback: add error handling for xenbus_printf

When xenbus_printf fails, the lack of error-handling code may
cause unexpected results.

This patch adds error-handling code after calling xenbus_printf.

Signed-off-by: Zhouyang Jia <jiazhouyang09@gmail.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xen-scsiback.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 7bc88fd43cfc..e2f3e8b0fba9 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -1012,6 +1012,7 @@ static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
 {
 	struct v2p_entry *entry;
 	unsigned long flags;
+	int err;
 
 	if (try) {
 		spin_lock_irqsave(&info->v2p_lock, flags);
@@ -1027,8 +1028,11 @@ static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
 			scsiback_del_translation_entry(info, vir);
 		}
 	} else if (!try) {
-		xenbus_printf(XBT_NIL, info->dev->nodename, state,
+		err = xenbus_printf(XBT_NIL, info->dev->nodename, state,
 			      "%d", XenbusStateClosed);
+		if (err)
+			xenbus_dev_error(info->dev, err,
+				"%s: writing %s", __func__, state);
 	}
 }
 
@@ -1067,8 +1071,11 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op,
 	snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", ent);
 	val = xenbus_read(XBT_NIL, dev->nodename, str, NULL);
 	if (IS_ERR(val)) {
-		xenbus_printf(XBT_NIL, dev->nodename, state,
+		err = xenbus_printf(XBT_NIL, dev->nodename, state,
 			      "%d", XenbusStateClosed);
+		if (err)
+			xenbus_dev_error(info->dev, err,
+				"%s: writing %s", __func__, state);
 		return;
 	}
 	strlcpy(phy, val, VSCSI_NAMELEN);
@@ -1079,8 +1086,11 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op,
 	err = xenbus_scanf(XBT_NIL, dev->nodename, str, "%u:%u:%u:%u",
 			   &vir.hst, &vir.chn, &vir.tgt, &vir.lun);
 	if (XENBUS_EXIST_ERR(err)) {
-		xenbus_printf(XBT_NIL, dev->nodename, state,
+		err = xenbus_printf(XBT_NIL, dev->nodename, state,
 			      "%d", XenbusStateClosed);
+		if (err)
+			xenbus_dev_error(info->dev, err,
+				"%s: writing %s", __func__, state);
 		return;
 	}
 

From e08ecba17b72aeb01859601bc242a5bc48620109 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 19 Jun 2018 21:51:55 +1000
Subject: [PATCH 087/215] powerpc/64s: Fix build failures with CONFIG_NMI_IPI=n

I broke the build when CONFIG_NMI_IPI=n with my recent commit to add
arch_trigger_cpumask_backtrace(), eg:

  stacktrace.c:(.text+0x1b0): undefined reference to `.smp_send_safe_nmi_ipi'

We should rework the CONFIG symbols here in future to avoid these
double barrelled ifdefs but for now they fix the build.

Fixes: 5cc05910f26e ("powerpc/64s: Wire up arch_trigger_cpumask_backtrace()")
Reported-by: Christophe LEROY <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nmi.h   | 2 +-
 arch/powerpc/kernel/stacktrace.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index 0f571e0ebca1..bd9ba8defd72 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -8,7 +8,7 @@ extern void arch_touch_nmi_watchdog(void);
 static inline void arch_touch_nmi_watchdog(void) {}
 #endif
 
-#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_STACKTRACE)
+#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE)
 extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
 					   bool exclude_self);
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 07e97f289c52..e2c50b55138f 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -196,7 +196,7 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
 #endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
 
-#ifdef CONFIG_PPC_BOOK3S_64
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
 static void handle_backtrace_ipi(struct pt_regs *regs)
 {
 	nmi_cpu_backtrace(regs);
@@ -242,4 +242,4 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
 {
 	nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
 }
-#endif /* CONFIG_PPC64 */
+#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */

From dd65a941f6ba473a5cb9d013d57fa43b48450a04 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Tue, 12 Jun 2018 13:08:40 +0200
Subject: [PATCH 088/215] arm64: dma-mapping: clear buffers allocated with
 FORCE_CONTIGUOUS flag

dma_alloc_*() buffers might be exposed to userspace via mmap() call, so
they should be cleared on allocation. In case of IOMMU-based dma-mapping
implementation such buffer clearing was missing in the code path for
DMA_ATTR_FORCE_CONTIGUOUS flag handling, because dma_alloc_from_contiguous()
doesn't honor __GFP_ZERO flag. This patch fixes this issue. For more
information on clearing buffers allocated by dma_alloc_* functions,
see commit 6829e274a623 ("arm64: dma-mapping: always clear allocated
buffers").

Fixes: 44176bb38fa4 ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/dma-mapping.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 49e217ac7e1e..61e93f0b5482 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -583,13 +583,14 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
 						    size >> PAGE_SHIFT);
 			return NULL;
 		}
-		if (!coherent)
-			__dma_flush_area(page_to_virt(page), iosize);
-
 		addr = dma_common_contiguous_remap(page, size, VM_USERMAP,
 						   prot,
 						   __builtin_return_address(0));
-		if (!addr) {
+		if (addr) {
+			memset(addr, 0, size);
+			if (!coherent)
+				__dma_flush_area(page_to_virt(page), iosize);
+		} else {
 			iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs);
 			dma_release_from_contiguous(dev, page,
 						    size >> PAGE_SHIFT);

From b154886f7892499d0d3054026e19dfb9a731df61 Mon Sep 17 00:00:00 2001
From: Zhizhou Zhang <zhizhouzhang@asrmicro.com>
Date: Tue, 12 Jun 2018 17:07:37 +0800
Subject: [PATCH 089/215] arm64: make secondary_start_kernel() notrace

We can't call function trace hook before setup percpu offset.
When entering secondary_start_kernel(), percpu offset has not
been initialized.  So this lead hotplug malfunction.
Here is the flow to reproduce this bug:

echo 0 > /sys/devices/system/cpu/cpu1/online
echo function > /sys/kernel/debug/tracing/current_tracer
echo 1 > /sys/kernel/debug/tracing/tracing_on
echo 1 > /sys/devices/system/cpu/cpu1/online

Acked-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Zhizhou Zhang <zhizhouzhang@asrmicro.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f3e2e3aec0b0..2faa9863d2e5 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -179,7 +179,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
  * This is the secondary CPU boot entry.  We're using this CPUs
  * idle thread stack, but a set of temporary page tables.
  */
-asmlinkage void secondary_start_kernel(void)
+asmlinkage notrace void secondary_start_kernel(void)
 {
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	struct mm_struct *mm = &init_mm;

From 42f86b44a4d356edba626171dfe0be061fc695af Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 18 Jun 2018 19:07:24 -0400
Subject: [PATCH 090/215] pNFS/flexfiles: Don't tie up all the rpciod threads
 in resends

We do not want to have rpciod threads perform recursive calls into the
RPC layer since that can deadlock. In particular, having to wait for
a layoutget can be nasty... We want rather to defer scheduling those
retries until we're in the rpc_release() callback, since that is
called from the nfsiod workqueue.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 11 ++++++++---
 include/linux/nfs_xdr.h                |  2 ++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 3ae038d9c292..336b4d560e2c 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1243,17 +1243,18 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
 					   hdr->ds_clp, hdr->lseg,
 					   hdr->pgio_mirror_idx);
 
+	clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
+	clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
 	switch (err) {
 	case -NFS4ERR_RESET_TO_PNFS:
 		if (ff_layout_choose_best_ds_for_read(hdr->lseg,
 					hdr->pgio_mirror_idx + 1,
 					&hdr->pgio_mirror_idx))
 			goto out_eagain;
-		ff_layout_read_record_layoutstats_done(task, hdr);
-		pnfs_read_resend_pnfs(hdr);
+		set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
 		return task->tk_status;
 	case -NFS4ERR_RESET_TO_MDS:
-		ff_layout_reset_read(hdr);
+		set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
 		return task->tk_status;
 	case -EAGAIN:
 		goto out_eagain;
@@ -1403,6 +1404,10 @@ static void ff_layout_read_release(void *data)
 	struct nfs_pgio_header *hdr = data;
 
 	ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
+	if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+		pnfs_read_resend_pnfs(hdr);
+	else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+		ff_layout_reset_read(hdr);
 	pnfs_generic_rw_release(data);
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9dee3c23895d..712eed156d09 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1438,6 +1438,8 @@ enum {
 	NFS_IOHDR_EOF,
 	NFS_IOHDR_REDO,
 	NFS_IOHDR_STAT,
+	NFS_IOHDR_RESEND_PNFS,
+	NFS_IOHDR_RESEND_MDS,
 };
 
 struct nfs_io_completion;

From 7b0df92ac12148098391bf53f3494af17812f264 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 18 Jun 2018 19:23:50 -0400
Subject: [PATCH 091/215] pNFS/flexfiles: Process writeback resends from nfsiod
 context as well

Although the writeback resends are more robust than the reads, since they
are not immediately rescheduled by the same thread, we are better off
processing them in the same place as the reads.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 336b4d560e2c..1386b774ec95 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1428,12 +1428,14 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
 					   hdr->ds_clp, hdr->lseg,
 					   hdr->pgio_mirror_idx);
 
+	clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
+	clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
 	switch (err) {
 	case -NFS4ERR_RESET_TO_PNFS:
-		ff_layout_reset_write(hdr, true);
+		set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
 		return task->tk_status;
 	case -NFS4ERR_RESET_TO_MDS:
-		ff_layout_reset_write(hdr, false);
+		set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
 		return task->tk_status;
 	case -EAGAIN:
 		return -EAGAIN;
@@ -1580,6 +1582,10 @@ static void ff_layout_write_release(void *data)
 	struct nfs_pgio_header *hdr = data;
 
 	ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
+	if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+		ff_layout_reset_write(hdr, true);
+	else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+		ff_layout_reset_write(hdr, false);
 	pnfs_generic_rw_release(data);
 }
 

From 0cc61e64e21cfc24fa0d938fd148aba4a595163b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Jun 2018 18:40:14 +0200
Subject: [PATCH 092/215] block: fix timeout changes for legacy request drivers

blk_mq_complete_request can only be called for blk-mq drivers, but when
removing the BLK_EH_HANDLED return value, two legacy request timeout
methods incorrectly got switched to call blk_mq_complete_request.
Call __blk_complete_request instead to reinstance the previous behavior.
For that __blk_complete_request needs to be exported.

Fixes: 1fc2b62e ("scsi_transport_fc: complete requests from ->timeout")
Fixes: 0df0bb08 ("null_blk: complete requests from ->timeout")
Reported-by: Jianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-softirq.c              | 1 +
 drivers/block/null_blk.c         | 2 +-
 drivers/scsi/scsi_transport_fc.c | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 01e2b353a2b9..15c1f5e12eb8 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -144,6 +144,7 @@ void __blk_complete_request(struct request *req)
 
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL(__blk_complete_request);
 
 /**
  * blk_complete_request - end I/O on a request
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 2bdadd7f1454..3d8bdbe9bd35 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -1365,7 +1365,7 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
 static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq)
 {
 	pr_info("null: rq %p timed out\n", rq);
-	blk_mq_complete_request(rq);
+	__blk_complete_request(rq);
 	return BLK_EH_DONE;
 }
 
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 1da3d71e9f61..13948102ca29 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3592,7 +3592,7 @@ fc_bsg_job_timeout(struct request *req)
 
 	/* the blk_end_sync_io() doesn't check the error */
 	if (inflight)
-		blk_mq_complete_request(req);
+		__blk_complete_request(req);
 	return BLK_EH_DONE;
 }
 

From 91c822c33066b7c4f8cc47d7532f47e3bb89979b Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajan.vaja@gmail.com>
Date: Mon, 18 Jun 2018 13:01:02 +0530
Subject: [PATCH 093/215] drm/amd/pp: Fix uninitialized variable

Initialize variable to 0 before performing logical OR operation.

Reviewed-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Rajan Vaja <rajan.vaja@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
index dbe4b1f66784..22364875a943 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
@@ -1090,7 +1090,7 @@ static int vega10_disable_se_edc_config(struct pp_hwmgr *hwmgr)
 static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
 {
 	struct amdgpu_device *adev = hwmgr->adev;
-	int result;
+	int result = 0;
 	uint32_t num_se = 0;
 	uint32_t count, data;
 

From 6fa39bc1e01dab8b4f54b23e95a181a2ed5a2d38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Fri, 8 Jun 2018 12:58:15 +0200
Subject: [PATCH 094/215] drm/amdgpu: Use kvmalloc_array for allocating VRAM
 manager nodes array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It can be quite big, and there's no need for it to be physically
contiguous. This is less likely to fail under memory pressure (has
actually happened while running piglit).

Cc: stable@vger.kernel.org
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 9aca653bec07..9c47e860e5e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -135,7 +135,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 		num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
 	}
 
-	nodes = kcalloc(num_nodes, sizeof(*nodes), GFP_KERNEL);
+	nodes = kvmalloc_array(num_nodes, sizeof(*nodes),
+			       GFP_KERNEL | __GFP_ZERO);
 	if (!nodes)
 		return -ENOMEM;
 
@@ -190,7 +191,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 		drm_mm_remove_node(&nodes[i]);
 	spin_unlock(&mgr->lock);
 
-	kfree(nodes);
+	kvfree(nodes);
 	return r == -ENOSPC ? 0 : r;
 }
 
@@ -229,7 +230,7 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
 	atomic64_sub(usage, &mgr->usage);
 	atomic64_sub(vis_usage, &mgr->vis_usage);
 
-	kfree(mem->mm_node);
+	kvfree(mem->mm_node);
 	mem->mm_node = NULL;
 }
 

From d9fda248046ac035f18a6e663f2f9245b4bf9470 Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Tue, 8 May 2018 11:33:42 -0400
Subject: [PATCH 095/215] drm/amdgpu: Don't default to DC support for Kaveri
 and older

We've had a number of users report failures to detect and light up
display with DC with LVDS and VGA. These connector types are not
currently supported with DC. I'd like to add support but unfortunately
don't have a system with LVDS or VGA available.

In order not to cause regressions we should probably fallback to the
non-DC driver for ASICs that support VGA and LVDS.

These ASICs are:
 * Bonaire
 * Kabini
 * Kaveri
 * Mullins

ASIC support can always be force enabled with amdgpu.dc=1

v2: Keep Hawaii on DC
v3: Added Mullins to the list

Cc: stable@vger.kernel.org
Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3317d1536f4f..6e5284e6c028 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2158,10 +2158,18 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 	switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
 	case CHIP_BONAIRE:
-	case CHIP_HAWAII:
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
 	case CHIP_MULLINS:
+		/*
+		 * We have systems in the wild with these ASICs that require
+		 * LVDS and VGA support which is not supported with DC.
+		 *
+		 * Fallback to the non-DC driver here by default so as not to
+		 * cause regressions.
+		 */
+		return amdgpu_dc > 0;
+	case CHIP_HAWAII:
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 	case CHIP_POLARIS10:

From 9c24c10a2c1e1bb478b6bb70612d9e885aee044f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Tue, 19 Jun 2018 10:26:40 -0700
Subject: [PATCH 096/215] Revert "block: Add warning for bi_next not NULL in
 bio_endio()"

Commit 0ba99ca4838b ("block: Add warning for bi_next not NULL in
bio_endio()") breaks the dm driver. end_clone_bio() detects whether
or not a bio is the last bio associated with a request by checking
the .bi_next field. Commit 0ba99ca4838b clears that field before
end_clone_bio() has had a chance to inspect that field. Hence revert
commit 0ba99ca4838b.

This patch avoids that KASAN reports the following complaint when
running the srp-test software (srp-test/run_tests -c -d -r 10 -t 02-mq):

==================================================================
BUG: KASAN: use-after-free in bio_advance+0x11b/0x1d0
Read of size 4 at addr ffff8801300e06d0 by task ksoftirqd/0/9

CPU: 0 PID: 9 Comm: ksoftirqd/0 Not tainted 4.18.0-rc1-dbg+ #1
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
Call Trace:
 dump_stack+0xa4/0xf5
 print_address_description+0x6f/0x270
 kasan_report+0x241/0x360
 __asan_load4+0x78/0x80
 bio_advance+0x11b/0x1d0
 blk_update_request+0xa7/0x5b0
 scsi_end_request+0x56/0x320 [scsi_mod]
 scsi_io_completion+0x7d6/0xb20 [scsi_mod]
 scsi_finish_command+0x1c0/0x280 [scsi_mod]
 scsi_softirq_done+0x19a/0x230 [scsi_mod]
 blk_mq_complete_request+0x160/0x240
 scsi_mq_done+0x50/0x1a0 [scsi_mod]
 srp_recv_done+0x515/0x1330 [ib_srp]
 __ib_process_cq+0xa0/0xf0 [ib_core]
 ib_poll_handler+0x38/0xa0 [ib_core]
 irq_poll_softirq+0xe8/0x1f0
 __do_softirq+0x128/0x60d
 run_ksoftirqd+0x3f/0x60
 smpboot_thread_fn+0x352/0x460
 kthread+0x1c1/0x1e0
 ret_from_fork+0x24/0x30

Allocated by task 1918:
 save_stack+0x43/0xd0
 kasan_kmalloc+0xad/0xe0
 kasan_slab_alloc+0x11/0x20
 kmem_cache_alloc+0xfe/0x350
 mempool_alloc_slab+0x15/0x20
 mempool_alloc+0xfb/0x270
 bio_alloc_bioset+0x244/0x350
 submit_bh_wbc+0x9c/0x2f0
 __block_write_full_page+0x299/0x5a0
 block_write_full_page+0x16b/0x180
 blkdev_writepage+0x18/0x20
 __writepage+0x42/0x80
 write_cache_pages+0x376/0x8a0
 generic_writepages+0xbe/0x110
 blkdev_writepages+0xe/0x10
 do_writepages+0x9b/0x180
 __filemap_fdatawrite_range+0x178/0x1c0
 file_write_and_wait_range+0x59/0xc0
 blkdev_fsync+0x46/0x80
 vfs_fsync_range+0x66/0x100
 do_fsync+0x3d/0x70
 __x64_sys_fsync+0x21/0x30
 do_syscall_64+0x77/0x230
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 9:
 save_stack+0x43/0xd0
 __kasan_slab_free+0x137/0x190
 kasan_slab_free+0xe/0x10
 kmem_cache_free+0xd3/0x380
 mempool_free_slab+0x17/0x20
 mempool_free+0x63/0x160
 bio_free+0x81/0xa0
 bio_put+0x59/0x60
 end_bio_bh_io_sync+0x5d/0x70
 bio_endio+0x1a7/0x360
 blk_update_request+0xd0/0x5b0
 end_clone_bio+0xa3/0xd0 [dm_mod]
 bio_endio+0x1a7/0x360
 blk_update_request+0xd0/0x5b0
 scsi_end_request+0x56/0x320 [scsi_mod]
 scsi_io_completion+0x7d6/0xb20 [scsi_mod]
 scsi_finish_command+0x1c0/0x280 [scsi_mod]
 scsi_softirq_done+0x19a/0x230 [scsi_mod]
 blk_mq_complete_request+0x160/0x240
 scsi_mq_done+0x50/0x1a0 [scsi_mod]
 srp_recv_done+0x515/0x1330 [ib_srp]
 __ib_process_cq+0xa0/0xf0 [ib_core]
 ib_poll_handler+0x38/0xa0 [ib_core]
 irq_poll_softirq+0xe8/0x1f0
 __do_softirq+0x128/0x60d

The buggy address belongs to the object at ffff8801300e0640
 which belongs to the cache bio-0 of size 200
The buggy address is located 144 bytes inside of
 200-byte region [ffff8801300e0640, ffff8801300e0708)
The buggy address belongs to the page:
page:ffffea0004c03800 count:1 mapcount:0 mapping:ffff88015a563a00 index:0x0 compound_mapcount: 0
flags: 0x8000000000008100(slab|head)
raw: 8000000000008100 dead000000000100 dead000000000200 ffff88015a563a00
raw: 0000000000000000 0000000000330033 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8801300e0580: fb fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc
 ffff8801300e0600: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
>ffff8801300e0680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                                 ^
 ffff8801300e0700: fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff8801300e0780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
==================================================================

Cc: Kent Overstreet <kent.overstreet@gmail.com>
Fixes: 0ba99ca4838b ("block: Add warning for bi_next not NULL in bio_endio()")
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c      | 3 ---
 block/blk-core.c | 8 +-------
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index db9a40e9a136..f7e3d88bd0b6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1807,9 +1807,6 @@ void bio_endio(struct bio *bio)
 	if (!bio_integrity_endio(bio))
 		return;
 
-	if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL"))
-		bio->bi_next = NULL;
-
 	/*
 	 * Need to have a real endio function for chained bios, otherwise
 	 * various corner cases will break (like stacking block devices that
diff --git a/block/blk-core.c b/block/blk-core.c
index cf0ee764b908..afd2596ea3d3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -273,10 +273,6 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 	bio_advance(bio, nbytes);
 
 	/* don't actually finish bio if it's part of flush sequence */
-	/*
-	 * XXX this code looks suspicious - it's not consistent with advancing
-	 * req->bio in caller
-	 */
 	if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
 		bio_endio(bio);
 }
@@ -3081,10 +3077,8 @@ bool blk_update_request(struct request *req, blk_status_t error,
 		struct bio *bio = req->bio;
 		unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
 
-		if (bio_bytes == bio->bi_iter.bi_size) {
+		if (bio_bytes == bio->bi_iter.bi_size)
 			req->bio = bio->bi_next;
-			bio->bi_next = NULL;
-		}
 
 		/* Completion has already been traced */
 		bio_clear_flag(bio, BIO_TRACE_COMPLETION);

From 5c53d19b76dccbaf340b11acb837d40c0789049d Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Mon, 18 Jun 2018 13:46:16 -0400
Subject: [PATCH 097/215] drm/amdgpu:All UVD instances share one idle_work
 handle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All UVD instanses have only one dpm control, so it is better
to share one idle_work handle.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Tested-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 14 +++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index bcf68f80bbf0..3ff08e326838 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -130,7 +130,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	unsigned version_major, version_minor, family_id;
 	int i, j, r;
 
-	INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler);
+	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
 
 	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
@@ -314,12 +314,12 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 	void *ptr;
 	int i, j;
 
+	cancel_delayed_work_sync(&adev->uvd.idle_work);
+
 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
 		if (adev->uvd.inst[j].vcpu_bo == NULL)
 			continue;
 
-		cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work);
-
 		/* only valid for physical mode */
 		if (adev->asic_type < CHIP_POLARIS10) {
 			for (i = 0; i < adev->uvd.max_handles; ++i)
@@ -1145,7 +1145,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 {
 	struct amdgpu_device *adev =
-		container_of(work, struct amdgpu_device, uvd.inst->idle_work.work);
+		container_of(work, struct amdgpu_device, uvd.idle_work.work);
 	unsigned fences = 0, i, j;
 
 	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
@@ -1167,7 +1167,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 							       AMD_CG_STATE_GATE);
 		}
 	} else {
-		schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
+		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
 	}
 }
 
@@ -1179,7 +1179,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 	if (amdgpu_sriov_vf(adev))
 		return;
 
-	set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work);
+	set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
 	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, true);
@@ -1196,7 +1196,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
 {
 	if (!amdgpu_sriov_vf(ring->adev))
-		schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
+		schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index b1579fba134c..8b23a1b00c76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -44,7 +44,6 @@ struct amdgpu_uvd_inst {
 	void			*saved_bo;
 	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
 	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
-	struct delayed_work	idle_work;
 	struct amdgpu_ring	ring;
 	struct amdgpu_ring	ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
 	struct amdgpu_irq_src	irq;
@@ -62,6 +61,7 @@ struct amdgpu_uvd {
 	bool			address_64_bit;
 	bool			use_ctx_buf;
 	struct amdgpu_uvd_inst		inst[AMDGPU_MAX_UVD_INSTANCES];
+	struct delayed_work	idle_work;
 };
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);

From 34d6d59986abb1d2cb5415a49b6c50f51ba1d2e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Fri, 15 Jun 2018 11:06:56 +0200
Subject: [PATCH 098/215] drm/amdgpu: Update pin_size values before unpinning
 BO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At least in theory, ttm_bo_validate may move the BO, in which case the
pin_size accounting would be inconsistent with when the BO was pinned.

Cc: stable@vger.kernel.org
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5e4e1bd90383..026140f08ee9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -790,15 +790,6 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 	bo->pin_count--;
 	if (bo->pin_count)
 		return 0;
-	for (i = 0; i < bo->placement.num_placement; i++) {
-		bo->placements[i].lpfn = 0;
-		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
-	}
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-	if (unlikely(r)) {
-		dev_err(adev->dev, "%p validate failed for unpin\n", bo);
-		goto error;
-	}
 
 	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
 		adev->vram_pin_size -= amdgpu_bo_size(bo);
@@ -808,7 +799,14 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 		adev->gart_pin_size -= amdgpu_bo_size(bo);
 	}
 
-error:
+	for (i = 0; i < bo->placement.num_placement; i++) {
+		bo->placements[i].lpfn = 0;
+		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
+	}
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (unlikely(r))
+		dev_err(adev->dev, "%p validate failed for unpin\n", bo);
+
 	return r;
 }
 

From 5e9244ff585239630f15f8ad8e676bc91a94ca9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Tue, 12 Jun 2018 12:07:33 +0200
Subject: [PATCH 099/215] drm/amdgpu: Refactor
 amdgpu_vram_mgr_bo_invisible_size helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Preparation for the following fix, no functional change intended.

Cc: stable@vger.kernel.org
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   |  6 ++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h      |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 16 ++++++++++++++++
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 026140f08ee9..3526efa8960e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -762,8 +762,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
 	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
 		adev->vram_pin_size += amdgpu_bo_size(bo);
-		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
-			adev->invisible_pin_size += amdgpu_bo_size(bo);
+		adev->invisible_pin_size += amdgpu_vram_mgr_bo_invisible_size(bo);
 	} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
 		adev->gart_pin_size += amdgpu_bo_size(bo);
 	}
@@ -793,8 +792,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 
 	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
 		adev->vram_pin_size -= amdgpu_bo_size(bo);
-		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
-			adev->invisible_pin_size -= amdgpu_bo_size(bo);
+		adev->invisible_pin_size -= amdgpu_vram_mgr_bo_invisible_size(bo);
 	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
 		adev->gart_pin_size -= amdgpu_bo_size(bo);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index e969c879d87e..e5da4654b630 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -73,6 +73,7 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_mem_reg *mem);
 uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
 int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man);
 
+u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo);
 uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 9c47e860e5e6..ae0049c6c52c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -96,6 +96,22 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
 		adev->gmc.visible_vram_size : end) - start;
 }
 
+/**
+ * amdgpu_vram_mgr_bo_invisible_size - CPU invisible BO size
+ *
+ * @bo: &amdgpu_bo buffer object (must be in VRAM)
+ *
+ * Returns:
+ * How much of the given &amdgpu_bo buffer object lies in CPU invisible VRAM.
+ */
+u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo)
+{
+	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
+		return amdgpu_bo_size(bo);
+
+	return 0;
+}
+
 /**
  * amdgpu_vram_mgr_new - allocate new ranges
  *

From 7303b39e46b2f523334591f05fd9566cf929eb26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Thu, 14 Jun 2018 13:02:07 +0200
Subject: [PATCH 100/215] drm/amdgpu: Make amdgpu_vram_mgr_bo_invisible_size
 always accurate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even BOs with AMDGPU_GEM_CREATE_NO_CPU_ACCESS may end up at least
partially in CPU visible VRAM, in particular when all VRAM is visible.

v2:
* Don't take VRAM mgr spinlock, not needed (Christian König)
* Make loop logic simpler and clearer.

Cc: stable@vger.kernel.org
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index ae0049c6c52c..b6333f92ba45 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -106,10 +106,26 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
  */
 u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo)
 {
-	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct ttm_mem_reg *mem = &bo->tbo.mem;
+	struct drm_mm_node *nodes = mem->mm_node;
+	unsigned pages = mem->num_pages;
+	u64 usage = 0;
+
+	if (adev->gmc.visible_vram_size == adev->gmc.real_vram_size)
+		return 0;
+
+	if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
 		return amdgpu_bo_size(bo);
 
-	return 0;
+	while (nodes && pages) {
+		usage += nodes->size << PAGE_SHIFT;
+		usage -= amdgpu_vram_mgr_vis_size(adev, nodes);
+		pages -= nodes->size;
+		++nodes;
+	}
+
+	return usage;
 }
 
 /**

From 6fb8656646f996d1eef42e6d56203c4915cb9e08 Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <mschiffer@universe-factory.net>
Date: Sat, 24 Mar 2018 17:57:49 +0100
Subject: [PATCH 101/215] mips: ftrace: fix static function graph tracing

ftrace_graph_caller was never run after calling ftrace_trace_function,
breaking the function graph tracer. Fix this, bringing it in line with the
x86 implementation.

While we're at it, also streamline the control flow of _mcount a bit to
reduce the number of branches.

This issue was reported before:
https://www.linux-mips.org/archives/linux-mips/2014-11/msg00295.html

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
Tested-by: Matt Redfearn <matt.redfearn@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/18929/
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: stable@vger.kernel.org # v3.17+
---
 arch/mips/kernel/mcount.S | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S
index f2ee7e1e3342..cff52b283e03 100644
--- a/arch/mips/kernel/mcount.S
+++ b/arch/mips/kernel/mcount.S
@@ -119,10 +119,20 @@ NESTED(_mcount, PT_SIZE, ra)
 EXPORT_SYMBOL(_mcount)
 	PTR_LA	t1, ftrace_stub
 	PTR_L	t2, ftrace_trace_function /* Prepare t2 for (1) */
-	bne	t1, t2, static_trace
+	beq	t1, t2, fgraph_trace
 	 nop
 
+	MCOUNT_SAVE_REGS
+
+	move	a0, ra		/* arg1: self return address */
+	jalr	t2		/* (1) call *ftrace_trace_function */
+	 move	a1, AT		/* arg2: parent's return address */
+
+	MCOUNT_RESTORE_REGS
+
+fgraph_trace:
 #ifdef	CONFIG_FUNCTION_GRAPH_TRACER
+	PTR_LA	t1, ftrace_stub
 	PTR_L	t3, ftrace_graph_return
 	bne	t1, t3, ftrace_graph_caller
 	 nop
@@ -131,24 +141,11 @@ EXPORT_SYMBOL(_mcount)
 	bne	t1, t3, ftrace_graph_caller
 	 nop
 #endif
-	b	ftrace_stub
-#ifdef CONFIG_32BIT
-	 addiu sp, sp, 8
-#else
-	 nop
-#endif
 
-static_trace:
-	MCOUNT_SAVE_REGS
-
-	move	a0, ra		/* arg1: self return address */
-	jalr	t2		/* (1) call *ftrace_trace_function */
-	 move	a1, AT		/* arg2: parent's return address */
-
-	MCOUNT_RESTORE_REGS
 #ifdef CONFIG_32BIT
 	addiu sp, sp, 8
 #endif
+
 	.globl ftrace_stub
 ftrace_stub:
 	RETURN_BACK

From 758380b8155f69b4e2f77f27562f8a7a466749d6 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Tue, 12 Jun 2018 19:38:08 +1000
Subject: [PATCH 102/215] powerpc/64s/radix: Fix radix_kvm_prefetch_workaround
 paca access of not possible CPU

If possible CPUs are limited (e.g., by kexec), then the kvm prefetch
workaround function can access the paca pointer for a !possible CPU.

Fixes: d2e60075a3d44 ("powerpc/64: Use array of paca pointers and allocate pacas individually")
Cc: stable@kernel.org
Reported-by: Pridhiviraj Paidipeddi <ppaidipe@linux.vnet.ibm.com>
Tested-by: Pridhiviraj Paidipeddi <ppaidipe@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/tlb-radix.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index a734e486664d..1135b43a597c 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -1097,6 +1097,8 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
 		for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
 			if (sib == cpu)
 				continue;
+			if (!cpu_possible(sib))
+				continue;
 			if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
 				flush = true;
 		}

From fadd03c615922d8521a2e76d4ba2335891cb2790 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 14 Jun 2018 16:01:52 +0530
Subject: [PATCH 103/215] powerpc/mm/hash/4k: Free hugetlb page table caches
 correctly.

With 4k page size for hugetlb we allocate hugepage directories from its on slab
cache. With patch 0c4d26802 ("powerpc/book3s64/mm: Simplify the rcu callback for page table free")
we missed to free these allocated hugepd tables.

Update pgtable_free to handle hugetlb hugepd directory table.

Fixes: 0c4d268029bf ("powerpc/book3s64/mm: Simplify the rcu callback for page table free")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
[mpe: Add CONFIG_HUGETLB_PAGE guard to fix build break]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/32/pgalloc.h  |  1 +
 .../include/asm/book3s/64/pgtable-4k.h        | 21 +++++++++++++++++++
 .../include/asm/book3s/64/pgtable-64k.h       |  9 ++++++++
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  5 +++++
 arch/powerpc/include/asm/nohash/32/pgalloc.h  |  1 +
 arch/powerpc/include/asm/nohash/64/pgalloc.h  |  1 +
 arch/powerpc/mm/hugetlbpage.c                 |  3 ++-
 arch/powerpc/mm/pgtable-book3s64.c            | 12 +++++++++++
 8 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 6a6673907e45..e4633803fe43 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -108,6 +108,7 @@ static inline void pgtable_free(void *table, unsigned index_size)
 }
 
 #define check_pgt_cache()	do { } while (0)
+#define get_hugepd_cache_index(x)  (x)
 
 #ifdef CONFIG_SMP
 static inline void pgtable_free_tlb(struct mmu_gather *tlb,
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
index af5f2baac80f..a069dfcac9a9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -49,6 +49,27 @@ static inline int hugepd_ok(hugepd_t hpd)
 }
 #define is_hugepd(hpd)		(hugepd_ok(hpd))
 
+/*
+ * 16M and 16G huge page directory tables are allocated from slab cache
+ *
+ */
+#define H_16M_CACHE_INDEX (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE - 24)
+#define H_16G_CACHE_INDEX                                                      \
+	(PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + H_PUD_INDEX_SIZE - 34)
+
+static inline int get_hugepd_cache_index(int index)
+{
+	switch (index) {
+	case H_16M_CACHE_INDEX:
+		return HTLB_16M_INDEX;
+	case H_16G_CACHE_INDEX:
+		return HTLB_16G_INDEX;
+	default:
+		BUG();
+	}
+	/* should not reach */
+}
+
 #else /* !CONFIG_HUGETLB_PAGE */
 static inline int pmd_huge(pmd_t pmd) { return 0; }
 static inline int pud_huge(pud_t pud) { return 0; }
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index fb4b3ba52339..d7ee249d6890 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -45,8 +45,17 @@ static inline int hugepd_ok(hugepd_t hpd)
 {
 	return 0;
 }
+
 #define is_hugepd(pdep)			0
 
+/*
+ * This should never get called
+ */
+static inline int get_hugepd_cache_index(int index)
+{
+	BUG();
+}
+
 #else /* !CONFIG_HUGETLB_PAGE */
 static inline int pmd_huge(pmd_t pmd) { return 0; }
 static inline int pud_huge(pud_t pud) { return 0; }
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 63cee159022b..42aafba7a308 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -287,6 +287,11 @@ enum pgtable_index {
 	PMD_INDEX,
 	PUD_INDEX,
 	PGD_INDEX,
+	/*
+	 * Below are used with 4k page size and hugetlb
+	 */
+	HTLB_16M_INDEX,
+	HTLB_16G_INDEX,
 };
 
 extern unsigned long __vmalloc_start;
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 1707781d2f20..9de40eb614da 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -109,6 +109,7 @@ static inline void pgtable_free(void *table, unsigned index_size)
 }
 
 #define check_pgt_cache()	do { } while (0)
+#define get_hugepd_cache_index(x)	(x)
 
 #ifdef CONFIG_SMP
 static inline void pgtable_free_tlb(struct mmu_gather *tlb,
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 0e693f322cb2..e2d62d033708 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -141,6 +141,7 @@ static inline void pgtable_free(void *table, int shift)
 	}
 }
 
+#define get_hugepd_cache_index(x)	(x)
 #ifdef CONFIG_SMP
 static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 {
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7c5f479c5c00..8a9a49c13865 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -337,7 +337,8 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
 	if (shift >= pdshift)
 		hugepd_free(tlb, hugepte);
 	else
-		pgtable_free_tlb(tlb, hugepte, pdshift - shift);
+		pgtable_free_tlb(tlb, hugepte,
+				 get_hugepd_cache_index(pdshift - shift));
 }
 
 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index c1f4ca45c93a..4afbfbb64bfd 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -409,6 +409,18 @@ static inline void pgtable_free(void *table, int index)
 	case PUD_INDEX:
 		kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
 		break;
+#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
+		/* 16M hugepd directory at pud level */
+	case HTLB_16M_INDEX:
+		BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
+		kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
+		break;
+		/* 16G hugepd directory at the pgd level */
+	case HTLB_16G_INDEX:
+		BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
+		kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
+		break;
+#endif
 		/* We don't free pgd table via RCU callback */
 	default:
 		BUG();

From 684fb246578b9e81fc7b4ca5c71eae22edb650b2 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Tue, 19 Jun 2018 10:47:50 -0500
Subject: [PATCH 104/215] objtool: Add machine_real_restart() to the noreturn
 list

machine_real_restart() is annotated as '__noreturn", so add it to the
objtool noreturn list.  This fixes the following warning with clang and
CONFIG_CC_OPTIMIZE_FOR_SIZE=y:

  arch/x86/kernel/reboot.o: warning: objtool: native_machine_emergency_restart() falls through to next function machine_power_off()

Reported-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Link: https://lkml.kernel.org/r/791712792aa4431bdd55bf1beb33a169ddf3b4a2.1529423255.git.jpoimboe@redhat.com
---
 tools/objtool/check.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 38047c6aa575..f4a25bd1871f 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -164,6 +164,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func,
 		"lbug_with_loc",
 		"fortify_panic",
 		"usercopy_abort",
+		"machine_real_restart",
 	};
 
 	if (func->bind == STB_WEAK)

From 18f3e95b90b28318ef35910d21c39908de672331 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Tue, 12 Jun 2018 17:54:42 +0800
Subject: [PATCH 105/215] MIPS: io: Add barrier after register read in inX()

While a barrier is present in the outX() functions before the register
write, a similar barrier is missing in the inX() functions after the
register read. This could allow memory accesses following inX() to
observe stale data.

This patch is very similar to commit a1cc7034e33d12dc1 ("MIPS: io: Add
barrier after register read in readX()"). Because war_io_reorder_wmb()
is both used by writeX() and outX(), if readX() need a barrier then so
does inX().

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
Patchwork: https://patchwork.linux-mips.org/patch/19516/
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <james.hogan@mips.com>
Cc: linux-mips@linux-mips.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhuacai@gmail.com>
---
 arch/mips/include/asm/io.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index a7d0b836f2f7..cea8ad864b3f 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -414,6 +414,8 @@ static inline type pfx##in##bwlq##p(unsigned long port)			\
 	__val = *__addr;						\
 	slow;								\
 									\
+	/* prevent prefetching of coherent DMA data prematurely */	\
+	rmb();								\
 	return pfx##ioswab##bwlq(__addr, __val);			\
 }
 

From 9ea141ad54716d48e79d0093052c12ed67debf09 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 14 Jun 2018 10:13:53 -0700
Subject: [PATCH 106/215] MIPS: Add support for restartable sequences

Implement support for restartable sequences on MIPS, which requires 3
simple things:

  - Call rseq_handle_notify_resume() on return to userspace if
    TIF_NOTIFY_RESUME is set.

  - Call rseq_signal_deliver() to fixup the pre-signal stack frame when
    a signal is delivered whilst executing a restartable sequence
    critical section.

  - Select CONFIG_HAVE_RSEQ.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reviewed-by: James Hogan <jhogan@kernel.org>
Patchwork: https://patchwork.linux-mips.org/patch/19523/
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/Kconfig         | 1 +
 arch/mips/kernel/signal.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 3f9deec70b92..08c10c518f83 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -65,6 +65,7 @@ config MIPS
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RSEQ
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 9e224469c788..00f2535d2226 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -801,6 +801,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 		regs->regs[0] = 0;		/* Don't deal with this again.	*/
 	}
 
+	rseq_signal_deliver(regs);
+
 	if (sig_uses_siginfo(&ksig->ka, abi))
 		ret = abi->setup_rt_frame(vdso + abi->vdso->off_rt_sigreturn,
 					  ksig, regs, oldset);
@@ -868,6 +870,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		rseq_handle_notify_resume(regs);
 	}
 
 	user_enter();

From 9bcf53598dfe1bd8caaf8e03738d3cc51d45904e Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 14 Jun 2018 10:20:54 -0700
Subject: [PATCH 107/215] MIPS: Add syscall detection for restartable sequences

Syscalls are not allowed inside restartable sequences, so add a call to
rseq_syscall() at the very beginning of the system call exit path when
CONFIG_DEBUG_RSEQ=y. This will help us to detect whether there is a
syscall issued erroneously inside a restartable sequence.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reviewed-by: James Hogan <jhogan@kernel.org>
Patchwork: https://patchwork.linux-mips.org/patch/19522/
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/kernel/entry.S | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 38a302919e6b..d7de8adcfcc8 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -79,6 +79,10 @@ FEXPORT(ret_from_fork)
 	jal	schedule_tail		# a0 = struct task_struct *prev
 
 FEXPORT(syscall_exit)
+#ifdef CONFIG_DEBUG_RSEQ
+	move	a0, sp
+	jal	rseq_syscall
+#endif
 	local_irq_disable		# make sure need_resched and
 					# signals dont change between
 					# sampling and return
@@ -141,6 +145,10 @@ work_notifysig:				# deal with pending signals and
 	j	resume_userspace_check
 
 FEXPORT(syscall_exit_partial)
+#ifdef CONFIG_DEBUG_RSEQ
+	move	a0, sp
+	jal	rseq_syscall
+#endif
 	local_irq_disable		# make sure need_resched doesn't
 					# change between and return
 	LONG_L	a2, TI_FLAGS($28)	# current->work

From e426b3754a2cb8bb45b71283fdac0cfc6d247db7 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 14 Jun 2018 10:22:44 -0700
Subject: [PATCH 108/215] MIPS: Wire up the restartable sequences (rseq)
 syscall

Wire up the restartable sequences (rseq) syscall for MIPS. This was
introduced by commit d7822b1e24f2 ("rseq: Introduce restartable
sequences system call") & MIPS now supports the prerequisites.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reviewed-by: James Hogan <jhogan@kernel.org>
Patchwork: https://patchwork.linux-mips.org/patch/19525/
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---
 arch/mips/include/uapi/asm/unistd.h | 15 +++++++++------
 arch/mips/kernel/scall32-o32.S      |  1 +
 arch/mips/kernel/scall64-64.S       |  1 +
 arch/mips/kernel/scall64-n32.S      |  1 +
 arch/mips/kernel/scall64-o32.S      |  1 +
 5 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index bb05e9916a5f..170bf0b5b250 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -388,17 +388,18 @@
 #define __NR_pkey_alloc			(__NR_Linux + 364)
 #define __NR_pkey_free			(__NR_Linux + 365)
 #define __NR_statx			(__NR_Linux + 366)
+#define __NR_rseq			(__NR_Linux + 367)
 
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		366
+#define __NR_Linux_syscalls		367
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		366
+#define __NR_O32_Linux_syscalls		367
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -733,16 +734,17 @@
 #define __NR_pkey_alloc			(__NR_Linux + 324)
 #define __NR_pkey_free			(__NR_Linux + 325)
 #define __NR_statx			(__NR_Linux + 326)
+#define __NR_rseq			(__NR_Linux + 327)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		326
+#define __NR_Linux_syscalls		327
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		326
+#define __NR_64_Linux_syscalls		327
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1081,15 +1083,16 @@
 #define __NR_pkey_alloc			(__NR_Linux + 328)
 #define __NR_pkey_free			(__NR_Linux + 329)
 #define __NR_statx			(__NR_Linux + 330)
+#define __NR_rseq			(__NR_Linux + 331)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		330
+#define __NR_Linux_syscalls		331
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		330
+#define __NR_N32_Linux_syscalls		331
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index a9a7d78803cd..842ff1612893 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -590,3 +590,4 @@ EXPORT(sys_call_table)
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free			/* 4365 */
 	PTR	sys_statx
+	PTR	sys_rseq
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 65d5aeeb9bdb..558830d1e5ba 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -439,4 +439,5 @@ EXPORT(sys_call_table)
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free			/* 5325 */
 	PTR	sys_statx
+	PTR	sys_rseq
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index cbf190ef9e8a..293f0b0119f3 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -434,4 +434,5 @@ EXPORT(sysn32_call_table)
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free
 	PTR	sys_statx			/* 6330 */
+	PTR	sys_rseq
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 9ebe3e2403b1..f13a08de8078 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -583,4 +583,5 @@ EXPORT(sys32_call_table)
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free			/* 4365 */
 	PTR	sys_statx
+	PTR	sys_rseq
 	.size	sys32_call_table,.-sys32_call_table

From 744f4be542d705a39dac9810350e96f37474eda3 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 14 Jun 2018 11:06:22 -0700
Subject: [PATCH 109/215] rseq/selftests: Implement MIPS support

Implement support for both MIPS32 & MIPS64 in the rseq selftests, in
order to sanity check the recently enabled rseq syscall.

The tests all pass on a MIPS Boston development board running either a
MIPS32r2 interAptiv CPU & a MIPS64r6 I6500 CPU, both of which were
configured with 2 cores each of which have 2 hardware threads (VP(E)s) -
ie. 4 CPUs.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reviewed-by: James Hogan <jhogan@kernel.org>
Patchwork: https://patchwork.linux-mips.org/patch/19524/
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---
 tools/testing/selftests/rseq/param_test.c |  24 +
 tools/testing/selftests/rseq/rseq-mips.h  | 725 ++++++++++++++++++++++
 tools/testing/selftests/rseq/rseq.h       |   2 +
 3 files changed, 751 insertions(+)
 create mode 100644 tools/testing/selftests/rseq/rseq-mips.h

diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 6a9f602a8718..615252331813 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -137,6 +137,30 @@ unsigned int yield_mod_cnt, nr_abort;
 	"subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
 	"bne 222b\n\t" \
 	"333:\n\t"
+
+#elif defined(__mips__)
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG	"$5"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+	"beqz " INJECT_ASM_REG ", 333f\n\t" \
+	"222:\n\t" \
+	"addiu " INJECT_ASM_REG ", -1\n\t" \
+	"bnez " INJECT_ASM_REG ", 222b\n\t" \
+	"333:\n\t"
+
 #else
 #error unsupported target
 #endif
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
new file mode 100644
index 000000000000..7f48ecf46994
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -0,0 +1,725 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Author: Paul Burton <paul.burton@mips.com>
+ * (C) Copyright 2018 MIPS Tech LLC
+ *
+ * Based on rseq-arm.h:
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#define RSEQ_SIG	0x53053053
+
+#define rseq_smp_mb()	__asm__ __volatile__ ("sync" ::: "memory")
+#define rseq_smp_rmb()	rseq_smp_mb()
+#define rseq_smp_wmb()	rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_smp_mb();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_smp_mb();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#if _MIPS_SZLONG == 64
+# define LONG			".dword"
+# define LONG_LA		"dla"
+# define LONG_L			"ld"
+# define LONG_S			"sd"
+# define LONG_ADDI		"daddiu"
+# define U32_U64_PAD(x)		x
+#elif _MIPS_SZLONG == 32
+# define LONG			".word"
+# define LONG_LA		"la"
+# define LONG_L			"lw"
+# define LONG_S			"sw"
+# define LONG_ADDI		"addiu"
+# ifdef __BIG_ENDIAN
+#  define U32_U64_PAD(x)	"0x0, " x
+# else
+#  define U32_U64_PAD(x)	x ", 0x0"
+# endif
+#else
+# error unsupported _MIPS_SZLONG
+#endif
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags,	start_ip, \
+				post_commit_offset, abort_ip) \
+		".pushsection __rseq_table, \"aw\"\n\t" \
+		".balign 32\n\t" \
+		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+		RSEQ_INJECT_ASM(1) \
+		LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \
+		LONG_S  " $4, %[" __rseq_str(rseq_cs) "]\n\t" \
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+		RSEQ_INJECT_ASM(2) \
+		"lw  $4, %[" __rseq_str(current_cpu_id) "]\n\t" \
+		"bne $4, %[" __rseq_str(cpu_id) "], " __rseq_str(label) "\n\t"
+
+#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+				abort_label, version, flags, \
+				start_ip, post_commit_offset, abort_ip) \
+		".balign 32\n\t" \
+		__rseq_str(table_label) ":\n\t" \
+		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+		".word " __rseq_str(RSEQ_SIG) "\n\t" \
+		__rseq_str(label) ":\n\t" \
+		teardown \
+		"b %l[" __rseq_str(abort_label) "]\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
+			      start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+				abort_label, 0x0, 0x0, start_ip, \
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+		__rseq_str(label) ":\n\t" \
+		teardown \
+		"b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+#define rseq_workaround_gcc_asm_size_guess()	__asm__ __volatile__("")
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+#endif
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"beq $4, %[expectnot], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"beq $4, %[expectnot], %l[error2]\n\t"
+#endif
+		LONG_S " $4, %[load]\n\t"
+		LONG_ADDI " $4, %[voffp]\n\t"
+		LONG_L " $4, 0($4)\n\t"
+		/* final store */
+		LONG_S " $4, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"Ir" (voffp),
+		  [load]		"m" (*load)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		LONG_L " $4, %[v]\n\t"
+		LONG_ADDI " $4, %[count]\n\t"
+		/* final store */
+		LONG_S " $4, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [count]		"Ir" (count)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+#endif
+		/* try store */
+		LONG_S " %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+#endif
+		/* try store */
+		LONG_S " %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		"sync\n\t"	/* full sync provides store-release */
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		LONG_L " $4, %[v2]\n\t"
+		"bne $4, %[expect2], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+		LONG_L " $4, %[v2]\n\t"
+		"bne $4, %[expect2], %l[error3]\n\t"
+#endif
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uintptr_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		LONG_S " %[src], %[rseq_scratch0]\n\t"
+		LONG_S "  %[dst], %[rseq_scratch1]\n\t"
+		LONG_S " %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 7f\n\t"
+#endif
+		/* try memcpy */
+		"beqz %[len], 333f\n\t" \
+		"222:\n\t" \
+		"lb   $4, 0(%[src])\n\t" \
+		"sb   $4, 0(%[dst])\n\t" \
+		LONG_ADDI " %[src], 1\n\t" \
+		LONG_ADDI " %[dst], 1\n\t" \
+		LONG_ADDI " %[len], -1\n\t" \
+		"bnez %[len], 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		LONG_L " %[len], %[rseq_scratch2]\n\t"
+		LONG_L " %[dst], %[rseq_scratch1]\n\t"
+		LONG_L " %[src], %[rseq_scratch0]\n\t"
+		"b 8f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4,
+				      /* teardown */
+				      LONG_L " %[len], %[rseq_scratch2]\n\t"
+				      LONG_L " %[dst], %[rseq_scratch1]\n\t"
+				      LONG_L " %[src], %[rseq_scratch0]\n\t",
+				      abort, 1b, 2b, 4f)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error2)
+#endif
+		"8:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	uintptr_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		LONG_S " %[src], %[rseq_scratch0]\n\t"
+		LONG_S " %[dst], %[rseq_scratch1]\n\t"
+		LONG_S " %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 7f\n\t"
+#endif
+		/* try memcpy */
+		"beqz %[len], 333f\n\t" \
+		"222:\n\t" \
+		"lb   $4, 0(%[src])\n\t" \
+		"sb   $4, 0(%[dst])\n\t" \
+		LONG_ADDI " %[src], 1\n\t" \
+		LONG_ADDI " %[dst], 1\n\t" \
+		LONG_ADDI " %[len], -1\n\t" \
+		"bnez %[len], 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		"sync\n\t"	/* full sync provides store-release */
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		LONG_L " %[len], %[rseq_scratch2]\n\t"
+		LONG_L " %[dst], %[rseq_scratch1]\n\t"
+		LONG_L " %[src], %[rseq_scratch0]\n\t"
+		"b 8f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4,
+				      /* teardown */
+				      LONG_L " %[len], %[rseq_scratch2]\n\t"
+				      LONG_L " %[dst], %[rseq_scratch1]\n\t"
+				      LONG_L " %[src], %[rseq_scratch0]\n\t",
+				      abort, 1b, 2b, 4f)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error2)
+#endif
+		"8:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 0a808575cbc4..a4684112676c 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -73,6 +73,8 @@ extern __thread volatile struct rseq __rseq_abi;
 #include <rseq-arm.h>
 #elif defined(__PPC__)
 #include <rseq-ppc.h>
+#elif defined(__mips__)
+#include <rseq-mips.h>
 #else
 #error unsupported target
 #endif

From 4337aac1e1c97cfda56fbec4077fbc0e37b867c0 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 14 Jun 2018 17:24:07 -0700
Subject: [PATCH 110/215] MIPS: Wire up io_pgetevents syscall

Wire up the io_pgetevents syscall that was introduced by commit
7a074e96dee6 ("aio: implement io_pgetevents").

Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/19593/
Cc: James Hogan <jhogan@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
---
 arch/mips/include/uapi/asm/unistd.h | 15 +++++++++------
 arch/mips/kernel/scall32-o32.S      |  1 +
 arch/mips/kernel/scall64-64.S       |  1 +
 arch/mips/kernel/scall64-n32.S      |  1 +
 arch/mips/kernel/scall64-o32.S      |  1 +
 5 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index 170bf0b5b250..f25dd1d83fb7 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -389,17 +389,18 @@
 #define __NR_pkey_free			(__NR_Linux + 365)
 #define __NR_statx			(__NR_Linux + 366)
 #define __NR_rseq			(__NR_Linux + 367)
+#define __NR_io_pgetevents		(__NR_Linux + 368)
 
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		367
+#define __NR_Linux_syscalls		368
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		367
+#define __NR_O32_Linux_syscalls		368
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -735,16 +736,17 @@
 #define __NR_pkey_free			(__NR_Linux + 325)
 #define __NR_statx			(__NR_Linux + 326)
 #define __NR_rseq			(__NR_Linux + 327)
+#define __NR_io_pgetevents		(__NR_Linux + 328)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		327
+#define __NR_Linux_syscalls		328
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		327
+#define __NR_64_Linux_syscalls		328
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1084,15 +1086,16 @@
 #define __NR_pkey_free			(__NR_Linux + 329)
 #define __NR_statx			(__NR_Linux + 330)
 #define __NR_rseq			(__NR_Linux + 331)
+#define __NR_io_pgetevents		(__NR_Linux + 332)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		331
+#define __NR_Linux_syscalls		332
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		331
+#define __NR_N32_Linux_syscalls		332
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 842ff1612893..91d3c8c46097 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -591,3 +591,4 @@ EXPORT(sys_call_table)
 	PTR	sys_pkey_free			/* 4365 */
 	PTR	sys_statx
 	PTR	sys_rseq
+	PTR	sys_io_pgetevents
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 558830d1e5ba..358d9599983d 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -440,4 +440,5 @@ EXPORT(sys_call_table)
 	PTR	sys_pkey_free			/* 5325 */
 	PTR	sys_statx
 	PTR	sys_rseq
+	PTR	sys_io_pgetevents
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 293f0b0119f3..c65eaacc1abf 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -435,4 +435,5 @@ EXPORT(sysn32_call_table)
 	PTR	sys_pkey_free
 	PTR	sys_statx			/* 6330 */
 	PTR	sys_rseq
+	PTR	compat_sys_io_pgetevents
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index f13a08de8078..73913f072e39 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -584,4 +584,5 @@ EXPORT(sys32_call_table)
 	PTR	sys_pkey_free			/* 4365 */
 	PTR	sys_statx
 	PTR	sys_rseq
+	PTR	compat_sys_io_pgetevents
 	.size	sys32_call_table,.-sys32_call_table

From a507a3065c09d69677c502905e597750da3a9815 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Tue, 19 Jun 2018 10:02:01 -0700
Subject: [PATCH 111/215] ACPI / processor: Finish making
 acpi_processor_ppc_has_changed() void

Commit bca5f557dcea "ACPI / processor: Make acpi_processor_ppc_has_changed()
void" changed one of the declarations of acpi_processor_ppc_has_changed()
to return void, but the !CPU_FREQ version still returns int. Let's return
void to be consistent.

Fixes: bca5f557dcea "ACPI / processor: Make acpi_processor_ppc_has_changed() void"
Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/processor.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 40a916efd7c0..1194a4c78d55 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -309,7 +309,7 @@ static inline void acpi_processor_ppc_exit(void)
 {
 	return;
 }
-static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr,
+static inline void acpi_processor_ppc_has_changed(struct acpi_processor *pr,
 								int event_flag)
 {
 	static unsigned int printout = 1;
@@ -320,7 +320,6 @@ static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr,
 		       "Consider compiling CPUfreq support into your kernel.\n");
 		printout = 0;
 	}
-	return 0;
 }
 static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 {

From 9560ba306df3e46b4b1037d101e2e4ca68610f55 Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Thu, 7 Jun 2018 18:37:59 -0700
Subject: [PATCH 112/215] quota: reclaim least recently used dquots

The dquots in the free_dquots list are not reclaimed in LRU way.
put_dquot_last() puts entries to the tail and dqcache_shrink_scan()
frees from the tail. Free unreferenced dquots in LRU order because it
seems more reasonable than freeing most recently used.

Signed-off-by: Greg Thelen <gthelen@google.com>
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d88231e3b2be..241b00f835b9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -716,7 +716,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 	unsigned long freed = 0;
 
 	spin_lock(&dq_list_lock);
-	head = free_dquots.prev;
+	head = free_dquots.next;
 	while (head != &free_dquots && sc->nr_to_scan) {
 		dquot = list_entry(head, struct dquot, dq_free);
 		remove_dquot_hash(dquot);
@@ -725,7 +725,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 		do_destroy_dquot(dquot);
 		sc->nr_to_scan--;
 		freed++;
-		head = free_dquots.prev;
+		head = free_dquots.next;
 	}
 	spin_unlock(&dq_list_lock);
 	return freed;

From 1822193b5d4fd5d9800e6a7ed141375b8e8e68eb Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 11 Jun 2018 12:14:45 +0200
Subject: [PATCH 113/215] quota: Cleanup list iteration in
 dqcache_shrink_scan()

Use list_first_entry() and list_empty() instead of opencoded variants.

Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 241b00f835b9..fc20e06c56ba 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -711,21 +711,18 @@ EXPORT_SYMBOL(dquot_quota_sync);
 static unsigned long
 dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
-	struct list_head *head;
 	struct dquot *dquot;
 	unsigned long freed = 0;
 
 	spin_lock(&dq_list_lock);
-	head = free_dquots.next;
-	while (head != &free_dquots && sc->nr_to_scan) {
-		dquot = list_entry(head, struct dquot, dq_free);
+	while (!list_empty(&free_dquots) && sc->nr_to_scan) {
+		dquot = list_first_entry(&free_dquots, struct dquot, dq_free);
 		remove_dquot_hash(dquot);
 		remove_free_dquot(dquot);
 		remove_inuse(dquot);
 		do_destroy_dquot(dquot);
 		sc->nr_to_scan--;
 		freed++;
-		head = free_dquots.next;
 	}
 	spin_unlock(&dq_list_lock);
 	return freed;

From 27e6ed54a30a00d6520ddb4518214df8ff99daf1 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Fri, 8 Jun 2018 10:53:40 +0800
Subject: [PATCH 114/215] ext2: add warning when specifying nocheck option

The option nocheck(nocheck/check=none) is useless but considering
backwards compatibility it's better to print warning for a while
before completely remove from the code.

This patch add proper warning message for option 'nocheck' and
remove unnecessary comment/function declaration which is used for
removed option 'check'.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/ext2.h  | 2 --
 fs/ext2/super.c | 6 +++---
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index cc40802ddfa8..00e759f05161 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -748,7 +748,6 @@ extern void ext2_free_blocks (struct inode *, unsigned long,
 			      unsigned long);
 extern unsigned long ext2_count_free_blocks (struct super_block *);
 extern unsigned long ext2_count_dirs (struct super_block *);
-extern void ext2_check_blocks_bitmap (struct super_block *);
 extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
 						    unsigned int block_group,
 						    struct buffer_head ** bh);
@@ -771,7 +770,6 @@ extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page
 extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *);
 extern void ext2_free_inode (struct inode *);
 extern unsigned long ext2_count_free_inodes (struct super_block *);
-extern void ext2_check_inodes_bitmap (struct super_block *);
 extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
 
 /* inode.c */
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 25ab1274090f..8ff53f8da3bc 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -557,6 +557,9 @@ static int parse_options(char *options, struct super_block *sb,
 			set_opt (opts->s_mount_opt, NO_UID32);
 			break;
 		case Opt_nocheck:
+			ext2_msg(sb, KERN_WARNING,
+				"Option nocheck/check=none is deprecated and"
+				" will be removed in June 2020.");
 			clear_opt (opts->s_mount_opt, CHECK);
 			break;
 		case Opt_debug:
@@ -1335,9 +1338,6 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 	new_opts.s_resgid = sbi->s_resgid;
 	spin_unlock(&sbi->s_lock);
 
-	/*
-	 * Allow the "check" option to be passed as a remount option.
-	 */
 	if (!parse_options(data, sb, &new_opts))
 		return -EINVAL;
 

From fa65653e575fbd958bdf5fb9c4a71a324e39510d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 13 Jun 2018 12:09:22 +0200
Subject: [PATCH 115/215] udf: Detect incorrect directory size

Detect when a directory entry is (possibly partially) beyond directory
size and return EIO in that case since it means the filesystem is
corrupted. Otherwise directory operations can further corrupt the
directory and possibly also oops the kernel.

CC: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
CC: stable@vger.kernel.org
Reported-and-tested-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/directory.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 0a98a2369738..3835f983cc99 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -152,6 +152,9 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
 			       sizeof(struct fileIdentDesc));
 		}
 	}
+	/* Got last entry outside of dir size - fs is corrupted! */
+	if (*nf_pos > dir->i_size)
+		return NULL;
 	return fi;
 }
 

From f2e83347119acc0412941c5a23d895624c9300e2 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 13 Jun 2018 17:30:14 +0200
Subject: [PATCH 116/215] udf: Provide function for calculating dir entry
 length

Provide function for calculating directory entry length and use to
reduce code duplication.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/directory.c |  5 +----
 fs/udf/namei.c     | 14 +++-----------
 fs/udf/udfdecl.h   |  6 ++++++
 3 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 3835f983cc99..d9523013096f 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -141,10 +141,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
 			       fibh->ebh->b_data,
 			       sizeof(struct fileIdentDesc) + fibh->soffset);
 
-			fi_len = (sizeof(struct fileIdentDesc) +
-				  cfi->lengthFileIdent +
-				  le16_to_cpu(cfi->lengthOfImpUse) + 3) & ~3;
-
+			fi_len = udf_dir_entry_len(cfi);
 			*nf_pos += fi_len - (fibh->eoffset - fibh->soffset);
 			fibh->eoffset = fibh->soffset + fi_len;
 		} else {
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index c586026508db..06f37ddd2997 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -351,8 +351,6 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 	loff_t f_pos;
 	loff_t size = udf_ext0_offset(dir) + dir->i_size;
 	int nfidlen;
-	uint8_t lfi;
-	uint16_t liu;
 	udf_pblk_t block;
 	struct kernel_lb_addr eloc;
 	uint32_t elen = 0;
@@ -383,7 +381,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 		namelen = 0;
 	}
 
-	nfidlen = (sizeof(struct fileIdentDesc) + namelen + 3) & ~3;
+	nfidlen = ALIGN(sizeof(struct fileIdentDesc) + namelen, UDF_NAME_PAD);
 
 	f_pos = udf_ext0_offset(dir);
 
@@ -424,12 +422,8 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
 			goto out_err;
 		}
 
-		liu = le16_to_cpu(cfi->lengthOfImpUse);
-		lfi = cfi->lengthFileIdent;
-
 		if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) {
-			if (((sizeof(struct fileIdentDesc) +
-					liu + lfi + 3) & ~3) == nfidlen) {
+			if (udf_dir_entry_len(cfi) == nfidlen) {
 				cfi->descTag.tagSerialNum = cpu_to_le16(1);
 				cfi->fileVersionNum = cpu_to_le16(1);
 				cfi->fileCharacteristics = 0;
@@ -1201,9 +1195,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	if (dir_fi) {
 		dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location);
-		udf_update_tag((char *)dir_fi,
-				(sizeof(struct fileIdentDesc) +
-				le16_to_cpu(dir_fi->lengthOfImpUse) + 3) & ~3);
+		udf_update_tag((char *)dir_fi, udf_dir_entry_len(dir_fi));
 		if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
 			mark_inode_dirty(old_inode);
 		else
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index bae311b59400..ed890bc4416d 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -132,6 +132,12 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
 extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
 			struct fileIdentDesc *, struct udf_fileident_bh *,
 			uint8_t *, uint8_t *);
+static inline unsigned int udf_dir_entry_len(struct fileIdentDesc *cfi)
+{
+	return ALIGN(sizeof(struct fileIdentDesc) +
+		le16_to_cpu(cfi->lengthOfImpUse) + cfi->lengthFileIdent,
+		UDF_NAME_PAD);
+}
 
 /* file.c */
 extern long udf_ioctl(struct file *, unsigned int, unsigned long);

From 6c1e4d06a3808dc67dbce2d631f4c12574567dd5 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 13 Jun 2018 18:04:24 +0200
Subject: [PATCH 117/215] udf: Drop unused arguments of udf_delete_aext()

udf_delete_aext() uses its last two arguments only as local variables.
Drop them.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/balloc.c  | 5 ++---
 fs/udf/inode.c   | 8 ++++----
 fs/udf/udfdecl.h | 3 +--
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 1b961b1d9699..fcda0fc97b90 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -533,8 +533,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
 			udf_write_aext(table, &epos, &eloc,
 					(etype << 30) | elen, 1);
 		} else
-			udf_delete_aext(table, epos, eloc,
-					(etype << 30) | elen);
+			udf_delete_aext(table, epos);
 	} else {
 		alloc_count = 0;
 	}
@@ -630,7 +629,7 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb,
 	if (goal_elen)
 		udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1);
 	else
-		udf_delete_aext(table, goal_epos, goal_eloc, goal_elen);
+		udf_delete_aext(table, goal_epos);
 	brelse(goal_epos.bh);
 
 	udf_add_free_space(sb, partition, -1);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7f39d17352c9..9915a58fbabd 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1147,8 +1147,7 @@ static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr
 
 	if (startnum > endnum) {
 		for (i = 0; i < (startnum - endnum); i++)
-			udf_delete_aext(inode, *epos, laarr[i].extLocation,
-					laarr[i].extLength);
+			udf_delete_aext(inode, *epos);
 	} else if (startnum < endnum) {
 		for (i = 0; i < (endnum - startnum); i++) {
 			udf_insert_aext(inode, *epos, laarr[i].extLocation,
@@ -2176,14 +2175,15 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
 	return (nelen >> 30);
 }
 
-int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
-		       struct kernel_lb_addr eloc, uint32_t elen)
+int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
 {
 	struct extent_position oepos;
 	int adsize;
 	int8_t etype;
 	struct allocExtDesc *aed;
 	struct udf_inode_info *iinfo;
+	struct kernel_lb_addr eloc;
+	uint32_t elen;
 
 	if (epos.bh) {
 		get_bh(epos.bh);
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index ed890bc4416d..84c47dde4d26 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -173,8 +173,7 @@ extern int udf_add_aext(struct inode *, struct extent_position *,
 			struct kernel_lb_addr *, uint32_t, int);
 extern void udf_write_aext(struct inode *, struct extent_position *,
 			   struct kernel_lb_addr *, uint32_t, int);
-extern int8_t udf_delete_aext(struct inode *, struct extent_position,
-			      struct kernel_lb_addr, uint32_t);
+extern int8_t udf_delete_aext(struct inode *, struct extent_position);
 extern int8_t udf_next_aext(struct inode *, struct extent_position *,
 			    struct kernel_lb_addr *, uint32_t *, int);
 extern int8_t udf_current_aext(struct inode *, struct extent_position *,

From 03eeafdd9ab06a770d42c2b264d50dff7e2f4eee Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 24 May 2018 09:26:38 -0400
Subject: [PATCH 118/215] locking/rwsem: Fix up_read_non_owner() warning with
 DEBUG_RWSEMS

It was found that the use of up_read_non_owner() in NFS was causing
the following warning when DEBUG_RWSEMS was configured.

  DEBUG_LOCKS_WARN_ON(sem->owner != ((struct task_struct *)(1UL << 0)))

Looking into the rwsem.c file, it was discovered that the corresponding
down_read_non_owner() function was not setting the owner field properly.
This is fixed now, and the warning should be gone.

Fixes: 5149cbac4235 ("locking/rwsem: Add DEBUG_RWSEMS to look for lock/unlock mismatches")
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Gavin Schenk <g.schenk@eckelmann.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: linux-nfs@vger.kernel.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1527168398-4291-1-git-send-email-longman@redhat.com
---
 kernel/locking/rwsem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index bc1e507be9ff..776308d2fa9e 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -181,6 +181,7 @@ void down_read_non_owner(struct rw_semaphore *sem)
 	might_sleep();
 
 	__down_read(sem);
+	rwsem_set_reader_owned(sem);
 }
 
 EXPORT_SYMBOL(down_read_non_owner);

From 3ae6295ccb7cf6d344908209701badbbbb503e40 Mon Sep 17 00:00:00 2001
From: Siarhei Liakh <Siarhei.Liakh@concurrent-rt.com>
Date: Thu, 14 Jun 2018 19:36:07 +0000
Subject: [PATCH 119/215] x86: Call fixup_exception() before notify_die() in
 math_error()

fpu__drop() has an explicit fwait which under some conditions can trigger a
fixable FPU exception while in kernel. Thus, we should attempt to fixup the
exception first, and only call notify_die() if the fixup failed just like
in do_general_protection(). The original call sequence incorrectly triggers
KDB entry on debug kernels under particular FPU-intensive workloads.

Andy noted, that this makes the whole conditional irq enable thing even
more inconsistent, but fixing that it outside the scope of this.

Signed-off-by: Siarhei Liakh <siarhei.liakh@concurrent-rt.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Borislav  Petkov" <bpetkov@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/DM5PR11MB201156F1CAB2592B07C79A03B17D0@DM5PR11MB2011.namprd11.prod.outlook.com
---
 arch/x86/kernel/traps.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 03f3d7695dac..162a31d80ad5 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -834,16 +834,18 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 	char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" :
 						"simd exception";
 
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP)
-		return;
 	cond_local_irq_enable(regs);
 
 	if (!user_mode(regs)) {
-		if (!fixup_exception(regs, trapnr)) {
-			task->thread.error_code = error_code;
-			task->thread.trap_nr = trapnr;
+		if (fixup_exception(regs, trapnr))
+			return;
+
+		task->thread.error_code = error_code;
+		task->thread.trap_nr = trapnr;
+
+		if (notify_die(DIE_TRAP, str, regs, error_code,
+					trapnr, SIGFPE) != NOTIFY_STOP)
 			die(str, regs, error_code);
-		}
 		return;
 	}
 

From 3d0641015bf73aaa1cb54c936674959e7805070f Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 19 Jun 2018 15:34:09 +0300
Subject: [PATCH 120/215] nvme-rdma: fix possible double free condition when
 failing to create a controller

Failures after nvme_init_ctrl will defer resource cleanups to .free_ctrl
when the reference is released, hence we should not free the controller
queues for these failures.

Fix that by moving controller queues allocation before controller
initialization and correctly freeing them for failures before
initialization and skip them for failures after initialization.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index c9424da0d23e..bcb0e5d6343d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -888,9 +888,9 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
 	list_del(&ctrl->list);
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-	kfree(ctrl->queues);
 	nvmf_free_options(nctrl->opts);
 free_ctrl:
+	kfree(ctrl->queues);
 	kfree(ctrl);
 }
 
@@ -1932,11 +1932,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 		goto out_free_ctrl;
 	}
 
-	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
-				0 /* no quirks, we're perfect! */);
-	if (ret)
-		goto out_free_ctrl;
-
 	INIT_DELAYED_WORK(&ctrl->reconnect_work,
 			nvme_rdma_reconnect_ctrl_work);
 	INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
@@ -1950,14 +1945,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
 				GFP_KERNEL);
 	if (!ctrl->queues)
-		goto out_uninit_ctrl;
+		goto out_free_ctrl;
+
+	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
+				0 /* no quirks, we're perfect! */);
+	if (ret)
+		goto out_kfree_queues;
 
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
 	WARN_ON_ONCE(!changed);
 
 	ret = nvme_rdma_configure_admin_queue(ctrl, true);
 	if (ret)
-		goto out_kfree_queues;
+		goto out_uninit_ctrl;
 
 	/* sanity check icdoff */
 	if (ctrl->ctrl.icdoff) {
@@ -2014,14 +2014,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 
 out_remove_admin_queue:
 	nvme_rdma_destroy_admin_queue(ctrl, true);
-out_kfree_queues:
-	kfree(ctrl->queues);
 out_uninit_ctrl:
 	nvme_uninit_ctrl(&ctrl->ctrl);
 	nvme_put_ctrl(&ctrl->ctrl);
 	if (ret > 0)
 		ret = -EIO;
 	return ERR_PTR(ret);
+out_kfree_queues:
+	kfree(ctrl->queues);
 out_free_ctrl:
 	kfree(ctrl);
 	return ERR_PTR(ret);

From 94e42213cc1ae41c57819539c0130f8dfc69d718 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 19 Jun 2018 15:34:10 +0300
Subject: [PATCH 121/215] nvme-rdma: fix possible free of a non-allocated async
 event buffer

If nvme_rdma_configure_admin_queue fails before we allocated
the async event buffer, we will falsly free it because
nvme_rdma_free_queue is freeing it. Fix it by allocating the buffer right
after nvme_rdma_alloc_queue and free it right before nvme_rdma_queue_free
to maintain orderly reverse cleanup sequence.

Reported-by: Israel Rukshin <israelr@mellanox.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index bcb0e5d6343d..f9affb71ac85 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -560,12 +560,6 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
 	if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
 		return;
 
-	if (nvme_rdma_queue_idx(queue) == 0) {
-		nvme_rdma_free_qe(queue->device->dev,
-			&queue->ctrl->async_event_sqe,
-			sizeof(struct nvme_command), DMA_TO_DEVICE);
-	}
-
 	nvme_rdma_destroy_queue_ib(queue);
 	rdma_destroy_id(queue->cm_id);
 }
@@ -739,6 +733,8 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		blk_cleanup_queue(ctrl->ctrl.admin_q);
 		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
 	}
+	nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+		sizeof(struct nvme_command), DMA_TO_DEVICE);
 	nvme_rdma_free_queue(&ctrl->queues[0]);
 }
 
@@ -755,11 +751,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 
 	ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
 
+	error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+			sizeof(struct nvme_command), DMA_TO_DEVICE);
+	if (error)
+		goto out_free_queue;
+
 	if (new) {
 		ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
 		if (IS_ERR(ctrl->ctrl.admin_tagset)) {
 			error = PTR_ERR(ctrl->ctrl.admin_tagset);
-			goto out_free_queue;
+			goto out_free_async_qe;
 		}
 
 		ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
@@ -795,12 +796,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 	if (error)
 		goto out_stop_queue;
 
-	error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
-			&ctrl->async_event_sqe, sizeof(struct nvme_command),
-			DMA_TO_DEVICE);
-	if (error)
-		goto out_stop_queue;
-
 	return 0;
 
 out_stop_queue:
@@ -811,6 +806,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 out_free_tagset:
 	if (new)
 		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
+out_free_async_qe:
+	nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+		sizeof(struct nvme_command), DMA_TO_DEVICE);
 out_free_queue:
 	nvme_rdma_free_queue(&ctrl->queues[0]);
 	return error;

From c947657b15379505a9bba36a02005882b66abe57 Mon Sep 17 00:00:00 2001
From: Israel Rukshin <israelr@mellanox.com>
Date: Tue, 19 Jun 2018 15:34:11 +0300
Subject: [PATCH 122/215] nvme-rdma: Fix command completion race at error
 recovery

The race is between completing the request at error recovery work and
rdma completions.  If we cancel the request before getting the good
rdma completion we get a NULL deref of the request MR at
nvme_rdma_process_nvme_rsp().

When Canceling the request we return its mr to the mr pool (set mr to
NULL) and also unmap its data.  Canceling the requests while the rdma
queues are active is not safe.  Because rdma queues are active and we
get good rdma completions that can use the mr pointer which may be NULL.
Completing the request too soon may lead also to performing DMA to/from
user buffers which might have been already unmapped.

The commit fixes the race by draining the QP before starting the abort
commands mechanism.

Signed-off-by: Israel Rukshin <israelr@mellanox.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f9affb71ac85..2815749f4dfb 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -728,7 +728,6 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
 static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
 		bool remove)
 {
-	nvme_rdma_stop_queue(&ctrl->queues[0]);
 	if (remove) {
 		blk_cleanup_queue(ctrl->ctrl.admin_q);
 		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
@@ -817,7 +816,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
 		bool remove)
 {
-	nvme_rdma_stop_io_queues(ctrl);
 	if (remove) {
 		blk_cleanup_queue(ctrl->ctrl.connect_q);
 		nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
@@ -947,6 +945,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
 	return;
 
 destroy_admin:
+	nvme_rdma_stop_queue(&ctrl->queues[0]);
 	nvme_rdma_destroy_admin_queue(ctrl, false);
 requeue:
 	dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
@@ -963,12 +962,14 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
+		nvme_rdma_stop_io_queues(ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
 		nvme_rdma_destroy_io_queues(ctrl, false);
 	}
 
 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+	nvme_rdma_stop_queue(&ctrl->queues[0]);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
 	nvme_rdma_destroy_admin_queue(ctrl, false);
@@ -1734,6 +1735,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
+		nvme_rdma_stop_io_queues(ctrl);
 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
 					nvme_cancel_request, &ctrl->ctrl);
 		nvme_rdma_destroy_io_queues(ctrl, shutdown);
@@ -1745,6 +1747,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 		nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 
 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+	nvme_rdma_stop_queue(&ctrl->queues[0]);
 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
 				nvme_cancel_request, &ctrl->ctrl);
 	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
@@ -2011,6 +2014,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 	return &ctrl->ctrl;
 
 out_remove_admin_queue:
+	nvme_rdma_stop_queue(&ctrl->queues[0]);
 	nvme_rdma_destroy_admin_queue(ctrl, true);
 out_uninit_ctrl:
 	nvme_uninit_ctrl(&ctrl->ctrl);

From 5e77d61cbc7e766778037127dab69e6410a8fc48 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 19 Jun 2018 15:34:13 +0300
Subject: [PATCH 123/215] nvme-rdma: don't override opts->queue_size

That is user argument, and theoretically controller limits can change
over time (over reconnects/resets).  Instead, use the sqsize controller
attribute to check queue depth boundaries and use it to the tagset
allocation.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2815749f4dfb..9544625c0b7d 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -692,7 +692,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
 		set = &ctrl->tag_set;
 		memset(set, 0, sizeof(*set));
 		set->ops = &nvme_rdma_mq_ops;
-		set->queue_depth = nctrl->opts->queue_size;
+		set->queue_depth = nctrl->sqsize + 1;
 		set->reserved_tags = 1; /* fabric connect */
 		set->numa_node = NUMA_NO_NODE;
 		set->flags = BLK_MQ_F_SHOULD_MERGE;
@@ -1975,20 +1975,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
 		goto out_remove_admin_queue;
 	}
 
-	if (opts->queue_size > ctrl->ctrl.maxcmd) {
-		/* warn if maxcmd is lower than queue_size */
-		dev_warn(ctrl->ctrl.device,
-			"queue_size %zu > ctrl maxcmd %u, clamping down\n",
-			opts->queue_size, ctrl->ctrl.maxcmd);
-		opts->queue_size = ctrl->ctrl.maxcmd;
-	}
-
+	/* only warn if argument is too large here, will clamp later */
 	if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
-		/* warn if sqsize is lower than queue_size */
 		dev_warn(ctrl->ctrl.device,
 			"queue_size %zu > ctrl sqsize %u, clamping down\n",
 			opts->queue_size, ctrl->ctrl.sqsize + 1);
-		opts->queue_size = ctrl->ctrl.sqsize + 1;
+	}
+
+	/* warn if maxcmd is lower than sqsize+1 */
+	if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
+		dev_warn(ctrl->ctrl.device,
+			"sqsize %u > ctrl maxcmd %u, clamping down\n",
+			ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
+		ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
 	}
 
 	if (opts->nr_io_queues) {

From d68a90e148f5a82aa67654c5012071e31c0e4baa Mon Sep 17 00:00:00 2001
From: Max Gurtuvoy <maxg@mellanox.com>
Date: Tue, 19 Jun 2018 15:45:33 +0300
Subject: [PATCH 124/215] nvmet: reset keep alive timer in controller enable

Controllers that are not yet enabled should not really enforce keep alive
timeouts, but we still want to track a timeout and cleanup in case a host
died before it enabled the controller.  Hence, simply reset the keep
alive timer when the controller is enabled.

Suggested-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index a03da764ecae..74d4b785d2da 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -686,6 +686,14 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 	}
 
 	ctrl->csts = NVME_CSTS_RDY;
+
+	/*
+	 * Controllers that are not yet enabled should not really enforce the
+	 * keep alive timeout, but we still want to track a timeout and cleanup
+	 * in case a host died before it enabled the controller.  Hence, simply
+	 * reset the keep alive timer when the controller is enabled.
+	 */
+	mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
 }
 
 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)

From a1e79188628580465ac6d7a93a313336ee3364f1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 20 Jun 2018 13:45:05 +0300
Subject: [PATCH 125/215] blk-mq-debugfs: Off by one in blk_mq_rq_state_name()

If rq_state == ARRAY_SIZE() then we read one element beyond the end of
the blk_mq_rq_state_name_array[] array.

Fixes: ec6dcf63c55c ("blk-mq-debugfs: Show more request state information")
Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index ffa622366922..1c4532e92938 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -356,7 +356,7 @@ static const char *const blk_mq_rq_state_name_array[] = {
 
 static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state)
 {
-	if (WARN_ON_ONCE((unsigned int)rq_state >
+	if (WARN_ON_ONCE((unsigned int)rq_state >=
 			 ARRAY_SIZE(blk_mq_rq_state_name_array)))
 		return "(?)";
 	return blk_mq_rq_state_name_array[rq_state];

From bdd5ae3aa51939bb1fd26cd9fe7af07ca8c60397 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 6 Jun 2018 17:18:36 -0400
Subject: [PATCH 126/215] tools/power turbostat: fix show/hide issues resulting
 from mis-merge

The --show and --hide options failed on "Node", which was listed as "Node%".
The --show and --hide options were generally fouled-up do due to come
content merges that scrambled the list of column name indexes.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 106 +++++++++++++-------------
 1 file changed, 54 insertions(+), 52 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d6cff3070ebd..f09a272941a1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -381,19 +381,23 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 }
 
 /*
- * Each string in this array is compared in --show and --hide cmdline.
- * Thus, strings that are proper sub-sets must follow their more specific peers.
+ * This list matches the column headers, except
+ * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
+ * 2. Core and CPU are moved to the end, we can't have strings that contain them
+ *    matching on them for --show and --hide.
  */
 struct msr_counter bic[] = {
 	{ 0x0, "usec" },
 	{ 0x0, "Time_Of_Day_Seconds" },
 	{ 0x0, "Package" },
+	{ 0x0, "Node" },
 	{ 0x0, "Avg_MHz" },
+	{ 0x0, "Busy%" },
 	{ 0x0, "Bzy_MHz" },
 	{ 0x0, "TSC_MHz" },
 	{ 0x0, "IRQ" },
 	{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
-	{ 0x0, "Busy%" },
+	{ 0x0, "sysfs" },
 	{ 0x0, "CPU%c1" },
 	{ 0x0, "CPU%c3" },
 	{ 0x0, "CPU%c6" },
@@ -424,15 +428,13 @@ struct msr_counter bic[] = {
 	{ 0x0, "Cor_J" },
 	{ 0x0, "GFX_J" },
 	{ 0x0, "RAM_J" },
-	{ 0x0, "Core" },
-	{ 0x0, "CPU" },
 	{ 0x0, "Mod%c6" },
-	{ 0x0, "sysfs" },
 	{ 0x0, "Totl%C0" },
 	{ 0x0, "Any%C0" },
 	{ 0x0, "GFX%C0" },
 	{ 0x0, "CPUGFX%" },
-	{ 0x0, "Node%" },
+	{ 0x0, "Core" },
+	{ 0x0, "CPU" },
 };
 
 
@@ -441,51 +443,51 @@ struct msr_counter bic[] = {
 #define	BIC_USEC	(1ULL << 0)
 #define	BIC_TOD		(1ULL << 1)
 #define	BIC_Package	(1ULL << 2)
-#define	BIC_Avg_MHz	(1ULL << 3)
-#define	BIC_Bzy_MHz	(1ULL << 4)
-#define	BIC_TSC_MHz	(1ULL << 5)
-#define	BIC_IRQ		(1ULL << 6)
-#define	BIC_SMI		(1ULL << 7)
-#define	BIC_Busy	(1ULL << 8)
-#define	BIC_CPU_c1	(1ULL << 9)
-#define	BIC_CPU_c3	(1ULL << 10)
-#define	BIC_CPU_c6	(1ULL << 11)
-#define	BIC_CPU_c7	(1ULL << 12)
-#define	BIC_ThreadC	(1ULL << 13)
-#define	BIC_CoreTmp	(1ULL << 14)
-#define	BIC_CoreCnt	(1ULL << 15)
-#define	BIC_PkgTmp	(1ULL << 16)
-#define	BIC_GFX_rc6	(1ULL << 17)
-#define	BIC_GFXMHz	(1ULL << 18)
-#define	BIC_Pkgpc2	(1ULL << 19)
-#define	BIC_Pkgpc3	(1ULL << 20)
-#define	BIC_Pkgpc6	(1ULL << 21)
-#define	BIC_Pkgpc7	(1ULL << 22)
-#define	BIC_Pkgpc8	(1ULL << 23)
-#define	BIC_Pkgpc9	(1ULL << 24)
-#define	BIC_Pkgpc10	(1ULL << 25)
-#define BIC_CPU_LPI	(1ULL << 26)
-#define BIC_SYS_LPI	(1ULL << 27)
-#define	BIC_PkgWatt	(1ULL << 26)
-#define	BIC_CorWatt	(1ULL << 27)
-#define	BIC_GFXWatt	(1ULL << 28)
-#define	BIC_PkgCnt	(1ULL << 29)
-#define	BIC_RAMWatt	(1ULL << 30)
-#define	BIC_PKG__	(1ULL << 31)
-#define	BIC_RAM__	(1ULL << 32)
-#define	BIC_Pkg_J	(1ULL << 33)
-#define	BIC_Cor_J	(1ULL << 34)
-#define	BIC_GFX_J	(1ULL << 35)
-#define	BIC_RAM_J	(1ULL << 36)
-#define	BIC_Core	(1ULL << 37)
-#define	BIC_CPU		(1ULL << 38)
-#define	BIC_Mod_c6	(1ULL << 39)
-#define	BIC_sysfs	(1ULL << 40)
-#define	BIC_Totl_c0	(1ULL << 41)
-#define	BIC_Any_c0	(1ULL << 42)
-#define	BIC_GFX_c0	(1ULL << 43)
-#define	BIC_CPUGFX	(1ULL << 44)
-#define	BIC_Node	(1ULL << 45)
+#define	BIC_Node	(1ULL << 3)
+#define	BIC_Avg_MHz	(1ULL << 4)
+#define	BIC_Busy	(1ULL << 5)
+#define	BIC_Bzy_MHz	(1ULL << 6)
+#define	BIC_TSC_MHz	(1ULL << 7)
+#define	BIC_IRQ		(1ULL << 8)
+#define	BIC_SMI		(1ULL << 9)
+#define	BIC_sysfs	(1ULL << 10)
+#define	BIC_CPU_c1	(1ULL << 11)
+#define	BIC_CPU_c3	(1ULL << 12)
+#define	BIC_CPU_c6	(1ULL << 13)
+#define	BIC_CPU_c7	(1ULL << 14)
+#define	BIC_ThreadC	(1ULL << 15)
+#define	BIC_CoreTmp	(1ULL << 16)
+#define	BIC_CoreCnt	(1ULL << 17)
+#define	BIC_PkgTmp	(1ULL << 18)
+#define	BIC_GFX_rc6	(1ULL << 19)
+#define	BIC_GFXMHz	(1ULL << 20)
+#define	BIC_Pkgpc2	(1ULL << 21)
+#define	BIC_Pkgpc3	(1ULL << 22)
+#define	BIC_Pkgpc6	(1ULL << 23)
+#define	BIC_Pkgpc7	(1ULL << 24)
+#define	BIC_Pkgpc8	(1ULL << 25)
+#define	BIC_Pkgpc9	(1ULL << 26)
+#define	BIC_Pkgpc10	(1ULL << 27)
+#define BIC_CPU_LPI	(1ULL << 28)
+#define BIC_SYS_LPI	(1ULL << 29)
+#define	BIC_PkgWatt	(1ULL << 30)
+#define	BIC_CorWatt	(1ULL << 31)
+#define	BIC_GFXWatt	(1ULL << 32)
+#define	BIC_PkgCnt	(1ULL << 33)
+#define	BIC_RAMWatt	(1ULL << 34)
+#define	BIC_PKG__	(1ULL << 35)
+#define	BIC_RAM__	(1ULL << 36)
+#define	BIC_Pkg_J	(1ULL << 37)
+#define	BIC_Cor_J	(1ULL << 38)
+#define	BIC_GFX_J	(1ULL << 39)
+#define	BIC_RAM_J	(1ULL << 40)
+#define	BIC_Mod_c6	(1ULL << 41)
+#define	BIC_Totl_c0	(1ULL << 42)
+#define	BIC_Any_c0	(1ULL << 43)
+#define	BIC_GFX_c0	(1ULL << 44)
+#define	BIC_CPUGFX	(1ULL << 45)
+#define	BIC_Core	(1ULL << 46)
+#define	BIC_CPU		(1ULL << 47)
 
 #define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD)
 

From d9d226ffadbf4a8e60f5b8fc866aaa5028c7e479 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 6 Jun 2018 15:47:36 -0400
Subject: [PATCH 127/215] tools/power turbostat: decode cpuid.1.HT

eg. the "HT" here:
CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM HT TM

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index f09a272941a1..b0f294bf89c3 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4394,7 +4394,7 @@ void process_cpuid()
 	if (!quiet) {
 		fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
 			max_level, family, model, stepping, family, model, stepping);
-		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
+		fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
 			ecx & (1 << 0) ? "SSE3" : "-",
 			ecx & (1 << 3) ? "MONITOR" : "-",
 			ecx & (1 << 6) ? "SMX" : "-",
@@ -4403,6 +4403,7 @@ void process_cpuid()
 			edx & (1 << 4) ? "TSC" : "-",
 			edx & (1 << 5) ? "MSR" : "-",
 			edx & (1 << 22) ? "ACPI-TM" : "-",
+			edx & (1 << 28) ? "HT" : "-",
 			edx & (1 << 29) ? "TM" : "-");
 	}
 

From 4c2122d42116ebaa1665ad0fbef2c558fdc0e3c6 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 6 Jun 2018 17:44:48 -0400
Subject: [PATCH 128/215] tools/power turbostat: add optional APIC X2APIC
 columns

Add APIC and X2APIC columns to the topology section.

They are disabled-by-default -- enable like so:
--debug
or
--enable APIC,X2APIC

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |  2 +-
 tools/power/x86/turbostat/turbostat.c | 75 ++++++++++++++++++++++++---
 2 files changed, 68 insertions(+), 9 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index ca9ef7017624..d39e4ff7d0bf 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -56,7 +56,7 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 .PP
 \fB--hide column\fP do not show the specified built-in columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
 .PP
-\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default.  Currently the only built-in counters disabled by default are "usec" and "Time_Of_Day_Seconds".
+\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default.  Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC".
 The column name "all" can be used to enable all disabled-by-default built-in counters.
 .PP
 \fB--show column\fP show only the specified built-in columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--show sysfs" to show the sysfs statistics columns as a group.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index b0f294bf89c3..3bc2c9d94739 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -109,6 +109,7 @@ unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;		/* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;		/* IA32_HWP_REQUEST_PKG */
 unsigned int has_misc_feature_control;
+unsigned int first_counter_read = 1;
 
 #define RAPL_PKG		(1 << 0)
 					/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -170,6 +171,8 @@ struct thread_data {
 	unsigned long long  irq_count;
 	unsigned int smi_count;
 	unsigned int cpu_id;
+	unsigned int apic_id;
+	unsigned int x2apic_id;
 	unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
 #define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4
@@ -435,10 +438,10 @@ struct msr_counter bic[] = {
 	{ 0x0, "CPUGFX%" },
 	{ 0x0, "Core" },
 	{ 0x0, "CPU" },
+	{ 0x0, "APIC" },
+	{ 0x0, "X2APIC" },
 };
 
-
-
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
 #define	BIC_USEC	(1ULL << 0)
 #define	BIC_TOD		(1ULL << 1)
@@ -488,11 +491,13 @@ struct msr_counter bic[] = {
 #define	BIC_CPUGFX	(1ULL << 45)
 #define	BIC_Core	(1ULL << 46)
 #define	BIC_CPU		(1ULL << 47)
+#define	BIC_APIC	(1ULL << 48)
+#define	BIC_X2APIC	(1ULL << 49)
 
-#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD)
+#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
 
 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
-unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs;
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
 
 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
@@ -603,6 +608,10 @@ void print_header(char *delim)
 		outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_CPU))
 		outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_APIC))
+		outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
+	if (DO_BIC(BIC_X2APIC))
+		outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Avg_MHz))
 		outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
 	if (DO_BIC(BIC_Busy))
@@ -882,6 +891,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 		if (DO_BIC(BIC_CPU))
 			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+		if (DO_BIC(BIC_APIC))
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+		if (DO_BIC(BIC_X2APIC))
+			outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
 	} else {
 		if (DO_BIC(BIC_Package)) {
 			if (p)
@@ -906,6 +919,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 		}
 		if (DO_BIC(BIC_CPU))
 			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
+		if (DO_BIC(BIC_APIC))
+			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
+		if (DO_BIC(BIC_X2APIC))
+			outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
 	}
 
 	if (DO_BIC(BIC_Avg_MHz))
@@ -1233,6 +1250,12 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 	int i;
 	struct msr_counter *mp;
 
+	/* we run cpuid just the 1st time, copy the results */
+	if (DO_BIC(BIC_APIC))
+		new->apic_id = old->apic_id;
+	if (DO_BIC(BIC_X2APIC))
+		new->x2apic_id = old->x2apic_id;
+
 	/*
 	 * the timestamps from start of measurement interval are in "old"
 	 * the timestamp from end of measurement interval are in "new"
@@ -1395,6 +1418,12 @@ int sum_counters(struct thread_data *t, struct core_data *c,
 	int i;
 	struct msr_counter *mp;
 
+	/* copy un-changing apic_id's */
+	if (DO_BIC(BIC_APIC))
+		average.threads.apic_id = t->apic_id;
+	if (DO_BIC(BIC_X2APIC))
+		average.threads.x2apic_id = t->x2apic_id;
+
 	/* remember first tv_begin */
 	if (average.threads.tv_begin.tv_sec == 0)
 		average.threads.tv_begin = t->tv_begin;
@@ -1621,6 +1650,34 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 	return 0;
 }
 
+void get_apic_id(struct thread_data *t)
+{
+	unsigned int eax, ebx, ecx, edx, max_level;
+
+	eax = ebx = ecx = edx = 0;
+
+	if (!genuine_intel)
+		return;
+
+	__cpuid(0, max_level, ebx, ecx, edx);
+
+	__cpuid(1, eax, ebx, ecx, edx);
+	t->apic_id = (ebx >> 24) & 0xf;
+
+	if (max_level < 0xb)
+		return;
+
+	if (!DO_BIC(BIC_X2APIC))
+		return;
+
+	ecx = 0;
+	__cpuid(0xb, eax, ebx, ecx, edx);
+	t->x2apic_id = edx;
+
+	if (debug && (t->apic_id != t->x2apic_id))
+		fprintf(stderr, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
+}
+
 /*
  * get_counters(...)
  * migrate to cpu
@@ -1634,7 +1691,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	struct msr_counter *mp;
 	int i;
 
-
 	gettimeofday(&t->tv_begin, (struct timezone *)NULL);
 
 	if (cpu_migrate(cpu)) {
@@ -1642,6 +1698,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return -1;
 	}
 
+	if (first_counter_read)
+		get_apic_id(t);
 retry:
 	t->tsc = rdtsc();	/* we are running on local CPU of interest */
 
@@ -2881,6 +2939,7 @@ void do_sleep(void)
 	}
 }
 
+
 void turbostat_loop()
 {
 	int retval;
@@ -2894,6 +2953,7 @@ void turbostat_loop()
 
 	snapshot_proc_sysfs_files();
 	retval = for_all_cpus(get_counters, EVEN_COUNTERS);
+	first_counter_read = 0;
 	if (retval < -1) {
 		exit(retval);
 	} else if (retval == -1) {
@@ -4655,7 +4715,6 @@ void process_cpuid()
 	return;
 }
 
-
 /*
  * in /dev/cpu/ return success for names that are numbers
  * ie. filter out ".", "..", "microcode".
@@ -4949,6 +5008,7 @@ int fork_it(char **argv)
 
 	snapshot_proc_sysfs_files();
 	status = for_all_cpus(get_counters, EVEN_COUNTERS);
+	first_counter_read = 0;
 	if (status)
 		exit(status);
 	/* clear affinity side-effect of get_counters() */
@@ -5384,7 +5444,7 @@ void cmdline(int argc, char **argv)
 			break;
 		case 'e':
 			/* --enable specified counter */
-			bic_enabled |= bic_lookup(optarg, SHOW_LIST);
+			bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
 			break;
 		case 'd':
 			debug++;
@@ -5468,7 +5528,6 @@ void cmdline(int argc, char **argv)
 int main(int argc, char **argv)
 {
 	outf = stderr;
-
 	cmdline(argc, argv);
 
 	if (!quiet)

From 42dd45209201edb222de5f9eadc1c8f93700ef28 Mon Sep 17 00:00:00 2001
From: Nathan Ciobanu <nathan.d.ciobanu@linux.intel.com>
Date: Fri, 8 Jun 2018 15:15:12 -0700
Subject: [PATCH 129/215] tools/power turbostat: fix segfault on 'no node'
 machines

Running turbostat on machines that don't expose nodes
in sysfs (no /sys/bus/node) causes a segfault or a -nan
value diesplayed in the log. This is caused by
physical_node_id being reported as -1 and logical_node_id
being calculated as a negative number resulting in the new
GET_THREAD/GET_CORE returning an incorrect address.

Signed-off-by: Nathan Ciobanu <nathan.d.ciobanu@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 3bc2c9d94739..97cc00a9c763 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2492,6 +2492,12 @@ void set_node_data(void)
 		if (pni[pkg].count > topo.nodes_per_pkg)
 			topo.nodes_per_pkg = pni[0].count;
 
+	/* Fake 1 node per pkg for machines that don't
+	 * expose nodes and thus avoid -nan results
+	 */
+	if (topo.nodes_per_pkg == 0)
+		topo.nodes_per_pkg = 1;
+
 	for (cpu = 0; cpu < topo.num_cpus; cpu++) {
 		pkg = cpus[cpu].physical_package_id;
 		node = cpus[cpu].physical_node_id;
@@ -4904,6 +4910,13 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
 	struct core_data *c;
 	struct pkg_data *p;
 
+
+	/* Workaround for systems where physical_node_id==-1
+	 * and logical_node_id==(-1 - topo.num_cpus)
+	 */
+	if (node_id < 0)
+		node_id = 0;
+
 	t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
 	c = GET_CORE(core_base, core_id, node_id, pkg_id);
 	p = GET_PKG(pkg_base, pkg_id);

From 2ee19bdea1bbc04a06606b5c9681a07d005ecbaf Mon Sep 17 00:00:00 2001
From: Nathan Ciobanu <nathan.d.ciobanu@linux.intel.com>
Date: Wed, 13 Jun 2018 19:51:32 -0700
Subject: [PATCH 130/215] tools/power turbostat: alphabetize the help output

Sort the command line arguments output of help() in
alphabetical order in line with other linux tools.

Signed-off-by: Nathan Ciobanu <nathan.d.ciobanu@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 97cc00a9c763..d33b655299ba 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -528,12 +528,12 @@ void help(void)
 	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
 	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
 	"		{core | package | j,k,l..m,n-p }\n"
-	"--quiet	skip decoding system configuration header\n"
 	"--interval sec.subsec	Override default 5-second measurement interval\n"
-	"--help		print this help message\n"
 	"--list		list column headers only\n"
 	"--num_iterations num   number of the measurement iterations\n"
 	"--out file	create or truncate \"file\" for all output\n"
+	"--quiet	skip decoding system configuration header\n"
+	"--help		print this help message\n"
 	"--version	print version information\n"
 	"\n"
 	"For more help, run \"man turbostat\"\n");

From cc4816503f835c7cea184776fe8ae5bb3f505083 Mon Sep 17 00:00:00 2001
From: Nathan Ciobanu <nathan.d.ciobanu@linux.intel.com>
Date: Wed, 13 Jun 2018 19:51:33 -0700
Subject: [PATCH 131/215] tools/power turbostat: add single character tokens to
 help

Improve the help() output by adding the single character
tokens (e.g -a).

Signed-off-by: Nathan Ciobanu <nathan.d.ciobanu@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d33b655299ba..2dcc05f3ee6f 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -524,17 +524,20 @@ void help(void)
 	"when COMMAND completes.\n"
 	"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
 	"to print statistics, until interrupted.\n"
-	"--add		add a counter\n"
-	"		eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
-	"--cpu	cpu-set	limit output to summary plus cpu-set:\n"
-	"		{core | package | j,k,l..m,n-p }\n"
-	"--interval sec.subsec	Override default 5-second measurement interval\n"
-	"--list		list column headers only\n"
-	"--num_iterations num   number of the measurement iterations\n"
-	"--out file	create or truncate \"file\" for all output\n"
-	"--quiet	skip decoding system configuration header\n"
-	"--help		print this help message\n"
-	"--version	print version information\n"
+	"  -a, --add	add a counter\n"
+	"		  eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+	"  -c, --cpu	cpu-set	limit output to summary plus cpu-set:\n"
+	"		  {core | package | j,k,l..m,n-p }\n"
+	"  -i, --interval sec.subsec\n"
+	"		Override default 5-second measurement interval\n"
+	"  -l, --list	list column headers only\n"
+	"  -n, --num_iterations num\n"
+	"		number of the measurement iterations\n"
+	"  -o, --out file\n"
+	"		create or truncate \"file\" for all output\n"
+	"  -q, --quiet	skip decoding system configuration header\n"
+	"  -h, --help	print this help message\n"
+	"  -v, --version	print version information\n"
 	"\n"
 	"For more help, run \"man turbostat\"\n");
 }

From 9ce80578d5f5279545c272563851d059a2359f3e Mon Sep 17 00:00:00 2001
From: Nathan Ciobanu <nathan.d.ciobanu@linux.intel.com>
Date: Wed, 13 Jun 2018 19:51:34 -0700
Subject: [PATCH 132/215] tools/power turbostat: add the missing command line
 switches

Document the missing command line tokens in the help() function.

Signed-off-by: Nathan Ciobanu <nathan.d.ciobanu@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2dcc05f3ee6f..108c3bf2a67c 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -528,14 +528,28 @@ void help(void)
 	"		  eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
 	"  -c, --cpu	cpu-set	limit output to summary plus cpu-set:\n"
 	"		  {core | package | j,k,l..m,n-p }\n"
+	"  -d, --debug	displays usec, Time_Of_Day_Seconds and more debugging\n"
+	"  -D, --Dump	displays the raw counter values\n"
+	"  -e, --enable	[all | column]\n"
+	"		shows all or the specified disabled column\n"
+	"  -H, --hide [column|column,column,...]\n"
+	"		hide the specified column(s)\n"
 	"  -i, --interval sec.subsec\n"
 	"		Override default 5-second measurement interval\n"
+	"  -J, --Joules	displays energy in Joules instead of Watts\n"
 	"  -l, --list	list column headers only\n"
 	"  -n, --num_iterations num\n"
 	"		number of the measurement iterations\n"
 	"  -o, --out file\n"
 	"		create or truncate \"file\" for all output\n"
 	"  -q, --quiet	skip decoding system configuration header\n"
+	"  -s, --show [column|column,column,...]\n"
+	"		show only the specified column(s)\n"
+	"  -S, --Summary\n"
+	"		limits output to 1-line system summary per interval\n"
+	"  -T, --TCC temperature\n"
+	"		sets the Thermal Control Circuit temperature in\n"
+	"		  degrees Celsius\n"
 	"  -h, --help	print this help message\n"
 	"  -v, --version	print version information\n"
 	"\n"

From 73780cd816e071b0fc0f74e204a9cb30fdb291c5 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 20 Jun 2018 13:55:29 -0400
Subject: [PATCH 133/215] tools/power turbostat: version 18.06.20

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 108c3bf2a67c..4d14bbbf9b63 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -5102,7 +5102,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 18.06.01"
+	fprintf(outf, "turbostat version 18.06.20"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 

From ce042c183bcb94eb2919e8036473a1fc203420f9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 20 Jun 2018 13:41:51 +0300
Subject: [PATCH 134/215] block: sed-opal: Fix a couple off by one bugs

resp->num is the number of tokens in resp->tok[].  It gets set in
response_parse().  So if n == resp->num then we're reading beyond the
end of the data.

Fixes: 455a7b238cd6 ("block: Add Sed-opal library")
Reviewed-by: Scott Bauer <scott.bauer@intel.com>
Tested-by: Scott Bauer <scott.bauer@intel.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/sed-opal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/sed-opal.c b/block/sed-opal.c
index 945f4b8610e0..e0de4dd448b3 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -877,7 +877,7 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
 		return 0;
 	}
 
-	if (n > resp->num) {
+	if (n >= resp->num) {
 		pr_debug("Response has %d tokens. Can't access %d\n",
 			 resp->num, n);
 		return 0;
@@ -916,7 +916,7 @@ static u64 response_get_u64(const struct parsed_resp *resp, int n)
 		return 0;
 	}
 
-	if (n > resp->num) {
+	if (n >= resp->num) {
 		pr_debug("Response has %d tokens. Can't access %d\n",
 			 resp->num, n);
 		return 0;

From 7a0f9d1eb51ff25d119b48fe7cc6aa0433cd6621 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 20 Jun 2018 10:42:07 +0200
Subject: [PATCH 135/215] Documentation: intel_pstate: Fix typo

Fix a typo in the intel_pstate admin-guide documentation.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/intel_pstate.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index ab2fe0eda1d7..8b9164990956 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -410,7 +410,7 @@ argument is passed to the kernel in the command line.
 	That only is supported in some configurations, though (for example, if
 	the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
 	the operation mode of the driver cannot be changed), and if it is not
-	supported in the current configuration, writes to this attribute with
+	supported in the current configuration, writes to this attribute will
 	fail with an appropriate error.
 
 Interpretation of Policy Attributes

From 08ba91ee6e2c1c08d3f0648f978cbb5dbf3491d8 Mon Sep 17 00:00:00 2001
From: Doron Roberts-Kedes <doronrk@fb.com>
Date: Fri, 15 Jun 2018 14:05:32 -0700
Subject: [PATCH 136/215] nbd: Add the nbd NBD_DISCONNECT_ON_CLOSE config flag.

If NBD_DISCONNECT_ON_CLOSE is set on a device, then the driver will
issue a disconnect from nbd_release if the device has no remaining
bdev->bd_openers.

Fix ret val so reconfigure with only setting the flag succeeds.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Doron Roberts-Kedes <doronrk@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c      | 42 ++++++++++++++++++++++++++++++++--------
 include/uapi/linux/nbd.h |  3 +++
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 3b7083b8ecbb..74a05561b620 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -76,6 +76,7 @@ struct link_dead_args {
 #define NBD_HAS_CONFIG_REF		4
 #define NBD_BOUND			5
 #define NBD_DESTROY_ON_DISCONNECT	6
+#define NBD_DISCONNECT_ON_CLOSE 	7
 
 struct nbd_config {
 	u32 flags;
@@ -138,6 +139,7 @@ static void nbd_config_put(struct nbd_device *nbd);
 static void nbd_connect_reply(struct genl_info *info, int index);
 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
 static void nbd_dead_link_work(struct work_struct *work);
+static void nbd_disconnect_and_put(struct nbd_device *nbd);
 
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
@@ -1305,6 +1307,12 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
 static void nbd_release(struct gendisk *disk, fmode_t mode)
 {
 	struct nbd_device *nbd = disk->private_data;
+	struct block_device *bdev = bdget_disk(disk, 0);
+
+	if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+			bdev->bd_openers == 0)
+		nbd_disconnect_and_put(nbd);
+
 	nbd_config_put(nbd);
 	nbd_put(nbd);
 }
@@ -1705,6 +1713,10 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 				&config->runtime_flags);
 			put_dev = true;
 		}
+		if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
+			set_bit(NBD_DISCONNECT_ON_CLOSE,
+				&config->runtime_flags);
+		}
 	}
 
 	if (info->attrs[NBD_ATTR_SOCKETS]) {
@@ -1749,6 +1761,17 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 	return ret;
 }
 
+static void nbd_disconnect_and_put(struct nbd_device *nbd)
+{
+	mutex_lock(&nbd->config_lock);
+	nbd_disconnect(nbd);
+	nbd_clear_sock(nbd);
+	mutex_unlock(&nbd->config_lock);
+	if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+			       &nbd->config->runtime_flags))
+		nbd_config_put(nbd);
+}
+
 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nbd_device *nbd;
@@ -1781,13 +1804,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
 		nbd_put(nbd);
 		return 0;
 	}
-	mutex_lock(&nbd->config_lock);
-	nbd_disconnect(nbd);
-	nbd_clear_sock(nbd);
-	mutex_unlock(&nbd->config_lock);
-	if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
-			       &nbd->config->runtime_flags))
-		nbd_config_put(nbd);
+	nbd_disconnect_and_put(nbd);
 	nbd_config_put(nbd);
 	nbd_put(nbd);
 	return 0;
@@ -1798,7 +1815,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
 	struct nbd_device *nbd = NULL;
 	struct nbd_config *config;
 	int index;
-	int ret = -EINVAL;
+	int ret = 0;
 	bool put_dev = false;
 
 	if (!netlink_capable(skb, CAP_SYS_ADMIN))
@@ -1838,6 +1855,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
 	    !nbd->task_recv) {
 		dev_err(nbd_to_dev(nbd),
 			"not configured, cannot reconfigure\n");
+		ret = -EINVAL;
 		goto out;
 	}
 
@@ -1862,6 +1880,14 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
 					       &config->runtime_flags))
 				refcount_inc(&nbd->refs);
 		}
+
+		if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
+			set_bit(NBD_DISCONNECT_ON_CLOSE,
+					&config->runtime_flags);
+		} else {
+			clear_bit(NBD_DISCONNECT_ON_CLOSE,
+					&config->runtime_flags);
+		}
 	}
 
 	if (info->attrs[NBD_ATTR_SOCKETS]) {
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index 85a3fb65e40a..20d6cc91435d 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -53,6 +53,9 @@ enum {
 /* These are client behavior specific flags. */
 #define NBD_CFLAG_DESTROY_ON_DISCONNECT	(1 << 0) /* delete the nbd device on
 						    disconnect. */
+#define NBD_CFLAG_DISCONNECT_ON_CLOSE (1 << 1) /* disconnect the nbd device on
+						*  close by last opener.
+						*/
 
 /* userspace doesn't need the nbd_device structure */
 

From 02d62a8bc48e92171c46540722e2d52ce77d87af Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 20 Jun 2018 07:44:12 -0700
Subject: [PATCH 137/215] nvme-fc: release io queues to allow fast fail

Rather than leaving io queues quiesced after tearing down an association,
restart them. This allows ios to be replayed, with fastfail ios terminating
and non-fastfail getting into loops of retry.

This follows rdma's lead.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Sagi Grimberg <sagi@grimber.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index b528a2f5826c..41d45a1b5c62 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2790,6 +2790,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
 	/* re-enable the admin_q so anything new can fast fail */
 	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
+	/* resume the io queues so that things will fast fail */
+	nvme_start_queues(&ctrl->ctrl);
+
 	nvme_fc_ctlr_inactive_on_rport(ctrl);
 }
 
@@ -2804,9 +2807,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
 	 * waiting for io to terminate
 	 */
 	nvme_fc_delete_association(ctrl);
-
-	/* resume the io queues so that things will fast fail */
-	nvme_start_queues(nctrl);
 }
 
 static void

From ba56bc3a0786992755e6804fbcbdc60ef6cfc24c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 1 Jun 2018 17:06:28 +0200
Subject: [PATCH 138/215] KVM: arm/arm64: Drop resource size check for GICV
 window

When booting a 64 KB pages kernel on a ACPI GICv3 system that
implements support for v2 emulation, the following warning is
produced

  GICV size 0x2000 not a multiple of page size 0x10000

and support for v2 emulation is disabled, preventing GICv2 VMs
from being able to run on such hosts.

The reason is that vgic_v3_probe() performs a sanity check on the
size of the window (it should be a multiple of the page size),
while the ACPI MADT parsing code hardcodes the size of the window
to 8 KB. This makes sense, considering that ACPI does not bother
to describe the size in the first place, under the assumption that
platforms implementing ACPI will follow the architecture and not
put anything else in the same 64 KB window.

So let's just drop the sanity check altogether, and assume that
the window is at least 64 KB in size.

Fixes: 909777324588 ("KVM: arm/arm64: vgic-new: vgic_init: implement kvm_vgic_hyp_init")
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/vgic/vgic-v3.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index ff7dc890941a..cdce653e3c47 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -617,11 +617,6 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
 		pr_warn("GICV physical address 0x%llx not page aligned\n",
 			(unsigned long long)info->vcpu.start);
 		kvm_vgic_global_state.vcpu_base = 0;
-	} else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) {
-		pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
-			(unsigned long long)resource_size(&info->vcpu),
-			PAGE_SIZE);
-		kvm_vgic_global_state.vcpu_base = 0;
 	} else {
 		kvm_vgic_global_state.vcpu_base = info->vcpu.start;
 		kvm_vgic_global_state.can_emulate_gicv2 = true;

From 6ebdf4db8fa564a150f46d32178af0873eb5abbb Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 15 Jun 2018 16:47:23 +0100
Subject: [PATCH 139/215] arm64: Introduce sysreg_clear_set()

Currently we have a couple of helpers to manipulate bits in particular
sysregs:

 * config_sctlr_el1(u32 clear, u32 set)

 * change_cpacr(u64 val, u64 mask)

The parameters of these differ in naming convention, order, and size,
which is unfortunate. They also differ slightly in behaviour, as
change_cpacr() skips the sysreg write if the bits are unchanged, which
is a useful optimization when sysreg writes are expensive.

Before we gain yet another sysreg manipulation function, let's
unify these with a common helper, providing a consistent order for
clear/set operands, and the write skipping behaviour from
change_cpacr(). Code will be migrated to the new helper in subsequent
patches.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Dave Martin <dave.martin@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/sysreg.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6171178075dc..a8f84812c6e8 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -728,6 +728,17 @@ asm(
 	asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val));	\
 } while (0)
 
+/*
+ * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the
+ * set mask are set. Other bits are left as-is.
+ */
+#define sysreg_clear_set(sysreg, clear, set) do {			\
+	u64 __scs_val = read_sysreg(sysreg);				\
+	u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set);		\
+	if (__scs_new != __scs_val)					\
+		write_sysreg(__scs_new, sysreg);			\
+} while (0)
+
 static inline void config_sctlr_el1(u32 clear, u32 set)
 {
 	u32 val;

From b045e4d0f392cbdab2674b0aa78c8d2b187e4e27 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Fri, 15 Jun 2018 16:47:24 +0100
Subject: [PATCH 140/215] KVM: arm64: Don't mask softirq with IRQs disabled in
 vcpu_put()

Commit e6b673b ("KVM: arm64: Optimise FPSIMD handling to reduce
guest/host thrashing") introduces a specific helper
kvm_arch_vcpu_put_fp() for saving the vcpu FPSIMD state during
vcpu_put().

This function uses local_bh_disable()/_enable() to protect the
FPSIMD context manipulation from interruption by softirqs.

This approach is not correct, because vcpu_put() can be invoked
either from the KVM host vcpu thread (when exiting the vcpu run
loop), or via a preempt notifier.  In the former case, only
preemption is disabled.  In the latter case, the function is called
from inside __schedule(), which means that IRQs are disabled.

Use of local_bh_disable()/_enable() with IRQs disabled is considerd
an error, resulting in lockdep splats while running VMs if lockdep
is enabled.

This patch disables IRQs instead of attempting to disable softirqs,
avoiding the problem of calling local_bh_enable() with IRQs
disabled in the __schedule() path.  This creates an additional
interrupt blackout during vcpu run loop exit, but this is the rare
case and the blackout latency is still less than that of
__schedule().

Fixes: e6b673b741ea ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing")
Reported-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/fpsimd.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index dc6ecfa5a2d2..f9d09318b8db 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -5,7 +5,7 @@
  * Copyright 2018 Arm Limited
  * Author: Dave Martin <Dave.Martin@arm.com>
  */
-#include <linux/bottom_half.h>
+#include <linux/irqflags.h>
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 #include <linux/kvm_host.h>
@@ -92,7 +92,9 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
  */
 void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 {
-	local_bh_disable();
+	unsigned long flags;
+
+	local_irq_save(flags);
 
 	update_thread_flag(TIF_SVE,
 			   vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
@@ -106,5 +108,5 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 		fpsimd_bind_task_to_cpu();
 	}
 
-	local_bh_enable();
+	local_irq_restore(flags);
 }

From b3eb56b629d1095dde56fa37f4d7bcd5f783c8b2 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Fri, 15 Jun 2018 16:47:25 +0100
Subject: [PATCH 141/215] KVM: arm64/sve: Fix SVE trap restoration for
 non-current tasks

Commit e6b673b ("KVM: arm64: Optimise FPSIMD handling to reduce
guest/host thrashing") attempts to restore the configuration of
userspace SVE trapping via a call to fpsimd_bind_task_to_cpu(), but
the logic for determining when to do this is not correct.

The patch makes the errnoenous assumption that the only task that
may try to enter userspace with the currently loaded FPSIMD/SVE
register content is current.  This may not be the case however:  if
some other user task T is scheduled on the CPU during the execution
of the KVM run loop, and the vcpu does not try to use the registers
in the meantime, then T's state may be left there intact.  If T
happens to be the next task to enter userspace on this CPU then the
hooks for reloading the register state and configuring traps will
be skipped.

(Also, current never has SVE state at this point anyway and should
always have the trap enabled, as a side-effect of the ioctl()
syscall needed to reach the KVM run loop in the first place.)

This patch instead restores the state of the EL0 trap from the
state observed at the most recent vcpu_load(), ensuring that the
trap is set correctly for the loaded context (if any).

Fixes: e6b673b741ea ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing")
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/kvm_host.h |  1 +
 arch/arm64/kvm/fpsimd.c           | 24 ++++++++++++++++++++----
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index fda9a8ca48be..fe8777b12f86 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -306,6 +306,7 @@ struct kvm_vcpu_arch {
 #define KVM_ARM64_FP_ENABLED		(1 << 1) /* guest FP regs loaded */
 #define KVM_ARM64_FP_HOST		(1 << 2) /* host FP regs loaded */
 #define KVM_ARM64_HOST_SVE_IN_USE	(1 << 3) /* backup for host TIF_SVE */
+#define KVM_ARM64_HOST_SVE_ENABLED	(1 << 4) /* SVE enabled for EL0 */
 
 #define vcpu_gp_regs(v)		(&(v)->arch.ctxt.gp_regs)
 
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index f9d09318b8db..98d19d1afa50 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -12,6 +12,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_host.h>
 #include <asm/kvm_mmu.h>
+#include <asm/sysreg.h>
 
 /*
  * Called on entry to KVM_RUN unless this vcpu previously ran at least
@@ -61,10 +62,16 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(!current->mm);
 
-	vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE);
+	vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+			      KVM_ARM64_HOST_SVE_IN_USE |
+			      KVM_ARM64_HOST_SVE_ENABLED);
 	vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+
 	if (test_thread_flag(TIF_SVE))
 		vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
+
+	if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
+		vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
 }
 
 /*
@@ -103,9 +110,18 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 		/* Clean guest FP state to memory and invalidate cpu view */
 		fpsimd_save();
 		fpsimd_flush_cpu_state();
-	} else if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
-		/* Ensure user trap controls are correctly restored */
-		fpsimd_bind_task_to_cpu();
+	} else if (system_supports_sve()) {
+		/*
+		 * The FPSIMD/SVE state in the CPU has not been touched, and we
+		 * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
+		 * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE
+		 * for EL0.  To avoid spurious traps, restore the trap state
+		 * seen by kvm_arch_vcpu_load_fp():
+		 */
+		if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
+			sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
+		else
+			sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
 	}
 
 	local_irq_restore(flags);

From 2955bcc8c309bb8f2c773db4798649aa802a491f Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Fri, 15 Jun 2018 16:47:26 +0100
Subject: [PATCH 142/215] KVM: arm64: Avoid mistaken attempts to save SVE state
 for vcpus

Commit e6b673b ("KVM: arm64: Optimise FPSIMD handling to reduce
guest/host thrashing") uses fpsimd_save() to save the FPSIMD state
for a vcpu when scheduling the vcpu out.  However, currently
current's value of TIF_SVE is restored before calling fpsimd_save()
which means that fpsimd_save() may erroneously attempt to save SVE
state from the vcpu.  This enables current's vector state to be
polluted with guest data.  current->thread.sve_state may be
unallocated or not large enough, so this can also trigger a NULL
dereference or buffer overrun.

Instead of this, TIF_SVE should be configured properly for the
guest when calling fpsimd_save() with the vcpu context loaded.

This patch ensures this by delaying restoration of current's
TIF_SVE until after the call to fpsimd_save().

Fixes: e6b673b741ea ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing")
Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/fpsimd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 98d19d1afa50..aac7808ce216 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -103,9 +103,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 
 	local_irq_save(flags);
 
-	update_thread_flag(TIF_SVE,
-			   vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
-
 	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
 		/* Clean guest FP state to memory and invalidate cpu view */
 		fpsimd_save();
@@ -124,5 +121,8 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 			sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
 	}
 
+	update_thread_flag(TIF_SVE,
+			   vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+
 	local_irq_restore(flags);
 }

From e8b92efa629dac0e70ea4145c5e70616de5f89c8 Mon Sep 17 00:00:00 2001
From: Maciej Purski <m.purski@samsung.com>
Date: Tue, 23 Jan 2018 12:17:19 +0100
Subject: [PATCH 143/215] drm/bridge/sii8620: fix display of packed pixel modes
 in MHL2

Currently packed pixel modes in MHL2 can't be displayed. The device
automatically recognizes output format, so setting format other than
RGB causes failure. Fix it by writing proper values to registers.

Tested on MHL1 and MHL2 using various vendors' dongles both in
DVI and HDMI mode.

Signed-off-by: Maciej Purski <m.purski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1516706239-9104-1-git-send-email-m.purski@samsung.com
---
 drivers/gpu/drm/bridge/sil-sii8620.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index 4a3deeda065c..250effa0e6b8 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -1017,20 +1017,11 @@ static void sii8620_stop_video(struct sii8620 *ctx)
 
 static void sii8620_set_format(struct sii8620 *ctx)
 {
-	u8 out_fmt;
-
 	if (sii8620_is_mhl3(ctx)) {
 		sii8620_setbits(ctx, REG_M3_P0CTRL,
 				BIT_M3_P0CTRL_MHL3_P0_PIXEL_MODE_PACKED,
 				ctx->use_packed_pixel ? ~0 : 0);
 	} else {
-		if (ctx->use_packed_pixel)
-			sii8620_write_seq_static(ctx,
-				REG_VID_MODE, BIT_VID_MODE_M1080P,
-				REG_MHL_TOP_CTL, BIT_MHL_TOP_CTL_MHL_PP_SEL | 1,
-				REG_MHLTX_CTL6, 0x60
-			);
-		else
 			sii8620_write_seq_static(ctx,
 				REG_VID_MODE, 0,
 				REG_MHL_TOP_CTL, 1,
@@ -1038,15 +1029,9 @@ static void sii8620_set_format(struct sii8620 *ctx)
 			);
 	}
 
-	if (ctx->use_packed_pixel)
-		out_fmt = VAL_TPI_FORMAT(YCBCR422, FULL) |
-			BIT_TPI_OUTPUT_CSCMODE709;
-	else
-		out_fmt = VAL_TPI_FORMAT(RGB, FULL);
-
 	sii8620_write_seq(ctx,
 		REG_TPI_INPUT, VAL_TPI_FORMAT(RGB, FULL),
-		REG_TPI_OUTPUT, out_fmt,
+		REG_TPI_OUTPUT, VAL_TPI_FORMAT(RGB, FULL),
 	);
 }
 

From 74899d92e66663dc7671a8017b3146dcd4735f3b Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 21 Jun 2018 10:43:31 +0200
Subject: [PATCH 144/215] x86/xen: Add call of
 speculative_store_bypass_ht_init() to PV paths

Commit:

  1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD")

... added speculative_store_bypass_ht_init() to the per-CPU initialization sequence.

speculative_store_bypass_ht_init() needs to be called on each CPU for
PV guests, too.

Reported-by: Brian Woods <brian.woods@amd.com>
Tested-by: Brian Woods <brian.woods@amd.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Cc: <stable@vger.kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: boris.ostrovsky@oracle.com
Cc: xen-devel@lists.xenproject.org
Fixes: 1f50ddb4f4189243c05926b842dc1a0332195f31 ("x86/speculation: Handle HT correctly on AMD")
Link: https://lore.kernel.org/lkml/20180621084331.21228-1-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/xen/smp_pv.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 2e20ae2fa2d6..e3b18ad49889 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -32,6 +32,7 @@
 #include <xen/interface/vcpu.h>
 #include <xen/interface/xenpmu.h>
 
+#include <asm/spec-ctrl.h>
 #include <asm/xen/interface.h>
 #include <asm/xen/hypercall.h>
 
@@ -70,6 +71,8 @@ static void cpu_bringup(void)
 	cpu_data(cpu).x86_max_cores = 1;
 	set_cpu_sibling_map(cpu);
 
+	speculative_store_bypass_ht_init();
+
 	xen_setup_cpu_clockevents();
 
 	notify_cpu_starting(cpu);
@@ -250,6 +253,8 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
 	}
 	set_cpu_sibling_map(0);
 
+	speculative_store_bypass_ht_init();
+
 	xen_pmu_init(0);
 
 	if (xen_smp_intr_init(0) || xen_smp_intr_init_pv(0))

From e7c9996bb3dedb5553e6b34e6dbbed210a72f3e1 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 19 Jun 2018 09:32:28 -0400
Subject: [PATCH 145/215] rseq/selftests: Make run_param_test.sh executable

The executable bit of the run_param_test.sh script got lost in
the merge.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Lameter <cl@linux.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Paul E . McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Link: https://lore.kernel.org/lkml/20180619133230.4087-2-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/rseq/run_param_test.sh | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 tools/testing/selftests/rseq/run_param_test.sh

diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
old mode 100644
new mode 100755

From 0ea73d5e286193be4dec70d04021d6005b5b1771 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 19 Jun 2018 09:32:29 -0400
Subject: [PATCH 146/215] rseq/selftests/arm: Align 'struct rseq_cs' on 32
 bytes

uapi/linux/rseq.h aligns 'struct rseq_cs' on 32 bytes. Satisfy this
alignment requirement in its definition within the rseq-arm.h
inline assembly as well.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Hunter <ahh@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Lameter <cl@linux.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Paul E . McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Link: https://lore.kernel.org/lkml/20180619133230.4087-3-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/rseq/rseq-arm.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index 3b055f9aeaab..3cea19877227 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -57,6 +57,7 @@ do {									\
 #define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown,		\
 				abort_label, version, flags,		\
 				start_ip, post_commit_offset, abort_ip)	\
+		".balign 32\n\t"					\
 		__rseq_str(table_label) ":\n\t"				\
 		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
 		".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \

From 9a789fcfe8605417f7a1a970355f5efa4fe88c64 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Tue, 19 Jun 2018 09:32:30 -0400
Subject: [PATCH 147/215] rseq/cleanup: Do not abort rseq c.s. in child on
 fork()

Considering that we explicitly forbid system calls in rseq critical
sections, it is not valid to issue a fork or clone system call within a
rseq critical section, so rseq_fork() is not required to restart an
active rseq c.s. in the child process.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ben Maurer <bmaurer@fb.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Lameter <cl@linux.com>
Cc: Dave Watson <davejwatson@fb.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Paul E . McKenney <paulmck@linux.vnet.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-api@vger.kernel.org
Cc: linux-kselftest@vger.kernel.org
Link: https://lore.kernel.org/lkml/20180619133230.4087-4-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 87bf02d93a27..c1882643d455 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1831,9 +1831,7 @@ static inline void rseq_migrate(struct task_struct *t)
 
 /*
  * If parent process has a registered restartable sequences area, the
- * child inherits. Only applies when forking a process, not a thread. In
- * case a parent fork() in the middle of a restartable sequence, set the
- * resume notifier to force the child to retry.
+ * child inherits. Only applies when forking a process, not a thread.
  */
 static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
 {
@@ -1847,7 +1845,6 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
 		t->rseq_len = current->rseq_len;
 		t->rseq_sig = current->rseq_sig;
 		t->rseq_event_mask = current->rseq_event_mask;
-		rseq_preempt(t);
 	}
 }
 

From 47a91b7232fa25abc7e0b7fc1c69ae4f81061594 Mon Sep 17 00:00:00 2001
From: Jia He <hejianet@gmail.com>
Date: Mon, 21 May 2018 11:05:30 +0800
Subject: [PATCH 148/215] KVM: arm/arm64: add WARN_ON if size is not PAGE_SIZE
 aligned in unmap_stage2_range

There is a panic in armv8a server(QDF2400) under memory pressure tests
(start 20 guests and run memhog in the host).

---------------------------------begin--------------------------------
[35380.800950] BUG: Bad page state in process qemu-kvm  pfn:dd0b6
[35380.805825] page:ffff7fe003742d80 count:-4871 mapcount:-2126053375
mapping:          (null) index:0x0
[35380.815024] flags: 0x1fffc00000000000()
[35380.818845] raw: 1fffc00000000000 0000000000000000 0000000000000000
ffffecf981470000
[35380.826569] raw: dead000000000100 dead000000000200 ffff8017c001c000
0000000000000000
[35380.805825] page:ffff7fe003742d80 count:-4871 mapcount:-2126053375
mapping:          (null) index:0x0
[35380.815024] flags: 0x1fffc00000000000()
[35380.818845] raw: 1fffc00000000000 0000000000000000 0000000000000000
ffffecf981470000
[35380.826569] raw: dead000000000100 dead000000000200 ffff8017c001c000
0000000000000000
[35380.834294] page dumped because: nonzero _refcount
[...]
--------------------------------end--------------------------------------

The root cause might be what was fixed at [1]. But from the KVM points of
view, it would be better if the issue was caught earlier.

If the size is not PAGE_SIZE aligned, unmap_stage2_range might unmap the
wrong(more or less) page range. Hence it caused the "BUG: Bad page
state"

Let's WARN in that case, so that the issue is obvious.

[1] https://lkml.org/lkml/2018/5/3/1042

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: jia.he@hxt-semitech.com
[maz: tidied up commit message]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/arm/mmu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 8d90de213ce9..1d90d79706bd 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -297,6 +297,8 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	phys_addr_t next;
 
 	assert_spin_locked(&kvm->mmu_lock);
+	WARN_ON(size & ~PAGE_MASK);
+
 	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
 	do {
 		/*

From bdab125c9301a6ac538911ba68f665dfd075ec81 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 19 Jun 2018 18:43:41 +0900
Subject: [PATCH 149/215] Revert "kexec/purgatory: Add clean-up for purgatory
 directory"

Reverts the following commit:

  b0108f9e93d0 ("kexec: purgatory: add clean-up for purgatory directory")

... which incorrectly stated that the kexec-purgatory.c and purgatory.ro files
were not removed after 'make mrproper'.

In fact, they are.  You can confirm it after reverting it.

  $ make mrproper
  $ touch arch/x86/purgatory/kexec-purgatory.c
  $ touch arch/x86/purgatory/purgatory.ro
  $ make mrproper
    CLEAN   arch/x86/purgatory
  $ ls arch/x86/purgatory/
  entry64.S  Makefile  purgatory.c  setup-x86_64.S  stack.S  string.c

This is obvious from the build system point of view.

arch/x86/Makefile adds 'arch/x86' to core-y.
Hence 'make clean' descends like this:

  arch/x86/Kbuild
    -> arch/x86/purgatory/Makefile

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <michal.lkml@markovi.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/1529401422-28838-2-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 60135cbd905c..d6f404ae3d93 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -327,7 +327,6 @@ archclean:
 	$(Q)rm -rf $(objtree)/arch/x86_64
 	$(Q)$(MAKE) $(clean)=$(boot)
 	$(Q)$(MAKE) $(clean)=arch/x86/tools
-	$(Q)$(MAKE) $(clean)=arch/x86/purgatory
 
 define archhelp
   echo  '* bzImage      - Compressed kernel image (arch/x86/boot/bzImage)'

From d6605b6bbee88b74150b14f5e83a6067f5e323d2 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 19 Jun 2018 18:43:42 +0900
Subject: [PATCH 150/215] x86/build: Remove unnecessary preparation for
 purgatory

kexec-purgatory.c is properly generated when Kbuild descend into
the arch/x86/purgatory/.

Thus the 'archprepare' target is redundant.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Marek <michal.lkml@markovi.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/1529401422-28838-3-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Makefile | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d6f404ae3d93..4fafba5df891 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -258,11 +258,6 @@ archscripts: scripts_basic
 archheaders:
 	$(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all
 
-archprepare:
-ifeq ($(CONFIG_KEXEC_FILE),y)
-	$(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
-endif
-
 ###
 # Kernel objects
 

From 6cb2b08ff92460290979de4be91363e5d1b6cec1 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Mon, 18 Jun 2018 09:59:54 +0200
Subject: [PATCH 151/215] x86/pti: Don't report XenPV as vulnerable

Xen PV domain kernel is not by design affected by meltdown as it's
enforcing split CR3 itself. Let's not report such systems as "Vulnerable"
in sysfs (we're also already forcing PTI to off in X86_HYPER_XEN_PV cases);
the security of the system ultimately depends on presence of mitigation in
the Hypervisor, which can't be easily detected from DomU; let's report
that.

Reported-and-tested-by: Mike Latimer <mlatimer@suse.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Juergen Gross <jgross@suse.com>
Cc: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1806180959080.6203@cbobk.fhfr.pm
[ Merge the user-visible string into a single line. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/bugs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index cd0fda1fff6d..404df26b7de8 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -27,6 +27,7 @@
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
 #include <asm/intel-family.h>
+#include <asm/hypervisor.h>
 
 static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
@@ -664,6 +665,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 		if (boot_cpu_has(X86_FEATURE_PTI))
 			return sprintf(buf, "Mitigation: PTI\n");
 
+		if (hypervisor_is_type(X86_HYPER_XEN_PV))
+			return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n");
+
 		break;
 
 	case X86_BUG_SPECTRE_V1:

From eab6870fee877258122a042bfd99ee7908c40280 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 7 Jun 2018 09:13:48 -0700
Subject: [PATCH 152/215] x86/spectre_v1: Disable compiler optimizations over
 array_index_mask_nospec()

Mark Rutland noticed that GCC optimization passes have the potential to elide
necessary invocations of the array_index_mask_nospec() instruction sequence,
so mark the asm() volatile.

Mark explains:

"The volatile will inhibit *some* cases where the compiler could lift the
 array_index_nospec() call out of a branch, e.g. where there are multiple
 invocations of array_index_nospec() with the same arguments:

        if (idx < foo) {
                idx1 = array_idx_nospec(idx, foo)
                do_something(idx1);
        }

        < some other code >

        if (idx < foo) {
                idx2 = array_idx_nospec(idx, foo);
                do_something_else(idx2);
        }

 ... since the compiler can determine that the two invocations yield the same
 result, and reuse the first result (likely the same register as idx was in
 originally) for the second branch, effectively re-writing the above as:

        if (idx < foo) {
                idx = array_idx_nospec(idx, foo);
                do_something(idx);
        }

        < some other code >

        if (idx < foo) {
                do_something_else(idx);
        }

 ... if we don't take the first branch, then speculatively take the second, we
 lose the nospec protection.

 There's more info on volatile asm in the GCC docs:

   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile
 "

Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@vger.kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: babdde2698d4 ("x86: Implement array_index_mask_nospec")
Link: https://lkml.kernel.org/lkml/152838798950.14521.4893346294059739135.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/barrier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 042b5e892ed1..14de0432d288 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -38,7 +38,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 {
 	unsigned long mask;
 
-	asm ("cmp %1,%2; sbb %0,%0;"
+	asm volatile ("cmp %1,%2; sbb %0,%0;"
 			:"=r" (mask)
 			:"g"(size),"r" (index)
 			:"cc");

From f642fb5864a6e3645edce6f85ffe7b44d5e9b990 Mon Sep 17 00:00:00 2001
From: "mike.travis@hpe.com" <mike.travis@hpe.com>
Date: Thu, 24 May 2018 15:17:12 -0500
Subject: [PATCH 153/215] x86/platform/UV: Add adjustable set memory block size
 function

Add a new function to "adjust" the current fixed UV memory block size
of 2GB so it can be changed to a different physical boundary.  This is
out of necessity so arch dependent code can accommodate specific BIOS
requirements which can align these new PMEM modules at less than the
default boundaries.

A "set order" type of function was used to insure that the memory block
size will be a power of two value without requiring a validity check.
64GB was chosen as the upper limit for memory block size values to
accommodate upcoming 4PB systems which have 6 more bits of physical
address space (46 becoming 52).

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Reviewed-by: Andrew Banman <andrew.banman@hpe.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russ Anderson <russ.anderson@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dan.j.williams@intel.com
Cc: jgross@suse.com
Cc: kirill.shutemov@linux.intel.com
Cc: mhocko@suse.com
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/lkml/20180524201711.609546602@stormcage.americas.sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init_64.c  | 20 ++++++++++++++++----
 include/linux/memory.h |  1 +
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 0a400606dea0..20d8bf5fbceb 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1350,16 +1350,28 @@ int kern_addr_valid(unsigned long addr)
 /* Amount of ram needed to start using large blocks */
 #define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30)
 
+/* Adjustable memory block size */
+static unsigned long set_memory_block_size;
+int __init set_memory_block_size_order(unsigned int order)
+{
+	unsigned long size = 1UL << order;
+
+	if (size > MEM_SIZE_FOR_LARGE_BLOCK || size < MIN_MEMORY_BLOCK_SIZE)
+		return -EINVAL;
+
+	set_memory_block_size = size;
+	return 0;
+}
+
 static unsigned long probe_memory_block_size(void)
 {
 	unsigned long boot_mem_end = max_pfn << PAGE_SHIFT;
 	unsigned long bz;
 
-	/* If this is UV system, always set 2G block size */
-	if (is_uv_system()) {
-		bz = MAX_BLOCK_SIZE;
+	/* If memory block size has been set, then use it */
+	bz = set_memory_block_size;
+	if (bz)
 		goto done;
-	}
 
 	/* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */
 	if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) {
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 31ca3e28b0eb..a6ddefc60517 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -38,6 +38,7 @@ struct memory_block {
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
 unsigned long memory_block_size_bytes(void);
+int set_memory_block_size_order(unsigned int order);
 
 /* These states are exposed to userspace as text strings in sysfs */
 #define	MEM_ONLINE		(1<<0) /* exposed to userspace */

From bbbd2b51a2aa0d76b3676271e216cf3647773397 Mon Sep 17 00:00:00 2001
From: "mike.travis@hpe.com" <mike.travis@hpe.com>
Date: Thu, 24 May 2018 15:17:13 -0500
Subject: [PATCH 154/215] x86/platform/UV: Use new set memory block size
 function

Add a call to the new function to "adjust" the current fixed UV memory
block size of 2GB so it can be changed to a different physical boundary.
This accommodates changes in the Intel BIOS, and therefore UV BIOS,
which now can align boundaries different than the previous UV standard
of 2GB.  It also flags any UV Global Address boundaries from BIOS that
cause a change in the mem block size (boundary).

The current boundary of 2GB has been used on UV since the first system
release in 2009 with Linux 2.6 and has worked fine.  But the new NVDIMM
persistent memory modules (PMEM), along with the Intel BIOS changes to
support these modules caused the memory block size boundary to be set
to a lower limit.  Intel only guarantees that this minimum boundary at
64MB though the current Linux limit is 128MB.

Note that the default remains 2GB if no changes occur.

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Reviewed-by: Andrew Banman <andrew.banman@hpe.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russ Anderson <russ.anderson@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dan.j.williams@intel.com
Cc: jgross@suse.com
Cc: kirill.shutemov@linux.intel.com
Cc: mhocko@suse.com
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/lkml/20180524201711.732785782@stormcage.americas.sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 49 ++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index efaf2d4f9c3c..2270a777d647 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -26,6 +26,7 @@
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
 #include <linux/reboot.h>
+#include <linux/memory.h>
 
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
@@ -392,6 +393,40 @@ extern int uv_hub_info_version(void)
 }
 EXPORT_SYMBOL(uv_hub_info_version);
 
+/* Default UV memory block size is 2GB */
+static unsigned long mem_block_size = (2UL << 30);
+
+static __init int adj_blksize(u32 lgre)
+{
+	unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT;
+	unsigned long size;
+
+	for (size = mem_block_size; size > MIN_MEMORY_BLOCK_SIZE; size >>= 1)
+		if (IS_ALIGNED(base, size))
+			break;
+
+	if (size >= mem_block_size)
+		return 0;
+
+	mem_block_size = size;
+	return 1;
+}
+
+static __init void set_block_size(void)
+{
+	unsigned int order = ffs(mem_block_size);
+
+	if (order) {
+		/* adjust for ffs return of 1..64 */
+		set_memory_block_size_order(order - 1);
+		pr_info("UV: mem_block_size set to 0x%lx\n", mem_block_size);
+	} else {
+		/* bad or zero value, default to 1UL << 31 (2GB) */
+		pr_err("UV: mem_block_size error with 0x%lx\n", mem_block_size);
+		set_memory_block_size_order(31);
+	}
+}
+
 /* Build GAM range lookup table: */
 static __init void build_uv_gr_table(void)
 {
@@ -1180,23 +1215,30 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
 					<< UV_GAM_RANGE_SHFT);
 		int order = 0;
 		char suffix[] = " KMGTPE";
+		int flag = ' ';
 
 		while (size > 9999 && order < sizeof(suffix)) {
 			size /= 1024;
 			order++;
 		}
 
+		/* adjust max block size to current range start */
+		if (gre->type == 1 || gre->type == 2)
+			if (adj_blksize(lgre))
+				flag = '*';
+
 		if (!index) {
 			pr_info("UV: GAM Range Table...\n");
-			pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
+			pr_info("UV:  # %20s %14s %6s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
 		}
-		pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d   %04x  %02x %02x\n",
+		pr_info("UV: %2d: 0x%014lx-0x%014lx%c %5lu%c %3d   %04x  %02x %02x\n",
 			index++,
 			(unsigned long)lgre << UV_GAM_RANGE_SHFT,
 			(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
-			size, suffix[order],
+			flag, size, suffix[order],
 			gre->type, gre->nasid, gre->sockid, gre->pnode);
 
+		/* update to next range start */
 		lgre = gre->limit;
 		if (sock_min > gre->sockid)
 			sock_min = gre->sockid;
@@ -1427,6 +1469,7 @@ static void __init uv_system_init_hub(void)
 
 	build_socket_tables();
 	build_uv_gr_table();
+	set_block_size();
 	uv_init_hub_info(&hub_info);
 	uv_possible_blades = num_possible_nodes();
 	if (!_node_to_pnode)

From d7609f4210cb716c11abfe2bfb5997191095d00b Mon Sep 17 00:00:00 2001
From: "mike.travis@hpe.com" <mike.travis@hpe.com>
Date: Thu, 24 May 2018 15:17:14 -0500
Subject: [PATCH 155/215] x86/platform/UV: Add kernel parameter to set memory
 block size

Add a kernel parameter that allows setting UV memory block size.  This
is to provide an adjustment for new forms of PMEM and other DIMM memory
that might require alignment restrictions other than scanning the global
address table for the required minimum alignment.  The value set will be
further adjusted by both the GAM range table scan as well as restrictions
imposed by set_memory_block_size_order().

Signed-off-by: Mike Travis <mike.travis@hpe.com>
Reviewed-by: Andrew Banman <andrew.banman@hpe.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dimitri Sivanich <dimitri.sivanich@hpe.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russ Anderson <russ.anderson@hpe.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dan.j.williams@intel.com
Cc: jgross@suse.com
Cc: kirill.shutemov@linux.intel.com
Cc: mhocko@suse.com
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/lkml/20180524201711.854849120@stormcage.americas.sgi.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 2270a777d647..d492752f79e1 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -396,6 +396,17 @@ EXPORT_SYMBOL(uv_hub_info_version);
 /* Default UV memory block size is 2GB */
 static unsigned long mem_block_size = (2UL << 30);
 
+/* Kernel parameter to specify UV mem block size */
+static int parse_mem_block_size(char *ptr)
+{
+	unsigned long size = memparse(ptr, NULL);
+
+	/* Size will be rounded down by set_block_size() below */
+	mem_block_size = size;
+	return 0;
+}
+early_param("uv_memblksize", parse_mem_block_size);
+
 static __init int adj_blksize(u32 lgre)
 {
 	unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT;

From 9f9cafc14016f23f982d3ce18f9057923bd3037a Mon Sep 17 00:00:00 2001
From: Jianchao Wang <jianchao.w.wang@oracle.com>
Date: Wed, 20 Jun 2018 13:42:22 +0800
Subject: [PATCH 156/215] nvme-pci: move nvme_kill_queues to
 nvme_remove_dead_ctrl

There is race between nvme_remove and nvme_reset_work that can
lead to io hang.

nvme_remove                    nvme_reset_work
                               -> nvme_remove_dead_ctrl
                                 -> nvme_dev_disable
                                   -> quiesce request_queue
                                 -> queue remove_work
-> cancel_work_sync reset_work
-> nvme_remove_namespaces
  -> splice ctrl->namespaces
                               nvme_remove_dead_ctrl_work
                               -> nvme_kill_queues
  -> nvme_ns_remove               do nothing
    -> blk_cleanup_queue
      -> blk_freeze_queue

Finally, the request_queue is quiesced state when wait freeze,
we will get io hang here. To fix it, move the nvme_kill_queues
from nvme_remove_dead_ctrl_work to nvme_remove_dead_ctrl.

Suggested-by: Keith Busch <keith.busch@linux.intel.com>
Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index fc33804662e7..73a97fcea364 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2289,6 +2289,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
 
 	nvme_get_ctrl(&dev->ctrl);
 	nvme_dev_disable(dev, false);
+	nvme_kill_queues(&dev->ctrl);
 	if (!queue_work(nvme_wq, &dev->remove_work))
 		nvme_put_ctrl(&dev->ctrl);
 }
@@ -2405,7 +2406,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
 	struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
-	nvme_kill_queues(&dev->ctrl);
 	if (pci_get_drvdata(pdev))
 		device_release_driver(&pdev->dev);
 	nvme_put_ctrl(&dev->ctrl);

From 90718e32e1dcc2479acfa208ccfc6442850b594c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 18 May 2018 18:27:39 +0200
Subject: [PATCH 157/215] uprobes/x86: Remove incorrect WARN_ON() in
 uprobe_init_insn()

insn_get_length() has the side-effect of processing the entire instruction
but only if it was decoded successfully, otherwise insn_complete() can fail
and in this case we need to just return an error without warning.

Reported-by: syzbot+30d675e3ca03c1c351e7@syzkaller.appspotmail.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: syzkaller-bugs@googlegroups.com
Link: https://lkml.kernel.org/lkml/20180518162739.GA5559@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/uprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 58d8d800875d..deb576b23b7c 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -293,7 +293,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
 	insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64);
 	/* has the side-effect of processing the entire instruction */
 	insn_get_length(insn);
-	if (WARN_ON_ONCE(!insn_complete(insn)))
+	if (!insn_complete(insn))
 		return -ENOEXEC;
 
 	if (is_prefix_bad(insn))

From 8730662d7b2582f65dd6c59ab1e0b7fa461c79b0 Mon Sep 17 00:00:00 2001
From: Wei Wang <wvw@google.com>
Date: Tue, 24 Apr 2018 14:22:38 -0700
Subject: [PATCH 158/215] kernel.h: Fix a typo in comment

Signed-off-by: Wei Wang <wvw@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Crt Mori <cmo@melexis.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: gregkh@linuxfoundation.org
Cc: wei.vince.wang@gmail.com
Link: https://lkml.kernel.org/lkml/20180424212241.16013-1-wvw@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d23123238534..941dc0a5a877 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -666,7 +666,7 @@ do {									\
  * your code. (Extra memory is used for special buffers that are
  * allocated when trace_printk() is used.)
  *
- * A little optization trick is done here. If there's only one
+ * A little optimization trick is done here. If there's only one
  * argument, there's no need to scan the string for printf formats.
  * The trace_puts() will suffice. But how can we take advantage of
  * using trace_puts() when trace_printk() has only one argument?

From 7ddfd3e0df29106c728dda2a6bd6591ee43a4e3c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sun, 17 Jun 2018 10:16:21 +0100
Subject: [PATCH 159/215] KVM: Enforce error in ioctl for compat tasks when
 !KVM_COMPAT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current behaviour of the compat ioctls is a bit odd.
We provide a compat_ioctl method when KVM_COMPAT is set, and NULL
otherwise. But NULL means that the normal, non-compat ioctl should
be used directly for compat tasks, and there is no way to actually
prevent a compat task from issueing KVM ioctls.

This patch changes this behaviour, by always registering a compat_ioctl
method, even if KVM_COMPAT is not selected. In that case, the callback
will always return -EINVAL.

Fixes: de8e5d744051568c8aad ("KVM: Disable compat ioctl for s390")
Reported-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/kvm_main.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ada21f47f22b..8b47507faab5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -116,6 +116,11 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 #ifdef CONFIG_KVM_COMPAT
 static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
 				  unsigned long arg);
+#define KVM_COMPAT(c)	.compat_ioctl	= (c)
+#else
+static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl,
+				unsigned long arg) { return -EINVAL; }
+#define KVM_COMPAT(c)	.compat_ioctl	= kvm_no_compat_ioctl
 #endif
 static int hardware_enable_all(void);
 static void hardware_disable_all(void);
@@ -2396,11 +2401,9 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 static struct file_operations kvm_vcpu_fops = {
 	.release        = kvm_vcpu_release,
 	.unlocked_ioctl = kvm_vcpu_ioctl,
-#ifdef CONFIG_KVM_COMPAT
-	.compat_ioctl   = kvm_vcpu_compat_ioctl,
-#endif
 	.mmap           = kvm_vcpu_mmap,
 	.llseek		= noop_llseek,
+	KVM_COMPAT(kvm_vcpu_compat_ioctl),
 };
 
 /*
@@ -2824,10 +2827,8 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
 
 static const struct file_operations kvm_device_fops = {
 	.unlocked_ioctl = kvm_device_ioctl,
-#ifdef CONFIG_KVM_COMPAT
-	.compat_ioctl = kvm_device_ioctl,
-#endif
 	.release = kvm_device_release,
+	KVM_COMPAT(kvm_device_ioctl),
 };
 
 struct kvm_device *kvm_device_from_filp(struct file *filp)
@@ -3165,10 +3166,8 @@ static long kvm_vm_compat_ioctl(struct file *filp,
 static struct file_operations kvm_vm_fops = {
 	.release        = kvm_vm_release,
 	.unlocked_ioctl = kvm_vm_ioctl,
-#ifdef CONFIG_KVM_COMPAT
-	.compat_ioctl   = kvm_vm_compat_ioctl,
-#endif
 	.llseek		= noop_llseek,
+	KVM_COMPAT(kvm_vm_compat_ioctl),
 };
 
 static int kvm_dev_ioctl_create_vm(unsigned long type)
@@ -3259,8 +3258,8 @@ static long kvm_dev_ioctl(struct file *filp,
 
 static struct file_operations kvm_chardev_ops = {
 	.unlocked_ioctl = kvm_dev_ioctl,
-	.compat_ioctl   = kvm_dev_ioctl,
 	.llseek		= noop_llseek,
+	KVM_COMPAT(kvm_dev_ioctl),
 };
 
 static struct miscdevice kvm_dev = {

From 37b65db85f9b2fc98267eee4a18d7506492e6e8c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sun, 17 Jun 2018 10:22:57 +0100
Subject: [PATCH 160/215] KVM: arm64: Prevent KVM_COMPAT from being selected

There is very little point in trying to support the 32bit KVM/arm API
on arm64, and this was never an anticipated use case.

Let's make it clear by not selecting KVM_COMPAT.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 virt/kvm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 72143cfaf6ec..ea434ddc8499 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -47,7 +47,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
 
 config KVM_COMPAT
        def_bool y
-       depends on KVM && COMPAT && !S390
+       depends on KVM && COMPAT && !(S390 || ARM64)
 
 config HAVE_KVM_IRQ_BYPASS
        bool

From fcc784be837714a9173b372ff9fb9b514590dad9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 4 Apr 2018 14:06:30 -0400
Subject: [PATCH 161/215] locking/lockdep: Do not record IRQ state within
 lockdep code

While debugging where things were going wrong with mapping
enabling/disabling interrupts with the lockdep state and actual real
enabling and disabling interrupts, I had to silent the IRQ
disabling/enabling in debug_check_no_locks_freed() because it was
always showing up as it was called before the splat was.

Use raw_local_irq_save/restore() for not only debug_check_no_locks_freed()
but for all internal lockdep functions, as they hide useful information
about where interrupts were used incorrectly last.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/lkml/20180404140630.3f4f4c7a@gandalf.local.home
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/lockdep.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index edcac5de7ebc..5fa4d3138bf1 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1265,11 +1265,11 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
 	this.parent = NULL;
 	this.class = class;
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	arch_spin_lock(&lockdep_lock);
 	ret = __lockdep_count_forward_deps(&this);
 	arch_spin_unlock(&lockdep_lock);
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 
 	return ret;
 }
@@ -1292,11 +1292,11 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
 	this.parent = NULL;
 	this.class = class;
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	arch_spin_lock(&lockdep_lock);
 	ret = __lockdep_count_backward_deps(&this);
 	arch_spin_unlock(&lockdep_lock);
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 
 	return ret;
 }
@@ -4411,7 +4411,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
 	if (unlikely(!debug_locks))
 		return;
 
-	local_irq_save(flags);
+	raw_local_irq_save(flags);
 	for (i = 0; i < curr->lockdep_depth; i++) {
 		hlock = curr->held_locks + i;
 
@@ -4422,7 +4422,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
 		print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock);
 		break;
 	}
-	local_irq_restore(flags);
+	raw_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
 

From 943e942e6266f22babee5efeb00f8f672fbff5bd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 21 Jun 2018 09:49:37 -0600
Subject: [PATCH 162/215] nvme-pci: limit max IO size and segments to avoid
 high order allocations

nvme requires an sg table allocation for each request. If the request
is large, then the allocation can become quite large. For instance,
with our default software settings of 1280KB IO size, we'll need
10248 bytes of sg table. That turns into a 2nd order allocation,
which we can't always guarantee. If we fail the allocation, blk-mq
will retry it later. But there's no guarantee that we'll EVER be
able to allocate that much contigious memory.

Limit the IO size such that we never need more than a single page
of memory. That's a lot faster and more reliable. Then back that
allocation with a mempool, so that we know we'll always be able
to succeed the allocation at some point.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c |  1 +
 drivers/nvme/host/nvme.h |  1 +
 drivers/nvme/host/pci.c  | 42 +++++++++++++++++++++++++++++++++++-----
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 21710a7460c8..46df030b2c3f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1808,6 +1808,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
 		u32 max_segments =
 			(ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
 
+		max_segments = min_not_zero(max_segments, ctrl->max_segments);
 		blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
 		blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
 	}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 231807cbc849..0c4a33df3b2f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -170,6 +170,7 @@ struct nvme_ctrl {
 	u64 cap;
 	u32 page_size;
 	u32 max_hw_sectors;
+	u32 max_segments;
 	u16 oncs;
 	u16 oacs;
 	u16 nssa;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 73a97fcea364..ba943f211687 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -38,6 +38,13 @@
 
 #define SGES_PER_PAGE	(PAGE_SIZE / sizeof(struct nvme_sgl_desc))
 
+/*
+ * These can be higher, but we need to ensure that any command doesn't
+ * require an sg allocation that needs more than a page of data.
+ */
+#define NVME_MAX_KB_SZ	4096
+#define NVME_MAX_SEGS	127
+
 static int use_threaded_interrupts;
 module_param(use_threaded_interrupts, int, 0);
 
@@ -100,6 +107,8 @@ struct nvme_dev {
 	struct nvme_ctrl ctrl;
 	struct completion ioq_wait;
 
+	mempool_t *iod_mempool;
+
 	/* shadow doorbell buffer support: */
 	u32 *dbbuf_dbs;
 	dma_addr_t dbbuf_dbs_dma_addr;
@@ -477,10 +486,7 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	iod->use_sgl = nvme_pci_use_sgls(dev, rq);
 
 	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
-		size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
-				iod->use_sgl);
-
-		iod->sg = kmalloc(alloc_size, GFP_ATOMIC);
+		iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
 		if (!iod->sg)
 			return BLK_STS_RESOURCE;
 	} else {
@@ -526,7 +532,7 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
 	}
 
 	if (iod->sg != iod->inline_sg)
-		kfree(iod->sg);
+		mempool_free(iod->sg, dev->iod_mempool);
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
@@ -2280,6 +2286,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
 		blk_put_queue(dev->ctrl.admin_q);
 	kfree(dev->queues);
 	free_opal_dev(dev->ctrl.opal_dev);
+	mempool_destroy(dev->iod_mempool);
 	kfree(dev);
 }
 
@@ -2334,6 +2341,13 @@ static void nvme_reset_work(struct work_struct *work)
 	if (result)
 		goto out;
 
+	/*
+	 * Limit the max command size to prevent iod->sg allocations going
+	 * over a single page.
+	 */
+	dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
+	dev->ctrl.max_segments = NVME_MAX_SEGS;
+
 	result = nvme_init_identify(&dev->ctrl);
 	if (result)
 		goto out;
@@ -2509,6 +2523,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	int node, result = -ENOMEM;
 	struct nvme_dev *dev;
 	unsigned long quirks = id->driver_data;
+	size_t alloc_size;
 
 	node = dev_to_node(&pdev->dev);
 	if (node == NUMA_NO_NODE)
@@ -2546,6 +2561,23 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto release_pools;
 
+	/*
+	 * Double check that our mempool alloc size will cover the biggest
+	 * command we support.
+	 */
+	alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ,
+						NVME_MAX_SEGS, true);
+	WARN_ON_ONCE(alloc_size > PAGE_SIZE);
+
+	dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,
+						mempool_kfree,
+						(void *) alloc_size,
+						GFP_KERNEL, node);
+	if (!dev->iod_mempool) {
+		result = -ENOMEM;
+		goto release_pools;
+	}
+
 	dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
 
 	nvme_get_ctrl(&dev->ctrl);

From 70303420b5721c38998cf987e6b7d30cc62d4ff1 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 21 Jun 2018 13:20:53 -0400
Subject: [PATCH 163/215] tracing: Check for no filter when processing event
 filters

The syzkaller detected a out-of-bounds issue with the events filter code,
specifically here:

	prog[N].pred = NULL;					/* #13 */
	prog[N].target = 1;		/* TRUE */
	prog[N+1].pred = NULL;
	prog[N+1].target = 0;		/* FALSE */
->	prog[N-1].target = N;
	prog[N-1].when_to_branch = false;

As that's the first reference to a "N-1" index, it appears that the code got
here with N = 0, which means the filter parser found no filter to parse
(which shouldn't ever happen, but apparently it did).

Add a new error to the parsing code that will check to make sure that N is
not zero before going into this part of the code. If N = 0, then -EINVAL is
returned, and a error message is added to the filter.

Cc: stable@vger.kernel.org
Fixes: 80765597bc587 ("tracing: Rewrite filter logic to be simpler and faster")
Reported-by: air icy <icytxw@gmail.com>
bugzilla url: https://bugzilla.kernel.org/show_bug.cgi?id=200019
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_filter.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e1c818dbc0d7..0dceb77d1d42 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -78,7 +78,8 @@ static const char * ops[] = { OPS };
 	C(TOO_MANY_PREDS,	"Too many terms in predicate expression"), \
 	C(INVALID_FILTER,	"Meaningless filter expression"),	\
 	C(IP_FIELD_ONLY,	"Only 'ip' field is supported for function trace"), \
-	C(INVALID_VALUE,	"Invalid value (did you forget quotes)?"),
+	C(INVALID_VALUE,	"Invalid value (did you forget quotes)?"), \
+	C(NO_FILTER,		"No filter found"),
 
 #undef C
 #define C(a, b)		FILT_ERR_##a
@@ -550,6 +551,13 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
 		goto out_free;
 	}
 
+	if (!N) {
+		/* No program? */
+		ret = -EINVAL;
+		parse_error(pe, FILT_ERR_NO_FILTER, ptr - str);
+		goto out_free;
+	}
+
 	prog[N].pred = NULL;					/* #13 */
 	prog[N].target = 1;		/* TRUE */
 	prog[N+1].pred = NULL;

From 1a63dcd8765bc8680481dc2f9acf6ef13cee6d27 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Thu, 7 Jun 2018 13:11:43 -0700
Subject: [PATCH 164/215] softirq: Reorder trace_softirqs_on to prevent lockdep
 splat

I'm able to reproduce a lockdep splat with config options:
CONFIG_PROVE_LOCKING=y,
CONFIG_DEBUG_LOCK_ALLOC=y and
CONFIG_PREEMPTIRQ_EVENTS=y

$ echo 1 > /d/tracing/events/preemptirq/preempt_enable/enable

[   26.112609] DEBUG_LOCKS_WARN_ON(current->softirqs_enabled)
[   26.112636] WARNING: CPU: 0 PID: 118 at kernel/locking/lockdep.c:3854
[...]
[   26.144229] Call Trace:
[   26.144926]  <IRQ>
[   26.145506]  lock_acquire+0x55/0x1b0
[   26.146499]  ? __do_softirq+0x46f/0x4d9
[   26.147571]  ? __do_softirq+0x46f/0x4d9
[   26.148646]  trace_preempt_on+0x8f/0x240
[   26.149744]  ? trace_preempt_on+0x4d/0x240
[   26.150862]  ? __do_softirq+0x46f/0x4d9
[   26.151930]  preempt_count_sub+0x18a/0x1a0
[   26.152985]  __do_softirq+0x46f/0x4d9
[   26.153937]  irq_exit+0x68/0xe0
[   26.154755]  smp_apic_timer_interrupt+0x271/0x280
[   26.156056]  apic_timer_interrupt+0xf/0x20
[   26.157105]  </IRQ>

The issue was this:

preempt_count = 1 << SOFTIRQ_SHIFT

	__local_bh_enable(cnt = 1 << SOFTIRQ_SHIFT) {
		if (softirq_count() == (cnt && SOFTIRQ_MASK)) {
			trace_softirqs_on() {
				current->softirqs_enabled = 1;
			}
		}
		preempt_count_sub(cnt) {
			trace_preempt_on() {
				tracepoint() {
					rcu_read_lock_sched() {
						// jumps into lockdep

Where preempt_count still has softirqs disabled, but
current->softirqs_enabled is true, and we get a splat.

Link: http://lkml.kernel.org/r/20180607201143.247775-1-joel@joelfernandes.org

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Tom Zanussi <tom.zanussi@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Glexiner <tglx@linutronix.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Todd Kjos <tkjos@google.com>
Cc: Erick Reyes <erickreyes@google.com>
Cc: Julia Cartwright <julia@ni.com>
Cc: Byungchul Park <byungchul.park@lge.com>
Cc: stable@vger.kernel.org
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Fixes: d59158162e032 ("tracing: Add support for preempt and irq enable/disable events")
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/softirq.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index de2f57fddc04..900dcfee542c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -139,9 +139,13 @@ static void __local_bh_enable(unsigned int cnt)
 {
 	lockdep_assert_irqs_disabled();
 
+	if (preempt_count() == cnt)
+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
+
 	if (softirq_count() == (cnt & SOFTIRQ_MASK))
 		trace_softirqs_on(_RET_IP_);
-	preempt_count_sub(cnt);
+
+	__preempt_count_sub(cnt);
 }
 
 /*

From 08ae88f8104f486fd4103854119169f3e55dbc4c Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 9 Feb 2018 11:53:16 -0600
Subject: [PATCH 165/215] tracing: Use swap macro in update_max_tr

Make use of the swap macro and remove unnecessary variable _buf_.
This makes the code easier to read and maintain. Also, reduces the
stack usage.

This code was detected with the help of Coccinelle.

Link: http://lkml.kernel.org/r/20180209175316.GA18720@embeddedgus

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c9336e98ac59..a0079b4c7a49 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1360,8 +1360,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 void
 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-	struct ring_buffer *buf;
-
 	if (tr->stop_count)
 		return;
 
@@ -1375,9 +1373,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 	arch_spin_lock(&tr->max_lock);
 
-	buf = tr->trace_buffer.buffer;
-	tr->trace_buffer.buffer = tr->max_buffer.buffer;
-	tr->max_buffer.buffer = buf;
+	swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
 
 	__update_max_tr(tr, tsk, cpu);
 	arch_spin_unlock(&tr->max_lock);

From 064f35a952246c60e956717dfc5782c48f174e74 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Thu, 14 Jun 2018 15:48:59 -0700
Subject: [PATCH 166/215] tracing: Fix some errors in histogram documentation

Fix typos, inconsistencies in using quotes, incorrect section number,
etc. in the trace histogram documentation.

Link: http://lkml.kernel.org/r/20180614224859.55864-1-joel@joelfernandes.org

Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Tom Zanussi <tom.zanussi@linux.intel.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 Documentation/trace/histogram.txt | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt
index e73bcf9cb5f3..7ffea6aa22e3 100644
--- a/Documentation/trace/histogram.txt
+++ b/Documentation/trace/histogram.txt
@@ -1729,35 +1729,35 @@ If a variable isn't a key variable or prefixed with 'vals=', the
 associated event field will be saved in a variable but won't be summed
 as a value:
 
-  # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger
+  # echo 'hist:keys=next_pid:ts1=common_timestamp ...' >> event/trigger
 
 Multiple variables can be assigned at the same time.  The below would
 result in both ts0 and b being created as variables, with both
 common_timestamp and field1 additionally being summed as values:
 
-  # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \
+  # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ...' >> \
 	event/trigger
 
 Note that variable assignments can appear either preceding or
 following their use.  The command below behaves identically to the
 command above:
 
-  # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \
+  # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ...' >> \
 	event/trigger
 
 Any number of variables not bound to a 'vals=' prefix can also be
 assigned by simply separating them with colons.  Below is the same
 thing but without the values being summed in the histogram:
 
-  # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger
+  # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ...' >> event/trigger
 
 Variables set as above can be referenced and used in expressions on
 another event.
 
 For example, here's how a latency can be calculated:
 
-  # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger
-  # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger
+  # echo 'hist:keys=pid,prio:ts0=common_timestamp ...' >> event1/trigger
+  # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ...' >> event2/trigger
 
 In the first line above, the event's timetamp is saved into the
 variable ts0.  In the next line, ts0 is subtracted from the second
@@ -1766,7 +1766,7 @@ yet another variable, 'wakeup_lat'.  The hist trigger below in turn
 makes use of the wakeup_lat variable to compute a combined latency
 using the same key and variable from yet another event:
 
-  # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger
+  # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ...' >> event3/trigger
 
 2.2.2 Synthetic Events
 ----------------------
@@ -1807,10 +1807,11 @@ the command that defined it with a '!':
 At this point, there isn't yet an actual 'wakeup_latency' event
 instantiated in the event subsytem - for this to happen, a 'hist
 trigger action' needs to be instantiated and bound to actual fields
-and variables defined on other events (see Section 6.3.3 below).
+and variables defined on other events (see Section 2.2.3 below on
+how that is done using hist trigger 'onmatch' action). Once that is
+done, the 'wakeup_latency' synthetic event instance is created.
 
-Once that is done, an event instance is created, and a histogram can
-be defined using it:
+A histogram can now be defined for the new synthetic event:
 
   # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
         /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
@@ -1960,7 +1961,7 @@ hist trigger specification.
     back to that pid, the timestamp difference is calculated.  If the
     resulting latency, stored in wakeup_lat, exceeds the current
     maximum latency, the values specified in the save() fields are
-    recoreded:
+    recorded:
 
     # echo 'hist:keys=pid:ts0=common_timestamp.usecs \
             if comm=="cyclictest"' >> \

From ed7d40bc67b8353c677b38c6cdddcdc310c0f452 Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Fri, 8 Jun 2018 14:47:46 -0700
Subject: [PATCH 167/215] tracing: Fix SKIP_STACK_VALIDATION=1 build due to bad
 merge with -mrecord-mcount

Non gcc-5 builds with CONFIG_STACK_VALIDATION=y and
SKIP_STACK_VALIDATION=1 fail.
Example output:
  /bin/sh: init/.tmp_main.o: Permission denied

commit 96f60dfa5819 ("trace: Use -mcount-record for dynamic ftrace"),
added a mismatched endif.  This causes cmd_objtool to get mistakenly
set.

Relocate endif to balance the newly added -record-mcount check.

Link: http://lkml.kernel.org/r/20180608214746.136554-1-gthelen@google.com

Fixes: 96f60dfa5819 ("trace: Use -mcount-record for dynamic ftrace")
Acked-by: Andi Kleen <ak@linux.intel.com>
Tested-by: David Rientjes <rientjes@google.com>
Signed-off-by: Greg Thelen <gthelen@google.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 scripts/Makefile.build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 34d9e9ce97c2..e7889f486ca1 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -239,6 +239,7 @@ cmd_record_mcount =						\
 	     "$(CC_FLAGS_FTRACE)" ]; then			\
 		$(sub_cmd_record_mcount)			\
 	fi;
+endif # -record-mcount
 endif # CONFIG_FTRACE_MCOUNT_RECORD
 
 ifdef CONFIG_STACK_VALIDATION
@@ -263,7 +264,6 @@ ifneq ($(RETPOLINE_CFLAGS),)
   objtool_args += --retpoline
 endif
 endif
-endif
 
 
 ifdef CONFIG_MODVERSIONS

From 6cc65be4f6f2a7186af8f3e09900787c7912dad2 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 21 Jun 2018 20:35:26 -0400
Subject: [PATCH 168/215] locking/qspinlock: Fix build for anonymous union in
 older GCC compilers

One of my tests compiles the kernel with gcc 4.5.3, and I hit the
following build error:

  include/linux/semaphore.h: In function 'sema_init':
  include/linux/semaphore.h:35:17: error: unknown field 'val' specified in initializer
  include/linux/semaphore.h:35:17: warning: missing braces around initializer
  include/linux/semaphore.h:35:17: warning: (near initialization for '(anonymous).raw_lock.<anonymous>.val')

I bisected it down to:

 625e88be1f41 ("locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'")

... which makes qspinlock have an anonymous union, which makes initializing it special
for older compilers. By adding strategic brackets, it makes the build
happy again.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Waiman Long <longman@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Fixes: 625e88be1f41 ("locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'")
Link: http://lkml.kernel.org/r/20180621203526.172ab5c4@vmware.local.home
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/qspinlock_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h
index 0763f065b975..d10f1e7d6ba8 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -63,7 +63,7 @@ typedef struct qspinlock {
 /*
  * Initializier
  */
-#define	__ARCH_SPIN_LOCK_UNLOCKED	{ .val = ATOMIC_INIT(0) }
+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ { .val = ATOMIC_INIT(0) } }
 
 /*
  * Bitfields in the atomic value:

From c51b3c639e01f20559531eef3c5919feae23c55a Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 18 Jun 2018 09:36:39 +0200
Subject: [PATCH 169/215] xen: add new hypercall buffer mapping device

For passing arbitrary data from user land to the Xen hypervisor the
Xen tools today are using mlock()ed buffers. Unfortunately the kernel
might change access rights of such buffers for brief periods of time
e.g. for page migration or compaction, leading to access faults in the
hypervisor, as the hypervisor can't use the locks of the kernel.

In order to solve this problem add a new device node to the Xen privcmd
driver to easily allocate hypercall buffers via mmap(). The memory is
allocated in the kernel and just mapped into user space. Marked as
VM_IO the user mapping will not be subject to page migration et al.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/Makefile      |   2 +-
 drivers/xen/privcmd-buf.c | 210 ++++++++++++++++++++++++++++++++++++++
 drivers/xen/privcmd.c     |   9 ++
 drivers/xen/privcmd.h     |   3 +
 4 files changed, 223 insertions(+), 1 deletion(-)
 create mode 100644 drivers/xen/privcmd-buf.c

diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 451e833f5931..48b154276179 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -41,4 +41,4 @@ obj-$(CONFIG_XEN_PVCALLS_FRONTEND)	+= pvcalls-front.o
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
-xen-privcmd-y				:= privcmd.o
+xen-privcmd-y				:= privcmd.o privcmd-buf.o
diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c
new file mode 100644
index 000000000000..df1ed37c3269
--- /dev/null
+++ b/drivers/xen/privcmd-buf.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/******************************************************************************
+ * privcmd-buf.c
+ *
+ * Mmap of hypercall buffers.
+ *
+ * Copyright (c) 2018 Juergen Gross
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "privcmd.h"
+
+MODULE_LICENSE("GPL");
+
+static unsigned int limit = 64;
+module_param(limit, uint, 0644);
+MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by "
+			"the privcmd-buf device per open file");
+
+struct privcmd_buf_private {
+	struct mutex lock;
+	struct list_head list;
+	unsigned int allocated;
+};
+
+struct privcmd_buf_vma_private {
+	struct privcmd_buf_private *file_priv;
+	struct list_head list;
+	unsigned int users;
+	unsigned int n_pages;
+	struct page *pages[];
+};
+
+static int privcmd_buf_open(struct inode *ino, struct file *file)
+{
+	struct privcmd_buf_private *file_priv;
+
+	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
+	if (!file_priv)
+		return -ENOMEM;
+
+	mutex_init(&file_priv->lock);
+	INIT_LIST_HEAD(&file_priv->list);
+
+	file->private_data = file_priv;
+
+	return 0;
+}
+
+static void privcmd_buf_vmapriv_free(struct privcmd_buf_vma_private *vma_priv)
+{
+	unsigned int i;
+
+	vma_priv->file_priv->allocated -= vma_priv->n_pages;
+
+	list_del(&vma_priv->list);
+
+	for (i = 0; i < vma_priv->n_pages; i++)
+		if (vma_priv->pages[i])
+			__free_page(vma_priv->pages[i]);
+
+	kfree(vma_priv);
+}
+
+static int privcmd_buf_release(struct inode *ino, struct file *file)
+{
+	struct privcmd_buf_private *file_priv = file->private_data;
+	struct privcmd_buf_vma_private *vma_priv;
+
+	mutex_lock(&file_priv->lock);
+
+	while (!list_empty(&file_priv->list)) {
+		vma_priv = list_first_entry(&file_priv->list,
+					    struct privcmd_buf_vma_private,
+					    list);
+		privcmd_buf_vmapriv_free(vma_priv);
+	}
+
+	mutex_unlock(&file_priv->lock);
+
+	kfree(file_priv);
+
+	return 0;
+}
+
+static void privcmd_buf_vma_open(struct vm_area_struct *vma)
+{
+	struct privcmd_buf_vma_private *vma_priv = vma->vm_private_data;
+
+	if (!vma_priv)
+		return;
+
+	mutex_lock(&vma_priv->file_priv->lock);
+	vma_priv->users++;
+	mutex_unlock(&vma_priv->file_priv->lock);
+}
+
+static void privcmd_buf_vma_close(struct vm_area_struct *vma)
+{
+	struct privcmd_buf_vma_private *vma_priv = vma->vm_private_data;
+	struct privcmd_buf_private *file_priv;
+
+	if (!vma_priv)
+		return;
+
+	file_priv = vma_priv->file_priv;
+
+	mutex_lock(&file_priv->lock);
+
+	vma_priv->users--;
+	if (!vma_priv->users)
+		privcmd_buf_vmapriv_free(vma_priv);
+
+	mutex_unlock(&file_priv->lock);
+}
+
+static vm_fault_t privcmd_buf_vma_fault(struct vm_fault *vmf)
+{
+	pr_debug("fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
+		 vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end,
+		 vmf->pgoff, (void *)vmf->address);
+
+	return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct privcmd_buf_vm_ops = {
+	.open = privcmd_buf_vma_open,
+	.close = privcmd_buf_vma_close,
+	.fault = privcmd_buf_vma_fault,
+};
+
+static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct privcmd_buf_private *file_priv = file->private_data;
+	struct privcmd_buf_vma_private *vma_priv;
+	unsigned long count = vma_pages(vma);
+	unsigned int i;
+	int ret = 0;
+
+	if (!(vma->vm_flags & VM_SHARED) || count > limit ||
+	    file_priv->allocated + count > limit)
+		return -EINVAL;
+
+	vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
+			   GFP_KERNEL);
+	if (!vma_priv)
+		return -ENOMEM;
+
+	vma_priv->n_pages = count;
+	count = 0;
+	for (i = 0; i < vma_priv->n_pages; i++) {
+		vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!vma_priv->pages[i])
+			break;
+		count++;
+	}
+
+	mutex_lock(&file_priv->lock);
+
+	file_priv->allocated += count;
+
+	vma_priv->file_priv = file_priv;
+	vma_priv->users = 1;
+
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND;
+	vma->vm_ops = &privcmd_buf_vm_ops;
+	vma->vm_private_data = vma_priv;
+
+	list_add(&vma_priv->list, &file_priv->list);
+
+	if (vma_priv->n_pages != count)
+		ret = -ENOMEM;
+	else
+		for (i = 0; i < vma_priv->n_pages; i++) {
+			ret = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
+					     vma_priv->pages[i]);
+			if (ret)
+				break;
+		}
+
+	if (ret)
+		privcmd_buf_vmapriv_free(vma_priv);
+
+	mutex_unlock(&file_priv->lock);
+
+	return ret;
+}
+
+const struct file_operations xen_privcmdbuf_fops = {
+	.owner = THIS_MODULE,
+	.open = privcmd_buf_open,
+	.release = privcmd_buf_release,
+	.mmap = privcmd_buf_mmap,
+};
+EXPORT_SYMBOL_GPL(xen_privcmdbuf_fops);
+
+struct miscdevice xen_privcmdbuf_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "xen/hypercall",
+	.fops = &xen_privcmdbuf_fops,
+};
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 8ae0349d9f0a..7e6e682104dc 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -1007,12 +1007,21 @@ static int __init privcmd_init(void)
 		pr_err("Could not register Xen privcmd device\n");
 		return err;
 	}
+
+	err = misc_register(&xen_privcmdbuf_dev);
+	if (err != 0) {
+		pr_err("Could not register Xen hypercall-buf device\n");
+		misc_deregister(&privcmd_dev);
+		return err;
+	}
+
 	return 0;
 }
 
 static void __exit privcmd_exit(void)
 {
 	misc_deregister(&privcmd_dev);
+	misc_deregister(&xen_privcmdbuf_dev);
 }
 
 module_init(privcmd_init);
diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h
index 14facaeed36f..0dd9f8f67ee3 100644
--- a/drivers/xen/privcmd.h
+++ b/drivers/xen/privcmd.h
@@ -1,3 +1,6 @@
 #include <linux/fs.h>
 
 extern const struct file_operations xen_privcmd_fops;
+extern const struct file_operations xen_privcmdbuf_fops;
+
+extern struct miscdevice xen_privcmdbuf_dev;

From eef04c7b3786ff0c9cb1019278b6c6c2ea0ad4ff Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Thu, 21 Jun 2018 13:29:44 -0400
Subject: [PATCH 170/215] xen: Remove unnecessary BUG_ON from
 __unbind_from_irq()

Commit 910f8befdf5b ("xen/pirq: fix error path cleanup when binding
MSIs") fixed a couple of errors in error cleanup path of
xen_bind_pirq_msi_to_irq(). This cleanup allowed a call to
__unbind_from_irq() with an unbound irq, which would result in
triggering the BUG_ON there.

Since there is really no reason for the BUG_ON (xen_free_irq() can
operate on unbound irqs) we can remove it.

Reported-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: stable@vger.kernel.org
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/events/events_base.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 762378f1811c..08e4af04d6f2 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -628,8 +628,6 @@ static void __unbind_from_irq(unsigned int irq)
 		xen_irq_info_cleanup(info);
 	}
 
-	BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
-
 	xen_free_irq(irq);
 }
 

From 52e1cf2d19c2e62e6a81b8de3f7320d033917dd5 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 22 Jun 2018 08:42:22 +0200
Subject: [PATCH 171/215] efi/libstub/tpm: Initialize efi_physical_addr_t vars
 to zero for mixed mode

Commit:

  79832f0b5f71 ("efi/libstub/tpm: Initialize pointer variables to zero for mixed mode")

fixes a problem with the tpm code on mixed mode (64-bit kernel on 32-bit UEFI),
where 64-bit pointer variables are not fully initialized by the 32-bit EFI code.

A similar problem applies to the efi_physical_addr_t variables which
are written by the ->get_event_log() EFI call. Even though efi_physical_addr_t
is 64-bit everywhere, it seems that some 32-bit UEFI implementations only
fill in the lower 32 bits when passed a pointer to an efi_physical_addr_t
to fill.

This commit initializes these to 0 to, to ensure the upper 32 bits are
0 in mixed mode. This fixes recent kernels sometimes hanging during
early boot on mixed mode UEFI systems.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # v4.16+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20180622064222.11633-2-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/tpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c
index caa37a6dd9d4..a90b0b8fc69a 100644
--- a/drivers/firmware/efi/libstub/tpm.c
+++ b/drivers/firmware/efi/libstub/tpm.c
@@ -64,7 +64,7 @@ static void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg)
 	efi_guid_t tcg2_guid = EFI_TCG2_PROTOCOL_GUID;
 	efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID;
 	efi_status_t status;
-	efi_physical_addr_t log_location, log_last_entry;
+	efi_physical_addr_t log_location = 0, log_last_entry = 0;
 	struct linux_efi_tpm_eventlog *log_tbl = NULL;
 	unsigned long first_entry_addr, last_entry_addr;
 	size_t log_size, last_entry_size;

From 57d6a7938a8fc6cee8420b40ca244220b41721f5 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Thu, 8 Mar 2018 21:28:56 +0100
Subject: [PATCH 172/215] perf/core: Move the inline keyword at the beginning
 of the function declaration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building perf with W=1 the following warning triggers:

  CC      kernel/events/ring_buffer.o
  kernel/events/ring_buffer.c:105:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration]
   static bool __always_inline
   ^~~~~~
  ...

Move the inline keyword to the beginning of the function declaration.

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: trival@kernel.org
Link: http://lkml.kernel.org/r/20180308202856.9378-1-malat@debian.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/ring_buffer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 1d8ca9ea9979..d11f60cbe8ca 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -103,7 +103,7 @@ static void perf_output_put_handle(struct perf_output_handle *handle)
 	preempt_enable();
 }
 
-static bool __always_inline
+static __always_inline bool
 ring_buffer_has_space(unsigned long head, unsigned long tail,
 		      unsigned long data_size, unsigned int size,
 		      bool backward)
@@ -114,7 +114,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail,
 		return CIRC_SPACE(tail, head, data_size) >= size;
 }
 
-static int __always_inline
+static __always_inline int
 __perf_output_begin(struct perf_output_handle *handle,
 		    struct perf_event *event, unsigned int size,
 		    bool backward)
@@ -414,7 +414,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 }
 EXPORT_SYMBOL_GPL(perf_aux_output_begin);
 
-static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb)
+static __always_inline bool rb_need_aux_wakeup(struct ring_buffer *rb)
 {
 	if (rb->aux_overwrite)
 		return false;

From 72a8edc2d9134c2895eac2fec5eecf8230a05c96 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 22 Jun 2018 10:52:48 +0100
Subject: [PATCH 173/215] genirq/debugfs: Add missing
 IRQCHIP_SUPPORTS_LEVEL_MSI debug

Debug is missing the IRQCHIP_SUPPORTS_LEVEL_MSI debug entry, making debugfs
slightly less useful.

Take this opportunity to also add a missing comment in the definition of
IRQCHIP_SUPPORTS_LEVEL_MSI.

Fixes: 6988e0e0d283 ("genirq/msi: Limit level-triggered MSI to platform devices")
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Yang Yingliang <yangyingliang@huawei.com>
Cc: Sumit Garg <sumit.garg@linaro.org>
Link: https://lkml.kernel.org/r/20180622095254.5906-2-marc.zyngier@arm.com
---
 include/linux/irq.h  | 1 +
 kernel/irq/debugfs.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 4bd2f34947f4..201de12a9957 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -503,6 +503,7 @@ struct irq_chip {
  * IRQCHIP_SKIP_SET_WAKE:	Skip chip.irq_set_wake(), for this irq chip
  * IRQCHIP_ONESHOT_SAFE:	One shot does not require mask/unmask
  * IRQCHIP_EOI_THREADED:	Chip requires eoi() on unmask in threaded mode
+ * IRQCHIP_SUPPORTS_LEVEL_MSI	Chip can provide two doorbells for Level MSIs
  */
 enum {
 	IRQCHIP_SET_TYPE_MASKED		= (1 <<  0),
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 4dadeb3d6666..6f636136cccc 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -55,6 +55,7 @@ static const struct irq_bit_descr irqchip_flags[] = {
 	BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE),
 	BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE),
 	BIT_MASK_DESCR(IRQCHIP_EOI_THREADED),
+	BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI),
 };
 
 static void

From 893fbfff976cd069f2e60c3b186dbe3f85504db2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 22 Jun 2018 10:52:49 +0100
Subject: [PATCH 174/215] irqchip/ls-scfg-msi: Fix MSI affinity handling

The ls-scfs-msi driver is not dealing with the effective affinity
as it should. Let's fix that, and make it clear that the effective
affinity is restricted to a single CPU. Also prevent the driver from
messing with the internals of the affinity setting infrastructure.

Reported-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Yang Yingliang <yangyingliang@huawei.com>
Cc: Sumit Garg <sumit.garg@linaro.org>
Link: https://lkml.kernel.org/r/20180622095254.5906-3-marc.zyngier@arm.com
---
 drivers/irqchip/irq-ls-scfg-msi.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c
index 1ec3bfe56693..c671b3212010 100644
--- a/drivers/irqchip/irq-ls-scfg-msi.c
+++ b/drivers/irqchip/irq-ls-scfg-msi.c
@@ -93,8 +93,12 @@ static void ls_scfg_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
 	msg->address_lo = lower_32_bits(msi_data->msiir_addr);
 	msg->data = data->hwirq;
 
-	if (msi_affinity_flag)
-		msg->data |= cpumask_first(data->common->affinity);
+	if (msi_affinity_flag) {
+		const struct cpumask *mask;
+
+		mask = irq_data_get_effective_affinity_mask(data);
+		msg->data |= cpumask_first(mask);
+	}
 
 	iommu_dma_map_msi_msg(data->irq, msg);
 }
@@ -121,7 +125,7 @@ static int ls_scfg_msi_set_affinity(struct irq_data *irq_data,
 		return -EINVAL;
 	}
 
-	cpumask_copy(irq_data->common->affinity, mask);
+	irq_data_update_effective_affinity(irq_data, cpumask_of(cpu));
 
 	return IRQ_SET_MASK_OK;
 }

From cbaf45a6be497c272e80500e4fd9bccdf20d5050 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 22 Jun 2018 10:52:50 +0100
Subject: [PATCH 175/215] irqchip/gic-v2m: Fix SPI release on error path

On failing to allocate the required SPIs, the actual number of interrupts
should be freed and not its log2 value.

Fixes: de337ee30142 ("irqchip/gic-v2m: Add PCI Multi-MSI support")
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Yang Yingliang <yangyingliang@huawei.com>
Cc: Sumit Garg <sumit.garg@linaro.org>
Link: https://lkml.kernel.org/r/20180622095254.5906-4-marc.zyngier@arm.com
---
 drivers/irqchip/irq-gic-v2m.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 0f52d44b3f69..f5fe0100f9ff 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -199,7 +199,7 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 
 fail:
 	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
-	gicv2m_unalloc_msi(v2m, hwirq, get_count_order(nr_irqs));
+	gicv2m_unalloc_msi(v2m, hwirq, nr_irqs);
 	return err;
 }
 

From c1797b11a09c8323c92b074fd48b89a936c991d0 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 22 Jun 2018 10:52:51 +0100
Subject: [PATCH 176/215] irqchip/gic-v3-its: Don't bind LPI to unavailable
 NUMA node

On a NUMA system, if an ITS is local to an offline node, the ITS driver may
pick an offline CPU to bind the LPI.  In this case, pick an online CPU (and
the first one will do).

But on some systems, binding an LPI to non-local node CPU may cause
deadlock (see Cavium erratum 23144).  In this case, just fail the activate
and return an error code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Sumit Garg <sumit.garg@linaro.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180622095254.5906-5-marc.zyngier@arm.com
---
 drivers/irqchip/irq-gic-v3-its.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 5377d7e2afba..cae53937feeb 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2310,7 +2310,14 @@ static int its_irq_domain_activate(struct irq_domain *domain,
 		cpu_mask = cpumask_of_node(its_dev->its->numa_node);
 
 	/* Bind the LPI to the first possible CPU */
-	cpu = cpumask_first(cpu_mask);
+	cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
+	if (cpu >= nr_cpu_ids) {
+		if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
+			return -EINVAL;
+
+		cpu = cpumask_first(cpu_online_mask);
+	}
+
 	its_dev->event_map.col_map[event] = cpu;
 	irq_data_update_effective_affinity(d, cpumask_of(cpu));
 

From 83559b47cdc4d396fc1187a13b527d01b55e0fe6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 22 Jun 2018 10:52:52 +0100
Subject: [PATCH 177/215] irqchip/gic-v3-its: Only emit SYNC if targetting a
 valid collection

It is possible, under obscure circumstances, to convince the ITS driver to
emit a SYNC operation that targets a collection that is not bound to any
redistributor (and the target_address field is zero) because the
corresponding CPU has not been seen yet (the system has been booted with
max_cpus="something small").

If the ITS is using the linear CPU number as the target, this is not a big
deal, as we just end-up issuing a SYNC to CPU0. But if the ITS requires the
physical address of the redistributor (with GITS_TYPER.PTA==1), we end-up
asking the ITS to write to the physical address zero, which is not exactly
a good idea (there has been report of the ITS locking up). This should of
course never happen, but hey, this is SW...

In order to avoid the above disaster, let's track which collections have
been actually initialized, and let's not generate a SYNC if the collection
hasn't been properly bound to a redistributor.  Take this opportunity to
spit our a warning, in the hope that someone may report the issue if it
arrises again.

Reported-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Sumit Garg <sumit.garg@linaro.org>
Link: https://lkml.kernel.org/r/20180622095254.5906-6-marc.zyngier@arm.com
---
 drivers/irqchip/irq-gic-v3-its.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index cae53937feeb..fcfc96f8e0de 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -182,6 +182,14 @@ static struct its_collection *dev_event_to_col(struct its_device *its_dev,
 	return its->collections + its_dev->event_map.col_map[event];
 }
 
+static struct its_collection *valid_col(struct its_collection *col)
+{
+	if (WARN_ON_ONCE(col->target_address & GENMASK_ULL(0, 15)))
+		return NULL;
+
+	return col;
+}
+
 /*
  * ITS command descriptors - parameters to be encoded in a command
  * block.
@@ -439,7 +447,7 @@ static struct its_collection *its_build_mapti_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return col;
+	return valid_col(col);
 }
 
 static struct its_collection *its_build_movi_cmd(struct its_node *its,
@@ -458,7 +466,7 @@ static struct its_collection *its_build_movi_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return col;
+	return valid_col(col);
 }
 
 static struct its_collection *its_build_discard_cmd(struct its_node *its,
@@ -476,7 +484,7 @@ static struct its_collection *its_build_discard_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return col;
+	return valid_col(col);
 }
 
 static struct its_collection *its_build_inv_cmd(struct its_node *its,
@@ -494,7 +502,7 @@ static struct its_collection *its_build_inv_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return col;
+	return valid_col(col);
 }
 
 static struct its_collection *its_build_int_cmd(struct its_node *its,
@@ -512,7 +520,7 @@ static struct its_collection *its_build_int_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return col;
+	return valid_col(col);
 }
 
 static struct its_collection *its_build_clear_cmd(struct its_node *its,
@@ -530,7 +538,7 @@ static struct its_collection *its_build_clear_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return col;
+	return valid_col(col);
 }
 
 static struct its_collection *its_build_invall_cmd(struct its_node *its,
@@ -1824,11 +1832,16 @@ static int its_alloc_tables(struct its_node *its)
 
 static int its_alloc_collections(struct its_node *its)
 {
+	int i;
+
 	its->collections = kcalloc(nr_cpu_ids, sizeof(*its->collections),
 				   GFP_KERNEL);
 	if (!its->collections)
 		return -ENOMEM;
 
+	for (i = 0; i < nr_cpu_ids; i++)
+		its->collections[i].target_address = ~0ULL;
+
 	return 0;
 }
 

From 205e065d91d72e6afad112ea84f0ca60b30bf5ab Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 22 Jun 2018 10:52:53 +0100
Subject: [PATCH 178/215] irqchip/gic-v3-its: Only emit VSYNC if targetting a
 valid collection

Similarily to the SYNC operation, it must be verified that the VPE
targetted by a VLPI is backed by a valid collection in the GIC driver data
structures.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Yang Yingliang <yangyingliang@huawei.com>
Cc: Sumit Garg <sumit.garg@linaro.org>
Link: https://lkml.kernel.org/r/20180622095254.5906-7-marc.zyngier@arm.com
---
 drivers/irqchip/irq-gic-v3-its.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index fcfc96f8e0de..0269ffb93f6e 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -190,6 +190,14 @@ static struct its_collection *valid_col(struct its_collection *col)
 	return col;
 }
 
+static struct its_vpe *valid_vpe(struct its_node *its, struct its_vpe *vpe)
+{
+	if (valid_col(its->collections + vpe->col_idx))
+		return vpe;
+
+	return NULL;
+}
+
 /*
  * ITS command descriptors - parameters to be encoded in a command
  * block.
@@ -562,7 +570,7 @@ static struct its_vpe *its_build_vinvall_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return desc->its_vinvall_cmd.vpe;
+	return valid_vpe(its, desc->its_vinvall_cmd.vpe);
 }
 
 static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
@@ -584,7 +592,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return desc->its_vmapp_cmd.vpe;
+	return valid_vpe(its, desc->its_vmapp_cmd.vpe);
 }
 
 static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
@@ -607,7 +615,7 @@ static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return desc->its_vmapti_cmd.vpe;
+	return valid_vpe(its, desc->its_vmapti_cmd.vpe);
 }
 
 static struct its_vpe *its_build_vmovi_cmd(struct its_node *its,
@@ -630,7 +638,7 @@ static struct its_vpe *its_build_vmovi_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return desc->its_vmovi_cmd.vpe;
+	return valid_vpe(its, desc->its_vmovi_cmd.vpe);
 }
 
 static struct its_vpe *its_build_vmovp_cmd(struct its_node *its,
@@ -648,7 +656,7 @@ static struct its_vpe *its_build_vmovp_cmd(struct its_node *its,
 
 	its_fixup_cmd(cmd);
 
-	return desc->its_vmovp_cmd.vpe;
+	return valid_vpe(its, desc->its_vmovp_cmd.vpe);
 }
 
 static u64 its_cmd_ptr_to_offset(struct its_node *its,

From 82f499c8811149069ec958b72a86643a7a289b25 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 22 Jun 2018 10:52:54 +0100
Subject: [PATCH 179/215] irqchip/gic-v3-its: Fix reprogramming of
 redistributors on CPU hotplug

Enabling LPIs was made a lot stricter recently, by checking that they are
disabled before enabling them. By doing so, the CPU hotplug case was missed
altogether, which leaves LPIs enabled on hotplug off (expecting the CPU to
eventually come back), and won't write a different value anyway on hotplug
on.

So skip that check if that particular case is detected

Fixes: 6eb486b66a30 ("irqchip/gic-v3: Ensure GICR_CTLR.EnableLPI=0 is observed before enabling")
Reported-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Sumit Garg <sumit.garg@linaro.org>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lkml.kernel.org/r/20180622095254.5906-8-marc.zyngier@arm.com
---
 drivers/irqchip/irq-gic-v3-its.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 0269ffb93f6e..d7842d312d3e 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3427,6 +3427,16 @@ static int redist_disable_lpis(void)
 	u64 timeout = USEC_PER_SEC;
 	u64 val;
 
+	/*
+	 * If coming via a CPU hotplug event, we don't need to disable
+	 * LPIs before trying to re-enable them. They are already
+	 * configured and all is well in the world. Detect this case
+	 * by checking the allocation of the pending table for the
+	 * current CPU.
+	 */
+	if (gic_data_rdist()->pend_page)
+		return 0;
+
 	if (!gic_rdists_supports_plpis()) {
 		pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
 		return -ENXIO;

From bed9df97b39e73a4607189f2c4b9fb89cc3f7f59 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 22 Jun 2018 19:35:33 +0800
Subject: [PATCH 180/215] irqdesc: Delete irq_desc_get_msi_desc()

Function irq_desc_get_msi_desc() is not referenced in the kernel (and does
not seem to have been referenced since e39758e0ea76, 3 years ago), so
delete it.

Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: <marc.zyngier@arm.com>
Cc: <will.deacon@arm.com>
Cc: <kstewart@linuxfoundation.org>
Cc: <julien.thierry@arm.com>
Cc: <andrew@lunn.ch>
Cc: <trivial@kernel.org>
Link: https://lkml.kernel.org/r/1529667333-92959-1-git-send-email-john.garry@huawei.com
---
 include/linux/irqdesc.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 25b33b664537..dd1e40ddac7d 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -145,11 +145,6 @@ static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
 	return desc->irq_common_data.handler_data;
 }
 
-static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
-{
-	return desc->irq_common_data.msi_desc;
-}
-
 /*
  * Architectures call this to let the generic IRQ layer
  * handle an interrupt.

From 1f74c8a64798e2c488f86efc97e308b85fb7d7aa Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 22 Jun 2018 11:54:28 +0200
Subject: [PATCH 181/215] x86/mce: Do not overwrite MCi_STATUS in
 mce_no_way_out()

mce_no_way_out() does a quick check during #MC to see whether some of
the MCEs logged would require the kernel to panic immediately. And it
passes a struct mce where MCi_STATUS gets written.

However, after having saved a valid status value, the next iteration
of the loop which goes over the MCA banks on the CPU, overwrites the
valid status value because we're using struct mce as storage instead of
a temporary variable.

Which leads to MCE records with an empty status value:

  mce: [Hardware Error]: CPU 0: Machine Check Exception: 6 Bank 0: 0000000000000000
  mce: [Hardware Error]: RIP 10:<ffffffffbd42fbd7> {trigger_mce+0x7/0x10}

In order to prevent the loss of the status register value, return
immediately when severity is a panic one so that we can panic
immediately with the first fatal MCE logged. This is also the intention
of this function and not to noodle over the banks while a fatal MCE is
already logged.

Tony: read the rest of the MCA bank to populate the struct mce fully.

Suggested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20180622095428.626-8-bp@alien8.de
---
 arch/x86/kernel/cpu/mcheck/mce.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index cd76380af79f..7e6f51a9d917 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -772,23 +772,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
 static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			  struct pt_regs *regs)
 {
-	int i, ret = 0;
 	char *tmp;
+	int i;
 
 	for (i = 0; i < mca_cfg.banks; i++) {
 		m->status = mce_rdmsrl(msr_ops.status(i));
-		if (m->status & MCI_STATUS_VAL) {
-			__set_bit(i, validp);
-			if (quirk_no_way_out)
-				quirk_no_way_out(i, m, regs);
-		}
+		if (!(m->status & MCI_STATUS_VAL))
+			continue;
+
+		__set_bit(i, validp);
+		if (quirk_no_way_out)
+			quirk_no_way_out(i, m, regs);
 
 		if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+			mce_read_aux(m, i);
 			*msg = tmp;
-			ret = 1;
+			return 1;
 		}
 	}
-	return ret;
+	return 0;
 }
 
 /*

From 40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 22 Jun 2018 11:54:23 +0200
Subject: [PATCH 182/215] x86/mce: Fix incorrect "Machine check from unknown
 source" message

Some injection testing resulted in the following console log:

  mce: [Hardware Error]: CPU 22: Machine Check Exception: f Bank 1: bd80000000100134
  mce: [Hardware Error]: RIP 10:<ffffffffc05292dd> {pmem_do_bvec+0x11d/0x330 [nd_pmem]}
  mce: [Hardware Error]: TSC c51a63035d52 ADDR 3234bc4000 MISC 88
  mce: [Hardware Error]: PROCESSOR 0:50654 TIME 1526502199 SOCKET 0 APIC 38 microcode 2000043
  mce: [Hardware Error]: Run the above through 'mcelog --ascii'
  Kernel panic - not syncing: Machine check from unknown source

This confused everybody because the first line quite clearly shows
that we found a logged error in "Bank 1", while the last line says
"unknown source".

The problem is that the Linux code doesn't do the right thing
for a local machine check that results in a fatal error.

It turns out that we know very early in the handler whether the
machine check is fatal. The call to mce_no_way_out() has checked
all the banks for the CPU that took the local machine check. If
it says we must crash, we can do so right away with the right
messages.

We do scan all the banks again. This means that we might initially
not see a problem, but during the second scan find something fatal.
If this happens we print a slightly different message (so I can
see if it actually every happens).

[ bp: Remove unneeded severity assignment. ]

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: stable@vger.kernel.org # 4.2
Link: http://lkml.kernel.org/r/52e049a497e86fd0b71c529651def8871c804df0.1527283897.git.tony.luck@intel.com
---
 arch/x86/kernel/cpu/mcheck/mce.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7e6f51a9d917..e93670d736a6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1207,13 +1207,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		lmce = m.mcgstatus & MCG_STATUS_LMCES;
 
 	/*
+	 * Local machine check may already know that we have to panic.
+	 * Broadcast machine check begins rendezvous in mce_start()
 	 * Go through all banks in exclusion of the other CPUs. This way we
 	 * don't report duplicated events on shared banks because the first one
-	 * to see it will clear it. If this is a Local MCE, then no need to
-	 * perform rendezvous.
+	 * to see it will clear it.
 	 */
-	if (!lmce)
+	if (lmce) {
+		if (no_way_out)
+			mce_panic("Fatal local machine check", &m, msg);
+	} else {
 		order = mce_start(&no_way_out);
+	}
 
 	for (i = 0; i < cfg->banks; i++) {
 		__clear_bit(i, toclear);
@@ -1289,12 +1294,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 			no_way_out = worst >= MCE_PANIC_SEVERITY;
 	} else {
 		/*
-		 * Local MCE skipped calling mce_reign()
-		 * If we found a fatal error, we need to panic here.
+		 * If there was a fatal machine check we should have
+		 * already called mce_panic earlier in this function.
+		 * Since we re-read the banks, we might have found
+		 * something new. Check again to see if we found a
+		 * fatal error. We call "mce_severity()" again to
+		 * make sure we have the right "msg".
 		 */
-		 if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
-			mce_panic("Machine check from unknown source",
-				NULL, NULL);
+		if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
+			mce_severity(&m, cfg->tolerant, &msg, true);
+			mce_panic("Local fatal machine check!", &m, msg);
+		}
 	}
 
 	/*

From 0218c766263e70795c5eaa17d75ed54bca350950 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Fri, 22 Jun 2018 13:51:26 +0200
Subject: [PATCH 183/215] x86/microcode/intel: Fix memleak in
 save_microcode_patch()

Free useless ucode_patch entry when it's replaced.

[ bp: Drop the memfree_patch() two-liner. ]

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Srinivas REDDY Eeda <srinivas.eeda@oracle.com>
Link: http://lkml.kernel.org/r/888102f0-fd22-459d-b090-a1bd8a00cb2b@default
---
 arch/x86/kernel/cpu/microcode/intel.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 1c2cfa0644aa..97ccf4c3b45b 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -190,8 +190,11 @@ static void save_microcode_patch(void *data, unsigned int size)
 			p = memdup_patch(data, size);
 			if (!p)
 				pr_err("Error allocating buffer %p\n", data);
-			else
+			else {
 				list_replace(&iter->plist, &p->plist);
+				kfree(iter->data);
+				kfree(iter);
+			}
 		}
 	}
 

From 0447378a4a793da008451fad50bc0f93e9675ae6 Mon Sep 17 00:00:00 2001
From: Marc Orr <marcorr@google.com>
Date: Wed, 20 Jun 2018 17:21:29 -0700
Subject: [PATCH 184/215] kvm: vmx: Nested VM-entry prereqs for event inj.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch extends the checks done prior to a nested VM entry.
Specifically, it extends the check_vmentry_prereqs function with checks
for fields relevant to the VM-entry event injection information, as
described in the Intel SDM, volume 3.

This patch is motivated by a syzkaller bug, where a bad VM-entry
interruption information field is generated in the VMCS02, which causes
the nested VM launch to fail. Then, KVM fails to resume L1.

While KVM should be improved to correctly resume L1 execution after a
failed nested launch, this change is justified because the existing code
to resume L1 is flaky/ad-hoc and the test coverage for resuming L1 is
sparse.

Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Marc Orr <marcorr@google.com>
[Removed comment whose parts were describing previous revisions and the
 rest was obvious from function/variable naming. - Radim]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/vmx.h |  3 ++
 arch/x86/kvm/vmx.c         | 67 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.h         |  9 +++++
 3 files changed, 79 insertions(+)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 425e6b8b9547..6aa8499e1f62 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -114,6 +114,7 @@
 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK	0x0000001f
 #define VMX_MISC_SAVE_EFER_LMA			0x00000020
 #define VMX_MISC_ACTIVITY_HLT			0x00000040
+#define VMX_MISC_ZERO_LEN_INS			0x40000000
 
 /* VMFUNC functions */
 #define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
@@ -351,11 +352,13 @@ enum vmcs_field {
 #define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
 
 #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
+#define INTR_TYPE_RESERVED              (1 << 8) /* reserved */
 #define INTR_TYPE_NMI_INTR		(2 << 8) /* NMI */
 #define INTR_TYPE_HARD_EXCEPTION	(3 << 8) /* processor exception */
 #define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
 #define INTR_TYPE_PRIV_SW_EXCEPTION	(5 << 8) /* ICE breakpoint - undocumented */
 #define INTR_TYPE_SOFT_EXCEPTION	(6 << 8) /* software exception */
+#define INTR_TYPE_OTHER_EVENT           (7 << 8) /* other event */
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
 #define GUEST_INTR_STATE_STI		0x00000001
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 559a12b6184d..1689f433f3a0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1705,6 +1705,17 @@ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
 		MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
 }
 
+static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu)
+{
+	return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS;
+}
+
+static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
+{
+	return to_vmx(vcpu)->nested.msrs.procbased_ctls_high &
+			CPU_BASED_MONITOR_TRAP_FLAG;
+}
+
 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
 {
 	return vmcs12->cpu_based_vm_exec_control & bit;
@@ -11620,6 +11631,62 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	    !nested_cr3_valid(vcpu, vmcs12->host_cr3))
 		return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
 
+	/*
+	 * From the Intel SDM, volume 3:
+	 * Fields relevant to VM-entry event injection must be set properly.
+	 * These fields are the VM-entry interruption-information field, the
+	 * VM-entry exception error code, and the VM-entry instruction length.
+	 */
+	if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
+		u32 intr_info = vmcs12->vm_entry_intr_info_field;
+		u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
+		u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
+		bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
+		bool should_have_error_code;
+		bool urg = nested_cpu_has2(vmcs12,
+					   SECONDARY_EXEC_UNRESTRICTED_GUEST);
+		bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
+
+		/* VM-entry interruption-info field: interruption type */
+		if (intr_type == INTR_TYPE_RESERVED ||
+		    (intr_type == INTR_TYPE_OTHER_EVENT &&
+		     !nested_cpu_supports_monitor_trap_flag(vcpu)))
+			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+		/* VM-entry interruption-info field: vector */
+		if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
+		    (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
+		    (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
+			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+		/* VM-entry interruption-info field: deliver error code */
+		should_have_error_code =
+			intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
+			x86_exception_has_error_code(vector);
+		if (has_error_code != should_have_error_code)
+			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+		/* VM-entry exception error code */
+		if (has_error_code &&
+		    vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
+			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+		/* VM-entry interruption-info field: reserved bits */
+		if (intr_info & INTR_INFO_RESVD_BITS_MASK)
+			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+		/* VM-entry instruction length */
+		switch (intr_type) {
+		case INTR_TYPE_SOFT_EXCEPTION:
+		case INTR_TYPE_SOFT_INTR:
+		case INTR_TYPE_PRIV_SW_EXCEPTION:
+			if ((vmcs12->vm_entry_instruction_len > 15) ||
+			    (vmcs12->vm_entry_instruction_len == 0 &&
+			     !nested_cpu_has_zero_length_injection(vcpu)))
+				return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+		}
+	}
+
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 331993c49dae..257f27620bc2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -110,6 +110,15 @@ static inline bool is_la57_mode(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static inline bool x86_exception_has_error_code(unsigned int vector)
+{
+	static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
+			BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) |
+			BIT(PF_VECTOR) | BIT(AC_VECTOR);
+
+	return (1U << vector) & exception_has_error_code;
+}
+
 static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;

From 2ddc649810133fcf8e5282eea898ee7ececf161e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 22 Jun 2018 16:56:14 +0200
Subject: [PATCH 185/215] KVM: fix KVM_CAP_HYPERV_TLBFLUSH paragraph number
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

KVM_CAP_HYPERV_TLBFLUSH collided with KVM_CAP_S390_PSW-BPB, its paragraph
number should now be 8.18.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 495b7742ab58..d10944e619d3 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4610,7 +4610,7 @@ This capability indicates that kvm will implement the interfaces to handle
 reset, migration and nested KVM for branch prediction blocking. The stfle
 facility 82 should not be provided to the guest without this capability.
 
-8.14 KVM_CAP_HYPERV_TLBFLUSH
+8.18 KVM_CAP_HYPERV_TLBFLUSH
 
 Architectures: x86
 

From abcbcb80cd09cd40f2089d912764e315459b71f7 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 22 Jun 2018 16:33:57 +0200
Subject: [PATCH 186/215] time: Make sure jiffies_to_msecs() preserves non-zero
 time periods
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For the common cases where 1000 is a multiple of HZ, or HZ is a multiple of
1000, jiffies_to_msecs() never returns zero when passed a non-zero time
period.

However, if HZ > 1000 and not an integer multiple of 1000 (e.g. 1024 or
1200, as used on alpha and DECstation), jiffies_to_msecs() may return zero
for small non-zero time periods.  This may break code that relies on
receiving back a non-zero value.

jiffies_to_usecs() does not need such a fix: one jiffy can only be less
than one µs if HZ > 1000000, and such large values of HZ are already
rejected at build time, twice:

  - include/linux/jiffies.h does #error if HZ >= 12288,
  - kernel/time/time.c has BUILD_BUG_ON(HZ > USEC_PER_SEC).

Broken since forever.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: linux-alpha@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180622143357.7495-1-geert@linux-m68k.org
---
 kernel/time/time.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/time/time.c b/kernel/time/time.c
index 6fa99213fc72..2b41e8e2d31d 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -28,6 +28,7 @@
  */
 
 #include <linux/export.h>
+#include <linux/kernel.h>
 #include <linux/timex.h>
 #include <linux/capability.h>
 #include <linux/timekeeper_internal.h>
@@ -314,9 +315,10 @@ unsigned int jiffies_to_msecs(const unsigned long j)
 	return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
 #else
 # if BITS_PER_LONG == 32
-	return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32;
+	return (HZ_TO_MSEC_MUL32 * j + (1ULL << HZ_TO_MSEC_SHR32) - 1) >>
+	       HZ_TO_MSEC_SHR32;
 # else
-	return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN;
+	return DIV_ROUND_UP(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN);
 # endif
 #endif
 }

From 48e315618dc4dc8904182cd221e3d395d5d97005 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 22 Jun 2018 12:08:20 +0200
Subject: [PATCH 187/215] MAINTAINERS: Add file patterns for x86 device tree
 bindings

Submitters of device tree binding documentation may forget to CC
the subsystem maintainer if this is missing.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: devicetree@vger.kernel.org
Link: https://lkml.kernel.org/r/20180622100820.29616-1-geert@linux-m68k.org
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9c125f705f78..60929873b900 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15366,6 +15366,7 @@ M:	x86@kernel.org
 L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
 S:	Maintained
+F:	Documentation/devicetree/bindings/x86/
 F:	Documentation/x86/
 F:	arch/x86/
 

From b5b7dd647f2d21b93f734ce890671cd908e69b0a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Jun 2018 10:25:25 +0100
Subject: [PATCH 188/215] arm64: kpti: Use early_param for kpti= command-line
 option

We inspect __kpti_forced early on as part of the cpufeature enable
callback which remaps the swapper page table using non-global entries.

Ensure that __kpti_forced has been updated to reflect the kpti=
command-line option before we start using it.

Fixes: ea1e3de85e94 ("arm64: entry: Add fake CPU feature for unmapping the kernel at EL0")
Cc: <stable@vger.kernel.org> # 4.16.x-
Reported-by: Wei Xu <xuwei5@hisilicon.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Wei Xu <xuwei5@hisilicon.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d2856b129097..f24892a40d2c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -937,7 +937,7 @@ static int __init parse_kpti(char *str)
 	__kpti_forced = enabled ? 1 : -1;
 	return 0;
 }
-__setup("kpti=", parse_kpti);
+early_param("kpti", parse_kpti);
 #endif	/* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
 #ifdef CONFIG_ARM64_HW_AFDBM

From 71c8fc0c96abf8e53e74ed4d891d671e585f9076 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Jun 2018 16:23:45 +0100
Subject: [PATCH 189/215] arm64: mm: Ensure writes to swapper are ordered wrt
 subsequent cache maintenance

When rewriting swapper using nG mappings, we must performance cache
maintenance around each page table access in order to avoid coherency
problems with the host's cacheable alias under KVM. To ensure correct
ordering of the maintenance with respect to Device memory accesses made
with the Stage-1 MMU disabled, DMBs need to be added between the
maintenance and the corresponding memory access.

This patch adds a missing DMB between writing a new page table entry and
performing a clean+invalidate on the same line.

Fixes: f992b4dfd58b ("arm64: kpti: Add ->enable callback to remap swapper using nG mappings")
Cc: <stable@vger.kernel.org> # 4.16.x-
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/proc.S | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 5f9a73a4452c..03646e6a2ef4 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -217,8 +217,9 @@ ENDPROC(idmap_cpu_replace_ttbr1)
 
 	.macro __idmap_kpti_put_pgtable_ent_ng, type
 	orr	\type, \type, #PTE_NG		// Same bit for blocks and pages
-	str	\type, [cur_\()\type\()p]	// Update the entry and ensure it
-	dc	civac, cur_\()\type\()p		// is visible to all CPUs.
+	str	\type, [cur_\()\type\()p]	// Update the entry and ensure
+	dmb	sy				// that it is visible to all
+	dc	civac, cur_\()\type\()p		// CPUs.
 	.endm
 
 /*

From 784e0300fe9fe4aa81bd7df9d59e138f56bb605b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Jun 2018 11:45:07 +0100
Subject: [PATCH 190/215] rseq: Avoid infinite recursion when delivering
 SIGSEGV

When delivering a signal to a task that is using rseq, we call into
__rseq_handle_notify_resume() so that the registers pushed in the
sigframe are updated to reflect the state of the restartable sequence
(for example, ensuring that the signal returns to the abort handler if
necessary).

However, if the rseq management fails due to an unrecoverable fault when
accessing userspace or certain combinations of RSEQ_CS_* flags, then we
will attempt to deliver a SIGSEGV. This has the potential for infinite
recursion if the rseq code continuously fails on signal delivery.

Avoid this problem by using force_sigsegv() instead of force_sig(), which
is explicitly designed to reset the SEGV handler to SIG_DFL in the case
of a recursive fault. In doing so, remove rseq_signal_deliver() from the
internal rseq API and have an optional struct ksignal * parameter to
rseq_handle_notify_resume() instead.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: peterz@infradead.org
Cc: paulmck@linux.vnet.ibm.com
Cc: boqun.feng@gmail.com
Link: https://lkml.kernel.org/r/1529664307-983-1-git-send-email-will.deacon@arm.com
---
 arch/arm/kernel/signal.c     |  4 ++--
 arch/powerpc/kernel/signal.c |  4 ++--
 arch/x86/entry/common.c      |  2 +-
 arch/x86/kernel/signal.c     |  2 +-
 include/linux/sched.h        | 18 +++++++++++-------
 kernel/rseq.c                |  7 ++++---
 6 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index f09e9d66d605..dec130e7078c 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -544,7 +544,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	 * Increment event counter and perform fixup for the pre-signal
 	 * frame.
 	 */
-	rseq_signal_deliver(regs);
+	rseq_signal_deliver(ksig, regs);
 
 	/*
 	 * Set up the stack frame
@@ -666,7 +666,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 			} else {
 				clear_thread_flag(TIF_NOTIFY_RESUME);
 				tracehook_notify_resume(regs);
-				rseq_handle_notify_resume(regs);
+				rseq_handle_notify_resume(NULL, regs);
 			}
 		}
 		local_irq_disable();
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 17fe4339ba59..b3e8db376ecd 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -134,7 +134,7 @@ static void do_signal(struct task_struct *tsk)
 	/* Re-enable the breakpoints for the signal stack */
 	thread_change_pc(tsk, tsk->thread.regs);
 
-	rseq_signal_deliver(tsk->thread.regs);
+	rseq_signal_deliver(&ksig, tsk->thread.regs);
 
 	if (is32) {
         	if (ksig.ka.sa.sa_flags & SA_SIGINFO)
@@ -170,7 +170,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		rseq_handle_notify_resume(regs);
+		rseq_handle_notify_resume(NULL, regs);
 	}
 
 	user_enter();
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 92190879b228..3b2490b81918 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -164,7 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
 		if (cached_flags & _TIF_NOTIFY_RESUME) {
 			clear_thread_flag(TIF_NOTIFY_RESUME);
 			tracehook_notify_resume(regs);
-			rseq_handle_notify_resume(regs);
+			rseq_handle_notify_resume(NULL, regs);
 		}
 
 		if (cached_flags & _TIF_USER_RETURN_NOTIFY)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 445ca11ff863..92a3b312a53c 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -692,7 +692,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
 	 * Increment event counter and perform fixup for the pre-signal
 	 * frame.
 	 */
-	rseq_signal_deliver(regs);
+	rseq_signal_deliver(ksig, regs);
 
 	/* Set up the stack frame */
 	if (is_ia32_frame(ksig)) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1882643d455..9256118bd40c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1799,20 +1799,22 @@ static inline void rseq_set_notify_resume(struct task_struct *t)
 		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
 }
 
-void __rseq_handle_notify_resume(struct pt_regs *regs);
+void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
 
-static inline void rseq_handle_notify_resume(struct pt_regs *regs)
+static inline void rseq_handle_notify_resume(struct ksignal *ksig,
+					     struct pt_regs *regs)
 {
 	if (current->rseq)
-		__rseq_handle_notify_resume(regs);
+		__rseq_handle_notify_resume(ksig, regs);
 }
 
-static inline void rseq_signal_deliver(struct pt_regs *regs)
+static inline void rseq_signal_deliver(struct ksignal *ksig,
+				       struct pt_regs *regs)
 {
 	preempt_disable();
 	__set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask);
 	preempt_enable();
-	rseq_handle_notify_resume(regs);
+	rseq_handle_notify_resume(ksig, regs);
 }
 
 /* rseq_preempt() requires preemption to be disabled. */
@@ -1861,10 +1863,12 @@ static inline void rseq_execve(struct task_struct *t)
 static inline void rseq_set_notify_resume(struct task_struct *t)
 {
 }
-static inline void rseq_handle_notify_resume(struct pt_regs *regs)
+static inline void rseq_handle_notify_resume(struct ksignal *ksig,
+					     struct pt_regs *regs)
 {
 }
-static inline void rseq_signal_deliver(struct pt_regs *regs)
+static inline void rseq_signal_deliver(struct ksignal *ksig,
+				       struct pt_regs *regs)
 {
 }
 static inline void rseq_preempt(struct task_struct *t)
diff --git a/kernel/rseq.c b/kernel/rseq.c
index ae306f90c514..22b6acf1ad63 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -251,10 +251,10 @@ static int rseq_ip_fixup(struct pt_regs *regs)
  * respect to other threads scheduled on the same CPU, and with respect
  * to signal handlers.
  */
-void __rseq_handle_notify_resume(struct pt_regs *regs)
+void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
 {
 	struct task_struct *t = current;
-	int ret;
+	int ret, sig;
 
 	if (unlikely(t->flags & PF_EXITING))
 		return;
@@ -268,7 +268,8 @@ void __rseq_handle_notify_resume(struct pt_regs *regs)
 	return;
 
 error:
-	force_sig(SIGSEGV, t);
+	sig = ksig ? ksig->sig : 0;
+	force_sigsegv(sig, t);
 }
 
 #ifdef CONFIG_DEBUG_RSEQ

From 0ae52ddf5bd7f685bb43d7687290f6c2eeacfb31 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 22 Jun 2018 13:05:35 +0200
Subject: [PATCH 191/215] lightnvm: Remove depends on HAS_DMA in case of
 platform dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove dependencies on HAS_DMA where a Kconfig symbol depends on another
symbol that implies HAS_DMA, and, optionally, on "|| COMPILE_TEST".
In most cases this other symbol is an architecture or platform specific
symbol, or PCI.

Generic symbols and drivers without platform dependencies keep their
dependencies on HAS_DMA, to prevent compiling subsystems or drivers that
cannot work anyway.

This simplifies the dependencies, and allows to improve compile-testing.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig
index 10c08982185a..9c03f35d9df1 100644
--- a/drivers/lightnvm/Kconfig
+++ b/drivers/lightnvm/Kconfig
@@ -4,7 +4,7 @@
 
 menuconfig NVM
 	bool "Open-Channel SSD target support"
-	depends on BLOCK && HAS_DMA && PCI
+	depends on BLOCK && PCI
 	select BLK_DEV_NVME
 	help
 	  Say Y here to get to enable Open-channel SSDs.

From 3ee7e8697d5860b173132606d80a9cd35e7113ee Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 18 Jun 2018 15:46:58 +0200
Subject: [PATCH 192/215] bdi: Fix another oops in wb_workfn()

syzbot is reporting NULL pointer dereference at wb_workfn() [1] due to
wb->bdi->dev being NULL. And Dmitry confirmed that wb->state was
WB_shutting_down after wb->bdi->dev became NULL. This indicates that
unregister_bdi() failed to call wb_shutdown() on one of wb objects.

The problem is in cgwb_bdi_unregister() which does cgwb_kill() and thus
drops bdi's reference to wb structures before going through the list of
wbs again and calling wb_shutdown() on each of them. This way the loop
iterating through all wbs can easily miss a wb if that wb has already
passed through cgwb_remove_from_bdi_list() called from wb_shutdown()
from cgwb_release_workfn() and as a result fully shutdown bdi although
wb_workfn() for this wb structure is still running. In fact there are
also other ways cgwb_bdi_unregister() can race with
cgwb_release_workfn() leading e.g. to use-after-free issues:

CPU1                            CPU2
                                cgwb_bdi_unregister()
                                  cgwb_kill(*slot);

cgwb_release()
  queue_work(cgwb_release_wq, &wb->release_work);
cgwb_release_workfn()
                                  wb = list_first_entry(&bdi->wb_list, ...)
                                  spin_unlock_irq(&cgwb_lock);
  wb_shutdown(wb);
  ...
  kfree_rcu(wb, rcu);
                                  wb_shutdown(wb); -> oops use-after-free

We solve these issues by synchronizing writeback structure shutdown from
cgwb_bdi_unregister() with cgwb_release_workfn() using a new mutex. That
way we also no longer need synchronization using WB_shutting_down as the
mutex provides it for CONFIG_CGROUP_WRITEBACK case and without
CONFIG_CGROUP_WRITEBACK wb_shutdown() can be called only once from
bdi_unregister().

Reported-by: syzbot <syzbot+4a7438e774b21ddd8eca@syzkaller.appspotmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/backing-dev-defs.h |  2 +-
 mm/backing-dev.c                 | 20 +++++++-------------
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 0bd432a4d7bd..24251762c20c 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -22,7 +22,6 @@ struct dentry;
  */
 enum wb_state {
 	WB_registered,		/* bdi_register() was done */
-	WB_shutting_down,	/* wb_shutdown() in progress */
 	WB_writeback_running,	/* Writeback is in progress */
 	WB_has_dirty_io,	/* Dirty inodes on ->b_{dirty|io|more_io} */
 	WB_start_all,		/* nr_pages == 0 (all) work pending */
@@ -189,6 +188,7 @@ struct backing_dev_info {
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
 	struct rb_root cgwb_congested_tree; /* their congested states */
+	struct mutex cgwb_release_mutex;  /* protect shutdown of wb structs */
 #else
 	struct bdi_writeback_congested *wb_congested;
 #endif
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 347cc834c04a..2e5d3df0853d 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -359,15 +359,8 @@ static void wb_shutdown(struct bdi_writeback *wb)
 	spin_lock_bh(&wb->work_lock);
 	if (!test_and_clear_bit(WB_registered, &wb->state)) {
 		spin_unlock_bh(&wb->work_lock);
-		/*
-		 * Wait for wb shutdown to finish if someone else is just
-		 * running wb_shutdown(). Otherwise we could proceed to wb /
-		 * bdi destruction before wb_shutdown() is finished.
-		 */
-		wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE);
 		return;
 	}
-	set_bit(WB_shutting_down, &wb->state);
 	spin_unlock_bh(&wb->work_lock);
 
 	cgwb_remove_from_bdi_list(wb);
@@ -379,12 +372,6 @@ static void wb_shutdown(struct bdi_writeback *wb)
 	mod_delayed_work(bdi_wq, &wb->dwork, 0);
 	flush_delayed_work(&wb->dwork);
 	WARN_ON(!list_empty(&wb->work_list));
-	/*
-	 * Make sure bit gets cleared after shutdown is finished. Matches with
-	 * the barrier provided by test_and_clear_bit() above.
-	 */
-	smp_wmb();
-	clear_and_wake_up_bit(WB_shutting_down, &wb->state);
 }
 
 static void wb_exit(struct bdi_writeback *wb)
@@ -508,10 +495,12 @@ static void cgwb_release_workfn(struct work_struct *work)
 	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
 						release_work);
 
+	mutex_lock(&wb->bdi->cgwb_release_mutex);
 	wb_shutdown(wb);
 
 	css_put(wb->memcg_css);
 	css_put(wb->blkcg_css);
+	mutex_unlock(&wb->bdi->cgwb_release_mutex);
 
 	fprop_local_destroy_percpu(&wb->memcg_completions);
 	percpu_ref_exit(&wb->refcnt);
@@ -697,6 +686,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 
 	INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
 	bdi->cgwb_congested_tree = RB_ROOT;
+	mutex_init(&bdi->cgwb_release_mutex);
 
 	ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
 	if (!ret) {
@@ -717,7 +707,10 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
 	spin_lock_irq(&cgwb_lock);
 	radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
 		cgwb_kill(*slot);
+	spin_unlock_irq(&cgwb_lock);
 
+	mutex_lock(&bdi->cgwb_release_mutex);
+	spin_lock_irq(&cgwb_lock);
 	while (!list_empty(&bdi->wb_list)) {
 		wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
 				      bdi_node);
@@ -726,6 +719,7 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
 		spin_lock_irq(&cgwb_lock);
 	}
 	spin_unlock_irq(&cgwb_lock);
+	mutex_unlock(&bdi->cgwb_release_mutex);
 }
 
 /**

From 964d978433a4b9aa1368ff71227ca0027dd1e32f Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Date: Wed, 13 Jun 2018 13:43:10 -0500
Subject: [PATCH 193/215] x86/CPU/AMD: Fix LLC ID bit-shift calculation

The current logic incorrectly calculates the LLC ID from the APIC ID.

Unless specified otherwise, the LLC ID should be calculated by removing
the Core and Thread ID bits from the least significant end of the APIC
ID. For more info, see "ApicId Enumeration Requirements" in any Fam17h
PPR document.

[ bp: Improve commit message. ]

Fixes: 68091ee7ac3c ("Calculate last level cache ID from number of sharing threads")
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1528915390-30533-1-git-send-email-suravee.suthikulpanit@amd.com
---
 arch/x86/kernel/cpu/cacheinfo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 38354c66df81..0c5fcbd998cf 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -671,7 +671,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
 			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
 
 		if (num_sharing_cache) {
-			int bits = get_count_order(num_sharing_cache) - 1;
+			int bits = get_count_order(num_sharing_cache);
 
 			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
 		}

From b8f1f65882f07913157c44673af7ec0b308d03eb Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 21 Jun 2018 13:11:31 +0800
Subject: [PATCH 194/215] vhost_net: validate sock before trying to put its fd

Sock will be NULL if we pass -1 to vhost_net_set_backend(), but when
we meet errors during ubuf allocation, the code does not check for
NULL before calling sockfd_put(), this will lead NULL
dereferencing. Fixing by checking sock pointer before.

Fixes: bab632d69ee4 ("vhost: vhost TX zero-copy support")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/net.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 686dc670fd29..29756d88799b 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1226,7 +1226,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 	if (ubufs)
 		vhost_net_ubuf_put_wait_and_free(ubufs);
 err_ubufs:
-	sockfd_put(sock);
+	if (sock)
+		sockfd_put(sock);
 err_vq:
 	mutex_unlock(&vq->mutex);
 err:

From 6c6da92808442908287fae8ebb0ca041a52469f4 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 21 Jun 2018 19:49:36 +0800
Subject: [PATCH 195/215] ipv6: mcast: fix unsolicited report interval after
 receiving querys

After recieving MLD querys, we update idev->mc_maxdelay with max_delay
from query header. This make the later unsolicited reports have the same
interval with mc_maxdelay, which means we may send unsolicited reports with
long interval time instead of default configured interval time.

Also as we will not call ipv6_mc_reset() after device up. This issue will
be there even after leave the group and join other groups.

Fixes: fc4eba58b4c14 ("ipv6: make unsolicited report intervals configurable for mld")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 975021df7c1c..c0c74088f2af 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2082,7 +2082,8 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev)
 		mld_send_initial_cr(idev);
 		idev->mc_dad_count--;
 		if (idev->mc_dad_count)
-			mld_dad_start_timer(idev, idev->mc_maxdelay);
+			mld_dad_start_timer(idev,
+					    unsolicited_report_interval(idev));
 	}
 }
 
@@ -2094,7 +2095,8 @@ static void mld_dad_timer_expire(struct timer_list *t)
 	if (idev->mc_dad_count) {
 		idev->mc_dad_count--;
 		if (idev->mc_dad_count)
-			mld_dad_start_timer(idev, idev->mc_maxdelay);
+			mld_dad_start_timer(idev,
+					    unsolicited_report_interval(idev));
 	}
 	in6_dev_put(idev);
 }
@@ -2452,7 +2454,8 @@ static void mld_ifc_timer_expire(struct timer_list *t)
 	if (idev->mc_ifc_count) {
 		idev->mc_ifc_count--;
 		if (idev->mc_ifc_count)
-			mld_ifc_start_timer(idev, idev->mc_maxdelay);
+			mld_ifc_start_timer(idev,
+					    unsolicited_report_interval(idev));
 	}
 	in6_dev_put(idev);
 }

From 32e6996ca697099a4be5cd0dbd4b436699ca81a9 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Fri, 22 Jun 2018 10:37:05 +1000
Subject: [PATCH 196/215] Documentation: e100: Use correct heading adornment

Recently documentation file was converted to rst.  The document title
has the incorrect heading adornment.  From kernel docs:

	* Please stick to this order of heading adornments:

	  1. ``=`` with overline for document title::

	       ==============
	       Document title
	       ==============

Add  overline heading adornment to document title.

Fixes commit (85d63445f411 Documentation: e100: Update the Intel 10/100 driver doc)

CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Tobin C. Harding <me@tobin.cc>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/e100.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/networking/e100.rst b/Documentation/networking/e100.rst
index d4d837027925..59b80608e27d 100644
--- a/Documentation/networking/e100.rst
+++ b/Documentation/networking/e100.rst
@@ -1,3 +1,4 @@
+==============================================================
 Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters
 ==============================================================
 

From 3be40e54764d3ac9c004dc1646ab2c4cc0f0905e Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Fri, 22 Jun 2018 10:37:06 +1000
Subject: [PATCH 197/215] Documentation: e1000: Use correct heading adornment

Recently documentation file was converted to rst.  The document title
has the incorrect heading adornment.  From kernel docs:

	* Please stick to this order of heading adornments:

	  1. ``=`` with overline for document title::

	       ==============
	       Document title
	       ==============

Add  overline heading adornment to document title.

Fixes commit (228046e76189 Documentation: e1000: Update kernel documentation)

CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Tobin C. Harding <me@tobin.cc>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/e1000.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/networking/e1000.rst b/Documentation/networking/e1000.rst
index 616848940e63..55f28e5043b6 100644
--- a/Documentation/networking/e1000.rst
+++ b/Documentation/networking/e1000.rst
@@ -1,3 +1,4 @@
+===========================================================
 Linux* Base Driver for Intel(R) Ethernet Network Connection
 ===========================================================
 

From 3b0c3ebe2a42ce18a59828acc4578166367dc7b5 Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Fri, 22 Jun 2018 10:37:07 +1000
Subject: [PATCH 198/215] Documentation: e100: Fix docs build error

Recent patch updated e100 docs to rst format.  Docs build (`make
htmldocs`) is currently failing due to this file with error:

	(SEVERE/4) Unexpected section title.

This is because a section of the file is indented 2 spaces.  Build error
can be cleared by aligning the text with column 0.  While we are changing
these lines we can make sure line length does not exceed 72, that
newlines following headings are uniform, and that full stops are
followed by two spaces.

Align text with column 0, limit line length to 72, ensure two spaces
follow all full stops, ensure uniform use of newlines after heading.

Fixes commit (85d63445f411 Documentation: e100: Update the Intel 10/100 driver doc)

CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Tobin C. Harding <me@tobin.cc>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/e100.rst | 111 +++++++++++++++---------------
 1 file changed, 56 insertions(+), 55 deletions(-)

diff --git a/Documentation/networking/e100.rst b/Documentation/networking/e100.rst
index 59b80608e27d..9708f5fa76de 100644
--- a/Documentation/networking/e100.rst
+++ b/Documentation/networking/e100.rst
@@ -87,83 +87,84 @@ Event Log Message Level:  The driver uses the message level flag to log events
 Additional Configurations
 =========================
 
-  Configuring the Driver on Different Distributions
-  -------------------------------------------------
+Configuring the Driver on Different Distributions
+-------------------------------------------------
 
-  Configuring a network driver to load properly when the system is started is
-  distribution dependent. Typically, the configuration process involves adding
-  an alias line to /etc/modprobe.d/*.conf as well as editing other system
-  startup scripts and/or configuration files.  Many popular Linux
-  distributions ship with tools to make these changes for you. To learn the
-  proper way to configure a network device for your system, refer to your
-  distribution documentation.  If during this process you are asked for the
-  driver or module name, the name for the Linux Base Driver for the Intel
-  PRO/100 Family of Adapters is e100.
+Configuring a network driver to load properly when the system is started
+is distribution dependent.  Typically, the configuration process involves
+adding an alias line to /etc/modprobe.d/*.conf as well as editing other
+system startup scripts and/or configuration files.  Many popular Linux
+distributions ship with tools to make these changes for you.  To learn
+the proper way to configure a network device for your system, refer to
+your distribution documentation.  If during this process you are asked
+for the driver or module name, the name for the Linux Base Driver for
+the Intel PRO/100 Family of Adapters is e100.
 
-  As an example, if you install the e100 driver for two PRO/100 adapters
-  (eth0 and eth1), add the following to a configuration file in /etc/modprobe.d/
+As an example, if you install the e100 driver for two PRO/100 adapters
+(eth0 and eth1), add the following to a configuration file in
+/etc/modprobe.d/::
 
        alias eth0 e100
        alias eth1 e100
 
-  Viewing Link Messages
-  ---------------------
-  In order to see link messages and other Intel driver information on your
-  console, you must set the dmesg level up to six. This can be done by
-  entering the following on the command line before loading the e100 driver::
+Viewing Link Messages
+---------------------
+
+In order to see link messages and other Intel driver information on your
+console, you must set the dmesg level up to six.  This can be done by
+entering the following on the command line before loading the e100
+driver::
 
        dmesg -n 6
 
-  If you wish to see all messages issued by the driver, including debug
-  messages, set the dmesg level to eight.
+If you wish to see all messages issued by the driver, including debug
+messages, set the dmesg level to eight.
 
-  NOTE: This setting is not saved across reboots.
+NOTE: This setting is not saved across reboots.
 
+ethtool
+-------
 
-  ethtool
-  -------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information.  The ethtool
+version 1.6 or later is required for this functionality.
 
-  The driver utilizes the ethtool interface for driver configuration and
-  diagnostics, as well as displaying statistical information.  The ethtool
-  version 1.6 or later is required for this functionality.
+The latest release of ethtool can be found from
+https://www.kernel.org/pub/software/network/ethtool/
 
-  The latest release of ethtool can be found from
-  https://www.kernel.org/pub/software/network/ethtool/
+Enabling Wake on LAN* (WoL)
+---------------------------
+WoL is provided through the ethtool* utility.  For instructions on
+enabling WoL with ethtool, refer to the ethtool man page.  WoL will be
+enabled on the system during the next shut down or reboot.  For this
+driver version, in order to enable WoL, the e100 driver must be loaded
+when shutting down or rebooting the system.
 
-  Enabling Wake on LAN* (WoL)
-  ---------------------------
-  WoL is provided through the ethtool* utility.  For instructions on enabling
-  WoL with ethtool, refer to the ethtool man page.
+NAPI
+----
 
-  WoL will be enabled on the system during the next shut down or reboot. For
-  this driver version, in order to enable WoL, the e100 driver must be
-  loaded when shutting down or rebooting the system.
+NAPI (Rx polling mode) is supported in the e100 driver.
 
-  NAPI
-  ----
+See https://wiki.linuxfoundation.org/networking/napi for more
+information on NAPI.
 
-  NAPI (Rx polling mode) is supported in the e100 driver.
+Multiple Interfaces on Same Ethernet Broadcast Network
+------------------------------------------------------
 
-  See https://wiki.linuxfoundation.org/networking/napi for more information
-  on NAPI.
+Due to the default ARP behavior on Linux, it is not possible to have one
+system on two IP networks in the same Ethernet broadcast domain
+(non-partitioned switch) behave as expected.  All Ethernet interfaces
+will respond to IP traffic for any IP address assigned to the system.
+This results in unbalanced receive traffic.
 
-  Multiple Interfaces on Same Ethernet Broadcast Network
-  ------------------------------------------------------
+If you have multiple interfaces in a server, either turn on ARP
+filtering by
 
-  Due to the default ARP behavior on Linux, it is not possible to have
-  one system on two IP networks in the same Ethernet broadcast domain
-  (non-partitioned switch) behave as expected. All Ethernet interfaces
-  will respond to IP traffic for any IP address assigned to the system.
-  This results in unbalanced receive traffic.
+(1) entering:: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+    (this only works if your kernel's version is higher than 2.4.5), or
 
-  If you have multiple interfaces in a server, either turn on ARP
-  filtering by
-
-  (1) entering:: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
-      (this only works if your kernel's version is higher than 2.4.5), or
-
-  (2) installing the interfaces in separate broadcast domains (either
-      in different switches or in a switch partitioned to VLANs).
+(2) installing the interfaces in separate broadcast domains (either
+    in different switches or in a switch partitioned to VLANs).
 
 
 Support

From 805f16a5f12fd68e10841013ccfaceb2f4d7066a Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <me@tobin.cc>
Date: Fri, 22 Jun 2018 10:37:08 +1000
Subject: [PATCH 199/215] Documentation: e1000: Fix docs build error

Recent patch updated e1000 docs to rst format.  Docs build (`make
htmldocs`) is currently failing due to this file with error:

        (SEVERE/4) Unexpected section title.

This is because a section of the file is indented 2 spaces.  Build error
can be cleared by aligning the text with column 0.  While we are changing
these lines we can make sure line length does not exceed 72, that
newlines following headings are uniform, and that full stops are
followed by two spaces.

Align text with column 0, limit line length to 72, ensure two spaces
follow all full stops, ensure uniform use of newlines after heading.

Fixes commit (228046e76189 Documentation: e1000: Update kernel documentation)

CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Tobin C. Harding <me@tobin.cc>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/e1000.rst | 75 +++++++++++++++---------------
 1 file changed, 38 insertions(+), 37 deletions(-)

diff --git a/Documentation/networking/e1000.rst b/Documentation/networking/e1000.rst
index 55f28e5043b6..144b87eef153 100644
--- a/Documentation/networking/e1000.rst
+++ b/Documentation/networking/e1000.rst
@@ -355,57 +355,58 @@ previously mentioned to force the adapter to the same speed and duplex.
 Additional Configurations
 =========================
 
-  Jumbo Frames
-  ------------
-  Jumbo Frames support is enabled by changing the MTU to a value larger than
-  the default of 1500.  Use the ifconfig command to increase the MTU size.
-  For example::
+Jumbo Frames
+------------
+Jumbo Frames support is enabled by changing the MTU to a value larger
+than the default of 1500.  Use the ifconfig command to increase the MTU
+size.  For example::
 
        ifconfig eth<x> mtu 9000 up
 
-  This setting is not saved across reboots.  It can be made permanent if
-  you add::
+This setting is not saved across reboots.  It can be made permanent if
+you add::
 
        MTU=9000
 
-   to the file /etc/sysconfig/network-scripts/ifcfg-eth<x>.  This example
-   applies to the Red Hat distributions; other distributions may store this
-   setting in a different location.
+to the file /etc/sysconfig/network-scripts/ifcfg-eth<x>.  This example
+applies to the Red Hat distributions; other distributions may store this
+setting in a different location.
 
-  Notes:
-  Degradation in throughput performance may be observed in some Jumbo frames
-  environments. If this is observed, increasing the application's socket buffer
-  size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help.
-  See the specific application manual and /usr/src/linux*/Documentation/
-  networking/ip-sysctl.txt for more details.
+Notes: Degradation in throughput performance may be observed in some
+Jumbo frames environments.  If this is observed, increasing the
+application's socket buffer size and/or increasing the
+/proc/sys/net/ipv4/tcp_*mem entry values may help.  See the specific
+application manual and /usr/src/linux*/Documentation/
+networking/ip-sysctl.txt for more details.
 
-  - The maximum MTU setting for Jumbo Frames is 16110.  This value coincides
-    with the maximum Jumbo Frames size of 16128.
+- The maximum MTU setting for Jumbo Frames is 16110.  This value
+  coincides with the maximum Jumbo Frames size of 16128.
 
-  - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
-    poor performance or loss of link.
+- Using Jumbo frames at 10 or 100 Mbps is not supported and may result
+  in poor performance or loss of link.
 
-  - Adapters based on the Intel(R) 82542 and 82573V/E controller do not
-    support Jumbo Frames. These correspond to the following product names:
-     Intel(R) PRO/1000 Gigabit Server Adapter
-     Intel(R) PRO/1000 PM Network Connection
+- Adapters based on the Intel(R) 82542 and 82573V/E controller do not
+  support Jumbo Frames.  These correspond to the following product names:
+  Intel(R) PRO/1000 Gigabit Server Adapter Intel(R) PRO/1000 PM Network
+  Connection
 
-  ethtool
-  -------
-  The driver utilizes the ethtool interface for driver configuration and
-  diagnostics, as well as displaying statistical information.  The ethtool
-  version 1.6 or later is required for this functionality.
+ethtool
+-------
+The driver utilizes the ethtool interface for driver configuration and
+diagnostics, as well as displaying statistical information.  The ethtool
+version 1.6 or later is required for this functionality.
 
-  The latest release of ethtool can be found from
-  https://www.kernel.org/pub/software/network/ethtool/
+The latest release of ethtool can be found from
+https://www.kernel.org/pub/software/network/ethtool/
 
-  Enabling Wake on LAN* (WoL)
-  ---------------------------
-  WoL is configured through the ethtool* utility.
+Enabling Wake on LAN* (WoL)
+---------------------------
+WoL is configured through the ethtool* utility.
+
+WoL will be enabled on the system during the next shut down or reboot.
+For this driver version, in order to enable WoL, the e1000 driver must be
+loaded when shutting down or rebooting the system.
 
-  WoL will be enabled on the system during the next shut down or reboot.
-  For this driver version, in order to enable WoL, the e1000 driver must be
-  loaded when shutting down or rebooting the system.
 
 Support
 =======

From 271f7ff5aa5a73488b7a9d8b84b5205fb5b2f7cc Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Fri, 22 Jun 2018 10:15:39 +0200
Subject: [PATCH 200/215] net: mvneta: fix the Rx desc DMA address in the Rx
 path

When using s/w buffer management, buffers are allocated and DMA mapped.
When doing so on an arm64 platform, an offset correction is applied on
the DMA address, before storing it in an Rx descriptor. The issue is
this DMA address is then used later in the Rx path without removing the
offset correction. Thus the DMA address is wrong, which can led to
various issues.

This patch fixes this by removing the offset correction from the DMA
address retrieved from the Rx descriptor before using it in the Rx path.

Fixes: 8d5047cf9ca2 ("net: mvneta: Convert to be 64 bits compatible")
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 17a904cc6a5e..0ad2f3f7da85 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1932,7 +1932,7 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
 		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
 		index = rx_desc - rxq->descs;
 		data = rxq->buf_virt_addr[index];
-		phys_addr = rx_desc->buf_phys_addr;
+		phys_addr = rx_desc->buf_phys_addr - pp->rx_offset_correction;
 
 		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
 		    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {

From c2cd650b5b2bd6b57f2f62be65b1a8a576d4de6d Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Fri, 22 Jun 2018 11:50:52 +0200
Subject: [PATCH 201/215] net: mscc: make sparse happy

This patch fixes a sparse warning about using an incorrect type in
argument 2 of ocelot_write_rix(), as an u32 was expected but a __be32
was given. The conversion to u32 is forced, which is safe as the value
will be written as-is in the hardware without any modification.

Fixes: 08d02364b12f ("net: mscc: fix the injection header")
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 52c57e0ff617..776a8a9be8e3 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -374,7 +374,8 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
 	ocelot_gen_ifh(ifh, &info);
 
 	for (i = 0; i < IFH_LEN; i++)
-		ocelot_write_rix(ocelot, cpu_to_be32(ifh[i]), QS_INJ_WR, grp);
+		ocelot_write_rix(ocelot, (__force u32)cpu_to_be32(ifh[i]),
+				 QS_INJ_WR, grp);
 
 	count = (skb->len + 3) / 4;
 	last = skb->len % 4;

From 935c5e3eafe8186dd13e1eda24ad82ecae942852 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 22 Jun 2018 12:08:13 +0200
Subject: [PATCH 202/215] MAINTAINERS: Add file patterns for dsa device tree
 bindings

Submitters of device tree binding documentation may forget to CC
the subsystem maintainer if this is missing.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index edf3cf5ea691..977f9a431f77 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9882,6 +9882,7 @@ M:	Andrew Lunn <andrew@lunn.ch>
 M:	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
 M:	Florian Fainelli <f.fainelli@gmail.com>
 S:	Maintained
+F:	Documentation/devicetree/bindings/net/dsa/
 F:	net/dsa/
 F:	include/net/dsa.h
 F:	include/linux/dsa/

From e020797b7def11c8feeb3ee5c1f48c12cb959def Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Fri, 22 Jun 2018 13:08:43 +0200
Subject: [PATCH 203/215] net: Remove depends on HAS_DMA in case of platform
 dependency

Remove dependencies on HAS_DMA where a Kconfig symbol depends on another
symbol that implies HAS_DMA, and, optionally, on "|| COMPILE_TEST".
In most cases this other symbol is an architecture or platform specific
symbol, or PCI.

Generic symbols and drivers without platform dependencies keep their
dependencies on HAS_DMA, to prevent compiling subsystems or drivers that
cannot work anyway.

This simplifies the dependencies, and allows to improve compile-testing.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Mark Brown <broonie@kernel.org>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/Kconfig                | 2 +-
 drivers/net/ethernet/apm/xgene-v2/Kconfig       | 1 -
 drivers/net/ethernet/apm/xgene/Kconfig          | 1 -
 drivers/net/ethernet/arc/Kconfig                | 6 ++++--
 drivers/net/ethernet/broadcom/Kconfig           | 2 --
 drivers/net/ethernet/calxeda/Kconfig            | 2 +-
 drivers/net/ethernet/hisilicon/Kconfig          | 2 +-
 drivers/net/ethernet/marvell/Kconfig            | 8 +++-----
 drivers/net/ethernet/mellanox/mlxsw/Kconfig     | 2 +-
 drivers/net/ethernet/renesas/Kconfig            | 2 --
 drivers/net/wireless/broadcom/brcm80211/Kconfig | 1 -
 drivers/net/wireless/quantenna/qtnfmac/Kconfig  | 2 +-
 12 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index d5c15e8bb3de..f273af136fc7 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -173,7 +173,7 @@ config SUNLANCE
 
 config AMD_XGBE
 	tristate "AMD 10GbE Ethernet driver"
-	depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM && HAS_DMA
+	depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM
 	depends on X86 || ARM64 || COMPILE_TEST
 	select BITREVERSE
 	select CRC32
diff --git a/drivers/net/ethernet/apm/xgene-v2/Kconfig b/drivers/net/ethernet/apm/xgene-v2/Kconfig
index 1205861b6318..eedd3f3dd22e 100644
--- a/drivers/net/ethernet/apm/xgene-v2/Kconfig
+++ b/drivers/net/ethernet/apm/xgene-v2/Kconfig
@@ -1,6 +1,5 @@
 config NET_XGENE_V2
 	tristate "APM X-Gene SoC Ethernet-v2 Driver"
-	depends on HAS_DMA
 	depends on ARCH_XGENE || COMPILE_TEST
 	help
 	  This is the Ethernet driver for the on-chip ethernet interface
diff --git a/drivers/net/ethernet/apm/xgene/Kconfig b/drivers/net/ethernet/apm/xgene/Kconfig
index afccb033177b..e4e33c900b57 100644
--- a/drivers/net/ethernet/apm/xgene/Kconfig
+++ b/drivers/net/ethernet/apm/xgene/Kconfig
@@ -1,6 +1,5 @@
 config NET_XGENE
 	tristate "APM X-Gene SoC Ethernet Driver"
-	depends on HAS_DMA
 	depends on ARCH_XGENE || COMPILE_TEST
 	select PHYLIB
 	select MDIO_XGENE
diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig
index e743ddf46343..5d0ab8e74b68 100644
--- a/drivers/net/ethernet/arc/Kconfig
+++ b/drivers/net/ethernet/arc/Kconfig
@@ -24,7 +24,8 @@ config ARC_EMAC_CORE
 config ARC_EMAC
 	tristate "ARC EMAC support"
 	select ARC_EMAC_CORE
-	depends on OF_IRQ && OF_NET && HAS_DMA && (ARC || COMPILE_TEST)
+	depends on OF_IRQ && OF_NET
+	depends on ARC || COMPILE_TEST
 	---help---
 	  On some legacy ARC (Synopsys) FPGA boards such as ARCAngel4/ML50x
 	  non-standard on-chip ethernet device ARC EMAC 10/100 is used.
@@ -33,7 +34,8 @@ config ARC_EMAC
 config EMAC_ROCKCHIP
 	tristate "Rockchip EMAC support"
 	select ARC_EMAC_CORE
-	depends on OF_IRQ && OF_NET && REGULATOR && HAS_DMA && (ARCH_ROCKCHIP || COMPILE_TEST)
+	depends on OF_IRQ && OF_NET && REGULATOR
+	depends on ARCH_ROCKCHIP || COMPILE_TEST
 	---help---
 	  Support for Rockchip RK3036/RK3066/RK3188 EMAC ethernet controllers.
 	  This selects Rockchip SoC glue layer support for the
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index af75156919ed..4c3bfde6e8de 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -157,7 +157,6 @@ config BGMAC
 config BGMAC_BCMA
 	tristate "Broadcom iProc GBit BCMA support"
 	depends on BCMA && BCMA_HOST_SOC
-	depends on HAS_DMA
 	depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST
 	select BGMAC
 	select PHYLIB
@@ -170,7 +169,6 @@ config BGMAC_BCMA
 
 config BGMAC_PLATFORM
 	tristate "Broadcom iProc GBit platform support"
-	depends on HAS_DMA
 	depends on ARCH_BCM_IPROC || COMPILE_TEST
 	depends on OF
 	select BGMAC
diff --git a/drivers/net/ethernet/calxeda/Kconfig b/drivers/net/ethernet/calxeda/Kconfig
index 07d2201530d2..9fdd496b90ff 100644
--- a/drivers/net/ethernet/calxeda/Kconfig
+++ b/drivers/net/ethernet/calxeda/Kconfig
@@ -1,6 +1,6 @@
 config NET_CALXEDA_XGMAC
 	tristate "Calxeda 1G/10G XGMAC Ethernet driver"
-	depends on HAS_IOMEM && HAS_DMA
+	depends on HAS_IOMEM
 	depends on ARCH_HIGHBANK || COMPILE_TEST
 	select CRC32
 	help
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 8bcf470ff5f3..fb1a7251f45d 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -5,7 +5,7 @@
 config NET_VENDOR_HISILICON
 	bool "Hisilicon devices"
 	default y
-	depends on (OF || ACPI) && HAS_DMA
+	depends on OF || ACPI
 	depends on ARM || ARM64 || COMPILE_TEST
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index cc2f7701e71e..f33fd22b351c 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -18,8 +18,8 @@ if NET_VENDOR_MARVELL
 
 config MV643XX_ETH
 	tristate "Marvell Discovery (643XX) and Orion ethernet support"
-	depends on (MV64X60 || PPC32 || PLAT_ORION || COMPILE_TEST) && INET
-	depends on HAS_DMA
+	depends on MV64X60 || PPC32 || PLAT_ORION || COMPILE_TEST
+	depends on INET
 	select PHYLIB
 	select MVMDIO
 	---help---
@@ -58,7 +58,6 @@ config MVNETA_BM_ENABLE
 config MVNETA
 	tristate "Marvell Armada 370/38x/XP/37xx network interface support"
 	depends on ARCH_MVEBU || COMPILE_TEST
-	depends on HAS_DMA
 	select MVMDIO
 	select PHYLINK
 	---help---
@@ -84,7 +83,6 @@ config MVNETA_BM
 config MVPP2
 	tristate "Marvell Armada 375/7K/8K network interface support"
 	depends on ARCH_MVEBU || COMPILE_TEST
-	depends on HAS_DMA
 	select MVMDIO
 	select PHYLINK
 	---help---
@@ -93,7 +91,7 @@ config MVPP2
 
 config PXA168_ETH
 	tristate "Marvell pxa168 ethernet support"
-	depends on HAS_IOMEM && HAS_DMA
+	depends on HAS_IOMEM
 	depends on CPU_PXA168 || ARCH_BERLIN || COMPILE_TEST
 	select PHYLIB
 	---help---
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index f4d9c9975ac3..82827a8d3d67 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -30,7 +30,7 @@ config MLXSW_CORE_THERMAL
 
 config MLXSW_PCI
 	tristate "PCI bus implementation for Mellanox Technologies Switch ASICs"
-	depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE
+	depends on PCI && HAS_IOMEM && MLXSW_CORE
 	default m
 	---help---
 	  This is PCI bus implementation for Mellanox Technologies Switch ASICs.
diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig
index 27be51f0a421..f3f7477043ce 100644
--- a/drivers/net/ethernet/renesas/Kconfig
+++ b/drivers/net/ethernet/renesas/Kconfig
@@ -17,7 +17,6 @@ if NET_VENDOR_RENESAS
 
 config SH_ETH
 	tristate "Renesas SuperH Ethernet support"
-	depends on HAS_DMA
 	depends on ARCH_RENESAS || SUPERH || COMPILE_TEST
 	select CRC32
 	select MII
@@ -31,7 +30,6 @@ config SH_ETH
 
 config RAVB
 	tristate "Renesas Ethernet AVB support"
-	depends on HAS_DMA
 	depends on ARCH_RENESAS || COMPILE_TEST
 	select CRC32
 	select MII
diff --git a/drivers/net/wireless/broadcom/brcm80211/Kconfig b/drivers/net/wireless/broadcom/brcm80211/Kconfig
index 9d99eb42d917..6acba67bca07 100644
--- a/drivers/net/wireless/broadcom/brcm80211/Kconfig
+++ b/drivers/net/wireless/broadcom/brcm80211/Kconfig
@@ -60,7 +60,6 @@ config BRCMFMAC_PCIE
 	bool "PCIE bus interface support for FullMAC driver"
 	depends on BRCMFMAC
 	depends on PCI
-	depends on HAS_DMA
 	select BRCMFMAC_PROTO_MSGBUF
 	select FW_LOADER
 	---help---
diff --git a/drivers/net/wireless/quantenna/qtnfmac/Kconfig b/drivers/net/wireless/quantenna/qtnfmac/Kconfig
index 025fa6018550..8d1492a90bd1 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/Kconfig
+++ b/drivers/net/wireless/quantenna/qtnfmac/Kconfig
@@ -7,7 +7,7 @@ config QTNFMAC
 config QTNFMAC_PEARL_PCIE
 	tristate "Quantenna QSR10g PCIe support"
 	default n
-	depends on HAS_DMA && PCI && CFG80211
+	depends on PCI && CFG80211
 	select QTNFMAC
 	select FW_LOADER
 	select CRC32

From 74174fe5634ffbf645a7ca5a261571f700b2f332 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 22 Jun 2018 06:44:14 -0700
Subject: [PATCH 204/215] net: dccp: avoid crash in ccid3_hc_rx_send_feedback()

On fast hosts or malicious bots, we trigger a DCCP_BUG() which
seems excessive.

syzbot reported :

BUG: delta (-6195) <= 0 at net/dccp/ccids/ccid3.c:628/ccid3_hc_rx_send_feedback()
CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.18.0-rc1+ #112
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 ccid3_hc_rx_send_feedback net/dccp/ccids/ccid3.c:628 [inline]
 ccid3_hc_rx_packet_recv.cold.16+0x38/0x71 net/dccp/ccids/ccid3.c:793
 ccid_hc_rx_packet_recv net/dccp/ccid.h:185 [inline]
 dccp_deliver_input_to_ccids+0xf0/0x280 net/dccp/input.c:180
 dccp_rcv_established+0x87/0xb0 net/dccp/input.c:378
 dccp_v4_do_rcv+0x153/0x180 net/dccp/ipv4.c:654
 sk_backlog_rcv include/net/sock.h:914 [inline]
 __sk_receive_skb+0x3ba/0xd80 net/core/sock.c:517
 dccp_v4_rcv+0x10f9/0x1f58 net/dccp/ipv4.c:875
 ip_local_deliver_finish+0x2eb/0xda0 net/ipv4/ip_input.c:215
 NF_HOOK include/linux/netfilter.h:287 [inline]
 ip_local_deliver+0x1e9/0x750 net/ipv4/ip_input.c:256
 dst_input include/net/dst.h:450 [inline]
 ip_rcv_finish+0x823/0x2220 net/ipv4/ip_input.c:396
 NF_HOOK include/linux/netfilter.h:287 [inline]
 ip_rcv+0xa18/0x1284 net/ipv4/ip_input.c:492
 __netif_receive_skb_core+0x2488/0x3680 net/core/dev.c:4628
 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693
 process_backlog+0x219/0x760 net/core/dev.c:5373
 napi_poll net/core/dev.c:5771 [inline]
 net_rx_action+0x7da/0x1980 net/core/dev.c:5837
 __do_softirq+0x2e8/0xb17 kernel/softirq.c:284
 run_ksoftirqd+0x86/0x100 kernel/softirq.c:645
 smpboot_thread_fn+0x417/0x870 kernel/smpboot.c:164
 kthread+0x345/0x410 kernel/kthread.c:240
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: dccp@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 8b5ba6dffac7..d57a2be1e2e0 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -625,9 +625,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 	case CCID3_FBACK_PERIODIC:
 		delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback);
 		if (delta <= 0)
-			DCCP_BUG("delta (%ld) <= 0", (long)delta);
-		else
-			hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta);
+			delta = 1;
+		hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta);
 		break;
 	default:
 		return;

From 0ce4e70ff00662ad7490e545ba0cd8c1fa179fca Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 22 Jun 2018 06:44:15 -0700
Subject: [PATCH 205/215] net: dccp: switch rx_tstamp_last_feedback to
 monotonic clock

To compute delays, better not use time of the day which can
be changed by admins or malicious programs.

Also change ccid3_first_li() to use s64 type for delta variable
to avoid potential overflows.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Cc: dccp@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d57a2be1e2e0..12877a1514e7 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -600,7 +600,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 {
 	struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	ktime_t now = ktime_get_real();
+	ktime_t now = ktime_get();
 	s64 delta = 0;
 
 	switch (fbtype) {
@@ -632,7 +632,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
 		return;
 	}
 
-	ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
+	ccid3_pr_debug("Interval %lldusec, X_recv=%u, 1/p=%u\n", delta,
 		       hc->rx_x_recv, hc->rx_pinv);
 
 	hc->rx_tstamp_last_feedback = now;
@@ -679,7 +679,8 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 static u32 ccid3_first_li(struct sock *sk)
 {
 	struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
-	u32 x_recv, p, delta;
+	u32 x_recv, p;
+	s64 delta;
 	u64 fval;
 
 	if (hc->rx_rtt == 0) {
@@ -687,7 +688,9 @@ static u32 ccid3_first_li(struct sock *sk)
 		hc->rx_rtt = DCCP_FALLBACK_RTT;
 	}
 
-	delta  = ktime_to_us(net_timedelta(hc->rx_tstamp_last_feedback));
+	delta = ktime_us_delta(ktime_get(), hc->rx_tstamp_last_feedback);
+	if (delta <= 0)
+		delta = 1;
 	x_recv = scaled_div32(hc->rx_bytes_recv, delta);
 	if (x_recv == 0) {		/* would also trigger divide-by-zero */
 		DCCP_WARN("X_recv==0\n");

From 35b42da69e35536da603a50e40aa6c41b2f7b0f8 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 22 Jun 2018 14:33:16 -0700
Subject: [PATCH 206/215] net_sched: remove a bogus warning in hfsc

In update_vf():

  cftree_remove(cl);
  update_cfmin(cl->cl_parent);

the cl_cfmin of cl->cl_parent is intentionally updated to 0
when that parent only has one child. And if this parent is
root qdisc, we could end up, in hfsc_schedule_watchdog(),
that we can't decide the next schedule time for qdisc watchdog.
But it seems safe that we can just skip it, as this watchdog is
not always scheduled anyway.

Thanks to Marco for testing all the cases, nothing is broken.

Reported-by: Marco Berizzi <pupilla@libero.it>
Tested-by: Marco Berizzi <pupilla@libero.it>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ae9877ea205..3278a76f6861 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1385,8 +1385,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
 		if (next_time == 0 || next_time > q->root.cl_cfmin)
 			next_time = q->root.cl_cfmin;
 	}
-	WARN_ON(next_time == 0);
-	qdisc_watchdog_schedule(&q->watchdog, next_time);
+	if (next_time)
+		qdisc_watchdog_schedule(&q->watchdog, next_time);
 }
 
 static int

From 51be1335151771075dcb19f3464ca9f331134285 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Sat, 23 Jun 2018 01:08:40 +0300
Subject: [PATCH 207/215] Revert "x86/mm: Mark __pgtable_l5_enabled __initdata"

This reverts commit e4e961e36f063484c48bed919013c106d178995d.

We need to use early version of pgtable_l5_enabled() in
early_identify_cpu() as this code runs before cpu_feature_enabled() is
usable.

But it leads to section mismatch:

cpu_init()
  load_mm_ldt()
    ldt_slot_va()
      LDT_BASE_ADDR
        LDT_PGD_ENTRY
	  pgtable_l5_enabled()
	    __pgtable_l5_enabled

__pgtable_l5_enabled marked as __initdata, but cpu_init() is not __init.

It's fixable: early code can be isolated into a separate translation unit,
but such change collides with other work in the area.  That's too much
hassle to save 4 bytes of memory.

Return __pgtable_l5_enabled back to be __ro_after_init.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180622220841.54135-2-kirill.shutemov@linux.intel.com
---
 arch/x86/kernel/head64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index a21d6ace648e..8047379e575a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -44,7 +44,7 @@ static unsigned int __initdata next_early_pgt;
 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
 
 #ifdef CONFIG_X86_5LEVEL
-unsigned int __pgtable_l5_enabled __initdata;
+unsigned int __pgtable_l5_enabled __ro_after_init;
 unsigned int pgdir_shift __ro_after_init = 39;
 EXPORT_SYMBOL(pgdir_shift);
 unsigned int ptrs_per_p4d __ro_after_init = 1;

From 2458e53ff74cd1063ed3e00459da1d35c559d369 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Sat, 23 Jun 2018 01:08:41 +0300
Subject: [PATCH 208/215] x86/mm: Fix 'no5lvl' handling

early_identify_cpu() has to use early version of pgtable_l5_enabled()
that doesn't rely on cpu_feature_enabled().

Defining USE_EARLY_PGTABLE_L5 before all includes does the trick.

I lost the define in one of reworks of the original patch.

Fixes: 372fddf70904 ("x86/mm: Introduce the 'no5lvl' kernel parameter")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lkml.kernel.org/r/20180622220841.54135-3-kirill.shutemov@linux.intel.com
---
 arch/x86/kernel/cpu/common.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0df7151cfef4..eb4cb3efd20e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,3 +1,6 @@
+/* cpu_feature_enabled() cannot be used this early */
+#define USE_EARLY_PGTABLE_L5
+
 #include <linux/bootmem.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>

From f5e350f021e04ea41d2e5d58487c33b05ba3d25b Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@wdc.com>
Date: Fri, 22 Jun 2018 13:18:09 -0700
Subject: [PATCH 209/215] blk-mq: Fix timeout handling in case the timeout
 handler returns BLK_EH_DONE

Make sure that RQF_TIMED_OUT is cleared when a request is reused
after a block driver timeout handler has returned BLK_EH_DONE.

Fixes: da6612673988 ("blk-mq: don't time out requests again that are in the timeout handler")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Andrew Randrianasulu <randrianasulu@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c      | 1 -
 block/blk-timeout.c | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8e57b84e50e9..b6888ff556cf 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -781,7 +781,6 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
 		WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
 	}
 
-	req->rq_flags &= ~RQF_TIMED_OUT;
 	blk_add_timer(req);
 }
 
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 4b8a48d48ba1..f2cfd56e1606 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -210,6 +210,7 @@ void blk_add_timer(struct request *req)
 	if (!req->timeout)
 		req->timeout = q->rq_timeout;
 
+	req->rq_flags &= ~RQF_TIMED_OUT;
 	blk_rq_set_deadline(req, jiffies + req->timeout);
 
 	/*

From 5ce36338a30f9814fc4824f9fe6c20cd83d872c7 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Sat, 23 Jun 2018 20:28:26 +0530
Subject: [PATCH 210/215] cxgb4: when disabling dcb set txq dcb priority to 0

When we are disabling DCB, store "0" in txq->dcb_prio
since that's used for future TX Work Request "OVLAN_IDX"
values. Setting non zero priority upon disabling DCB
would halt the traffic.

Reported-by: AMG Zollner Robert <robert@cloudmedia.eu>
CC: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: Casey Leedom <leedom@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index dd04a2f89ce6..bc03c175a3cd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -263,7 +263,7 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable)
 				"Can't %s DCB Priority on port %d, TX Queue %d: err=%d\n",
 				enable ? "set" : "unset", pi->port_id, i, -err);
 		else
-			txq->dcb_prio = value;
+			txq->dcb_prio = enable ? value : 0;
 	}
 }
 

From e7e197edd09c25774b4f12cab19f9d5462f240f4 Mon Sep 17 00:00:00 2001
From: Aleksander Morgado <aleksander@aleksander.es>
Date: Sat, 23 Jun 2018 23:22:52 +0200
Subject: [PATCH 211/215] qmi_wwan: add support for the Dell Wireless 5821e
 module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This module exposes two USB configurations: a QMI+AT capable setup on
USB config #1 and a MBIM capable setup on USB config #2.

By default the kernel will choose the MBIM capable configuration as
long as the cdc_mbim driver is available. This patch adds support for
the QMI port in the secondary configuration.

Signed-off-by: Aleksander Morgado <aleksander@aleksander.es>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 8e8b51f171f4..8fac8e132c5b 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1246,6 +1246,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x413c, 0x81b3, 8)},	/* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
 	{QMI_FIXED_INTF(0x413c, 0x81b6, 8)},	/* Dell Wireless 5811e */
 	{QMI_FIXED_INTF(0x413c, 0x81b6, 10)},	/* Dell Wireless 5811e */
+	{QMI_FIXED_INTF(0x413c, 0x81d7, 1)},	/* Dell Wireless 5821e */
 	{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)},	/* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
 	{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)},	/* HP lt4120 Snapdragon X5 LTE */
 	{QMI_FIXED_INTF(0x22de, 0x9061, 3)},	/* WeTelecom WPD-600N */

From 2e6eb40ca5eb53836d18f3b9ac61ff2e0b417038 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 23 Jun 2018 23:19:03 +0200
Subject: [PATCH 212/215] efi/x86: Fix incorrect invocation of
 PciIo->Attributes()

The following commit:

  2c3625cb9fa2 ("efi/x86: Fold __setup_efi_pci32() and __setup_efi_pci64() into one function")

... merged the two versions of __setup_efi_pciXX(), without taking into
account that the 32-bit version used a rather dodgy trick to pass an
immediate 0 constant as argument for a uint64_t parameter.

The issue is caused by the fact that on x86, UEFI protocol method calls
are redirected via struct efi_config::call(), which is a variadic function,
and so the compiler has to infer the types of the parameters from the
arguments rather than from the prototype.

As the 32-bit x86 calling convention passes arguments via the stack,
passing the unqualified constant 0 twice is the same as passing 0ULL,
which is why the 32-bit code in __setup_efi_pci32() contained the
following call:

  status = efi_early->call(pci->attributes, pci,
                           EfiPciIoAttributeOperationGet, 0, 0,
                           &attributes);

to invoke this UEFI protocol method:

  typedef
  EFI_STATUS
  (EFIAPI *EFI_PCI_IO_PROTOCOL_ATTRIBUTES) (
    IN  EFI_PCI_IO_PROTOCOL                     *This,
    IN  EFI_PCI_IO_PROTOCOL_ATTRIBUTE_OPERATION Operation,
    IN  UINT64                                  Attributes,
    OUT UINT64                                  *Result OPTIONAL
    );

After the merge, we inadvertently ended up with this version for both
32-bit and 64-bit builds, breaking the latter.

So replace the two zeroes with the explicitly typed constant 0ULL,
which works as expected on both 32-bit and 64-bit builds.

Wilfried tested the 64-bit build, and I checked the generated assembly
of a 32-bit build with and without this patch, and they are identical.

Reported-by: Wilfried Klaebe <linux-kernel@lebenslange-mailadresse.de>
Tested-by: Wilfried Klaebe <linux-kernel@lebenslange-mailadresse.de>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: hdegoede@redhat.com
Cc: linux-efi@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/eboot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index a8a8642d2b0b..e57665b4ba1c 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -118,7 +118,7 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
 	void *romimage;
 
 	status = efi_call_proto(efi_pci_io_protocol, attributes, pci,
-				EfiPciIoAttributeOperationGet, 0, 0,
+				EfiPciIoAttributeOperationGet, 0ULL,
 				&attributes);
 	if (status != EFI_SUCCESS)
 		return status;

From 3531456aba6c8a8c905730af96dbb83608538b71 Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Sun, 24 Jun 2018 18:14:01 +0530
Subject: [PATCH 213/215] strparser: Corrected typo in documentation.

Replaced strp_pause() with strp_unpause() to correct a seemingly copy
paste documentation mistake.

Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/strparser.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt
index 13081b3decef..a7d354ddda7b 100644
--- a/Documentation/networking/strparser.txt
+++ b/Documentation/networking/strparser.txt
@@ -48,7 +48,7 @@ void strp_pause(struct strparser *strp)
      Temporarily pause a stream parser. Message parsing is suspended
      and no new messages are delivered to the upper layer.
 
-void strp_pause(struct strparser *strp)
+void strp_unpause(struct strparser *strp)
 
      Unpause a paused stream parser.
 

From 7daf201d7fe8334e2d2364d4e8ed3394ec9af819 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 24 Jun 2018 20:54:29 +0800
Subject: [PATCH 214/215] Linux 4.18-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index ca2af1ab91eb..c9132594860b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 18
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Merciless Moray
 
 # *DOCUMENTATION*

From 829eb05365ff06e8adc23f2541597d0cc3c18348 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 24 Jun 2018 11:57:31 +0100
Subject: [PATCH 215/215] sfc: make function efx_rps_hash_bucket static

The function efx_rps_hash_bucket is local to the source and
does not need to be in global scope, so make it static.

Cleans up sparse warning:
symbol 'efx_rps_hash_bucket' was not declared. Should it be static?

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/efx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index ad4a354ce570..570ec72266f3 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -3180,6 +3180,7 @@ bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
 	return true;
 }
 
+static
 struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
 				       const struct efx_filter_spec *spec)
 {