From 7ae69d2aa4ed3ee8cef18a072346366f019d6a4a Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 13 Jan 2006 10:03:58 -0800
Subject: [PATCH 01/16] [IA64] Add stub entry to fsys.S for sys_migrate_pages

When this new syscall was added to ia64 in commit

  39743889aaf76725152f16aa90ca3c45f6d52da3

fsys.S was forgotten.  Add a ".data8 0" there to keep
it in step.  [Reported by Stephane Eranian]

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/fsys.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 2ddbac6f4999..ce423910ca97 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -903,5 +903,6 @@ fsyscall_table:
 	data8 0
 	data8 0
 	data8 0
+	data8 0							// 1280
 
 	.org fsyscall_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls

From 246c7e33d51afe99890b2caab7ad482c0296d5ba Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Thu, 22 Dec 2005 14:32:56 -0600
Subject: [PATCH 02/16] [IA64-SGI] ensure XPC disengage request is processed

This patch fixes a problem in XPC disengage processing whereby it was not
seeing the request to disengage from a remote partition, so the disengage
wasn't happening. The disengagement is suppose to transpire during the time
a XPC channel is disconnecting, and should be completed before the channel
is declared to be disconnected.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/xpc.h           |  2 +-
 arch/ia64/sn/kernel/xpc_channel.c   | 20 ++++++++++++--------
 arch/ia64/sn/kernel/xpc_main.c      |  2 +-
 arch/ia64/sn/kernel/xpc_partition.c |  3 ++-
 include/asm-ia64/sn/xp.h            |  4 +++-
 5 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
index 5483a9f227d4..66b17b6aa81f 100644
--- a/arch/ia64/sn/kernel/xpc.h
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -707,7 +707,7 @@ extern void xpc_connected_callout(struct xpc_channel *);
 extern void xpc_deliver_msg(struct xpc_channel *);
 extern void xpc_disconnect_channel(const int, struct xpc_channel *,
 					enum xpc_retval, unsigned long *);
-extern void xpc_disconnecting_callout(struct xpc_channel *);
+extern void xpc_disconnect_callout(struct xpc_channel *, enum xpc_retval);
 extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
 extern void xpc_teardown_infrastructure(struct xpc_partition *);
 
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index abf4fc2a87bb..272ab4deb573 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -779,6 +779,12 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	/* both sides are disconnected now */
 
+	if (ch->flags & XPC_C_CONNECTCALLOUT) {
+		spin_unlock_irqrestore(&ch->lock, *irq_flags);
+		xpc_disconnect_callout(ch, xpcDisconnected);
+		spin_lock_irqsave(&ch->lock, *irq_flags);
+	}
+
 	/* it's now safe to free the channel's message queues */
 	xpc_free_msgqueues(ch);
 
@@ -1645,7 +1651,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
 
 
 void
-xpc_disconnecting_callout(struct xpc_channel *ch)
+xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason)
 {
 	/*
 	 * Let the channel's registerer know that the channel is being
@@ -1654,15 +1660,13 @@ xpc_disconnecting_callout(struct xpc_channel *ch)
 	 */
 
 	if (ch->func != NULL) {
-		dev_dbg(xpc_chan, "ch->func() called, reason=xpcDisconnecting,"
-			" partid=%d, channel=%d\n", ch->partid, ch->number);
+		dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
+			"channel=%d\n", reason, ch->partid, ch->number);
 
-		ch->func(xpcDisconnecting, ch->partid, ch->number, NULL,
-								ch->key);
+		ch->func(reason, ch->partid, ch->number, NULL, ch->key);
 
-		dev_dbg(xpc_chan, "ch->func() returned, reason="
-			"xpcDisconnecting, partid=%d, channel=%d\n",
-			ch->partid, ch->number);
+		dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
+			"channel=%d\n", reason, ch->partid, ch->number);
 	}
 }
 
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index b617236524c6..6708ef6e0618 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -773,7 +773,7 @@ xpc_daemonize_kthread(void *args)
 			ch->flags |= XPC_C_DISCONNECTCALLOUT;
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 
-			xpc_disconnecting_callout(ch);
+			xpc_disconnect_callout(ch, xpcDisconnecting);
 		} else {
 			spin_unlock_irqrestore(&ch->lock, irq_flags);
 		}
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index cdd6431853a1..cf02a9bcd245 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -771,7 +771,8 @@ xpc_identify_act_IRQ_req(int nasid)
 		}
 	}
 
-	if (!xpc_partition_disengaged(part)) {
+	if (part->disengage_request_timeout > 0 &&
+					!xpc_partition_disengaged(part)) {
 		/* still waiting on other side to disengage from us */
 		return;
 	}
diff --git a/include/asm-ia64/sn/xp.h b/include/asm-ia64/sn/xp.h
index 49faf8f26430..203945ae034e 100644
--- a/include/asm-ia64/sn/xp.h
+++ b/include/asm-ia64/sn/xp.h
@@ -227,7 +227,9 @@ enum xpc_retval {
 
 	xpcOpenCloseError,	/* 50: channel open/close protocol error */
 
-	xpcUnknownReason	/* 51: unknown reason -- must be last in list */
+	xpcDisconnected,	/* 51: channel disconnected (closed) */
+
+	xpcUnknownReason	/* 52: unknown reason -- must be last in list */
 };
 
 

From 1ecaded80f94f2779160529aef7d6f37a22c2f60 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Fri, 6 Jan 2006 09:48:21 -0600
Subject: [PATCH 03/16] [IA64-SGI] cleanup XPC disengage related messages

Cleanup the XPC disengage related messages that are printed to the log.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/xpc.h           |  1 +
 arch/ia64/sn/kernel/xpc_main.c      | 89 +++++++++++++++++++++--------
 arch/ia64/sn/kernel/xpc_partition.c |  3 +
 3 files changed, 69 insertions(+), 24 deletions(-)

diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
index 66b17b6aa81f..82e7430be789 100644
--- a/arch/ia64/sn/kernel/xpc.h
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -663,6 +663,7 @@ extern struct xpc_registration xpc_registrations[];
 extern struct device *xpc_part;
 extern struct device *xpc_chan;
 extern int xpc_disengage_request_timelimit;
+extern int xpc_disengage_request_timedout;
 extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *);
 extern void xpc_dropped_IPI_check(struct xpc_partition *);
 extern void xpc_activate_partition(struct xpc_partition *);
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 6708ef6e0618..948206b13f68 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -162,6 +162,8 @@ static ctl_table xpc_sys_dir[] = {
 };
 static struct ctl_table_header *xpc_sysctl;
 
+/* non-zero if any remote partition disengage request was timed out */
+int xpc_disengage_request_timedout;
 
 /* #of IRQs received */
 static atomic_t xpc_act_IRQ_rcvd;
@@ -921,9 +923,9 @@ static void
 xpc_do_exit(enum xpc_retval reason)
 {
 	partid_t partid;
-	int active_part_count;
+	int active_part_count, printed_waiting_msg = 0;
 	struct xpc_partition *part;
-	unsigned long printmsg_time;
+	unsigned long printmsg_time, disengage_request_timeout = 0;
 
 
 	/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
@@ -953,7 +955,8 @@ xpc_do_exit(enum xpc_retval reason)
 
 	/* wait for all partitions to become inactive */
 
-	printmsg_time = jiffies;
+	printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+	xpc_disengage_request_timedout = 0;
 
 	do {
 		active_part_count = 0;
@@ -969,20 +972,39 @@ xpc_do_exit(enum xpc_retval reason)
 			active_part_count++;
 
 			XPC_DEACTIVATE_PARTITION(part, reason);
+
+			if (part->disengage_request_timeout >
+						disengage_request_timeout) {
+				disengage_request_timeout =
+						part->disengage_request_timeout;
+			}
 		}
 
-		if (active_part_count == 0) {
-			break;
-		}
-
-		if (jiffies >= printmsg_time) {
-			dev_info(xpc_part, "waiting for partitions to "
-				"deactivate/disengage, active count=%d, remote "
-				"engaged=0x%lx\n", active_part_count,
-				xpc_partition_engaged(1UL << partid));
-
-			printmsg_time = jiffies +
+		if (xpc_partition_engaged(-1UL)) {
+			if (time_after(jiffies, printmsg_time)) {
+				dev_info(xpc_part, "waiting for remote "
+					"partitions to disengage, timeout in "
+					"%ld seconds\n",
+					(disengage_request_timeout - jiffies)
+									/ HZ);
+				printmsg_time = jiffies +
 					(XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+				printed_waiting_msg = 1;
+			}
+
+		} else if (active_part_count > 0) {
+			if (printed_waiting_msg) {
+				dev_info(xpc_part, "waiting for local partition"
+					" to disengage\n");
+				printed_waiting_msg = 0;
+			}
+
+		} else {
+			if (!xpc_disengage_request_timedout) {
+				dev_info(xpc_part, "all partitions have "
+					"disengaged\n");
+			}
+			break;
 		}
 
 		/* sleep for a 1/3 of a second or so */
@@ -1028,7 +1050,7 @@ xpc_die_disengage(void)
 	struct xpc_partition *part;
 	partid_t partid;
 	unsigned long engaged;
-	long time, print_time, disengage_request_timeout;
+	long time, printmsg_time, disengage_request_timeout;
 
 
 	/* keep xpc_hb_checker thread from doing anything (just in case) */
@@ -1055,24 +1077,43 @@ xpc_die_disengage(void)
 		}
 	}
 
-	print_time = rtc_time();
-	disengage_request_timeout = print_time +
+	time = rtc_time();
+	printmsg_time = time +
+		(XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
+	disengage_request_timeout = time +
 		(xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
 
 	/* wait for all other partitions to disengage from us */
 
-	while ((engaged = xpc_partition_engaged(-1UL)) &&
-			(time = rtc_time()) < disengage_request_timeout) {
+	while (1) {
+		engaged = xpc_partition_engaged(-1UL);
+		if (!engaged) {
+			dev_info(xpc_part, "all partitions have disengaged\n");
+			break;
+		}
 
-		if (time >= print_time) {
+		time = rtc_time();
+		if (time >= disengage_request_timeout) {
+			for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+				if (engaged & (1UL << partid)) {
+					dev_info(xpc_part, "disengage from "
+						"remote partition %d timed "
+						"out\n", partid);
+				}
+			}
+			break;
+		}
+
+		if (time >= printmsg_time) {
 			dev_info(xpc_part, "waiting for remote partitions to "
-				"disengage, engaged=0x%lx\n", engaged);
-			print_time = time + (XPC_DISENGAGE_PRINTMSG_INTERVAL *
+				"disengage, timeout in %ld seconds\n",
+				(disengage_request_timeout - time) /
+						sn_rtc_cycles_per_second);
+			printmsg_time = time +
+					(XPC_DISENGAGE_PRINTMSG_INTERVAL *
 						sn_rtc_cycles_per_second);
 		}
 	}
-	dev_info(xpc_part, "finished waiting for remote partitions to "
-				"disengage, engaged=0x%lx\n", engaged);
 }
 
 
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index cf02a9bcd245..6bc0409628c5 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -874,6 +874,9 @@ xpc_partition_disengaged(struct xpc_partition *part)
 			 * request in a timely fashion, so assume it's dead.
 			 */
 
+			dev_info(xpc_part, "disengage from remote partition %d "
+				"timed out\n", partid);
+			xpc_disengage_request_timedout = 1;
 			xpc_clear_partition_engaged(1UL << partid);
 			disengaged = 1;
 		}

From 0752c670d83362609c7f3f59ffa0e180709c60c2 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 10 Jan 2006 11:07:19 -0600
Subject: [PATCH 04/16] [IA64-SGI] XPC and unregistering from notifier lists

Only unregister from notifier lists if XPC is unloading.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/xpc_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 948206b13f68..dcac286dab32 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -1022,11 +1022,13 @@ xpc_do_exit(enum xpc_retval reason)
 	del_timer_sync(&xpc_hb_timer);
 	DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
 
-	/* take ourselves off of the reboot_notifier_list */
-	(void) unregister_reboot_notifier(&xpc_reboot_notifier);
+	if (reason == xpcUnloading) {
+		/* take ourselves off of the reboot_notifier_list */
+		(void) unregister_reboot_notifier(&xpc_reboot_notifier);
 
-	/* take ourselves off of the die_notifier list */
-	(void) unregister_die_notifier(&xpc_die_notifier);
+		/* take ourselves off of the die_notifier list */
+		(void) unregister_die_notifier(&xpc_die_notifier);
+	}
 
 	/* close down protections for IPI operations */
 	xpc_restrict_IPI_ops();

From 1f4674b2d5f63bac4c393ac4de1d6c1b6b72c09c Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 10 Jan 2006 11:08:00 -0600
Subject: [PATCH 05/16] [IA64-SGI] ignoring loss of heartbeat while XPC is in
 kdebug

Allow for the loss of heartbeat while in kdebug to be ignored by remote
partitions.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/xpc_main.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index dcac286dab32..db57b46bddd5 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -82,6 +82,9 @@ struct device *xpc_part = &xpc_part_dbg_subname;
 struct device *xpc_chan = &xpc_chan_dbg_subname;
 
 
+static int xpc_kdebug_ignore;
+
+
 /* systune related variables for /proc/sys directories */
 
 static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
@@ -1148,7 +1151,12 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
 
 
 /*
- * This function is called when the system is being rebooted.
+ * This function is called when the system is being restarted or halted due
+ * to some sort of system failure. If this is the case we need to notify the
+ * other partitions to disengage from all references to our memory.
+ * This function can also be called when our heartbeater could be offlined
+ * for a time. In this case we need to notify other partitions to not worry
+ * about the lack of a heartbeat.
  */
 static int
 xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
@@ -1158,11 +1166,25 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 	case DIE_MACHINE_HALT:
 		xpc_die_disengage();
 		break;
+
+	case DIE_KDEBUG_ENTER:
+		/* Should lack of heartbeat be ignored by other partitions? */
+		if (!xpc_kdebug_ignore) {
+			break;
+		}
+		/* fall through */
 	case DIE_MCA_MONARCH_ENTER:
 	case DIE_INIT_MONARCH_ENTER:
 		xpc_vars->heartbeat++;
 		xpc_vars->heartbeat_offline = 1;
 		break;
+
+	case DIE_KDEBUG_LEAVE:
+		/* Is lack of heartbeat being ignored by other partitions? */
+		if (!xpc_kdebug_ignore) {
+			break;
+		}
+		/* fall through */
 	case DIE_MCA_MONARCH_LEAVE:
 	case DIE_INIT_MONARCH_LEAVE:
 		xpc_vars->heartbeat++;
@@ -1387,3 +1409,7 @@ module_param(xpc_disengage_request_timelimit, int, 0);
 MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
 		"for disengage request to complete.");
 
+module_param(xpc_kdebug_ignore, int, 0);
+MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
+		"other partitions when dropping into kdebug.");
+

From d6ad033a88b42420ddb6c62c95e42f88d862b246 Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 10 Jan 2006 11:08:55 -0600
Subject: [PATCH 06/16] [IA64-SGI] move xpc_system_reboot()

Move xpc_system_reboot() to be closer to the file it calls for readability
reasons (which are indeed subjective).

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/xpc_main.c | 58 +++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index db57b46bddd5..50a0c09bca80 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -1047,7 +1047,35 @@ xpc_do_exit(enum xpc_retval reason)
 
 
 /*
- * Called when the system is about to be either restarted or halted.
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+	enum xpc_retval reason;
+
+
+	switch (event) {
+	case SYS_RESTART:
+		reason = xpcSystemReboot;
+		break;
+	case SYS_HALT:
+		reason = xpcSystemHalt;
+		break;
+	case SYS_POWER_OFF:
+		reason = xpcSystemPoweroff;
+		break;
+	default:
+		reason = xpcSystemGoingDown;
+	}
+
+	xpc_do_exit(reason);
+	return NOTIFY_DONE;
+}
+
+
+/*
+ * Notify other partitions to disengage from all references to our memory.
  */
 static void
 xpc_die_disengage(void)
@@ -1122,34 +1150,6 @@ xpc_die_disengage(void)
 }
 
 
-/*
- * This function is called when the system is being rebooted.
- */
-static int
-xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
-{
-	enum xpc_retval reason;
-
-
-	switch (event) {
-	case SYS_RESTART:
-		reason = xpcSystemReboot;
-		break;
-	case SYS_HALT:
-		reason = xpcSystemHalt;
-		break;
-	case SYS_POWER_OFF:
-		reason = xpcSystemPoweroff;
-		break;
-	default:
-		reason = xpcSystemGoingDown;
-	}
-
-	xpc_do_exit(reason);
-	return NOTIFY_DONE;
-}
-
-
 /*
  * This function is called when the system is being restarted or halted due
  * to some sort of system failure. If this is the case we need to notify the

From 87a149d6bba5949fbc53b8a21189b54748ac9e2a Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 10 Jan 2006 11:09:48 -0600
Subject: [PATCH 07/16] [IA64-SGI] move xpc.h to include/asm-ia64/sn

Move xpc.h from arch/ia64/sn/kernel to include/asm-ia64/sn without change.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 {arch/ia64/sn/kernel => include/asm-ia64/sn}/xpc.h | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {arch/ia64/sn/kernel => include/asm-ia64/sn}/xpc.h (100%)

diff --git a/arch/ia64/sn/kernel/xpc.h b/include/asm-ia64/sn/xpc.h
similarity index 100%
rename from arch/ia64/sn/kernel/xpc.h
rename to include/asm-ia64/sn/xpc.h

From 9335d48e10d2d07eacaddf889ec1efb8a5a5082e Mon Sep 17 00:00:00 2001
From: Dean Nelson <dcn@sgi.com>
Date: Tue, 10 Jan 2006 11:12:32 -0600
Subject: [PATCH 08/16] [IA64-SGI] move xpc.h to include/asm-ia64/sn (cleanup)

Cleanup a few items after moving xpc.h from arch/ia64/sn/kernel to
include/asm-ia64/sn.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/xpc_channel.c   | 4 ++--
 arch/ia64/sn/kernel/xpc_main.c      | 4 ++--
 arch/ia64/sn/kernel/xpc_partition.c | 4 ++--
 include/asm-ia64/sn/xpc.h           | 8 ++++----
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index 272ab4deb573..0c0a68902409 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -24,7 +24,7 @@
 #include <linux/slab.h>
 #include <asm/sn/bte.h>
 #include <asm/sn/sn_sal.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
 
 
 /*
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 50a0c09bca80..8930586e0eb4 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -59,7 +59,7 @@
 #include <asm/sn/sn_sal.h>
 #include <asm/kdebug.h>
 #include <asm/uaccess.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
 
 
 /* define two XPC debug device structures to be used with dev_dbg() et al */
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 6bc0409628c5..88a730e6cfdb 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -28,7 +28,7 @@
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/nodepda.h>
 #include <asm/sn/addrs.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
 
 
 /* XPC is exiting flag */
diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h
index 82e7430be789..87e9cd588510 100644
--- a/include/asm-ia64/sn/xpc.h
+++ b/include/asm-ia64/sn/xpc.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
@@ -11,8 +11,8 @@
  * Cross Partition Communication (XPC) structures and macros.
  */
 
-#ifndef _IA64_SN_KERNEL_XPC_H
-#define _IA64_SN_KERNEL_XPC_H
+#ifndef _ASM_IA64_SN_XPC_H
+#define _ASM_IA64_SN_XPC_H
 
 
 #include <linux/config.h>
@@ -1270,5 +1270,5 @@ xpc_check_for_channel_activity(struct xpc_partition *part)
 }
 
 
-#endif /* _IA64_SN_KERNEL_XPC_H */
+#endif /* _ASM_IA64_SN_XPC_H */
 

From 8a4b7b6f187f2967bff222e8c3758ab47efdb14f Mon Sep 17 00:00:00 2001
From: Francois Wellenrieter <francois.wellenreiter@bull.net>
Date: Fri, 13 Jan 2006 14:01:01 -0800
Subject: [PATCH 09/16] [IA64] Fix conversion of pal_min_state physical address

On return from INIT handler we must convert the address of the
minstate area from a kernel virtual uncached address (0xC...)
to physical uncached (0x8...).  A typo (or thinko?) in the code
converted to physical cached.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/mca_asm.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index db32fc1d3935..403a80a58c13 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -847,7 +847,7 @@ ia64_state_restore:
 	;;
 	mov cr.iim=temp3
 	mov cr.iha=temp4
-	dep r22=0,r22,62,2	// pal_min_state, physical, uncached
+	dep r22=0,r22,62,1	// pal_min_state, physical, uncached
 	mov IA64_KR(CURRENT)=r21
 	ld8 r8=[temp1]		// os_status
 	ld8 r10=[temp2]		// context

From 17e8ce0e9417eee1f57f9b3d4aad168425e043c3 Mon Sep 17 00:00:00 2001
From: Russ Anderson <rja@efs.americas.sgi.com>
Date: Fri, 16 Dec 2005 17:19:01 -0600
Subject: [PATCH 10/16] [IA64-SGI] Altix BTE error handling fixes

Altix (shub2) pushes the BTE clean-up into SAL.
This patch correctly interfaces with the now implemented SAL call.
It also fixes a bug when delaying clean-up to allow busy BTEs to
complete (or error out).

Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/kernel/bte_error.c | 58 +++++++++++++++++++++++++++------
 arch/ia64/sn/kernel/huberror.c  | 11 ++++---
 include/asm-ia64/sn/sn_sal.h    |  2 +-
 3 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
index fcbc748ae433..f1ec1370b3e3 100644
--- a/arch/ia64/sn/kernel/bte_error.c
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -33,7 +33,7 @@ void bte_error_handler(unsigned long);
  * Wait until all BTE related CRBs are completed
  * and then reset the interfaces.
  */
-void shub1_bte_error_handler(unsigned long _nodepda)
+int shub1_bte_error_handler(unsigned long _nodepda)
 {
 	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
 	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
@@ -53,7 +53,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
 	    (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
 		BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
 			    smp_processor_id()));
-		return;
+		return 1;
 	}
 
 	/* Determine information about our hub */
@@ -81,7 +81,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
 		mod_timer(recovery_timer, HZ * 5);
 		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
 			    smp_processor_id()));
-		return;
+		return 1;
 	}
 	if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
 
@@ -99,7 +99,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
 				BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
 					    err_nodepda, smp_processor_id(),
 					    i));
-				return;
+				return 1;
 			}
 		}
 	}
@@ -124,6 +124,42 @@ void shub1_bte_error_handler(unsigned long _nodepda)
 	REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
 
 	del_timer(recovery_timer);
+	return 0;
+}
+
+/*
+ * Wait until all BTE related CRBs are completed
+ * and then reset the interfaces.
+ */
+int shub2_bte_error_handler(unsigned long _nodepda)
+{
+	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
+	struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
+	struct bteinfo_s *bte;
+	nasid_t nasid;
+	u64 status;
+	int i;
+
+	nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
+
+	/*
+	 * Verify that all the BTEs are complete
+	 */
+	for (i = 0; i < BTES_PER_NODE; i++) {
+		bte = &err_nodepda->bte_if[i];
+		status = BTE_LNSTAT_LOAD(bte);
+		if ((status & IBLS_ERROR) || !(status & IBLS_BUSY))
+			continue;
+		mod_timer(recovery_timer, HZ * 5);
+		BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
+			    smp_processor_id()));
+		return 1;
+	}
+	if (ia64_sn_bte_recovery(nasid))
+		panic("bte_error_handler(): Fatal BTE Error");
+
+	del_timer(recovery_timer);
+	return 0;
 }
 
 /*
@@ -135,7 +171,6 @@ void bte_error_handler(unsigned long _nodepda)
 	struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
 	spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
 	int i;
-	nasid_t nasid;
 	unsigned long irq_flags;
 	volatile u64 *notify;
 	bte_result_t bh_error;
@@ -160,12 +195,15 @@ void bte_error_handler(unsigned long _nodepda)
 	}
 
 	if (is_shub1()) {
-		shub1_bte_error_handler(_nodepda);
+		if (shub1_bte_error_handler(_nodepda)) {
+			spin_unlock_irqrestore(recovery_lock, irq_flags);
+			return;
+		}
 	} else {
-		nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
-
-		if (ia64_sn_bte_recovery(nasid))
-			panic("bte_error_handler(): Fatal BTE Error");
+		if (shub2_bte_error_handler(_nodepda)) {
+			spin_unlock_irqrestore(recovery_lock, irq_flags);
+			return;
+		}
 	}
 
 	for (i = 0; i < BTES_PER_NODE; i++) {
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
index 5c5eb01c50f0..56ab6bae00ee 100644
--- a/arch/ia64/sn/kernel/huberror.c
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -32,13 +32,14 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep)
 	ret_stuff.v0 = 0;
 	hubdev_info = (struct hubdev_info *)arg;
 	nasid = hubdev_info->hdi_nasid;
-	SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
-			(u64) nasid, 0, 0, 0, 0, 0, 0);
-
-	if ((int)ret_stuff.v0)
-		panic("hubii_eint_handler(): Fatal TIO Error");
 
 	if (is_shub1()) {
+		SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
+			(u64) nasid, 0, 0, 0, 0, 0, 0);
+
+		if ((int)ret_stuff.v0)
+			panic("hubii_eint_handler(): Fatal TIO Error");
+
 		if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
 			(void)hubiio_crb_error_handler(hubdev_info);
 	} else 
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 2a8b0d92a5d6..4363ed3598ad 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -1100,7 +1100,7 @@ ia64_sn_bte_recovery(nasid_t nasid)
 	struct ia64_sal_retval rv;
 
 	rv.status = 0;
-	SAL_CALL_NOLOCK(rv, SN_SAL_BTE_RECOVER, 0, 0, 0, 0, 0, 0, 0);
+	SAL_CALL_NOLOCK(rv, SN_SAL_BTE_RECOVER, (u64)nasid, 0, 0, 0, 0, 0, 0);
 	if (rv.status == SALRET_NOT_IMPLEMENTED)
 		return 0;
 	return (int) rv.status;

From cfbb1426bd76c4ba6ec4491c8df2a5dd3d984750 Mon Sep 17 00:00:00 2001
From: Jack Steiner <steiner@sgi.com>
Date: Thu, 22 Dec 2005 13:45:41 -0600
Subject: [PATCH 11/16] [IA64] Hole in IA64 TLB flushing from system threads

I originally thought this was an bug only in the SN code, but I think I
also see a hole in the generic IA64 tlb code. (Separate patch was sent
for the SN problem).

It looks like there is a bug in the TLB flushing code. During context switch,
kernel threads (kswapd, for example) inherit the mm of the task that was
previously running on the cpu. Normally, this is ok because the previous context
is still loaded into the RR registers. However, if the owner of the mm
migrates to another cpu, changes it's context number, and references a
page before kswapd issues a tlb_purge for that same page, the purge will be
done with a stale context number (& RR registers).

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/mm/tlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 41105d454423..6a4eec9113e8 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -90,7 +90,7 @@ ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
 {
 	static DEFINE_SPINLOCK(ptcg_lock);
 
-	if (mm != current->active_mm) {
+	if (mm != current->active_mm || !current->mm) {
 		flush_tlb_all();
 		return;
 	}

From 6d6e420005f3753392b608a614eee8475bdc16f7 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@sgi.com>
Date: Fri, 23 Dec 2005 13:33:25 -0500
Subject: [PATCH 12/16] [IA64-SGI] Fix sn_flush_device_kernel & spinlock
 initialization

This patch separates the sn_flush_device_list struct into kernel and
common (both kernel and PROM accessible) structures.  As it was, if the
size of a spinlock_t changed (due to additional CONFIG options, etc.) the
sal call which populated the sn_flush_device_list structs would erroneously
write data (and cause memory corruption and/or a panic).

This patch does the following:

1.  Removes sn_flush_device_list and adds sn_flush_device_common and
sn_flush_device_kernel.

2.  Adds a new SAL call to populate a sn_flush_device_common struct per
device, not per widget as previously done.

3.  Correctly initializes each device's sn_flush_device_kernel spinlock_t
struct (before it was only doing each widget's first device).

Signed-off-by: Prarit Bhargava <prarit@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/sn/include/xtalk/hubdev.h     | 16 +++--
 arch/ia64/sn/kernel/io_init.c           | 92 ++++++++++++++-----------
 arch/ia64/sn/pci/pcibr/pcibr_dma.c      | 34 ++++-----
 arch/ia64/sn/pci/pcibr/pcibr_provider.c | 20 +++---
 include/asm-ia64/sn/sn_sal.h            |  3 +-
 5 files changed, 94 insertions(+), 71 deletions(-)

diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h
index 71c2b271b4c6..4d417c301201 100644
--- a/arch/ia64/sn/include/xtalk/hubdev.h
+++ b/arch/ia64/sn/include/xtalk/hubdev.h
@@ -26,11 +26,14 @@
 #define IIO_NUM_ITTES   7
 #define HUB_NUM_BIG_WINDOW      (IIO_NUM_ITTES - 1)
 
-struct sn_flush_device_list {
+/* This struct is shared between the PROM and the kernel.
+ * Changes to this struct will require corresponding changes to the kernel.
+ */
+struct sn_flush_device_common {
 	int sfdl_bus;
 	int sfdl_slot;
 	int sfdl_pin;
-	struct bar_list {
+	struct common_bar_list {
 		unsigned long start;
 		unsigned long end;
 	} sfdl_bar_list[6];
@@ -40,14 +43,19 @@ struct sn_flush_device_list {
 	uint32_t sfdl_persistent_busnum;
 	uint32_t sfdl_persistent_segment;
 	struct pcibus_info *sfdl_pcibus_info;
+};
+
+/* This struct is kernel only and is not used by the PROM */
+struct sn_flush_device_kernel {
 	spinlock_t sfdl_flush_lock;
+	struct sn_flush_device_common *common;
 };
 
 /*
- * **widget_p - Used as an array[wid_num][device] of sn_flush_device_list.
+ * **widget_p - Used as an array[wid_num][device] of sn_flush_device_kernel.
  */
 struct sn_flush_nasid_entry  {
-	struct sn_flush_device_list **widget_p; /* Used as a array of wid_num */
+	struct sn_flush_device_kernel **widget_p; // Used as an array of wid_num
 	uint64_t iio_itte[8];
 };
 
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 318087e35b66..258d9d7aff98 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -76,11 +76,12 @@ static struct sn_pcibus_provider sn_pci_default_provider = {
 };
 
 /*
- * Retrieve the DMA Flush List given nasid.  This list is needed 
- * to implement the WAR - Flush DMA data on PIO Reads.
+ * Retrieve the DMA Flush List given nasid, widget, and device.
+ * This list is needed to implement the WAR - Flush DMA data on PIO Reads.
  */
-static inline uint64_t
-sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address)
+static inline u64
+sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
+			     u64 address)
 {
 
 	struct ia64_sal_retval ret_stuff;
@@ -88,17 +89,17 @@ sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address)
 	ret_stuff.v0 = 0;
 
 	SAL_CALL_NOLOCK(ret_stuff,
-			(u64) SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
-			(u64) nasid, (u64) widget_num, (u64) address, 0, 0, 0,
-			0);
-	return ret_stuff.v0;
+			(u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST,
+			(u64) nasid, (u64) widget_num,
+			(u64) device_num, (u64) address, 0, 0, 0);
+	return ret_stuff.status;
 
 }
 
 /*
  * Retrieve the hub device info structure for the given nasid.
  */
-static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address)
+static inline u64 sal_get_hubdev_info(u64 handle, u64 address)
 {
 
 	struct ia64_sal_retval ret_stuff;
@@ -114,7 +115,7 @@ static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address)
 /*
  * Retrieve the pci bus information given the bus number.
  */
-static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
+static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
 {
 
 	struct ia64_sal_retval ret_stuff;
@@ -130,7 +131,7 @@ static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
 /*
  * Retrieve the pci device information given the bus and device|function number.
  */
-static inline uint64_t
+static inline u64
 sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, 
 			u64 sn_irq_info)
 {
@@ -170,12 +171,12 @@ sn_pcidev_info_get(struct pci_dev *dev)
  */
 static void sn_fixup_ionodes(void)
 {
-
-	struct sn_flush_device_list *sn_flush_device_list;
+	struct sn_flush_device_kernel *sn_flush_device_kernel;
+	struct sn_flush_device_kernel *dev_entry;
 	struct hubdev_info *hubdev;
-	uint64_t status;
-	uint64_t nasid;
-	int i, widget;
+	u64 status;
+	u64 nasid;
+	int i, widget, device;
 
 	/*
 	 * Get SGI Specific HUB chipset information.
@@ -186,7 +187,7 @@ static void sn_fixup_ionodes(void)
 		nasid = cnodeid_to_nasid(i);
 		hubdev->max_segment_number = 0xffffffff;
 		hubdev->max_pcibus_number = 0xff;
-		status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev));
+		status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev));
 		if (status)
 			continue;
 
@@ -213,38 +214,49 @@ static void sn_fixup_ionodes(void)
 
 		hubdev->hdi_flush_nasid_list.widget_p =
 		    kmalloc((HUB_WIDGET_ID_MAX + 1) *
-			    sizeof(struct sn_flush_device_list *), GFP_KERNEL);
-
+			    sizeof(struct sn_flush_device_kernel *),
+			    GFP_KERNEL);
 		memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0,
 		       (HUB_WIDGET_ID_MAX + 1) *
-		       sizeof(struct sn_flush_device_list *));
+		       sizeof(struct sn_flush_device_kernel *));
 
 		for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
-			sn_flush_device_list = kmalloc(DEV_PER_WIDGET *
-						       sizeof(struct
-							      sn_flush_device_list),
-						       GFP_KERNEL);
-			memset(sn_flush_device_list, 0x0,
+			sn_flush_device_kernel = kmalloc(DEV_PER_WIDGET *
+						         sizeof(struct
+						        sn_flush_device_kernel),
+						        GFP_KERNEL);
+			if (!sn_flush_device_kernel)
+				BUG();
+			memset(sn_flush_device_kernel, 0x0,
 			       DEV_PER_WIDGET *
-			       sizeof(struct sn_flush_device_list));
+			       sizeof(struct sn_flush_device_kernel));
 
-			status =
-			    sal_get_widget_dmaflush_list(nasid, widget,
-							 (uint64_t)
-							 __pa
-							 (sn_flush_device_list));
-			if (status) {
-				kfree(sn_flush_device_list);
-				continue;
+			dev_entry = sn_flush_device_kernel;
+			for (device = 0; device < DEV_PER_WIDGET;
+			     device++,dev_entry++) {
+				dev_entry->common = kmalloc(sizeof(struct
+					      	        sn_flush_device_common),
+					                    GFP_KERNEL);
+				if (!dev_entry->common)
+					BUG();
+				memset(dev_entry->common, 0x0, sizeof(struct
+					     	       sn_flush_device_common));
+
+				status = sal_get_device_dmaflush_list(nasid,
+									widget,
+								       	device,
+						      (u64)(dev_entry->common));
+				if (status)
+					BUG();
+
+				spin_lock_init(&dev_entry->sfdl_flush_lock);
 			}
 
-			spin_lock_init(&sn_flush_device_list->sfdl_flush_lock);
-			hubdev->hdi_flush_nasid_list.widget_p[widget] =
-			    sn_flush_device_list;
-		}
-
+			if (sn_flush_device_kernel)
+				hubdev->hdi_flush_nasid_list.widget_p[widget] =
+						       sn_flush_device_kernel;
+	        }
 	}
-
 }
 
 /*
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index 34093476e965..e68332d93171 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -218,7 +218,9 @@ void sn_dma_flush(uint64_t addr)
 	uint64_t flags;
 	uint64_t itte;
 	struct hubdev_info *hubinfo;
-	volatile struct sn_flush_device_list *p;
+	volatile struct sn_flush_device_kernel *p;
+	volatile struct sn_flush_device_common *common;
+
 	struct sn_flush_nasid_entry *flush_nasid_list;
 
 	if (!sn_ioif_inited)
@@ -268,17 +270,17 @@ void sn_dma_flush(uint64_t addr)
 	p = &flush_nasid_list->widget_p[wid_num][0];
 
 	/* find a matching BAR */
-	for (i = 0; i < DEV_PER_WIDGET; i++) {
+	for (i = 0; i < DEV_PER_WIDGET; i++,p++) {
+		common = p->common;
 		for (j = 0; j < PCI_ROM_RESOURCE; j++) {
-			if (p->sfdl_bar_list[j].start == 0)
+			if (common->sfdl_bar_list[j].start == 0)
 				break;
-			if (addr >= p->sfdl_bar_list[j].start
-			    && addr <= p->sfdl_bar_list[j].end)
+			if (addr >= common->sfdl_bar_list[j].start
+			    && addr <= common->sfdl_bar_list[j].end)
 				break;
 		}
-		if (j < PCI_ROM_RESOURCE && p->sfdl_bar_list[j].start != 0)
+		if (j < PCI_ROM_RESOURCE && common->sfdl_bar_list[j].start != 0)
 			break;
-		p++;
 	}
 
 	/* if no matching BAR, return without doing anything. */
@@ -304,24 +306,24 @@ void sn_dma_flush(uint64_t addr)
 		if ((1 << XWIDGET_PART_REV_NUM_REV(revnum)) & PV907516) {
 			return;
 		} else {
-			pcireg_wrb_flush_get(p->sfdl_pcibus_info,
-					     (p->sfdl_slot - 1));
+			pcireg_wrb_flush_get(common->sfdl_pcibus_info,
+					     (common->sfdl_slot - 1));
 		}
 	} else {
-		spin_lock_irqsave(&((struct sn_flush_device_list *)p)->
-				  sfdl_flush_lock, flags);
-
-		*p->sfdl_flush_addr = 0;
+		spin_lock_irqsave((spinlock_t *)&p->sfdl_flush_lock,
+				  flags);
+		*common->sfdl_flush_addr = 0;
 
 		/* force an interrupt. */
-		*(volatile uint32_t *)(p->sfdl_force_int_addr) = 1;
+		*(volatile uint32_t *)(common->sfdl_force_int_addr) = 1;
 
 		/* wait for the interrupt to come back. */
-		while (*(p->sfdl_flush_addr) != 0x10f)
+		while (*(common->sfdl_flush_addr) != 0x10f)
 			cpu_relax();
 
 		/* okay, everything is synched up. */
-		spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, flags);
+		spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock,
+				       flags);
 	}
 	return;
 }
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 1f500c81002c..e328e948175d 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -92,7 +92,8 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 	cnodeid_t near_cnode;
 	struct hubdev_info *hubdev_info;
 	struct pcibus_info *soft;
-	struct sn_flush_device_list *sn_flush_device_list;
+	struct sn_flush_device_kernel *sn_flush_device_kernel;
+	struct sn_flush_device_common *common;
 
 	if (! IS_PCI_BRIDGE_ASIC(prom_bussoft->bs_asic_type)) {
 		return NULL;
@@ -137,20 +138,19 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
 	hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
 
 	if (hubdev_info->hdi_flush_nasid_list.widget_p) {
-		sn_flush_device_list = hubdev_info->hdi_flush_nasid_list.
+		sn_flush_device_kernel = hubdev_info->hdi_flush_nasid_list.
 		    widget_p[(int)soft->pbi_buscommon.bs_xid];
-		if (sn_flush_device_list) {
+		if (sn_flush_device_kernel) {
 			for (j = 0; j < DEV_PER_WIDGET;
-			     j++, sn_flush_device_list++) {
-				if (sn_flush_device_list->sfdl_slot == -1)
+			     j++, sn_flush_device_kernel++) {
+				common = sn_flush_device_kernel->common;
+				if (common->sfdl_slot == -1)
 					continue;
-				if ((sn_flush_device_list->
-				     sfdl_persistent_segment ==
+				if ((common->sfdl_persistent_segment ==
 				     soft->pbi_buscommon.bs_persist_segment) &&
-				     (sn_flush_device_list->
-				     sfdl_persistent_busnum ==
+				     (common->sfdl_persistent_busnum ==
 				     soft->pbi_buscommon.bs_persist_busnum))
-					sn_flush_device_list->sfdl_pcibus_info =
+					common->sfdl_pcibus_info =
 					    soft;
 			}
 		}
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 4363ed3598ad..8b9e10e7cdba 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -75,7 +75,8 @@
 #define  SN_SAL_IOIF_GET_HUBDEV_INFO		   0x02000055
 #define  SN_SAL_IOIF_GET_PCIBUS_INFO		   0x02000056
 #define  SN_SAL_IOIF_GET_PCIDEV_INFO		   0x02000057
-#define  SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST	   0x02000058
+#define  SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST	   0x02000058	// deprecated
+#define  SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST	   0x0200005a
 
 #define SN_SAL_HUB_ERROR_INTERRUPT		   0x02000060
 #define SN_SAL_BTE_RECOVER			   0x02000061

From 15029285dc977a392e74eacb7625984b71d4f605 Mon Sep 17 00:00:00 2001
From: Jason Uhlenkott <jasonuhl@sgi.com>
Date: Fri, 30 Dec 2005 02:27:01 -0800
Subject: [PATCH 13/16] [IA64] Handle debug traps in fsys mode

We need to handle debug traps in fsys mode non-fatally.  They can
happen now that we have fsyscalls which contain probe instructions.

Signed-off-by: Jason Uhlenkott <jasonuhl@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/traps.c       | 26 +++++++++++++++++++-------
 include/asm-ia64/thread_info.h |  4 +++-
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index d3e0ecb56d62..55391901b013 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -530,12 +530,15 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 		if (fsys_mode(current, &regs)) {
 			extern char __kernel_syscall_via_break[];
 			/*
-			 * Got a trap in fsys-mode: Taken Branch Trap and Single Step trap
-			 * need special handling; Debug trap is not supposed to happen.
+			 * Got a trap in fsys-mode: Taken Branch Trap
+			 * and Single Step trap need special handling;
+			 * Debug trap is ignored (we disable it here
+			 * and re-enable it in the lower-privilege trap).
 			 */
 			if (unlikely(vector == 29)) {
-				die("Got debug trap in fsys-mode---not supposed to happen!",
-				    &regs, 0);
+				set_thread_flag(TIF_DB_DISABLED);
+				ia64_psr(&regs)->db = 0;
+				ia64_psr(&regs)->lp = 1;
 				return;
 			}
 			/* re-do the system call via break 0x100000: */
@@ -589,10 +592,19 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
 	      case 34:
 		if (isr & 0x2) {
 			/* Lower-Privilege Transfer Trap */
+
+			/* If we disabled debug traps during an fsyscall,
+			 * re-enable them here.
+			 */
+			if (test_thread_flag(TIF_DB_DISABLED)) {
+				clear_thread_flag(TIF_DB_DISABLED);
+				ia64_psr(&regs)->db = 1;
+			}
+
 			/*
-			 * Just clear PSR.lp and then return immediately: all the
-			 * interesting work (e.g., signal delivery is done in the kernel
-			 * exit path).
+			 * Just clear PSR.lp and then return immediately:
+			 * all the interesting work (e.g., signal delivery)
+			 * is done in the kernel exit path.
 			 */
 			ia64_psr(&regs)->lp = 0;
 			return;
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 653bb7f9a753..1d6518fe1f02 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -93,6 +93,7 @@ struct thread_info {
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17
 #define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
+#define TIF_DB_DISABLED		19	/* debug trap disabled for fsyscall */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
@@ -100,9 +101,10 @@ struct thread_info {
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_SIGDELAYED	(1 << TIF_SIGDELAYED)
+#define _TIF_SIGDELAYED		(1 << TIF_SIGDELAYED)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_MCA_INIT		(1 << TIF_MCA_INIT)
+#define _TIF_DB_DISABLED	(1 << TIF_DB_DISABLED)
 
 /* "work to do on user-return" bits */
 #define TIF_ALLWORK_MASK	(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SIGDELAYED)

From e026cca0f2c09c4c28c902db6384fd8a412671d6 Mon Sep 17 00:00:00 2001
From: Keith Owens <kaos@sgi.com>
Date: Fri, 6 Jan 2006 10:36:06 +1100
Subject: [PATCH 14/16] [IA64] Add hotplug cpu to salinfo.c, replace semaphore
 with mutex

Add hotplug cpu support to salinfo.c.

The cpu_event field is a cpumask so use the cpu_* macros consistently,
replacing the existing mixture of cpu_* and *_bit macros.

Instead of counting the number of outstanding events in a semaphore and
trying to track that count over user space context, interrupt context,
non-maskable interrupt context and cpu hotplug, replace the semaphore
with a test for "any bits set" combined with a mutex.

Modify the locking to make the test for "work to do" an atomic
operation.

Signed-off-by: Keith Owens <kaos@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/salinfo.c | 174 ++++++++++++++++++++++++++-----------
 1 file changed, 121 insertions(+), 53 deletions(-)

diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index a87a162a3086..9d5a823479a3 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -3,7 +3,7 @@
  *
  * Creates entries in /proc/sal for various system features.
  *
- * Copyright (c) 2003 Silicon Graphics, Inc.  All rights reserved.
+ * Copyright (c) 2003, 2006 Silicon Graphics, Inc.  All rights reserved.
  * Copyright (c) 2003 Hewlett-Packard Co
  *	Bjorn Helgaas <bjorn.helgaas@hp.com>
  *
@@ -27,9 +27,17 @@
  *   mca.c may not pass a buffer, a NULL buffer just indicates that a new
  *   record is available in SAL.
  *   Replace some NR_CPUS by cpus_online, for hotplug cpu.
+ *
+ * Jan  5 2006        kaos@sgi.com
+ *   Handle hotplug cpus coming online.
+ *   Handle hotplug cpus going offline while they still have outstanding records.
+ *   Use the cpu_* macros consistently.
+ *   Replace the counting semaphore with a mutex and a test if the cpumask is non-empty.
+ *   Modify the locking to make the test for "work to do" an atomic operation.
  */
 
 #include <linux/capability.h>
+#include <linux/cpu.h>
 #include <linux/types.h>
 #include <linux/proc_fs.h>
 #include <linux/module.h>
@@ -132,8 +140,8 @@ enum salinfo_state {
 };
 
 struct salinfo_data {
-	volatile cpumask_t	cpu_event;	/* which cpus have outstanding events */
-	struct semaphore	sem;		/* count of cpus with outstanding events (bits set in cpu_event) */
+	cpumask_t		cpu_event;	/* which cpus have outstanding events */
+	struct semaphore	mutex;
 	u8			*log_buffer;
 	u64			log_size;
 	u8			*oemdata;	/* decoded oem data */
@@ -174,6 +182,21 @@ struct salinfo_platform_oemdata_parms {
 	int ret;
 };
 
+/* Kick the mutex that tells user space that there is work to do.  Instead of
+ * trying to track the state of the mutex across multiple cpus, in user
+ * context, interrupt context, non-maskable interrupt context and hotplug cpu,
+ * it is far easier just to grab the mutex if it is free then release it.
+ *
+ * This routine must be called with data_saved_lock held, to make the down/up
+ * operation atomic.
+ */
+static void
+salinfo_work_to_do(struct salinfo_data *data)
+{
+	down_trylock(&data->mutex);
+	up(&data->mutex);
+}
+
 static void
 salinfo_platform_oemdata_cpu(void *context)
 {
@@ -212,9 +235,9 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
 
 	BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
 
+	if (irqsafe)
+		spin_lock_irqsave(&data_saved_lock, flags);
 	if (buffer) {
-		if (irqsafe)
-			spin_lock_irqsave(&data_saved_lock, flags);
 		for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
 			if (!data_saved->buffer)
 				break;
@@ -232,13 +255,11 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
 			data_saved->size = size;
 			data_saved->buffer = buffer;
 		}
-		if (irqsafe)
-			spin_unlock_irqrestore(&data_saved_lock, flags);
 	}
-
-	if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) {
-		if (irqsafe)
-			up(&data->sem);
+	cpu_set(smp_processor_id(), data->cpu_event);
+	if (irqsafe) {
+		salinfo_work_to_do(data);
+		spin_unlock_irqrestore(&data_saved_lock, flags);
 	}
 }
 
@@ -249,20 +270,17 @@ static struct timer_list salinfo_timer;
 static void
 salinfo_timeout_check(struct salinfo_data *data)
 {
-	int i;
+	unsigned long flags;
 	if (!data->open)
 		return;
-	for_each_online_cpu(i) {
-		if (test_bit(i, &data->cpu_event)) {
-			/* double up() is not a problem, user space will see no
-			 * records for the additional "events".
-			 */
-			up(&data->sem);
-		}
+	if (!cpus_empty(data->cpu_event)) {
+		spin_lock_irqsave(&data_saved_lock, flags);
+		salinfo_work_to_do(data);
+		spin_unlock_irqrestore(&data_saved_lock, flags);
 	}
 }
 
-static void 
+static void
 salinfo_timeout (unsigned long arg)
 {
 	salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
@@ -290,16 +308,20 @@ salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t
 	int i, n, cpu = -1;
 
 retry:
-	if (down_trylock(&data->sem)) {
+	if (cpus_empty(data->cpu_event) && down_trylock(&data->mutex)) {
 		if (file->f_flags & O_NONBLOCK)
 			return -EAGAIN;
-		if (down_interruptible(&data->sem))
+		if (down_interruptible(&data->mutex))
 			return -EINTR;
 	}
 
 	n = data->cpu_check;
 	for (i = 0; i < NR_CPUS; i++) {
-		if (test_bit(n, &data->cpu_event) && cpu_online(n)) {
+		if (cpu_isset(n, data->cpu_event)) {
+			if (!cpu_online(n)) {
+				cpu_clear(n, data->cpu_event);
+				continue;
+			}
 			cpu = n;
 			break;
 		}
@@ -310,9 +332,6 @@ salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t
 	if (cpu == -1)
 		goto retry;
 
-	/* events are sticky until the user says "clear" */
-	up(&data->sem);
-
 	/* for next read, start checking at next CPU */
 	data->cpu_check = cpu;
 	if (++data->cpu_check == NR_CPUS)
@@ -381,10 +400,8 @@ salinfo_log_release(struct inode *inode, struct file *file)
 static void
 call_on_cpu(int cpu, void (*fn)(void *), void *arg)
 {
-	cpumask_t save_cpus_allowed, new_cpus_allowed;
-	memcpy(&save_cpus_allowed, &current->cpus_allowed, sizeof(save_cpus_allowed));
-	memset(&new_cpus_allowed, 0, sizeof(new_cpus_allowed));
-	set_bit(cpu, &new_cpus_allowed);
+	cpumask_t save_cpus_allowed = current->cpus_allowed;
+	cpumask_t new_cpus_allowed = cpumask_of_cpu(cpu);
 	set_cpus_allowed(current, new_cpus_allowed);
 	(*fn)(arg);
 	set_cpus_allowed(current, save_cpus_allowed);
@@ -433,10 +450,10 @@ salinfo_log_new_read(int cpu, struct salinfo_data *data)
 	if (!data->saved_num)
 		call_on_cpu(cpu, salinfo_log_read_cpu, data);
 	if (!data->log_size) {
-	        data->state = STATE_NO_DATA;
-	        clear_bit(cpu, &data->cpu_event);
+		data->state = STATE_NO_DATA;
+		cpu_clear(cpu, data->cpu_event);
 	} else {
-	        data->state = STATE_LOG_RECORD;
+		data->state = STATE_LOG_RECORD;
 	}
 }
 
@@ -473,27 +490,31 @@ static int
 salinfo_log_clear(struct salinfo_data *data, int cpu)
 {
 	sal_log_record_header_t *rh;
+	unsigned long flags;
+	spin_lock_irqsave(&data_saved_lock, flags);
 	data->state = STATE_NO_DATA;
-	if (!test_bit(cpu, &data->cpu_event))
-		return 0;
-	down(&data->sem);
-	clear_bit(cpu, &data->cpu_event);
-	if (data->saved_num) {
-		unsigned long flags;
-		spin_lock_irqsave(&data_saved_lock, flags);
-		shift1_data_saved(data, data->saved_num - 1 );
-		data->saved_num = 0;
+	if (!cpu_isset(cpu, data->cpu_event)) {
 		spin_unlock_irqrestore(&data_saved_lock, flags);
+		return 0;
 	}
+	cpu_clear(cpu, data->cpu_event);
+	if (data->saved_num) {
+		shift1_data_saved(data, data->saved_num - 1);
+		data->saved_num = 0;
+	}
+	spin_unlock_irqrestore(&data_saved_lock, flags);
 	rh = (sal_log_record_header_t *)(data->log_buffer);
 	/* Corrected errors have already been cleared from SAL */
 	if (rh->severity != sal_log_severity_corrected)
 		call_on_cpu(cpu, salinfo_log_clear_cpu, data);
 	/* clearing a record may make a new record visible */
 	salinfo_log_new_read(cpu, data);
-	if (data->state == STATE_LOG_RECORD &&
-	    !test_and_set_bit(cpu,  &data->cpu_event))
-		up(&data->sem);
+	if (data->state == STATE_LOG_RECORD) {
+		spin_lock_irqsave(&data_saved_lock, flags);
+		cpu_set(cpu, data->cpu_event);
+		salinfo_work_to_do(data);
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+	}
 	return 0;
 }
 
@@ -550,6 +571,53 @@ static struct file_operations salinfo_data_fops = {
 	.write   = salinfo_log_write,
 };
 
+#ifdef	CONFIG_HOTPLUG_CPU
+static int __devinit
+salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
+{
+	unsigned int i, cpu = (unsigned long)hcpu;
+	unsigned long flags;
+	struct salinfo_data *data;
+	switch (action) {
+	case CPU_ONLINE:
+		spin_lock_irqsave(&data_saved_lock, flags);
+		for (i = 0, data = salinfo_data;
+		     i < ARRAY_SIZE(salinfo_data);
+		     ++i, ++data) {
+			cpu_set(cpu, data->cpu_event);
+			salinfo_work_to_do(data);
+		}
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+		break;
+	case CPU_DEAD:
+		spin_lock_irqsave(&data_saved_lock, flags);
+		for (i = 0, data = salinfo_data;
+		     i < ARRAY_SIZE(salinfo_data);
+		     ++i, ++data) {
+			struct salinfo_data_saved *data_saved;
+			int j;
+			for (j = ARRAY_SIZE(data->data_saved) - 1, data_saved = data->data_saved + j;
+			     j >= 0;
+			     --j, --data_saved) {
+				if (data_saved->buffer && data_saved->cpu == cpu) {
+					shift1_data_saved(data, j);
+				}
+			}
+			cpu_clear(cpu, data->cpu_event);
+		}
+		spin_unlock_irqrestore(&data_saved_lock, flags);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block salinfo_cpu_notifier =
+{
+	.notifier_call = salinfo_cpu_callback,
+	.priority = 0,
+};
+#endif	/* CONFIG_HOTPLUG_CPU */
+
 static int __init
 salinfo_init(void)
 {
@@ -557,7 +625,7 @@ salinfo_init(void)
 	struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
 	struct proc_dir_entry *dir, *entry;
 	struct salinfo_data *data;
-	int i, j, online;
+	int i, j;
 
 	salinfo_dir = proc_mkdir("sal", NULL);
 	if (!salinfo_dir)
@@ -572,7 +640,7 @@ salinfo_init(void)
 	for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
 		data = salinfo_data + i;
 		data->type = i;
-		sema_init(&data->sem, 0);
+		init_MUTEX(&data->mutex);
 		dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
 		if (!dir)
 			continue;
@@ -592,12 +660,8 @@ salinfo_init(void)
 		*sdir++ = entry;
 
 		/* we missed any events before now */
-		online = 0;
-		for_each_online_cpu(j) {
-			set_bit(j, &data->cpu_event);
-			++online;
-		}
-		sema_init(&data->sem, online);
+		for_each_online_cpu(j)
+			cpu_set(j, data->cpu_event);
 
 		*sdir++ = dir;
 	}
@@ -609,6 +673,10 @@ salinfo_init(void)
 	salinfo_timer.function = &salinfo_timeout;
 	add_timer(&salinfo_timer);
 
+#ifdef	CONFIG_HOTPLUG_CPU
+	register_cpu_notifier(&salinfo_cpu_notifier);
+#endif
+
 	return 0;
 }
 

From d3ef1f5aafcf7a4129eb2078c70bc9e577bc3af1 Mon Sep 17 00:00:00 2001
From: Zhang Yanmin <yanmin.zhang@intel.com>
Date: Fri, 13 Jan 2006 14:45:21 -0800
Subject: [PATCH 15/16] [IA64] prevent accidental modification of args in
 jprobe handler

When jprobe is hit, the function parameters of the original function
should be saved before jprobe handler is executed, and restored it after
jprobe handler is executed, because jprobe handler might change the
register values due to tail call optimization by the gcc.

Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/jprobes.S | 27 ++++++++++++++++++
 arch/ia64/kernel/kprobes.c | 57 ++++++++++++++++++++++++++++++++++++++
 include/asm-ia64/kprobes.h |  6 ++++
 3 files changed, 90 insertions(+)

diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S
index 2323377e3695..5cd6226f44f2 100644
--- a/arch/ia64/kernel/jprobes.S
+++ b/arch/ia64/kernel/jprobes.S
@@ -60,3 +60,30 @@ END(jprobe_break)
 GLOBAL_ENTRY(jprobe_inst_return)
 	br.call.sptk.many b0=jprobe_break
 END(jprobe_inst_return)
+
+GLOBAL_ENTRY(invalidate_stacked_regs)
+	movl r16=invalidate_restore_cfm
+	;;
+	mov b6=r16
+	;;
+	br.ret.sptk.many b6
+	;;
+invalidate_restore_cfm:
+	mov r16=ar.rsc
+	;;
+	mov ar.rsc=r0
+	;;
+	loadrs
+	;;
+	mov ar.rsc=r16
+	;;
+	br.cond.sptk.many rp
+END(invalidate_stacked_regs)
+
+GLOBAL_ENTRY(flush_register_stack)
+	// flush dirty regs to backing store (must be first in insn group)
+	flushrs
+	;;
+	br.ret.sptk.many rp
+END(flush_register_stack)
+
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 346fedf9ea47..50ae8c7d453d 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -766,11 +766,56 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 	return ret;
 }
 
+struct param_bsp_cfm {
+	unsigned long ip;
+	unsigned long *bsp;
+	unsigned long cfm;
+};
+
+static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg)
+{
+	unsigned long ip;
+	struct param_bsp_cfm *lp = arg;
+
+	do {
+		unw_get_ip(info, &ip);
+		if (ip == 0)
+			break;
+		if (ip == lp->ip) {
+			unw_get_bsp(info, (unsigned long*)&lp->bsp);
+			unw_get_cfm(info, (unsigned long*)&lp->cfm);
+			return;
+		}
+	} while (unw_unwind(info) >= 0);
+	lp->bsp = 0;
+	lp->cfm = 0;
+	return;
+}
+
 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	struct param_bsp_cfm pa;
+	int bytes;
+
+	/*
+	 * Callee owns the argument space and could overwrite it, eg
+	 * tail call optimization. So to be absolutely safe
+	 * we save the argument space before transfering the control
+	 * to instrumented jprobe function which runs in
+	 * the process context
+	 */
+	pa.ip = regs->cr_iip;
+	unw_init_running(ia64_get_bsp_cfm, &pa);
+	bytes = (char *)ia64_rse_skip_regs(pa.bsp, pa.cfm & 0x3f)
+				- (char *)pa.bsp;
+	memcpy( kcb->jprobes_saved_stacked_regs,
+		pa.bsp,
+		bytes );
+	kcb->bsp = pa.bsp;
+	kcb->cfm = pa.cfm;
 
 	/* save architectural state */
 	kcb->jprobe_saved_regs = *regs;
@@ -792,8 +837,20 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	int bytes;
 
+	/* restoring architectural state */
 	*regs = kcb->jprobe_saved_regs;
+
+	/* restoring the original argument space */
+	flush_register_stack();
+	bytes = (char *)ia64_rse_skip_regs(kcb->bsp, kcb->cfm & 0x3f)
+				- (char *)kcb->bsp;
+	memcpy( kcb->bsp,
+		kcb->jprobes_saved_stacked_regs,
+		bytes );
+	invalidate_stacked_regs();
+
 	preempt_enable_no_resched();
 	return 1;
 }
diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h
index a74b68104559..8c0fc227f0fb 100644
--- a/include/asm-ia64/kprobes.h
+++ b/include/asm-ia64/kprobes.h
@@ -68,10 +68,14 @@ struct prev_kprobe {
 	unsigned long status;
 };
 
+#define	MAX_PARAM_RSE_SIZE	(0x60+0x60/0x3f)
 /* per-cpu kprobe control block */
 struct kprobe_ctlblk {
 	unsigned long kprobe_status;
 	struct pt_regs jprobe_saved_regs;
+	unsigned long jprobes_saved_stacked_regs[MAX_PARAM_RSE_SIZE];
+	unsigned long *bsp;
+	unsigned long cfm;
 	struct prev_kprobe prev_kprobe;
 };
 
@@ -118,5 +122,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
 static inline void jprobe_return(void)
 {
 }
+extern void invalidate_stacked_regs(void);
+extern void flush_register_stack(void);
 
 #endif				/* _ASM_KPROBES_H */

From d50f5c5ca0c3426669fbe11ad4d5708d333eb9fb Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Fri, 13 Jan 2006 23:46:38 +0100
Subject: [PATCH 16/16] [IA64] build broken for ia64 simserial.c

TTY layer buffering revamp broke ia64 in commit
 33f0f88f1c51ae5c2d593d26960c760ea154c2e2

  CC      arch/ia64/hp/sim/simserial.o
arch/ia64/hp/sim/simserial.c: In function `receive_chars':
arch/ia64/hp/sim/simserial.c:170: error: structure has no member named `flip'
 ... and so on ...
make[1]: *** [arch/ia64/hp/sim/simserial.o] Error 1

Patch from Andreas Schwab.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/hp/sim/simserial.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index a346e1833bf2..27f23fa5ca15 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -167,15 +167,9 @@ static  void receive_chars(struct tty_struct *tty, struct pt_regs *regs)
 			}
 		}
 		seen_esc = 0;
-		if (tty->flip.count >= TTY_FLIPBUF_SIZE) break;
 
-		*tty->flip.char_buf_ptr = ch;
-
-		*tty->flip.flag_buf_ptr = 0;
-
-		tty->flip.flag_buf_ptr++;
-		tty->flip.char_buf_ptr++;
-		tty->flip.count++;
+		if (tty_insert_flip_char(tty, ch, TTY_NORMAL) == 0)
+			break;
 	}
 	tty_flip_buffer_push(tty);
 }