From f64ae042d94d376b54e7a343d93c48561e9d2e16 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 11 Nov 2011 08:33:48 +0800 Subject: [PATCH 1/4] slub: use correct parameter to add a page to partial list tail unfreeze_partials() needs add the page to partial list tail, since such page hasn't too many free objects. We now explictly use DEACTIVATE_TO_TAIL for this, while DEACTIVATE_TO_TAIL != 1. This will cause performance regression (eg, more lock contention in node->list_lock) without below fix. Signed-off-by: Shaohua Li Acked-by: Christoph Lameter Acked-by: David Rientjes Signed-off-by: Pekka Enberg --- mm/slub.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 7d2a996c307e..60e16c43f88c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1904,7 +1904,8 @@ static void unfreeze_partials(struct kmem_cache *s) if (l == M_PARTIAL) remove_partial(n, page); else - add_partial(n, page, 1); + add_partial(n, page, + DEACTIVATE_TO_TAIL); l = m; } From 9ada19342b2441f290f0043ed7c562682c8c4ede Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 14 Nov 2011 13:34:13 +0800 Subject: [PATCH 2/4] slub: move discard_slab out of node lock Lockdep reports there is potential deadlock for slub node list_lock. discard_slab() is called with the lock hold in unfreeze_partials(), which could trigger a slab allocation, which could hold the lock again. discard_slab() doesn't need hold the lock actually, if the slab is already removed from partial list. Acked-by: Christoph Lameter Reported-and-tested-by: Yong Zhang Reported-and-tested-by: Julie Sullivan Signed-off-by: Shaohua Li Signed-off-by: Pekka Enberg --- mm/slub.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 60e16c43f88c..00efbb56a268 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1862,7 +1862,7 @@ static void unfreeze_partials(struct kmem_cache *s) { struct kmem_cache_node *n = NULL; struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); - struct page *page; + struct page *page, *discard_page = NULL; while ((page = c->partial)) { enum slab_modes { M_PARTIAL, M_FREE }; @@ -1916,14 +1916,22 @@ static void unfreeze_partials(struct kmem_cache *s) "unfreezing slab")); if (m == M_FREE) { - stat(s, DEACTIVATE_EMPTY); - discard_slab(s, page); - stat(s, FREE_SLAB); + page->next = discard_page; + discard_page = page; } } if (n) spin_unlock(&n->list_lock); + + while (discard_page) { + page = discard_page; + discard_page = discard_page->next; + + stat(s, DEACTIVATE_EMPTY); + discard_slab(s, page); + stat(s, FREE_SLAB); + } } /* From 42d623a8cd08eb93ab221d22cee5a62618895bbf Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 23 Nov 2011 09:14:38 -0600 Subject: [PATCH 3/4] slub: use irqsafe_cpu_cmpxchg for put_cpu_partial The cmpxchg must be irq safe. The fallback for this_cpu_cmpxchg only disables preemption which results in per cpu partial page operation potentially failing on non x86 platforms. This patch fixes the following problem reported by Christian Kujau: I seem to hit it with heavy disk & cpu IO is in progress on this PowerBook G4. Full dmesg & .config: http://nerdbynature.de/bits/3.2.0-rc1/oops/ I've enabled some debug options and now it really points to slub.c:2166 http://nerdbynature.de/bits/3.2.0-rc1/oops/oops4m.jpg With debug options enabled I'm currently in the xmon debugger, not sure what to make of it yet, I'll try to get something useful out of it :) Reported-by: Christian Kujau Tested-by: Christian Kujau Acked-by: Eric Dumazet Acked-by: David Rientjes Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 00efbb56a268..2a9cfd72a3d7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1978,7 +1978,7 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) page->pobjects = pobjects; page->next = oldpage; - } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); + } while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); stat(s, CPU_PARTIAL_FREE); return pobjects; } From bc6697d8a506dedf09e8e9974ffa3a316183e608 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Nov 2011 16:02:02 +0100 Subject: [PATCH 4/4] slub: avoid potential NULL dereference or corruption show_slab_objects() can trigger NULL dereferences or memory corruption. Another cpu can change its c->page to NULL or c->node to NUMA_NO_NODE while we use them. Use ACCESS_ONCE(c->page) and ACCESS_ONCE(c->node) to make sure this cannot happen. Acked-by: Christoph Lameter Acked-by: David Rientjes Signed-off-by: Eric Dumazet Signed-off-by: Pekka Enberg --- mm/slub.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 2a9cfd72a3d7..ed3334d9b6da 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4444,30 +4444,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s, for_each_possible_cpu(cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); + int node = ACCESS_ONCE(c->node); struct page *page; - if (!c || c->node < 0) + if (node < 0) continue; - - if (c->page) { - if (flags & SO_TOTAL) - x = c->page->objects; + page = ACCESS_ONCE(c->page); + if (page) { + if (flags & SO_TOTAL) + x = page->objects; else if (flags & SO_OBJECTS) - x = c->page->inuse; + x = page->inuse; else x = 1; total += x; - nodes[c->node] += x; + nodes[node] += x; } page = c->partial; if (page) { x = page->pobjects; - total += x; - nodes[c->node] += x; + total += x; + nodes[node] += x; } - per_cpu[c->node]++; + per_cpu[node]++; } }