dm bufio: do buffer cleanup from a workqueue

Until now, DM bufio's waiting for IO from reclaim context in its
shrinker has caused kswapd to block; which results in systemic IO
stalls and even deadlock, e.g.:
https://www.redhat.com/archives/dm-devel/2020-March/msg00025.html

Here is Dave Chinner's problem description that motivated this fix,
from: https://lore.kernel.org/linux-fsdevel/20190809215733.GZ7777@dread.disaster.area/

"Waiting for IO in kswapd reclaim context is considered harmful -
kswapd context shrinker reclaim should be as non-blocking as possible,
and any back-off to wait for IO to complete should be done by the high
level reclaim core once it's completed an entire reclaim scan cycle of
everything....

What follows from that, and is pertinent in this situation, is that if
you don't block kswapd, then other reclaim contexts are not going to
get stuck waiting for it regardless of the reclaim context they use."

Continued elsewhere:

"The only way to fix this problem once and for all is to stop using
the shrinker as a mechanism to issue and wait on IO. If you need
background writeback of dirty buffers, do it from a WQ_MEM_RECLAIM
workqueue that isn't directly in the memory reclaim path and so can
issue writeback and block safely from a GFP_KERNEL context. Kick the
workqueue from the shrinker context, but get rid of the IO submission
and waiting from the shrinker and all the GFP_NOFS memory reclaim
recursion problems go away."

As such, this commit moves buffer cleanup to a workqueue.

Suggested-by: Dave Chinner <dchinner@redhat.com>
Reported-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Tested-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
Mikulas Patocka 2020-07-03 10:26:46 -04:00 committed by Mike Snitzer
parent e766668c6c
commit 70704c33db

View File

@ -108,7 +108,10 @@ struct dm_bufio_client {
int async_write_error;
struct list_head client_list;
struct shrinker shrinker;
struct work_struct shrink_work;
atomic_long_t need_shrink;
};
/*
@ -1634,8 +1637,7 @@ static unsigned long get_retain_buffers(struct dm_bufio_client *c)
return retain_bytes;
}
static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
gfp_t gfp_mask)
static void __scan(struct dm_bufio_client *c)
{
int l;
struct dm_buffer *b, *tmp;
@ -1646,42 +1648,58 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
for (l = 0; l < LIST_SIZE; l++) {
list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
if (__try_evict_buffer(b, gfp_mask))
if (count - freed <= retain_target)
atomic_long_set(&c->need_shrink, 0);
if (!atomic_long_read(&c->need_shrink))
return;
if (__try_evict_buffer(b, GFP_KERNEL)) {
atomic_long_dec(&c->need_shrink);
freed++;
if (!--nr_to_scan || ((count - freed) <= retain_target))
return freed;
}
cond_resched();
}
}
return freed;
}
static unsigned long
dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
static void shrink_work(struct work_struct *w)
{
struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work);
dm_bufio_lock(c);
__scan(c);
dm_bufio_unlock(c);
}
static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
struct dm_bufio_client *c;
unsigned long freed;
c = container_of(shrink, struct dm_bufio_client, shrinker);
if (sc->gfp_mask & __GFP_FS)
dm_bufio_lock(c);
else if (!dm_bufio_trylock(c))
return SHRINK_STOP;
atomic_long_add(sc->nr_to_scan, &c->need_shrink);
queue_work(dm_bufio_wq, &c->shrink_work);
freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
dm_bufio_unlock(c);
return freed;
return sc->nr_to_scan;
}
static unsigned long
dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
READ_ONCE(c->n_buffers[LIST_DIRTY]);
unsigned long retain_target = get_retain_buffers(c);
unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink);
return (count < retain_target) ? 0 : (count - retain_target);
if (unlikely(count < retain_target))
count = 0;
else
count -= retain_target;
if (unlikely(count < queued_for_cleanup))
count = 0;
else
count -= queued_for_cleanup;
return count;
}
/*
@ -1772,6 +1790,9 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
__free_buffer_wake(b);
}
INIT_WORK(&c->shrink_work, shrink_work);
atomic_long_set(&c->need_shrink, 0);
c->shrinker.count_objects = dm_bufio_shrink_count;
c->shrinker.scan_objects = dm_bufio_shrink_scan;
c->shrinker.seeks = 1;
@ -1817,6 +1838,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
drop_buffers(c);
unregister_shrinker(&c->shrinker);
flush_work(&c->shrink_work);
mutex_lock(&dm_bufio_clients_lock);