forked from luck/tmp_suning_uos_patched
- A couple DM thin provisioning fixes
- A few request-based DM and DM multipath fixes for issues that were made when merging Christoph's changes with Bart's changes for 4.12 - A DM bufio unsigned overflow fix - A couple pure fixes for the DM cache target. - Various very small tweaks to the DM cache target that enable considerable speed improvements in the face of continuous IO. Given that the cache target was significantly reworked for 4.12 I see no reason to sit on these advances until 4.13 considering the favorable results associated with such minimalist tweaks. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJZHJ6hAAoJEMUj8QotnQNazb4H/2i+OdNU3qEv0wg0Vf2fdWbU M0FYLDwuEBx+/RCBoJbjLY4enfup2Ak4ykxivt1gDZL4sBY8bsf/jxjgTKD1icp4 tV+6tLFbzVZwp2JtDgbWJ0FuYEfINxNwVJYRUY6dbgsWQPCxKwYAYnYa102no78t pqykpj/jfQB5ru5bNDVC/KcV8fj+3mc4H7IJxGeEnVzoXyW6wsnvP+6FqOHnuocE HU2zCzit1nfLtI7eL3I3B5nQKVtkPZoR/ILFyo7viU1EKA9zNEjkLI7EOKrMvPNC b0H6lqMkoBIWDw22sxkk/utpywFVqQ1K7kyAWTa1XsSsSvritSmZP2k+dCEkk/U= =5lrV -----END PGP SIGNATURE----- Merge tag 'for-4.12/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper fixes from Mike Snitzer: - a couple DM thin provisioning fixes - a few request-based DM and DM multipath fixes for issues that were made when merging Christoph's changes with Bart's changes for 4.12 - a DM bufio unsigned overflow fix - a couple pure fixes for the DM cache target. - various very small tweaks to the DM cache target that enable considerable speed improvements in the face of continuous IO. Given that the cache target was significantly reworked for 4.12 I see no reason to sit on these advances until 4.13 considering the favorable results associated with such minimalist tweaks. * tag 'for-4.12/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: dm cache: handle kmalloc failure allocating background_tracker struct dm bufio: make the parameter "retain_bytes" unsigned long dm mpath: multipath_clone_and_map must not return -EIO dm mpath: don't return -EIO from dm_report_EIO dm rq: add a missing break to map_request dm space map disk: fix some book keeping in the disk space map dm thin metadata: call precommit before saving the roots dm cache policy smq: don't do any writebacks unless IDLE dm cache: simplify the IDLE vs BUSY state calculation dm cache: track all IO to the cache rather than just the origin device's IO dm cache policy smq: stop preemptively demoting blocks dm cache policy smq: put newly promoted entries at the top of the multiqueue dm cache policy smq: be more aggressive about triggering a writeback dm cache policy smq: only demote entries in bottom half of the clean multiqueue dm cache: fix incorrect 'idle_time' reset in IO tracker
This commit is contained in:
commit
dac94e2911
|
@ -218,7 +218,7 @@ static DEFINE_SPINLOCK(param_spinlock);
|
|||
* Buffers are freed after this timeout
|
||||
*/
|
||||
static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
|
||||
static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
|
||||
static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
|
||||
|
||||
static unsigned long dm_bufio_peak_allocated;
|
||||
static unsigned long dm_bufio_allocated_kmem_cache;
|
||||
|
@ -1558,10 +1558,10 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
|
|||
return true;
|
||||
}
|
||||
|
||||
static unsigned get_retain_buffers(struct dm_bufio_client *c)
|
||||
static unsigned long get_retain_buffers(struct dm_bufio_client *c)
|
||||
{
|
||||
unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
|
||||
return retain_bytes / c->block_size;
|
||||
unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
|
||||
return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT);
|
||||
}
|
||||
|
||||
static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
|
||||
|
@ -1571,7 +1571,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
|
|||
struct dm_buffer *b, *tmp;
|
||||
unsigned long freed = 0;
|
||||
unsigned long count = nr_to_scan;
|
||||
unsigned retain_target = get_retain_buffers(c);
|
||||
unsigned long retain_target = get_retain_buffers(c);
|
||||
|
||||
for (l = 0; l < LIST_SIZE; l++) {
|
||||
list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
|
||||
|
@ -1794,8 +1794,8 @@ static bool older_than(struct dm_buffer *b, unsigned long age_hz)
|
|||
static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
|
||||
{
|
||||
struct dm_buffer *b, *tmp;
|
||||
unsigned retain_target = get_retain_buffers(c);
|
||||
unsigned count;
|
||||
unsigned long retain_target = get_retain_buffers(c);
|
||||
unsigned long count;
|
||||
LIST_HEAD(write_list);
|
||||
|
||||
dm_bufio_lock(c);
|
||||
|
@ -1955,7 +1955,7 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
|
|||
module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
|
||||
|
||||
module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR);
|
||||
module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
|
||||
|
||||
module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
|
||||
|
|
|
@ -33,6 +33,11 @@ struct background_tracker *btracker_create(unsigned max_work)
|
|||
{
|
||||
struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL);
|
||||
|
||||
if (!b) {
|
||||
DMERR("couldn't create background_tracker");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
b->max_work = max_work;
|
||||
atomic_set(&b->pending_promotes, 0);
|
||||
atomic_set(&b->pending_writebacks, 0);
|
||||
|
|
|
@ -1120,8 +1120,6 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
|
|||
* Cache entries may not be populated. So we cannot rely on the
|
||||
* size of the clean queue.
|
||||
*/
|
||||
unsigned nr_clean;
|
||||
|
||||
if (idle) {
|
||||
/*
|
||||
* We'd like to clean everything.
|
||||
|
@ -1129,18 +1127,16 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
|
|||
return q_size(&mq->dirty) == 0u;
|
||||
}
|
||||
|
||||
nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty);
|
||||
return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >=
|
||||
percent_to_target(mq, CLEAN_TARGET);
|
||||
/*
|
||||
* If we're busy we don't worry about cleaning at all.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool free_target_met(struct smq_policy *mq, bool idle)
|
||||
static bool free_target_met(struct smq_policy *mq)
|
||||
{
|
||||
unsigned nr_free;
|
||||
|
||||
if (!idle)
|
||||
return true;
|
||||
|
||||
nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated;
|
||||
return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >=
|
||||
percent_to_target(mq, FREE_TARGET);
|
||||
|
@ -1190,9 +1186,9 @@ static void queue_demotion(struct smq_policy *mq)
|
|||
if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
|
||||
return;
|
||||
|
||||
e = q_peek(&mq->clean, mq->clean.nr_levels, true);
|
||||
e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
|
||||
if (!e) {
|
||||
if (!clean_target_met(mq, false))
|
||||
if (!clean_target_met(mq, true))
|
||||
queue_writeback(mq);
|
||||
return;
|
||||
}
|
||||
|
@ -1220,7 +1216,7 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock,
|
|||
* We always claim to be 'idle' to ensure some demotions happen
|
||||
* with continuous loads.
|
||||
*/
|
||||
if (!free_target_met(mq, true))
|
||||
if (!free_target_met(mq))
|
||||
queue_demotion(mq);
|
||||
return;
|
||||
}
|
||||
|
@ -1421,14 +1417,10 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle,
|
|||
spin_lock_irqsave(&mq->lock, flags);
|
||||
r = btracker_issue(mq->bg_work, result);
|
||||
if (r == -ENODATA) {
|
||||
/* find some writeback work to do */
|
||||
if (mq->migrations_allowed && !free_target_met(mq, idle))
|
||||
queue_demotion(mq);
|
||||
|
||||
else if (!clean_target_met(mq, idle))
|
||||
if (!clean_target_met(mq, idle)) {
|
||||
queue_writeback(mq);
|
||||
|
||||
r = btracker_issue(mq->bg_work, result);
|
||||
r = btracker_issue(mq->bg_work, result);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&mq->lock, flags);
|
||||
|
||||
|
@ -1452,6 +1444,7 @@ static void __complete_background_work(struct smq_policy *mq,
|
|||
clear_pending(mq, e);
|
||||
if (success) {
|
||||
e->oblock = work->oblock;
|
||||
e->level = NR_CACHE_LEVELS - 1;
|
||||
push(mq, e);
|
||||
// h, q, a
|
||||
} else {
|
||||
|
|
|
@ -94,6 +94,9 @@ static void iot_io_begin(struct io_tracker *iot, sector_t len)
|
|||
|
||||
static void __iot_io_end(struct io_tracker *iot, sector_t len)
|
||||
{
|
||||
if (!len)
|
||||
return;
|
||||
|
||||
iot->in_flight -= len;
|
||||
if (!iot->in_flight)
|
||||
iot->idle_time = jiffies;
|
||||
|
@ -474,7 +477,7 @@ struct cache {
|
|||
spinlock_t invalidation_lock;
|
||||
struct list_head invalidation_requests;
|
||||
|
||||
struct io_tracker origin_tracker;
|
||||
struct io_tracker tracker;
|
||||
|
||||
struct work_struct commit_ws;
|
||||
struct batcher committer;
|
||||
|
@ -901,8 +904,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
|
|||
|
||||
static bool accountable_bio(struct cache *cache, struct bio *bio)
|
||||
{
|
||||
return ((bio->bi_bdev == cache->origin_dev->bdev) &&
|
||||
bio_op(bio) != REQ_OP_DISCARD);
|
||||
return bio_op(bio) != REQ_OP_DISCARD;
|
||||
}
|
||||
|
||||
static void accounted_begin(struct cache *cache, struct bio *bio)
|
||||
|
@ -912,7 +914,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio)
|
|||
|
||||
if (accountable_bio(cache, bio)) {
|
||||
pb->len = bio_sectors(bio);
|
||||
iot_io_begin(&cache->origin_tracker, pb->len);
|
||||
iot_io_begin(&cache->tracker, pb->len);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -921,7 +923,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio)
|
|||
size_t pb_data_size = get_per_bio_data_size(cache);
|
||||
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
|
||||
|
||||
iot_io_end(&cache->origin_tracker, pb->len);
|
||||
iot_io_end(&cache->tracker, pb->len);
|
||||
}
|
||||
|
||||
static void accounted_request(struct cache *cache, struct bio *bio)
|
||||
|
@ -1716,20 +1718,19 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
|
|||
|
||||
enum busy {
|
||||
IDLE,
|
||||
MODERATE,
|
||||
BUSY
|
||||
};
|
||||
|
||||
static enum busy spare_migration_bandwidth(struct cache *cache)
|
||||
{
|
||||
bool idle = iot_idle_for(&cache->origin_tracker, HZ);
|
||||
bool idle = iot_idle_for(&cache->tracker, HZ);
|
||||
sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
|
||||
cache->sectors_per_block;
|
||||
|
||||
if (current_volume <= cache->migration_threshold)
|
||||
return idle ? IDLE : MODERATE;
|
||||
if (idle && current_volume <= cache->migration_threshold)
|
||||
return IDLE;
|
||||
else
|
||||
return idle ? MODERATE : BUSY;
|
||||
return BUSY;
|
||||
}
|
||||
|
||||
static void inc_hit_counter(struct cache *cache, struct bio *bio)
|
||||
|
@ -2045,8 +2046,6 @@ static void check_migrations(struct work_struct *ws)
|
|||
|
||||
for (;;) {
|
||||
b = spare_migration_bandwidth(cache);
|
||||
if (b == BUSY)
|
||||
break;
|
||||
|
||||
r = policy_get_background_work(cache->policy, b == IDLE, &op);
|
||||
if (r == -ENODATA)
|
||||
|
@ -2717,7 +2716,7 @@ static int cache_create(struct cache_args *ca, struct cache **result)
|
|||
|
||||
batcher_init(&cache->committer, commit_op, cache,
|
||||
issue_op, cache, cache->wq);
|
||||
iot_init(&cache->origin_tracker);
|
||||
iot_init(&cache->tracker);
|
||||
|
||||
init_rwsem(&cache->background_work_lock);
|
||||
prevent_background_work(cache);
|
||||
|
@ -2941,7 +2940,7 @@ static void cache_postsuspend(struct dm_target *ti)
|
|||
|
||||
cancel_delayed_work(&cache->waker);
|
||||
flush_workqueue(cache->wq);
|
||||
WARN_ON(cache->origin_tracker.in_flight);
|
||||
WARN_ON(cache->tracker.in_flight);
|
||||
|
||||
/*
|
||||
* If it's a flush suspend there won't be any deferred bios, so this
|
||||
|
|
|
@ -447,7 +447,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
|
|||
* it has been invoked.
|
||||
*/
|
||||
#define dm_report_EIO(m) \
|
||||
({ \
|
||||
do { \
|
||||
struct mapped_device *md = dm_table_get_md((m)->ti->table); \
|
||||
\
|
||||
pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \
|
||||
|
@ -455,8 +455,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
|
|||
test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \
|
||||
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \
|
||||
dm_noflush_suspending((m)->ti)); \
|
||||
-EIO; \
|
||||
})
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Map cloned requests (request-based multipath)
|
||||
|
@ -481,7 +480,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
|
|||
if (!pgpath) {
|
||||
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
||||
return DM_MAPIO_DELAY_REQUEUE;
|
||||
return dm_report_EIO(m); /* Failed */
|
||||
dm_report_EIO(m); /* Failed */
|
||||
return DM_MAPIO_KILL;
|
||||
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
|
||||
test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
|
||||
if (pg_init_all_paths(m))
|
||||
|
@ -558,7 +558,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
|
|||
if (!pgpath) {
|
||||
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
||||
return DM_MAPIO_REQUEUE;
|
||||
return dm_report_EIO(m);
|
||||
dm_report_EIO(m);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
mpio->pgpath = pgpath;
|
||||
|
@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
|
|||
if (atomic_read(&m->nr_valid_paths) == 0 &&
|
||||
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
|
||||
if (error == -EIO)
|
||||
error = dm_report_EIO(m);
|
||||
dm_report_EIO(m);
|
||||
/* complete with the original error */
|
||||
r = DM_ENDIO_DONE;
|
||||
}
|
||||
|
@ -1524,8 +1525,10 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
|
|||
fail_path(mpio->pgpath);
|
||||
|
||||
if (atomic_read(&m->nr_valid_paths) == 0 &&
|
||||
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
|
||||
return dm_report_EIO(m);
|
||||
!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
|
||||
dm_report_EIO(m);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/* Queue for the daemon to resubmit */
|
||||
dm_bio_restore(get_bio_details_from_bio(clone), clone);
|
||||
|
|
|
@ -507,6 +507,7 @@ static int map_request(struct dm_rq_target_io *tio)
|
|||
case DM_MAPIO_KILL:
|
||||
/* The target wants to complete the I/O */
|
||||
dm_kill_unmapped_request(rq, -EIO);
|
||||
break;
|
||||
default:
|
||||
DMWARN("unimplemented target map return value: %d", r);
|
||||
BUG();
|
||||
|
|
|
@ -484,11 +484,11 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd)
|
|||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = save_sm_roots(pmd);
|
||||
r = dm_tm_pre_commit(pmd->tm);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
r = dm_tm_pre_commit(pmd->tm);
|
||||
r = save_sm_roots(pmd);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
|
|
|
@ -142,10 +142,23 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
|
|||
|
||||
static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
|
||||
{
|
||||
int r;
|
||||
uint32_t old_count;
|
||||
enum allocation_event ev;
|
||||
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
|
||||
|
||||
return sm_ll_dec(&smd->ll, b, &ev);
|
||||
r = sm_ll_dec(&smd->ll, b, &ev);
|
||||
if (!r && (ev == SM_FREE)) {
|
||||
/*
|
||||
* It's only free if it's also free in the last
|
||||
* transaction.
|
||||
*/
|
||||
r = sm_ll_lookup(&smd->old_ll, b, &old_count);
|
||||
if (!r && !old_count)
|
||||
smd->nr_allocated_this_transaction--;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
|
||||
|
|
Loading…
Reference in New Issue
Block a user