forked from luck/tmp_suning_uos_patched
dm thin metadata: try to avoid ever aborting transactions
Committing a transaction can consume some metadata of it's own, we now reserve a small amount of metadata to cover this. Free metadata reported by the kernel will not include this reserve. If any of the reserve has been used after a commit we enter a new internal state PM_OUT_OF_METADATA_SPACE. This is reported as PM_READ_ONLY, so no userland changes are needed. If the metadata device is resized the pool will move back to PM_WRITE. These changes mean we never need to abort and rollback a transaction due to running out of metadata space. This is particularly important because there have been a handful of reports of data corruption against DM thin-provisioning that can all be attributed to the thin-pool having ran out of metadata space. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
parent
5380c05b68
commit
3ab9182816
|
@ -188,6 +188,12 @@ struct dm_pool_metadata {
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
sector_t data_block_size;
|
sector_t data_block_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We reserve a section of the metadata for commit overhead.
|
||||||
|
* All reported space does *not* include this.
|
||||||
|
*/
|
||||||
|
dm_block_t metadata_reserve;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set if a transaction has to be aborted but the attempt to roll back
|
* Set if a transaction has to be aborted but the attempt to roll back
|
||||||
* to the previous (good) transaction failed. The only pool metadata
|
* to the previous (good) transaction failed. The only pool metadata
|
||||||
|
@ -816,6 +822,22 @@ static int __commit_transaction(struct dm_pool_metadata *pmd)
|
||||||
return dm_tm_commit(pmd->tm, sblock);
|
return dm_tm_commit(pmd->tm, sblock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __set_metadata_reserve(struct dm_pool_metadata *pmd)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
dm_block_t total;
|
||||||
|
dm_block_t max_blocks = 4096; /* 16M */
|
||||||
|
|
||||||
|
r = dm_sm_get_nr_blocks(pmd->metadata_sm, &total);
|
||||||
|
if (r) {
|
||||||
|
DMERR("could not get size of metadata device");
|
||||||
|
pmd->metadata_reserve = max_blocks;
|
||||||
|
} else {
|
||||||
|
sector_div(total, 10);
|
||||||
|
pmd->metadata_reserve = min(max_blocks, total);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
|
struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
|
||||||
sector_t data_block_size,
|
sector_t data_block_size,
|
||||||
bool format_device)
|
bool format_device)
|
||||||
|
@ -849,6 +871,8 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
|
||||||
return ERR_PTR(r);
|
return ERR_PTR(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__set_metadata_reserve(pmd);
|
||||||
|
|
||||||
return pmd;
|
return pmd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1820,6 +1844,13 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
|
||||||
down_read(&pmd->root_lock);
|
down_read(&pmd->root_lock);
|
||||||
if (!pmd->fail_io)
|
if (!pmd->fail_io)
|
||||||
r = dm_sm_get_nr_free(pmd->metadata_sm, result);
|
r = dm_sm_get_nr_free(pmd->metadata_sm, result);
|
||||||
|
|
||||||
|
if (!r) {
|
||||||
|
if (*result < pmd->metadata_reserve)
|
||||||
|
*result = 0;
|
||||||
|
else
|
||||||
|
*result -= pmd->metadata_reserve;
|
||||||
|
}
|
||||||
up_read(&pmd->root_lock);
|
up_read(&pmd->root_lock);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
|
@ -1932,8 +1963,11 @@ int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_cou
|
||||||
int r = -EINVAL;
|
int r = -EINVAL;
|
||||||
|
|
||||||
down_write(&pmd->root_lock);
|
down_write(&pmd->root_lock);
|
||||||
if (!pmd->fail_io)
|
if (!pmd->fail_io) {
|
||||||
r = __resize_space_map(pmd->metadata_sm, new_count);
|
r = __resize_space_map(pmd->metadata_sm, new_count);
|
||||||
|
if (!r)
|
||||||
|
__set_metadata_reserve(pmd);
|
||||||
|
}
|
||||||
up_write(&pmd->root_lock);
|
up_write(&pmd->root_lock);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
|
|
|
@ -200,7 +200,13 @@ struct dm_thin_new_mapping;
|
||||||
enum pool_mode {
|
enum pool_mode {
|
||||||
PM_WRITE, /* metadata may be changed */
|
PM_WRITE, /* metadata may be changed */
|
||||||
PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */
|
PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Like READ_ONLY, except may switch back to WRITE on metadata resize. Reported as READ_ONLY.
|
||||||
|
*/
|
||||||
|
PM_OUT_OF_METADATA_SPACE,
|
||||||
PM_READ_ONLY, /* metadata may not be changed */
|
PM_READ_ONLY, /* metadata may not be changed */
|
||||||
|
|
||||||
PM_FAIL, /* all I/O fails */
|
PM_FAIL, /* all I/O fails */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1371,7 +1377,35 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
|
||||||
|
|
||||||
static void requeue_bios(struct pool *pool);
|
static void requeue_bios(struct pool *pool);
|
||||||
|
|
||||||
static void check_for_space(struct pool *pool)
|
static bool is_read_only_pool_mode(enum pool_mode mode)
|
||||||
|
{
|
||||||
|
return (mode == PM_OUT_OF_METADATA_SPACE || mode == PM_READ_ONLY);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_read_only(struct pool *pool)
|
||||||
|
{
|
||||||
|
return is_read_only_pool_mode(get_pool_mode(pool));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void check_for_metadata_space(struct pool *pool)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
const char *ooms_reason = NULL;
|
||||||
|
dm_block_t nr_free;
|
||||||
|
|
||||||
|
r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free);
|
||||||
|
if (r)
|
||||||
|
ooms_reason = "Could not get free metadata blocks";
|
||||||
|
else if (!nr_free)
|
||||||
|
ooms_reason = "No free metadata blocks";
|
||||||
|
|
||||||
|
if (ooms_reason && !is_read_only(pool)) {
|
||||||
|
DMERR("%s", ooms_reason);
|
||||||
|
set_pool_mode(pool, PM_OUT_OF_METADATA_SPACE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void check_for_data_space(struct pool *pool)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
dm_block_t nr_free;
|
dm_block_t nr_free;
|
||||||
|
@ -1397,14 +1431,16 @@ static int commit(struct pool *pool)
|
||||||
{
|
{
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
if (get_pool_mode(pool) >= PM_READ_ONLY)
|
if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
r = dm_pool_commit_metadata(pool->pmd);
|
r = dm_pool_commit_metadata(pool->pmd);
|
||||||
if (r)
|
if (r)
|
||||||
metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
|
metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
|
||||||
else
|
else {
|
||||||
check_for_space(pool);
|
check_for_metadata_space(pool);
|
||||||
|
check_for_data_space(pool);
|
||||||
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
@ -1470,6 +1506,19 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r = dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks);
|
||||||
|
if (r) {
|
||||||
|
metadata_operation_failed(pool, "dm_pool_get_free_metadata_block_count", r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!free_blocks) {
|
||||||
|
/* Let's commit before we use up the metadata reserve. */
|
||||||
|
r = commit(pool);
|
||||||
|
if (r)
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1501,6 +1550,7 @@ static blk_status_t should_error_unserviceable_bio(struct pool *pool)
|
||||||
case PM_OUT_OF_DATA_SPACE:
|
case PM_OUT_OF_DATA_SPACE:
|
||||||
return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
|
return pool->pf.error_if_no_space ? BLK_STS_NOSPC : 0;
|
||||||
|
|
||||||
|
case PM_OUT_OF_METADATA_SPACE:
|
||||||
case PM_READ_ONLY:
|
case PM_READ_ONLY:
|
||||||
case PM_FAIL:
|
case PM_FAIL:
|
||||||
return BLK_STS_IOERR;
|
return BLK_STS_IOERR;
|
||||||
|
@ -2464,8 +2514,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
|
||||||
error_retry_list(pool);
|
error_retry_list(pool);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PM_OUT_OF_METADATA_SPACE:
|
||||||
case PM_READ_ONLY:
|
case PM_READ_ONLY:
|
||||||
if (old_mode != new_mode)
|
if (!is_read_only_pool_mode(old_mode))
|
||||||
notify_of_pool_mode_change(pool, "read-only");
|
notify_of_pool_mode_change(pool, "read-only");
|
||||||
dm_pool_metadata_read_only(pool->pmd);
|
dm_pool_metadata_read_only(pool->pmd);
|
||||||
pool->process_bio = process_bio_read_only;
|
pool->process_bio = process_bio_read_only;
|
||||||
|
@ -3403,6 +3454,10 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit)
|
||||||
DMINFO("%s: growing the metadata device from %llu to %llu blocks",
|
DMINFO("%s: growing the metadata device from %llu to %llu blocks",
|
||||||
dm_device_name(pool->pool_md),
|
dm_device_name(pool->pool_md),
|
||||||
sb_metadata_dev_size, metadata_dev_size);
|
sb_metadata_dev_size, metadata_dev_size);
|
||||||
|
|
||||||
|
if (get_pool_mode(pool) == PM_OUT_OF_METADATA_SPACE)
|
||||||
|
set_pool_mode(pool, PM_WRITE);
|
||||||
|
|
||||||
r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
|
r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
|
||||||
if (r) {
|
if (r) {
|
||||||
metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
|
metadata_operation_failed(pool, "dm_pool_resize_metadata_dev", r);
|
||||||
|
@ -3707,7 +3762,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv,
|
||||||
struct pool_c *pt = ti->private;
|
struct pool_c *pt = ti->private;
|
||||||
struct pool *pool = pt->pool;
|
struct pool *pool = pt->pool;
|
||||||
|
|
||||||
if (get_pool_mode(pool) >= PM_READ_ONLY) {
|
if (get_pool_mode(pool) >= PM_OUT_OF_METADATA_SPACE) {
|
||||||
DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
|
DMERR("%s: unable to service pool target messages in READ_ONLY or FAIL mode",
|
||||||
dm_device_name(pool->pool_md));
|
dm_device_name(pool->pool_md));
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
@ -3781,6 +3836,7 @@ static void pool_status(struct dm_target *ti, status_type_t type,
|
||||||
dm_block_t nr_blocks_data;
|
dm_block_t nr_blocks_data;
|
||||||
dm_block_t nr_blocks_metadata;
|
dm_block_t nr_blocks_metadata;
|
||||||
dm_block_t held_root;
|
dm_block_t held_root;
|
||||||
|
enum pool_mode mode;
|
||||||
char buf[BDEVNAME_SIZE];
|
char buf[BDEVNAME_SIZE];
|
||||||
char buf2[BDEVNAME_SIZE];
|
char buf2[BDEVNAME_SIZE];
|
||||||
struct pool_c *pt = ti->private;
|
struct pool_c *pt = ti->private;
|
||||||
|
@ -3851,9 +3907,10 @@ static void pool_status(struct dm_target *ti, status_type_t type,
|
||||||
else
|
else
|
||||||
DMEMIT("- ");
|
DMEMIT("- ");
|
||||||
|
|
||||||
if (pool->pf.mode == PM_OUT_OF_DATA_SPACE)
|
mode = get_pool_mode(pool);
|
||||||
|
if (mode == PM_OUT_OF_DATA_SPACE)
|
||||||
DMEMIT("out_of_data_space ");
|
DMEMIT("out_of_data_space ");
|
||||||
else if (pool->pf.mode == PM_READ_ONLY)
|
else if (is_read_only_pool_mode(mode))
|
||||||
DMEMIT("ro ");
|
DMEMIT("ro ");
|
||||||
else
|
else
|
||||||
DMEMIT("rw ");
|
DMEMIT("rw ");
|
||||||
|
|
Loading…
Reference in New Issue
Block a user