Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-next into for-linus-3.9

Signed-off-by: Chris Mason <chris.mason@fusionio.com>

Conflicts:
	fs/btrfs/disk-io.c
This commit is contained in:
Chris Mason 2013-02-20 14:05:45 -05:00
commit b2c6b3e061
38 changed files with 1644 additions and 716 deletions

View File

@ -19,7 +19,7 @@
#ifndef __BTRFS_BACKREF__
#define __BTRFS_BACKREF__
#include "ioctl.h"
#include <linux/btrfs.h>
#include "ulist.h"
#include "extent_io.h"

View File

@ -40,6 +40,8 @@
#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
#define BTRFS_INODE_NEEDS_FULL_SYNC 7
#define BTRFS_INODE_COPY_EVERYTHING 8
#define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
/* in memory btrfs inode */
struct btrfs_inode {
@ -216,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
return 0;
}
/*
* Disable DIO read nolock optimization, so new dio readers will be forced
* to grab i_mutex. It is used to avoid the endless truncate due to
* nonlocked dio read.
*/
static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
{
set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
smp_mb();
}
static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
{
smp_mb__before_clear_bit();
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags);
}
#endif

View File

@ -813,8 +813,7 @@ static int btrfsic_process_superblock_dev_mirror(
(bh->b_data + (dev_bytenr & 4095));
if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
sizeof(super_tmp->magic)) ||
super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) ||
memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
btrfs_super_nodesize(super_tmp) != state->metablock_size ||
btrfs_super_leafsize(super_tmp) != state->metablock_size ||

View File

@ -1138,6 +1138,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
switch (tm->op) {
case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
BUG_ON(tm->slot < n);
/* Fallthrough */
case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
case MOD_LOG_KEY_REMOVE:
btrfs_set_node_key(eb, &tm->key, tm->slot);
@ -1222,7 +1223,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
__tree_mod_log_rewind(eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root));
BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
return eb_rewin;
}
@ -1441,7 +1442,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
*/
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, int cache_only, u64 *last_ret,
int start_slot, u64 *last_ret,
struct btrfs_key *progress)
{
struct extent_buffer *cur;
@ -1461,8 +1462,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
parent_level = btrfs_header_level(parent);
if (cache_only && parent_level != 1)
return 0;
WARN_ON(trans->transaction != root->fs_info->running_transaction);
WARN_ON(trans->transid != root->fs_info->generation);
@ -1508,10 +1507,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
else
uptodate = 0;
if (!cur || !uptodate) {
if (cache_only) {
free_extent_buffer(cur);
continue;
}
if (!cur) {
cur = read_tree_block(root, blocknr,
blocksize, gen);
@ -4825,8 +4820,8 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
/*
* A helper function to walk down the tree starting at min_key, and looking
* for nodes or leaves that are either in cache or have a minimum
* transaction id. This is used by the btree defrag code, and tree logging
* for nodes or leaves that are have a minimum transaction id.
* This is used by the btree defrag code, and tree logging
*
* This does not cow, but it does stuff the starting key it finds back
* into min_key, so you can call btrfs_search_slot with cow=1 on the
@ -4847,7 +4842,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
*/
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
struct btrfs_key *max_key,
struct btrfs_path *path, int cache_only,
struct btrfs_path *path,
u64 min_trans)
{
struct extent_buffer *cur;
@ -4887,15 +4882,12 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
if (sret && slot > 0)
slot--;
/*
* check this node pointer against the cache_only and
* min_trans parameters. If it isn't in cache or is too
* old, skip to the next one.
* check this node pointer against the min_trans parameters.
* If it is too old, old, skip to the next one.
*/
while (slot < nritems) {
u64 blockptr;
u64 gen;
struct extent_buffer *tmp;
struct btrfs_disk_key disk_key;
blockptr = btrfs_node_blockptr(cur, slot);
gen = btrfs_node_ptr_generation(cur, slot);
@ -4903,27 +4895,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
slot++;
continue;
}
if (!cache_only)
break;
if (max_key) {
btrfs_node_key(cur, &disk_key, slot);
if (comp_keys(&disk_key, max_key) >= 0) {
ret = 1;
goto out;
}
}
tmp = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
free_extent_buffer(tmp);
break;
}
if (tmp)
free_extent_buffer(tmp);
slot++;
break;
}
find_next_key:
/*
@ -4934,7 +4906,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
path->slots[level] = slot;
btrfs_set_path_blocking(path);
sret = btrfs_find_next_key(root, path, min_key, level,
cache_only, min_trans);
min_trans);
if (sret == 0) {
btrfs_release_path(path);
goto again;
@ -5399,8 +5371,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
/*
* this is similar to btrfs_next_leaf, but does not try to preserve
* and fixup the path. It looks for and returns the next key in the
* tree based on the current path and the cache_only and min_trans
* parameters.
* tree based on the current path and the min_trans parameters.
*
* 0 is returned if another key is found, < 0 if there are any errors
* and 1 is returned if there are no higher keys in the tree
@ -5409,8 +5380,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
* calling this function.
*/
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int level,
int cache_only, u64 min_trans)
struct btrfs_key *key, int level, u64 min_trans)
{
int slot;
struct extent_buffer *c;
@ -5461,22 +5431,8 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
if (level == 0)
btrfs_item_key_to_cpu(c, key, slot);
else {
u64 blockptr = btrfs_node_blockptr(c, slot);
u64 gen = btrfs_node_ptr_generation(c, slot);
if (cache_only) {
struct extent_buffer *cur;
cur = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
if (!cur ||
btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
slot++;
if (cur)
free_extent_buffer(cur);
goto next;
}
free_extent_buffer(cur);
}
if (gen < min_trans) {
slot++;
goto next;

View File

@ -31,10 +31,10 @@
#include <trace/events/btrfs.h>
#include <asm/kmap_types.h>
#include <linux/pagemap.h>
#include <linux/btrfs.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
#include "ioctl.h"
struct btrfs_trans_handle;
struct btrfs_transaction;
@ -46,7 +46,7 @@ extern struct kmem_cache *btrfs_path_cachep;
extern struct kmem_cache *btrfs_free_space_cachep;
struct btrfs_ordered_sum;
#define BTRFS_MAGIC "_BHRfS_M"
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
#define BTRFS_MAX_MIRRORS 3
@ -191,6 +191,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
/* ioprio of readahead is set to idle */
#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
/*
* The key defines the order in the tree, and so it also defines (optimal)
* block layout.
@ -336,7 +338,9 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
/*
* File system states
*/
#define BTRFS_FS_STATE_ERROR 0
/* Super block flags */
/* Errors detected */
#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
@ -953,7 +957,15 @@ struct btrfs_dev_replace_item {
#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
#define BTRFS_NR_RAID_TYPES 5
enum btrfs_raid_types {
BTRFS_RAID_RAID10,
BTRFS_RAID_RAID1,
BTRFS_RAID_DUP,
BTRFS_RAID_RAID0,
BTRFS_RAID_SINGLE,
BTRFS_NR_RAID_TYPES
};
#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
BTRFS_BLOCK_GROUP_SYSTEM | \
@ -1225,6 +1237,11 @@ struct seq_list {
u64 seq;
};
enum btrfs_orphan_cleanup_state {
ORPHAN_CLEANUP_STARTED = 1,
ORPHAN_CLEANUP_DONE = 2,
};
/* fs_info */
struct reloc_control;
struct btrfs_device;
@ -1250,6 +1267,7 @@ struct btrfs_fs_info {
/* block group cache stuff */
spinlock_t block_group_cache_lock;
u64 first_logical_byte;
struct rb_root block_group_cache_tree;
/* keep track of unallocated space */
@ -1288,7 +1306,23 @@ struct btrfs_fs_info {
u64 last_trans_log_full_commit;
unsigned long mount_opt;
unsigned long compress_type:4;
/*
* It is a suggestive number, the read side is safe even it gets a
* wrong number because we will write out the data into a regular
* extent. The write side(mount/remount) is under ->s_umount lock,
* so it is also safe.
*/
u64 max_inline;
/*
* Protected by ->chunk_mutex and sb->s_umount.
*
* The reason that we use two lock to protect it is because only
* remount and mount operations can change it and these two operations
* are under sb->s_umount, but the read side (chunk allocation) can not
* acquire sb->s_umount or the deadlock would happen. So we use two
* locks to protect it. On the write side, we must acquire two locks,
* and on the read side, we just need acquire one of them.
*/
u64 alloc_start;
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
@ -1365,6 +1399,7 @@ struct btrfs_fs_info {
*/
struct list_head ordered_extents;
spinlock_t delalloc_lock;
/*
* all of the inodes that have delalloc bytes. It is possible for
* this list to be empty even when there is still dirty data=ordered
@ -1372,13 +1407,6 @@ struct btrfs_fs_info {
*/
struct list_head delalloc_inodes;
/*
* special rename and truncate targets that must be on disk before
* we're allowed to commit. This is basically the ext3 style
* data=ordered list.
*/
struct list_head ordered_operations;
/*
* there is a pool of worker threads for checksumming during writes
* and a pool for checksumming after reads. This is because readers
@ -1423,10 +1451,12 @@ struct btrfs_fs_info {
u64 total_pinned;
/* protected by the delalloc lock, used to keep from writing
* metadata until there is a nice batch
*/
u64 dirty_metadata_bytes;
/* used to keep from writing metadata until there is a nice batch */
struct percpu_counter dirty_metadata_bytes;
struct percpu_counter delalloc_bytes;
s32 dirty_metadata_batch;
s32 delalloc_batch;
struct list_head dirty_cowonly_roots;
struct btrfs_fs_devices *fs_devices;
@ -1442,9 +1472,6 @@ struct btrfs_fs_info {
struct reloc_control *reloc_ctl;
spinlock_t delalloc_lock;
u64 delalloc_bytes;
/* data_alloc_cluster is only used in ssd mode */
struct btrfs_free_cluster data_alloc_cluster;
@ -1456,6 +1483,8 @@ struct btrfs_fs_info {
struct rb_root defrag_inodes;
atomic_t defrag_running;
/* Used to protect avail_{data, metadata, system}_alloc_bits */
seqlock_t profiles_lock;
/*
* these three are in extended format (availability of single
* chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
@ -1520,7 +1549,7 @@ struct btrfs_fs_info {
u64 qgroup_seq;
/* filesystem state */
u64 fs_state;
unsigned long fs_state;
struct btrfs_delayed_root *delayed_root;
@ -1623,6 +1652,9 @@ struct btrfs_root {
struct list_head root_list;
spinlock_t log_extents_lock[2];
struct list_head logged_list[2];
spinlock_t orphan_lock;
atomic_t orphan_inodes;
struct btrfs_block_rsv *orphan_block_rsv;
@ -2936,8 +2968,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
u64 num_bytes, u64 *refs, u64 *flags);
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num, int reserved);
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
u64 bytenr, u64 num_bytes);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@ -3092,10 +3123,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level,
int cache_only, u64 min_trans);
u64 min_trans);
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
struct btrfs_key *max_key,
struct btrfs_path *path, int cache_only,
struct btrfs_path *path,
u64 min_trans);
enum btrfs_compare_tree_result {
BTRFS_COMPARE_TREE_NEW,
@ -3148,7 +3179,7 @@ int btrfs_search_slot_for_read(struct btrfs_root *root,
int find_higher, int return_any);
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
int start_slot, int cache_only, u64 *last_ret,
int start_slot, u64 *last_ret,
struct btrfs_key *progress);
void btrfs_release_path(struct btrfs_path *p);
struct btrfs_path *btrfs_alloc_path(void);
@ -3543,7 +3574,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int cache_only);
struct btrfs_root *root);
/* sysfs.c */
int btrfs_init_sysfs(void);
@ -3620,11 +3651,14 @@ __printf(5, 6)
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
/*
* If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic
* will panic(). Otherwise we BUG() here.
*/
#define btrfs_panic(fs_info, errno, fmt, args...) \
do { \
struct btrfs_fs_info *_i = (fs_info); \
__btrfs_panic(_i, __func__, __LINE__, errno, fmt, ##args); \
BUG_ON(!(_i->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)); \
__btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args); \
BUG(); \
} while (0)
/* acl.c */
@ -3745,4 +3779,11 @@ static inline int is_fstree(u64 rootid)
return 1;
return 0;
}
static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
{
return signal_pending(current);
}
#endif

View File

@ -875,7 +875,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
struct btrfs_delayed_item *delayed_item)
{
struct extent_buffer *leaf;
struct btrfs_item *item;
char *ptr;
int ret;
@ -886,7 +885,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
item = btrfs_item_nr(leaf, path->slots[0]);
ptr = btrfs_item_ptr(leaf, path->slots[0], char);
write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
@ -1065,32 +1063,25 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
}
}
static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_delayed_node *node)
static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_delayed_node *node)
{
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
int ret;
mutex_lock(&node->mutex);
if (!node->inode_dirty) {
mutex_unlock(&node->mutex);
return 0;
}
key.objectid = node->inode_id;
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
key.offset = 0;
ret = btrfs_lookup_inode(trans, root, path, &key, 1);
if (ret > 0) {
btrfs_release_path(path);
mutex_unlock(&node->mutex);
return -ENOENT;
} else if (ret < 0) {
mutex_unlock(&node->mutex);
return ret;
}
@ -1105,11 +1096,47 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
btrfs_delayed_inode_release_metadata(root, node);
btrfs_release_delayed_inode(node);
mutex_unlock(&node->mutex);
return 0;
}
static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_delayed_node *node)
{
int ret;
mutex_lock(&node->mutex);
if (!node->inode_dirty) {
mutex_unlock(&node->mutex);
return 0;
}
ret = __btrfs_update_delayed_inode(trans, root, path, node);
mutex_unlock(&node->mutex);
return ret;
}
static inline int
__btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_delayed_node *node)
{
int ret;
ret = btrfs_insert_delayed_items(trans, path, node->root, node);
if (ret)
return ret;
ret = btrfs_delete_delayed_items(trans, path, node->root, node);
if (ret)
return ret;
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
return ret;
}
/*
* Called when committing the transaction.
* Returns 0 on success.
@ -1119,7 +1146,6 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int nr)
{
struct btrfs_root *curr_root = root;
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_path *path;
@ -1142,15 +1168,8 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
curr_node = btrfs_first_delayed_node(delayed_root);
while (curr_node && (!count || (count && nr--))) {
curr_root = curr_node->root;
ret = btrfs_insert_delayed_items(trans, path, curr_root,
curr_node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path,
curr_root, curr_node);
if (!ret)
ret = btrfs_update_delayed_inode(trans, curr_root,
path, curr_node);
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
if (ret) {
btrfs_release_delayed_node(curr_node);
curr_node = NULL;
@ -1183,36 +1202,12 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
return __btrfs_run_delayed_items(trans, root, nr);
}
static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
trans->block_rsv = &node->root->fs_info->delayed_block_rsv;
ret = btrfs_insert_delayed_items(trans, path, node->root, node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, node->root, node);
if (!ret)
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
}
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret;
if (!delayed_node)
@ -1226,8 +1221,74 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
}
mutex_unlock(&delayed_node->mutex);
ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
btrfs_release_delayed_node(delayed_node);
btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
}
int btrfs_commit_inode_delayed_inode(struct inode *inode)
{
struct btrfs_trans_handle *trans;
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret;
if (!delayed_node)
return 0;
mutex_lock(&delayed_node->mutex);
if (!delayed_node->inode_dirty) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
return 0;
}
mutex_unlock(&delayed_node->mutex);
trans = btrfs_join_transaction(delayed_node->root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto trans_out;
}
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
mutex_lock(&delayed_node->mutex);
if (delayed_node->inode_dirty)
ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
path, delayed_node);
else
ret = 0;
mutex_unlock(&delayed_node->mutex);
btrfs_free_path(path);
trans->block_rsv = block_rsv;
trans_out:
btrfs_end_transaction(trans, delayed_node->root);
btrfs_btree_balance_dirty(delayed_node->root);
out:
btrfs_release_delayed_node(delayed_node);
return ret;
}
@ -1258,7 +1319,6 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
int need_requeue = 0;
int ret;
async_node = container_of(work, struct btrfs_async_delayed_node, work);
@ -1277,14 +1337,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
block_rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->delayed_block_rsv;
ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
if (!ret)
ret = btrfs_delete_delayed_items(trans, path, root,
delayed_node);
if (!ret)
btrfs_update_delayed_inode(trans, root, path, delayed_node);
__btrfs_commit_inode_delayed_items(trans, path, delayed_node);
/*
* Maybe new delayed items have been inserted, so we need requeue
* the work. Besides that, we must dequeue the empty delayed nodes

View File

@ -117,6 +117,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
/* Used for evicting the inode. */
void btrfs_remove_delayed_node(struct inode *inode);
void btrfs_kill_delayed_inode_items(struct inode *inode);
int btrfs_commit_inode_delayed_inode(struct inode *inode);
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,

View File

@ -23,6 +23,10 @@
#include "delayed-ref.h"
#include "transaction.h"
struct kmem_cache *btrfs_delayed_ref_head_cachep;
struct kmem_cache *btrfs_delayed_tree_ref_cachep;
struct kmem_cache *btrfs_delayed_data_ref_cachep;
struct kmem_cache *btrfs_delayed_extent_op_cachep;
/*
* delayed back reference update tracking. For subvolume trees
* we queue up extent allocations and backref maintenance for
@ -422,6 +426,14 @@ int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
return 1;
}
void btrfs_release_ref_cluster(struct list_head *cluster)
{
struct list_head *pos, *q;
list_for_each_safe(pos, q, cluster)
list_del_init(pos);
}
/*
* helper function to update an extent delayed ref in the
* rbtree. existing and update must both have the same
@ -511,7 +523,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
ref->extent_op->flags_to_set;
existing_ref->extent_op->update_flags = 1;
}
kfree(ref->extent_op);
btrfs_free_delayed_extent_op(ref->extent_op);
}
}
/*
@ -592,7 +604,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
* we've updated the existing ref, free the newly
* allocated ref
*/
kfree(head_ref);
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
} else {
delayed_refs->num_heads++;
delayed_refs->num_heads_ready++;
@ -653,7 +665,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
* we've updated the existing ref, free the newly
* allocated ref
*/
kfree(full_ref);
kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
} else {
delayed_refs->num_entries++;
trans->delayed_ref_updates++;
@ -714,7 +726,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
* we've updated the existing ref, free the newly
* allocated ref
*/
kfree(full_ref);
kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
} else {
delayed_refs->num_entries++;
trans->delayed_ref_updates++;
@ -738,13 +750,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs;
BUG_ON(extent_op && extent_op->is_data);
ref = kmalloc(sizeof(*ref), GFP_NOFS);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref) {
kfree(ref);
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
return -ENOMEM;
}
@ -786,13 +798,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs;
BUG_ON(extent_op && !extent_op->is_data);
ref = kmalloc(sizeof(*ref), GFP_NOFS);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
return -ENOMEM;
head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref) {
kfree(ref);
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
return -ENOMEM;
}
@ -826,7 +838,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS);
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
if (!head_ref)
return -ENOMEM;
@ -860,3 +872,51 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
return btrfs_delayed_node_to_head(ref);
return NULL;
}
void btrfs_delayed_ref_exit(void)
{
if (btrfs_delayed_ref_head_cachep)
kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
if (btrfs_delayed_tree_ref_cachep)
kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
if (btrfs_delayed_data_ref_cachep)
kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
if (btrfs_delayed_extent_op_cachep)
kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
}
int btrfs_delayed_ref_init(void)
{
btrfs_delayed_ref_head_cachep = kmem_cache_create(
"btrfs_delayed_ref_head",
sizeof(struct btrfs_delayed_ref_head), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_ref_head_cachep)
goto fail;
btrfs_delayed_tree_ref_cachep = kmem_cache_create(
"btrfs_delayed_tree_ref",
sizeof(struct btrfs_delayed_tree_ref), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_tree_ref_cachep)
goto fail;
btrfs_delayed_data_ref_cachep = kmem_cache_create(
"btrfs_delayed_data_ref",
sizeof(struct btrfs_delayed_data_ref), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_data_ref_cachep)
goto fail;
btrfs_delayed_extent_op_cachep = kmem_cache_create(
"btrfs_delayed_extent_op",
sizeof(struct btrfs_delayed_extent_op), 0,
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!btrfs_delayed_extent_op_cachep)
goto fail;
return 0;
fail:
btrfs_delayed_ref_exit();
return -ENOMEM;
}

View File

@ -141,12 +141,47 @@ struct btrfs_delayed_ref_root {
u64 run_delayed_start;
};
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
int btrfs_delayed_ref_init(void);
void btrfs_delayed_ref_exit(void);
static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)
{
return kmem_cache_alloc(btrfs_delayed_extent_op_cachep, GFP_NOFS);
}
static inline void
btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
{
if (op)
kmem_cache_free(btrfs_delayed_extent_op_cachep, op);
}
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
{
WARN_ON(atomic_read(&ref->refs) == 0);
if (atomic_dec_and_test(&ref->refs)) {
WARN_ON(ref->in_tree);
kfree(ref);
switch (ref->type) {
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_SHARED_BLOCK_REF_KEY:
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
break;
case BTRFS_EXTENT_DATA_REF_KEY:
case BTRFS_SHARED_DATA_REF_KEY:
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
break;
case 0:
kmem_cache_free(btrfs_delayed_ref_head_cachep, ref);
break;
default:
BUG();
}
}
}
@ -176,8 +211,14 @@ struct btrfs_delayed_ref_head *
btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head);
static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
{
mutex_unlock(&head->mutex);
}
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
void btrfs_release_ref_cluster(struct list_head *cluster);
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,

View File

@ -465,7 +465,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
* flush all outstanding I/O and inode extent mappings before the
* copy operation is declared as being finished
*/
btrfs_start_delalloc_inodes(root, 0);
ret = btrfs_start_delalloc_inodes(root, 0);
if (ret) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
}
btrfs_wait_ordered_extents(root, 0);
trans = btrfs_start_transaction(root, 0);

View File

@ -56,7 +56,8 @@ static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only);
static void btrfs_destroy_ordered_operations(struct btrfs_root *root);
static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
struct btrfs_root *root);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
@ -420,7 +421,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
{
struct extent_io_tree *tree;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 start = page_offset(page);
u64 found_start;
struct extent_buffer *eb;
@ -946,18 +947,20 @@ static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct extent_io_tree *tree;
struct btrfs_fs_info *fs_info;
int ret;
tree = &BTRFS_I(mapping->host)->io_tree;
if (wbc->sync_mode == WB_SYNC_NONE) {
struct btrfs_root *root = BTRFS_I(mapping->host)->root;
u64 num_dirty;
unsigned long thresh = 32 * 1024 * 1024;
if (wbc->for_kupdate)
return 0;
fs_info = BTRFS_I(mapping->host)->root->fs_info;
/* this is a bit racy, but that's ok */
num_dirty = root->fs_info->dirty_metadata_bytes;
if (num_dirty < thresh)
ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
BTRFS_DIRTY_METADATA_THRESH);
if (ret < 0)
return 0;
}
return btree_write_cache_pages(mapping, wbc);
@ -1125,24 +1128,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf)
{
struct btrfs_fs_info *fs_info = root->fs_info;
if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid) {
fs_info->running_transaction->transid) {
btrfs_assert_tree_locked(buf);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
spin_lock(&root->fs_info->delalloc_lock);
if (root->fs_info->dirty_metadata_bytes >= buf->len)
root->fs_info->dirty_metadata_bytes -= buf->len;
else {
spin_unlock(&root->fs_info->delalloc_lock);
btrfs_panic(root->fs_info, -EOVERFLOW,
"Can't clear %lu bytes from "
" dirty_mdatadata_bytes (%llu)",
buf->len,
root->fs_info->dirty_metadata_bytes);
}
spin_unlock(&root->fs_info->delalloc_lock);
__percpu_counter_add(&fs_info->dirty_metadata_bytes,
-buf->len,
fs_info->dirty_metadata_batch);
/* ugh, clear_extent_buffer_dirty needs to lock the page */
btrfs_set_lock_blocking(buf);
clear_extent_buffer_dirty(buf);
@ -1178,9 +1173,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->root_list);
INIT_LIST_HEAD(&root->logged_list[0]);
INIT_LIST_HEAD(&root->logged_list[1]);
spin_lock_init(&root->orphan_lock);
spin_lock_init(&root->inode_lock);
spin_lock_init(&root->accounting_lock);
spin_lock_init(&root->log_extents_lock[0]);
spin_lock_init(&root->log_extents_lock[1]);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait);
@ -2004,10 +2003,24 @@ int open_ctree(struct super_block *sb,
goto fail_srcu;
}
ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
if (ret) {
err = ret;
goto fail_bdi;
}
fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
(1 + ilog2(nr_cpu_ids));
ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
if (ret) {
err = ret;
goto fail_dirty_metadata_bytes;
}
fs_info->btree_inode = new_inode(sb);
if (!fs_info->btree_inode) {
err = -ENOMEM;
goto fail_bdi;
goto fail_delalloc_bytes;
}
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@ -2017,7 +2030,6 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->delayed_iputs);
INIT_LIST_HEAD(&fs_info->delalloc_inodes);
INIT_LIST_HEAD(&fs_info->ordered_operations);
INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->trans_lock);
@ -2028,6 +2040,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->tree_mod_seq_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
seqlock_init(&fs_info->profiles_lock);
init_completion(&fs_info->kobj_unregister);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@ -2126,6 +2139,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT;
fs_info->first_logical_byte = (u64)-1;
extent_io_tree_init(&fs_info->freed_extents[0],
fs_info->btree_inode->i_mapping);
@ -2187,7 +2201,8 @@ int open_ctree(struct super_block *sb,
goto fail_alloc;
/* check FS state, whether FS is broken. */
fs_info->fs_state |= btrfs_super_flags(disk_super);
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
if (ret) {
@ -2261,6 +2276,8 @@ int open_ctree(struct super_block *sb,
leafsize = btrfs_super_leafsize(disk_super);
sectorsize = btrfs_super_sectorsize(disk_super);
stripesize = btrfs_super_stripesize(disk_super);
fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
/*
* mixed block groups end up with duplicate but slightly offset
@ -2390,8 +2407,7 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
sizeof(disk_super->magic))) {
if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) {
printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
goto fail_sb_buffer;
}
@ -2694,13 +2710,13 @@ int open_ctree(struct super_block *sb,
* kthreads
*/
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
fail_block_groups:
btrfs_free_block_groups(fs_info);
fail_tree_roots:
free_root_pointers(fs_info, 1);
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
fail_sb_buffer:
btrfs_stop_workers(&fs_info->generic_worker);
@ -2721,8 +2737,11 @@ int open_ctree(struct super_block *sb,
fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
iput(fs_info->btree_inode);
fail_delalloc_bytes:
percpu_counter_destroy(&fs_info->delalloc_bytes);
fail_dirty_metadata_bytes:
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
fail_bdi:
bdi_destroy(&fs_info->bdi);
fail_srcu:
@ -2795,8 +2814,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
super = (struct btrfs_super_block *)bh->b_data;
if (btrfs_super_bytenr(super) != bytenr ||
strncmp((char *)(&super->magic), BTRFS_MAGIC,
sizeof(super->magic))) {
super->magic != cpu_to_le64(BTRFS_MAGIC)) {
brelse(bh);
continue;
}
@ -3339,7 +3357,7 @@ int close_ctree(struct btrfs_root *root)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
btrfs_error_commit_super(root);
btrfs_put_block_group_cache(fs_info);
@ -3352,9 +3370,9 @@ int close_ctree(struct btrfs_root *root)
btrfs_free_qgroup_config(root->fs_info);
if (fs_info->delalloc_bytes) {
printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
(unsigned long long)fs_info->delalloc_bytes);
if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
percpu_counter_sum(&fs_info->delalloc_bytes));
}
free_extent_buffer(fs_info->extent_root->node);
@ -3401,6 +3419,8 @@ int close_ctree(struct btrfs_root *root)
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
bdi_destroy(&fs_info->bdi);
cleanup_srcu_struct(&fs_info->subvol_srcu);
@ -3443,11 +3463,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
(unsigned long long)transid,
(unsigned long long)root->fs_info->generation);
was_dirty = set_extent_buffer_dirty(buf);
if (!was_dirty) {
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->dirty_metadata_bytes += buf->len;
spin_unlock(&root->fs_info->delalloc_lock);
}
if (!was_dirty)
__percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
buf->len,
root->fs_info->dirty_metadata_batch);
}
static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
@ -3457,8 +3476,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
* looks as though older kernels can get into trouble with
* this code, they end up stuck in balance_dirty_pages forever
*/
u64 num_dirty;
unsigned long thresh = 32 * 1024 * 1024;
int ret;
if (current->flags & PF_MEMALLOC)
return;
@ -3466,9 +3484,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
if (flush_delayed)
btrfs_balance_delayed_items(root);
num_dirty = root->fs_info->dirty_metadata_bytes;
if (num_dirty > thresh) {
ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
BTRFS_DIRTY_METADATA_THRESH);
if (ret > 0) {
balance_dirty_pages_ratelimited(
root->fs_info->btree_inode->i_mapping);
}
@ -3518,7 +3536,8 @@ void btrfs_error_commit_super(struct btrfs_root *root)
btrfs_cleanup_transaction(root);
}
static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
struct btrfs_root *root)
{
struct btrfs_inode *btrfs_inode;
struct list_head splice;
@ -3528,7 +3547,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_init(&root->fs_info->ordered_operations, &splice);
list_splice_init(&t->ordered_operations, &splice);
while (!list_empty(&splice)) {
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
ordered_operations);
@ -3544,35 +3563,16 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
{
struct list_head splice;
struct btrfs_ordered_extent *ordered;
struct inode *inode;
INIT_LIST_HEAD(&splice);
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_init(&root->fs_info->ordered_extents, &splice);
while (!list_empty(&splice)) {
ordered = list_entry(splice.next, struct btrfs_ordered_extent,
root_extent_list);
list_del_init(&ordered->root_extent_list);
atomic_inc(&ordered->refs);
/* the inode may be getting freed (in sys_unlink path). */
inode = igrab(ordered->inode);
spin_unlock(&root->fs_info->ordered_extent_lock);
if (inode)
iput(inode);
atomic_set(&ordered->refs, 1);
btrfs_put_ordered_extent(ordered);
spin_lock(&root->fs_info->ordered_extent_lock);
}
/*
* This will just short circuit the ordered completion stuff which will
* make sure the ordered extent gets properly cleaned up.
*/
list_for_each_entry(ordered, &root->fs_info->ordered_extents,
root_extent_list)
set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
spin_unlock(&root->fs_info->ordered_extent_lock);
}
@ -3594,11 +3594,11 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
}
while ((node = rb_first(&delayed_refs->root)) != NULL) {
ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
struct btrfs_delayed_ref_head *head = NULL;
ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
atomic_set(&ref->refs, 1);
if (btrfs_delayed_ref_is_head(ref)) {
struct btrfs_delayed_ref_head *head;
head = btrfs_delayed_node_to_head(ref);
if (!mutex_trylock(&head->mutex)) {
@ -3614,16 +3614,18 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
continue;
}
kfree(head->extent_op);
btrfs_free_delayed_extent_op(head->extent_op);
delayed_refs->num_heads--;
if (list_empty(&head->cluster))
delayed_refs->num_heads_ready--;
list_del_init(&head->cluster);
}
ref->in_tree = 0;
rb_erase(&ref->rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
if (head)
mutex_unlock(&head->mutex);
spin_unlock(&delayed_refs->lock);
btrfs_put_delayed_ref(ref);
@ -3671,6 +3673,8 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
delalloc_inodes);
list_del_init(&btrfs_inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&btrfs_inode->runtime_flags);
btrfs_invalidate_inodes(btrfs_inode->root);
}
@ -3823,10 +3827,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
while (!list_empty(&list)) {
t = list_entry(list.next, struct btrfs_transaction, list);
if (!t)
break;
btrfs_destroy_ordered_operations(root);
btrfs_destroy_ordered_operations(t, root);
btrfs_destroy_ordered_extents(root);

View File

@ -72,8 +72,7 @@ enum {
RESERVE_ALLOC_NO_ACCOUNT = 2,
};
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
static int update_block_group(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@ -103,6 +102,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@ -162,6 +163,10 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
rb_link_node(&block_group->cache_node, parent, p);
rb_insert_color(&block_group->cache_node,
&info->block_group_cache_tree);
if (info->first_logical_byte > block_group->key.objectid)
info->first_logical_byte = block_group->key.objectid;
spin_unlock(&info->block_group_cache_lock);
return 0;
@ -203,8 +208,11 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
break;
}
}
if (ret)
if (ret) {
btrfs_get_block_group(ret);
if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
info->first_logical_byte = ret->key.objectid;
}
spin_unlock(&info->block_group_cache_lock);
return ret;
@ -468,8 +476,6 @@ static noinline void caching_thread(struct btrfs_work *work)
}
static int cache_block_group(struct btrfs_block_group_cache *cache,
struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int load_cache_only)
{
DEFINE_WAIT(wait);
@ -527,12 +533,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
cache->cached = BTRFS_CACHE_FAST;
spin_unlock(&cache->lock);
/*
* We can't do the read from on-disk cache during a commit since we need
* to have the normal tree locking. Also if we are currently trying to
* allocate blocks for the tree root we can't do the fast caching since
* we likely hold important locks.
*/
if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
ret = load_free_space_cache(fs_info, cache);
@ -2143,7 +2143,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
node->num_bytes);
}
}
mutex_unlock(&head->mutex);
return ret;
}
@ -2258,7 +2257,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
* process of being added. Don't run this ref yet.
*/
list_del_init(&locked_ref->cluster);
mutex_unlock(&locked_ref->mutex);
btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
delayed_refs->num_heads_ready++;
spin_unlock(&delayed_refs->lock);
@ -2285,7 +2284,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ref = &locked_ref->node;
if (extent_op && must_insert_reserved) {
kfree(extent_op);
btrfs_free_delayed_extent_op(extent_op);
extent_op = NULL;
}
@ -2294,28 +2293,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ret = run_delayed_extent_op(trans, root,
ref, extent_op);
kfree(extent_op);
btrfs_free_delayed_extent_op(extent_op);
if (ret) {
list_del_init(&locked_ref->cluster);
mutex_unlock(&locked_ref->mutex);
printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
printk(KERN_DEBUG
"btrfs: run_delayed_extent_op "
"returned %d\n", ret);
spin_lock(&delayed_refs->lock);
btrfs_delayed_ref_unlock(locked_ref);
return ret;
}
goto next;
}
list_del_init(&locked_ref->cluster);
locked_ref = NULL;
}
ref->in_tree = 0;
rb_erase(&ref->rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
if (locked_ref) {
if (!btrfs_delayed_ref_is_head(ref)) {
/*
* when we play the delayed ref, also correct the
* ref_mod on head
@ -2337,20 +2333,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
ret = run_one_delayed_ref(trans, root, ref, extent_op,
must_insert_reserved);
btrfs_put_delayed_ref(ref);
kfree(extent_op);
count++;
btrfs_free_delayed_extent_op(extent_op);
if (ret) {
if (locked_ref) {
list_del_init(&locked_ref->cluster);
mutex_unlock(&locked_ref->mutex);
}
printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
btrfs_delayed_ref_unlock(locked_ref);
btrfs_put_delayed_ref(ref);
printk(KERN_DEBUG
"btrfs: run_one_delayed_ref returned %d\n", ret);
spin_lock(&delayed_refs->lock);
return ret;
}
/*
* If this node is a head, that means all the refs in this head
* have been dealt with, and we will pick the next head to deal
* with, so we must unlock the head and drop it from the cluster
* list before we release it.
*/
if (btrfs_delayed_ref_is_head(ref)) {
list_del_init(&locked_ref->cluster);
btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
}
btrfs_put_delayed_ref(ref);
count++;
next:
cond_resched();
spin_lock(&delayed_refs->lock);
@ -2500,6 +2505,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
ret = run_clustered_refs(trans, root, &cluster);
if (ret < 0) {
btrfs_release_ref_cluster(&cluster);
spin_unlock(&delayed_refs->lock);
btrfs_abort_transaction(trans, root, ret);
return ret;
@ -2586,7 +2592,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_delayed_extent_op *extent_op;
int ret;
extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
extent_op = btrfs_alloc_delayed_extent_op();
if (!extent_op)
return -ENOMEM;
@ -2598,7 +2604,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
num_bytes, extent_op);
if (ret)
kfree(extent_op);
btrfs_free_delayed_extent_op(extent_op);
return ret;
}
@ -3223,12 +3229,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
u64 extra_flags = chunk_to_extended(flags) &
BTRFS_EXTENDED_PROFILE_MASK;
write_seqlock(&fs_info->profiles_lock);
if (flags & BTRFS_BLOCK_GROUP_DATA)
fs_info->avail_data_alloc_bits |= extra_flags;
if (flags & BTRFS_BLOCK_GROUP_METADATA)
fs_info->avail_metadata_alloc_bits |= extra_flags;
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
fs_info->avail_system_alloc_bits |= extra_flags;
write_sequnlock(&fs_info->profiles_lock);
}
/*
@ -3320,12 +3328,18 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
{
if (flags & BTRFS_BLOCK_GROUP_DATA)
flags |= root->fs_info->avail_data_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
flags |= root->fs_info->avail_system_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_METADATA)
flags |= root->fs_info->avail_metadata_alloc_bits;
unsigned seq;
do {
seq = read_seqbegin(&root->fs_info->profiles_lock);
if (flags & BTRFS_BLOCK_GROUP_DATA)
flags |= root->fs_info->avail_data_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
flags |= root->fs_info->avail_system_alloc_bits;
else if (flags & BTRFS_BLOCK_GROUP_METADATA)
flags |= root->fs_info->avail_metadata_alloc_bits;
} while (read_seqretry(&root->fs_info->profiles_lock, seq));
return btrfs_reduce_alloc_profile(root, flags);
}
@ -3564,6 +3578,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
int wait_for_alloc = 0;
int ret = 0;
/* Don't re-enter if we're already allocating a chunk */
if (trans->allocating_chunk)
return -ENOSPC;
space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
ret = update_space_info(extent_root->fs_info, flags,
@ -3606,6 +3624,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
goto again;
}
trans->allocating_chunk = true;
/*
* If we have mixed data/metadata chunks we want to make sure we keep
* allocating mixed chunks instead of individual chunks.
@ -3632,6 +3652,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
check_system_chunk(trans, extent_root, flags);
ret = btrfs_alloc_chunk(trans, extent_root, flags);
trans->allocating_chunk = false;
if (ret < 0 && ret != -ENOSPC)
goto out;
@ -3653,13 +3674,31 @@ static int can_overcommit(struct btrfs_root *root,
struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush)
{
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
u64 profile = btrfs_get_alloc_profile(root, 0);
u64 rsv_size = 0;
u64 avail;
u64 used;
u64 to_add;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly +
space_info->bytes_may_use;
space_info->bytes_pinned + space_info->bytes_readonly;
spin_lock(&global_rsv->lock);
rsv_size = global_rsv->size;
spin_unlock(&global_rsv->lock);
/*
* We only want to allow over committing if we have lots of actual space
* free, but if we don't have enough space to handle the global reserve
* space then we could end up having a real enospc problem when trying
* to allocate a chunk or some other such important allocation.
*/
rsv_size <<= 1;
if (used + rsv_size >= space_info->total_bytes)
return 0;
used += space_info->bytes_may_use;
spin_lock(&root->fs_info->free_chunk_lock);
avail = root->fs_info->free_chunk_space;
@ -3674,27 +3713,38 @@ static int can_overcommit(struct btrfs_root *root,
BTRFS_BLOCK_GROUP_RAID10))
avail >>= 1;
to_add = space_info->total_bytes;
/*
* If we aren't flushing all things, let us overcommit up to
* 1/2th of the space. If we can flush, don't let us overcommit
* too much, let it overcommit up to 1/8 of the space.
*/
if (flush == BTRFS_RESERVE_FLUSH_ALL)
avail >>= 3;
to_add >>= 3;
else
avail >>= 1;
to_add >>= 1;
if (used + bytes < space_info->total_bytes + avail)
/*
* Limit the overcommit to the amount of free space we could possibly
* allocate for chunks.
*/
to_add = min(avail, to_add);
if (used + bytes < space_info->total_bytes + to_add)
return 1;
return 0;
}
static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
unsigned long nr_pages,
enum wb_reason reason)
static inline int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
unsigned long nr_pages,
enum wb_reason reason)
{
if (!writeback_in_progress(sb->s_bdi) &&
down_read_trylock(&sb->s_umount)) {
/* the flusher is dealing with the dirty inodes now. */
if (writeback_in_progress(sb->s_bdi))
return 1;
if (down_read_trylock(&sb->s_umount)) {
writeback_inodes_sb_nr(sb, nr_pages, reason);
up_read(&sb->s_umount);
return 1;
@ -3703,6 +3753,28 @@ static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
return 0;
}
void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
unsigned long nr_pages)
{
struct super_block *sb = root->fs_info->sb;
int started;
/* If we can not start writeback, just sync all the delalloc file. */
started = writeback_inodes_sb_nr_if_idle_safe(sb, nr_pages,
WB_REASON_FS_FREE_SPACE);
if (!started) {
/*
* We needn't worry the filesystem going from r/w to r/o though
* we don't acquire ->s_umount mutex, because the filesystem
* should guarantee the delalloc inodes list be empty after
* the filesystem is readonly(all dirty pages are written to
* the disk).
*/
btrfs_start_delalloc_inodes(root, 0);
btrfs_wait_ordered_extents(root, 0);
}
}
/*
* shrink metadata reservation for delalloc
*/
@ -3724,7 +3796,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
space_info = block_rsv->space_info;
smp_mb();
delalloc_bytes = root->fs_info->delalloc_bytes;
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
if (delalloc_bytes == 0) {
if (trans)
return;
@ -3735,10 +3808,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
while (delalloc_bytes && loops < 3) {
max_reclaim = min(delalloc_bytes, to_reclaim);
nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb,
nr_pages,
WB_REASON_FS_FREE_SPACE);
btrfs_writeback_inodes_sb_nr(root, nr_pages);
/*
* We need to wait for the async pages to actually start before
* we do anything.
@ -3766,7 +3836,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
break;
}
smp_mb();
delalloc_bytes = root->fs_info->delalloc_bytes;
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
}
}
@ -4030,6 +4101,15 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
goto again;
out:
if (ret == -ENOSPC &&
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
struct btrfs_block_rsv *global_rsv =
&root->fs_info->global_block_rsv;
if (block_rsv != global_rsv &&
!block_rsv_use_bytes(global_rsv, orig_bytes))
ret = 0;
}
if (flushing) {
spin_lock(&space_info->lock);
space_info->flush = 0;
@ -4668,7 +4748,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
spin_lock(&BTRFS_I(inode)->lock);
dropped = drop_outstanding_extent(inode);
to_free = calc_csum_metadata_size(inode, num_bytes, 0);
if (num_bytes)
to_free = calc_csum_metadata_size(inode, num_bytes, 0);
spin_unlock(&BTRFS_I(inode)->lock);
if (dropped > 0)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
@ -4735,8 +4816,7 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
btrfs_free_reserved_data_space(inode, num_bytes);
}
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
static int update_block_group(struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc)
{
struct btrfs_block_group_cache *cache = NULL;
@ -4773,7 +4853,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
* space back to the block group, otherwise we will leak space.
*/
if (!alloc && cache->cached == BTRFS_CACHE_NO)
cache_block_group(cache, trans, NULL, 1);
cache_block_group(cache, 1);
byte_in_group = bytenr - cache->key.objectid;
WARN_ON(byte_in_group > cache->key.offset);
@ -4823,6 +4903,13 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
struct btrfs_block_group_cache *cache;
u64 bytenr;
spin_lock(&root->fs_info->block_group_cache_lock);
bytenr = root->fs_info->first_logical_byte;
spin_unlock(&root->fs_info->block_group_cache_lock);
if (bytenr < (u64)-1)
return bytenr;
cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
if (!cache)
return 0;
@ -4873,8 +4960,7 @@ int btrfs_pin_extent(struct btrfs_root *root,
/*
* this function must be called within transaction
*/
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
u64 bytenr, u64 num_bytes)
{
struct btrfs_block_group_cache *cache;
@ -4888,7 +4974,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
* to one because the slow code to read in the free extents does check
* the pinned extents.
*/
cache_block_group(cache, trans, root, 1);
cache_block_group(cache, 1);
pin_down_extent(root, cache, bytenr, num_bytes, 0);
@ -5285,7 +5371,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
}
ret = update_block_group(trans, root, bytenr, num_bytes, 0);
ret = update_block_group(root, bytenr, num_bytes, 0);
if (ret) {
btrfs_abort_transaction(trans, extent_root, ret);
goto out;
@ -5330,7 +5416,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
if (head->extent_op) {
if (!head->must_insert_reserved)
goto out;
kfree(head->extent_op);
btrfs_free_delayed_extent_op(head->extent_op);
head->extent_op = NULL;
}
@ -5476,7 +5562,6 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
u64 num_bytes)
{
struct btrfs_caching_control *caching_ctl;
DEFINE_WAIT(wait);
caching_ctl = get_caching_control(cache);
if (!caching_ctl)
@ -5493,7 +5578,6 @@ static noinline int
wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
{
struct btrfs_caching_control *caching_ctl;
DEFINE_WAIT(wait);
caching_ctl = get_caching_control(cache);
if (!caching_ctl)
@ -5507,20 +5591,16 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
int __get_raid_index(u64 flags)
{
int index;
if (flags & BTRFS_BLOCK_GROUP_RAID10)
index = 0;
return BTRFS_RAID_RAID10;
else if (flags & BTRFS_BLOCK_GROUP_RAID1)
index = 1;
return BTRFS_RAID_RAID1;
else if (flags & BTRFS_BLOCK_GROUP_DUP)
index = 2;
return BTRFS_RAID_DUP;
else if (flags & BTRFS_BLOCK_GROUP_RAID0)
index = 3;
return BTRFS_RAID_RAID0;
else
index = 4;
return index;
return BTRFS_RAID_SINGLE;
}
static int get_block_group_index(struct btrfs_block_group_cache *cache)
@ -5678,8 +5758,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
cached = block_group_cache_done(block_group);
if (unlikely(!cached)) {
found_uncached_bg = true;
ret = cache_block_group(block_group, trans,
orig_root, 0);
ret = cache_block_group(block_group, 0);
BUG_ON(ret < 0);
ret = 0;
}
@ -6108,7 +6187,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
ret = update_block_group(root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
printk(KERN_ERR "btrfs update block group failed for %llu "
"%llu\n", (unsigned long long)ins->objectid,
@ -6172,7 +6251,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
ret = update_block_group(root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
printk(KERN_ERR "btrfs update block group failed for %llu "
"%llu\n", (unsigned long long)ins->objectid,
@ -6215,7 +6294,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
u64 num_bytes = ins->offset;
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
cache_block_group(block_group, trans, NULL, 0);
cache_block_group(block_group, 0);
caching_ctl = get_caching_control(block_group);
if (!caching_ctl) {
@ -6329,12 +6408,14 @@ use_block_rsv(struct btrfs_trans_handle *trans,
if (!ret)
return block_rsv;
if (ret && !block_rsv->failfast) {
static DEFINE_RATELIMIT_STATE(_rs,
DEFAULT_RATELIMIT_INTERVAL,
/*DEFAULT_RATELIMIT_BURST*/ 2);
if (__ratelimit(&_rs))
WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n",
ret);
if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
static DEFINE_RATELIMIT_STATE(_rs,
DEFAULT_RATELIMIT_INTERVAL * 10,
/*DEFAULT_RATELIMIT_BURST*/ 1);
if (__ratelimit(&_rs))
WARN(1, KERN_DEBUG
"btrfs: block rsv returned %d\n", ret);
}
ret = reserve_metadata_bytes(root, block_rsv, blocksize,
BTRFS_RESERVE_NO_FLUSH);
if (!ret) {
@ -6400,7 +6481,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
extent_op = btrfs_alloc_delayed_extent_op();
BUG_ON(!extent_op); /* -ENOMEM */
if (key)
memcpy(&extent_op->key, key, sizeof(extent_op->key));
@ -7481,16 +7562,16 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
index = get_block_group_index(block_group);
}
if (index == 0) {
if (index == BTRFS_RAID_RAID10) {
dev_min = 4;
/* Divide by 2 */
min_free >>= 1;
} else if (index == 1) {
} else if (index == BTRFS_RAID_RAID1) {
dev_min = 2;
} else if (index == 2) {
} else if (index == BTRFS_RAID_DUP) {
/* Multiply by 2 */
min_free <<= 1;
} else if (index == 3) {
} else if (index == BTRFS_RAID_RAID0) {
dev_min = fs_devices->rw_devices;
do_div(min_free, dev_min);
}
@ -7651,11 +7732,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
space_info = list_entry(info->space_info.next,
struct btrfs_space_info,
list);
if (space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 ||
space_info->bytes_may_use > 0) {
WARN_ON(1);
dump_space_info(space_info, 0, 0);
if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
if (space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 ||
space_info->bytes_may_use > 0) {
WARN_ON(1);
dump_space_info(space_info, 0, 0);
}
}
list_del(&space_info->list);
kfree(space_info);
@ -7932,12 +8015,14 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
u64 extra_flags = chunk_to_extended(flags) &
BTRFS_EXTENDED_PROFILE_MASK;
write_seqlock(&fs_info->profiles_lock);
if (flags & BTRFS_BLOCK_GROUP_DATA)
fs_info->avail_data_alloc_bits &= ~extra_flags;
if (flags & BTRFS_BLOCK_GROUP_METADATA)
fs_info->avail_metadata_alloc_bits &= ~extra_flags;
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
fs_info->avail_system_alloc_bits &= ~extra_flags;
write_sequnlock(&fs_info->profiles_lock);
}
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
@ -8036,6 +8121,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_lock(&root->fs_info->block_group_cache_lock);
rb_erase(&block_group->cache_node,
&root->fs_info->block_group_cache_tree);
if (root->fs_info->first_logical_byte == block_group->key.objectid)
root->fs_info->first_logical_byte = (u64)-1;
spin_unlock(&root->fs_info->block_group_cache_lock);
down_write(&block_group->space_info->groups_sem);
@ -8158,7 +8246,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
if (end - start >= range->minlen) {
if (!block_group_cache_done(cache)) {
ret = cache_block_group(cache, NULL, root, 0);
ret = cache_block_group(cache, 0);
if (!ret)
wait_block_group_cache_done(cache);
}

View File

@ -1834,7 +1834,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
*/
static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
SetPageUptodate(page);
@ -1846,7 +1846,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
*/
static void check_page_locked(struct extent_io_tree *tree, struct page *page)
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
unlock_page(page);
@ -1960,7 +1960,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
return -EIO;
}
bio->bi_bdev = dev->bdev;
bio_add_page(bio, page, length, start-page_offset(page));
bio_add_page(bio, page, length, start - page_offset(page));
btrfsic_submit_bio(WRITE_SYNC, bio);
wait_for_completion(&compl);
@ -2293,8 +2293,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
struct page *page = bvec->bv_page;
tree = &BTRFS_I(page->mapping->host)->io_tree;
start = ((u64)page->index << PAGE_CACHE_SHIFT) +
bvec->bv_offset;
start = page_offset(page) + bvec->bv_offset;
end = start + bvec->bv_len - 1;
if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
@ -2353,8 +2352,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
(long int)bio->bi_bdev);
tree = &BTRFS_I(page->mapping->host)->io_tree;
start = ((u64)page->index << PAGE_CACHE_SHIFT) +
bvec->bv_offset;
start = page_offset(page) + bvec->bv_offset;
end = start + bvec->bv_len - 1;
if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
@ -2471,7 +2469,7 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
struct extent_io_tree *tree = bio->bi_private;
u64 start;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
start = page_offset(page) + bvec->bv_offset;
bio->bi_private = NULL;
@ -2595,7 +2593,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
unsigned long *bio_flags)
{
struct inode *inode = page->mapping->host;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 start = page_offset(page);
u64 page_end = start + PAGE_CACHE_SIZE - 1;
u64 end;
u64 cur = start;
@ -2648,6 +2646,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
}
}
while (cur <= end) {
unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
if (cur >= last_byte) {
char *userpage;
struct extent_state *cached = NULL;
@ -2735,26 +2735,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
continue;
}
ret = 0;
if (tree->ops && tree->ops->readpage_io_hook) {
ret = tree->ops->readpage_io_hook(page, cur,
cur + iosize - 1);
}
if (!ret) {
unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
pnr -= page->index;
ret = submit_extent_page(READ, tree, page,
pnr -= page->index;
ret = submit_extent_page(READ, tree, page,
sector, disk_io_size, pg_offset,
bdev, bio, pnr,
end_bio_extent_readpage, mirror_num,
*bio_flags,
this_bio_flag);
if (!ret) {
nr++;
*bio_flags = this_bio_flag;
}
}
if (ret) {
if (!ret) {
nr++;
*bio_flags = this_bio_flag;
} else {
SetPageError(page);
unlock_extent(tree, cur, cur + iosize - 1);
}
@ -2806,7 +2797,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
struct inode *inode = page->mapping->host;
struct extent_page_data *epd = data;
struct extent_io_tree *tree = epd->tree;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 start = page_offset(page);
u64 delalloc_start;
u64 page_end = start + PAGE_CACHE_SIZE - 1;
u64 end;
@ -3124,12 +3115,9 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
spin_unlock(&eb->refs_lock);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
spin_lock(&fs_info->delalloc_lock);
if (fs_info->dirty_metadata_bytes >= eb->len)
fs_info->dirty_metadata_bytes -= eb->len;
else
WARN_ON(1);
spin_unlock(&fs_info->delalloc_lock);
__percpu_counter_add(&fs_info->dirty_metadata_bytes,
-eb->len,
fs_info->dirty_metadata_batch);
ret = 1;
} else {
spin_unlock(&eb->refs_lock);
@ -3446,15 +3434,9 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
* swizzled back from swapper_space to tmpfs file
* mapping
*/
if (tree->ops &&
tree->ops->write_cache_pages_lock_hook) {
tree->ops->write_cache_pages_lock_hook(page,
data, flush_fn);
} else {
if (!trylock_page(page)) {
flush_fn(data);
lock_page(page);
}
if (!trylock_page(page)) {
flush_fn(data);
lock_page(page);
}
if (unlikely(page->mapping != mapping)) {
@ -3674,7 +3656,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset)
{
struct extent_state *cached_state = NULL;
u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
size_t blocksize = page->mapping->host->i_sb->s_blocksize;
@ -3700,7 +3682,7 @@ int try_release_extent_state(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask)
{
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
int ret = 1;
@ -3739,7 +3721,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
gfp_t mask)
{
struct extent_map *em;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
if ((mask & __GFP_WAIT) &&

View File

@ -75,7 +75,6 @@ struct extent_io_ops {
int (*merge_bio_hook)(struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags);
int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int mirror);
@ -90,8 +89,6 @@ struct extent_io_ops {
struct extent_state *other);
void (*split_extent_hook)(struct inode *inode,
struct extent_state *orig, u64 split);
int (*write_cache_pages_lock_hook)(struct page *page, void *data,
void (*flush_fn)(void *));
};
struct extent_io_tree {

View File

@ -684,6 +684,24 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
return ret;
}
static u64 btrfs_sector_sum_left(struct btrfs_ordered_sum *sums,
struct btrfs_sector_sum *sector_sum,
u64 total_bytes, u64 sectorsize)
{
u64 tmp = sectorsize;
u64 next_sector = sector_sum->bytenr;
struct btrfs_sector_sum *next = sector_sum + 1;
while ((tmp + total_bytes) < sums->len) {
if (next_sector + sectorsize != next->bytenr)
break;
tmp += sectorsize;
next_sector = next->bytenr;
next++;
}
return tmp;
}
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums)
@ -789,20 +807,32 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
goto insert;
}
if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) /
if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) /
csum_size) {
u32 diff = (csum_offset + 1) * csum_size;
int extend_nr;
u64 tmp;
u32 diff;
u32 free_space;
/*
* is the item big enough already? we dropped our lock
* before and need to recheck
*/
if (diff < btrfs_item_size_nr(leaf, path->slots[0]))
goto csum;
if (btrfs_leaf_free_space(root, leaf) <
sizeof(struct btrfs_item) + csum_size * 2)
goto insert;
free_space = btrfs_leaf_free_space(root, leaf) -
sizeof(struct btrfs_item) - csum_size;
tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes,
root->sectorsize);
tmp >>= root->fs_info->sb->s_blocksize_bits;
WARN_ON(tmp < 1);
extend_nr = max_t(int, 1, (int)tmp);
diff = (csum_offset + extend_nr) * csum_size;
diff = min(diff, MAX_CSUM_ITEMS(root, csum_size) * csum_size);
diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
if (diff != csum_size)
goto insert;
diff = min(free_space, diff);
diff /= csum_size;
diff *= csum_size;
btrfs_extend_item(trans, root, path, diff);
goto csum;
@ -812,19 +842,14 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
csum_offset = 0;
if (found_next) {
u64 tmp = total_bytes + root->sectorsize;
u64 next_sector = sector_sum->bytenr;
struct btrfs_sector_sum *next = sector_sum + 1;
u64 tmp;
while (tmp < sums->len) {
if (next_sector + root->sectorsize != next->bytenr)
break;
tmp += root->sectorsize;
next_sector = next->bytenr;
next++;
}
tmp = min(tmp, next_offset - file_key.offset);
tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes,
root->sectorsize);
tmp >>= root->fs_info->sb->s_blocksize_bits;
tmp = min(tmp, (next_offset - file_key.offset) >>
root->fs_info->sb->s_blocksize_bits);
tmp = max((u64)1, tmp);
tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
ins_size = csum_size * tmp;

View File

@ -30,11 +30,11 @@
#include <linux/statfs.h>
#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/btrfs.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
#include "tree-log.h"
#include "locking.h"
@ -1544,7 +1544,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
* although we have opened a file as writable, we have
* to stop this write operation to ensure FS consistency.
*/
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
mutex_unlock(&inode->i_mutex);
err = -EROFS;
goto out;
@ -1627,7 +1627,20 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
*/
if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
/*
* We need to block on a committing transaction to keep us from
* throwing a ordered operation on to the list and causing
* something like sync to deadlock trying to flush out this
* inode.
*/
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans))
return PTR_ERR(trans);
btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
btrfs_end_transaction(trans, root);
if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
filemap_flush(inode->i_mapping);
}
@ -1654,16 +1667,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
struct btrfs_trans_handle *trans;
bool full_sync = 0;
trace_btrfs_sync_file(file, datasync);
/*
* We write the dirty pages in the range and wait until they complete
* out of the ->i_mutex. If so, we can flush the dirty pages by
* multi-task, and make the performance up.
* multi-task, and make the performance up. See
* btrfs_wait_ordered_range for an explanation of the ASYNC check.
*/
atomic_inc(&BTRFS_I(inode)->sync_writers);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags))
ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
atomic_dec(&BTRFS_I(inode)->sync_writers);
if (ret)
return ret;
@ -1675,7 +1693,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* range being left.
*/
atomic_inc(&root->log_batch);
btrfs_wait_ordered_range(inode, start, end - start + 1);
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
if (full_sync)
btrfs_wait_ordered_range(inode, start, end - start + 1);
atomic_inc(&root->log_batch);
/*
@ -1742,13 +1763,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (ret != BTRFS_NO_LOG_SYNC) {
if (ret > 0) {
/*
* If we didn't already wait for ordered extents we need
* to do that now.
*/
if (!full_sync)
btrfs_wait_ordered_range(inode, start,
end - start + 1);
ret = btrfs_commit_transaction(trans, root);
} else {
ret = btrfs_sync_log(trans, root);
if (ret == 0)
if (ret == 0) {
ret = btrfs_end_transaction(trans, root);
else
} else {
if (!full_sync)
btrfs_wait_ordered_range(inode, start,
end -
start + 1);
ret = btrfs_commit_transaction(trans, root);
}
}
} else {
ret = btrfs_end_transaction(trans, root);

View File

@ -1356,6 +1356,8 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
max_bitmaps = max(max_bitmaps, 1);
BUG_ON(ctl->total_bitmaps > max_bitmaps);
/*
@ -1636,10 +1638,14 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
}
/*
* some block groups are so tiny they can't be enveloped by a bitmap, so
* don't even bother to create a bitmap for this
* The original block groups from mkfs can be really small, like 8
* megabytes, so don't bother with a bitmap for those entries. However
* some block groups can be smaller than what a bitmap would cover but
* are still large enough that they could overflow the 32k memory limit,
* so allow those block groups to still be allowed to have a bitmap
* entry.
*/
if (BITS_PER_BITMAP * ctl->unit > block_group->key.offset)
if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset)
return false;
return true;

View File

@ -39,12 +39,12 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/mount.h>
#include <linux/btrfs.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
#include "ordered-data.h"
#include "xattr.h"
@ -608,7 +608,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
if (list_empty(&async_cow->extents))
return 0;
again:
while (!list_empty(&async_cow->extents)) {
async_extent = list_entry(async_cow->extents.next,
struct async_extent, list);
@ -648,6 +648,8 @@ static noinline int submit_compressed_extents(struct inode *inode,
async_extent->ram_size - 1,
btrfs_get_extent,
WB_SYNC_ALL);
else if (ret)
unlock_page(async_cow->locked_page);
kfree(async_extent);
cond_resched();
continue;
@ -672,6 +674,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
if (ret) {
int i;
for (i = 0; i < async_extent->nr_pages; i++) {
WARN_ON(async_extent->pages[i]->mapping);
page_cache_release(async_extent->pages[i]);
@ -679,12 +682,10 @@ static noinline int submit_compressed_extents(struct inode *inode,
kfree(async_extent->pages);
async_extent->nr_pages = 0;
async_extent->pages = NULL;
unlock_extent(io_tree, async_extent->start,
async_extent->start +
async_extent->ram_size - 1);
if (ret == -ENOSPC)
goto retry;
goto out_free; /* JDM: Requeue? */
goto out_free;
}
/*
@ -696,10 +697,13 @@ static noinline int submit_compressed_extents(struct inode *inode,
async_extent->ram_size - 1, 0);
em = alloc_extent_map();
BUG_ON(!em); /* -ENOMEM */
if (!em)
goto out_free_reserve;
em->start = async_extent->start;
em->len = async_extent->ram_size;
em->orig_start = em->start;
em->mod_start = em->start;
em->mod_len = em->len;
em->block_start = ins.objectid;
em->block_len = ins.offset;
@ -726,6 +730,9 @@ static noinline int submit_compressed_extents(struct inode *inode,
async_extent->ram_size - 1, 0);
}
if (ret)
goto out_free_reserve;
ret = btrfs_add_ordered_extent_compress(inode,
async_extent->start,
ins.objectid,
@ -733,7 +740,8 @@ static noinline int submit_compressed_extents(struct inode *inode,
ins.offset,
BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
BUG_ON(ret); /* -ENOMEM */
if (ret)
goto out_free_reserve;
/*
* clear dirty, set writeback and unlock the pages.
@ -754,18 +762,30 @@ static noinline int submit_compressed_extents(struct inode *inode,
ins.objectid,
ins.offset, async_extent->pages,
async_extent->nr_pages);
BUG_ON(ret); /* -ENOMEM */
alloc_hint = ins.objectid + ins.offset;
kfree(async_extent);
if (ret)
goto out;
cond_resched();
}
ret = 0;
out:
return ret;
out_free_reserve:
btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
out_free:
extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
NULL, EXTENT_CLEAR_UNLOCK_PAGE |
EXTENT_CLEAR_UNLOCK |
EXTENT_CLEAR_DELALLOC |
EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK |
EXTENT_END_WRITEBACK);
kfree(async_extent);
goto out;
goto again;
}
static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@ -892,6 +912,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
em->orig_start = em->start;
ram_size = ins.offset;
em->len = ins.offset;
em->mod_start = em->start;
em->mod_len = em->len;
em->block_start = ins.objectid;
em->block_len = ins.offset;
@ -1338,6 +1360,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
em->block_start = disk_bytenr;
em->orig_block_len = disk_num_bytes;
em->bdev = root->fs_info->fs_devices->latest_bdev;
em->mod_start = em->start;
em->mod_len = em->len;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
set_bit(EXTENT_FLAG_FILLING, &em->flags);
em->generation = -1;
@ -1508,14 +1532,22 @@ static void btrfs_set_bit_hook(struct inode *inode,
spin_unlock(&BTRFS_I(inode)->lock);
}
spin_lock(&root->fs_info->delalloc_lock);
__percpu_counter_add(&root->fs_info->delalloc_bytes, len,
root->fs_info->delalloc_batch);
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->delalloc_bytes += len;
root->fs_info->delalloc_bytes += len;
if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->fs_info->delalloc_inodes);
if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&BTRFS_I(inode)->runtime_flags)) {
spin_lock(&root->fs_info->delalloc_lock);
if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->fs_info->delalloc_inodes);
set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&BTRFS_I(inode)->runtime_flags);
}
spin_unlock(&root->fs_info->delalloc_lock);
}
spin_unlock(&root->fs_info->delalloc_lock);
spin_unlock(&BTRFS_I(inode)->lock);
}
}
@ -1550,15 +1582,22 @@ static void btrfs_clear_bit_hook(struct inode *inode,
&& do_list)
btrfs_free_reserved_data_space(inode, len);
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->delalloc_bytes -= len;
__percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
root->fs_info->delalloc_batch);
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->delalloc_bytes -= len;
if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_del_init(&BTRFS_I(inode)->delalloc_inodes);
test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&BTRFS_I(inode)->runtime_flags)) {
spin_lock(&root->fs_info->delalloc_lock);
if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_del_init(&BTRFS_I(inode)->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&BTRFS_I(inode)->runtime_flags);
}
spin_unlock(&root->fs_info->delalloc_lock);
}
spin_unlock(&root->fs_info->delalloc_lock);
spin_unlock(&BTRFS_I(inode)->lock);
}
}
@ -2001,11 +2040,23 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
if (trans)
btrfs_end_transaction(trans, root);
if (ret)
if (ret) {
clear_extent_uptodate(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1, NULL, GFP_NOFS);
/*
* If the ordered extent had an IOERR or something else went
* wrong we need to return the space for this ordered extent
* back to the allocator.
*/
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
btrfs_free_reserved_extent(root, ordered_extent->start,
ordered_extent->disk_len);
}
/*
* This needs to be done to make sure anybody waiting knows we are done
* updating everything for this ordered extent.
@ -2062,7 +2113,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int mirror)
{
size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
size_t offset = start - page_offset(page);
struct inode *inode = page->mapping->host;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
char *kaddr;
@ -2167,11 +2218,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
}
}
enum btrfs_orphan_cleanup_state {
ORPHAN_CLEANUP_STARTED = 1,
ORPHAN_CLEANUP_DONE = 2,
};
/*
* This is called in transaction commit time. If there are no orphan
* files in the subvolume, it removes orphan item and frees block_rsv
@ -2469,6 +2515,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
*/
set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags);
atomic_inc(&root->orphan_inodes);
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
@ -2491,6 +2538,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
goto out;
ret = btrfs_truncate(inode);
if (ret)
btrfs_orphan_del(NULL, inode);
} else {
nr_unlink++;
}
@ -2709,34 +2758,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *item,
struct inode *inode)
{
btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
btrfs_set_inode_mode(leaf, item, inode->i_mode);
btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
struct btrfs_map_token token;
btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
inode->i_atime.tv_sec);
btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
inode->i_atime.tv_nsec);
btrfs_init_map_token(&token);
btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
inode->i_mtime.tv_sec);
btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
inode->i_mtime.tv_nsec);
btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
&token);
btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
inode->i_ctime.tv_sec);
btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
inode->i_ctime.tv_nsec);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
inode->i_atime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
inode->i_atime.tv_nsec, &token);
btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
btrfs_set_inode_sequence(leaf, item, inode->i_version);
btrfs_set_inode_transid(leaf, item, trans->transid);
btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
btrfs_set_inode_block_group(leaf, item, 0);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
inode->i_mtime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
inode->i_mtime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
inode->i_ctime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
inode->i_ctime.tv_nsec, &token);
btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
&token);
btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
&token);
btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
btrfs_set_token_inode_block_group(leaf, item, 0, &token);
}
/*
@ -3832,6 +3888,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
/* we don't support swapfiles, so vmtruncate shouldn't fail */
truncate_setsize(inode, newsize);
/* Disable nonlocked read DIO to avoid the end less truncate */
btrfs_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
btrfs_inode_resume_unlocked_dio(inode);
ret = btrfs_truncate(inode);
if (ret && inode->i_nlink)
btrfs_orphan_del(NULL, inode);
@ -3904,6 +3966,12 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
ret = btrfs_commit_inode_delayed_inode(inode);
if (ret) {
btrfs_orphan_del(NULL, inode);
goto no_delete;
}
rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
if (!rsv) {
btrfs_orphan_del(NULL, inode);
@ -3941,7 +4009,7 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
trans = btrfs_start_transaction_lflush(root, 1);
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
btrfs_orphan_del(NULL, inode);
btrfs_free_block_rsv(root, rsv);
@ -3955,9 +4023,6 @@ void btrfs_evict_inode(struct inode *inode)
break;
trans->block_rsv = &root->fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
btrfs_end_transaction(trans, root);
trans = NULL;
btrfs_btree_balance_dirty(root);
@ -5006,12 +5071,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
err = btrfs_update_inode(trans, root, inode);
if (err) {
drop_inode = 1;
goto out_unlock;
}
/*
* If the active LSM wants to access the inode during
* d_instantiate it needs these. Smack checks to see
@ -5949,6 +6008,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
em->start = start;
em->orig_start = orig_start;
em->mod_start = start;
em->mod_len = len;
em->len = len;
em->block_len = block_len;
em->block_start = block_start;
@ -5990,16 +6051,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
u64 len = bh_result->b_size;
struct btrfs_trans_handle *trans;
int unlock_bits = EXTENT_LOCKED;
int ret;
int ret = 0;
if (create) {
ret = btrfs_delalloc_reserve_space(inode, len);
if (ret)
return ret;
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
} else {
} else
len = min_t(u64, len, root->sectorsize);
}
lockstart = start;
lockend = start + len - 1;
@ -6011,14 +6071,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
return -ENOTBLK;
if (create) {
ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_DELALLOC, NULL,
&cached_state, GFP_NOFS);
if (ret)
goto unlock_err;
}
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@ -6050,7 +6102,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (!create && (em->block_start == EXTENT_MAP_HOLE ||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
free_extent_map(em);
ret = 0;
goto unlock_err;
}
@ -6148,6 +6199,11 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
*/
if (start + len > i_size_read(inode))
i_size_write(inode, start + len);
ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockstart + len - 1, EXTENT_DELALLOC, NULL,
&cached_state, GFP_NOFS);
BUG_ON(ret);
}
/*
@ -6156,24 +6212,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
* aren't using if there is any left over space.
*/
if (lockstart < lockend) {
if (create && len < lockend - lockstart) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockstart + len - 1,
unlock_bits | EXTENT_DEFRAG, 1, 0,
&cached_state, GFP_NOFS);
/*
* Beside unlock, we also need to cleanup reserved space
* for the left range by attaching EXTENT_DO_ACCOUNTING.
*/
clear_extent_bit(&BTRFS_I(inode)->io_tree,
lockstart + len, lockend,
unlock_bits | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
} else {
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, unlock_bits, 1, 0,
&cached_state, GFP_NOFS);
}
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, unlock_bits, 1, 0,
&cached_state, GFP_NOFS);
} else {
free_extent_state(cached_state);
}
@ -6183,9 +6224,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
return 0;
unlock_err:
if (create)
unlock_bits |= EXTENT_DO_ACCOUNTING;
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
return ret;
@ -6623,15 +6661,63 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
size_t count = 0;
int flags = 0;
bool wakeup = true;
bool relock = false;
ssize_t ret;
if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
offset, nr_segs))
return 0;
return __blockdev_direct_IO(rw, iocb, inode,
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, 0);
atomic_inc(&inode->i_dio_count);
smp_mb__after_atomic_inc();
if (rw & WRITE) {
count = iov_length(iov, nr_segs);
/*
* If the write DIO is beyond the EOF, we need update
* the isize, but it is protected by i_mutex. So we can
* not unlock the i_mutex at this case.
*/
if (offset + count <= inode->i_size) {
mutex_unlock(&inode->i_mutex);
relock = true;
}
ret = btrfs_delalloc_reserve_space(inode, count);
if (ret)
goto out;
} else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags))) {
inode_dio_done(inode);
flags = DIO_LOCKING | DIO_SKIP_HOLES;
wakeup = false;
}
ret = __blockdev_direct_IO(rw, iocb, inode,
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, flags);
if (rw & WRITE) {
if (ret < 0 && ret != -EIOCBQUEUED)
btrfs_delalloc_release_space(inode, count);
else if (ret > 0 && (size_t)ret < count) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode,
count - (size_t)ret);
}
btrfs_delalloc_release_metadata(inode, 0);
}
out:
if (wakeup)
inode_dio_done(inode);
if (relock)
mutex_lock(&inode->i_mutex);
return ret;
}
#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
@ -6735,8 +6821,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
return;
}
lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
ordered = btrfs_lookup_ordered_extent(inode,
page_offset(page));
ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
if (ordered) {
/*
* IO on this page will never be started, so we need
@ -7216,8 +7301,9 @@ int btrfs_drop_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
/* the snap/subvol tree is on deleting */
if (btrfs_root_refs(&root->root_item) == 0 &&
!btrfs_is_free_space_inode(inode))
root != root->fs_info->tree_root)
return 1;
else
return generic_drop_inode(inode);
@ -7299,14 +7385,19 @@ int btrfs_init_cachep(void)
static int btrfs_getattr(struct vfsmount *mnt,
struct dentry *dentry, struct kstat *stat)
{
u64 delalloc_bytes;
struct inode *inode = dentry->d_inode;
u32 blocksize = inode->i_sb->s_blocksize;
generic_fillattr(inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
stat->blksize = PAGE_CACHE_SIZE;
spin_lock(&BTRFS_I(inode)->lock);
delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
spin_unlock(&BTRFS_I(inode)->lock);
stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;
ALIGN(delalloc_bytes, blocksize)) >> 9;
return 0;
}
@ -7583,7 +7674,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
INIT_LIST_HEAD(&works);
INIT_LIST_HEAD(&splice);
again:
spin_lock(&root->fs_info->delalloc_lock);
list_splice_init(&root->fs_info->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
@ -7593,8 +7684,11 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
list_del_init(&binode->delalloc_inodes);
inode = igrab(&binode->vfs_inode);
if (!inode)
if (!inode) {
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&binode->runtime_flags);
continue;
}
list_add_tail(&binode->delalloc_inodes,
&root->fs_info->delalloc_inodes);
@ -7619,13 +7713,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
btrfs_wait_and_free_delalloc_work(work);
}
spin_lock(&root->fs_info->delalloc_lock);
if (!list_empty(&root->fs_info->delalloc_inodes)) {
spin_unlock(&root->fs_info->delalloc_lock);
goto again;
}
spin_unlock(&root->fs_info->delalloc_lock);
/* the filemap_flush will queue IO into the worker threads, but
* we have to make sure the IO is actually started and that
* ordered extents get created before we return

View File

@ -42,12 +42,12 @@
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/uuid.h>
#include <linux/btrfs.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
#include "volumes.h"
#include "locking.h"
@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root,
struct dentry *dentry,
char *name, int namelen,
u64 *async_transid,
struct btrfs_qgroup_inherit **inherit)
struct btrfs_qgroup_inherit *inherit)
{
struct btrfs_trans_handle *trans;
struct btrfs_key key;
@ -401,8 +401,7 @@ static noinline int create_subvol(struct btrfs_root *root,
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid,
inherit ? *inherit : NULL);
ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, inherit);
if (ret)
goto fail;
@ -533,7 +532,7 @@ static noinline int create_subvol(struct btrfs_root *root,
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
char *name, int namelen, u64 *async_transid,
bool readonly, struct btrfs_qgroup_inherit **inherit)
bool readonly, struct btrfs_qgroup_inherit *inherit)
{
struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
@ -552,10 +551,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
pending_snapshot->readonly = readonly;
if (inherit) {
pending_snapshot->inherit = *inherit;
*inherit = NULL; /* take responsibility to free it */
}
pending_snapshot->inherit = inherit;
trans = btrfs_start_transaction(root->fs_info->extent_root, 6);
if (IS_ERR(trans)) {
@ -695,7 +691,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
struct btrfs_root *snap_src,
u64 *async_transid, bool readonly,
struct btrfs_qgroup_inherit **inherit)
struct btrfs_qgroup_inherit *inherit)
{
struct inode *dir = parent->dentry->d_inode;
struct dentry *dentry;
@ -818,7 +814,7 @@ static int find_new_extents(struct btrfs_root *root,
while(1) {
ret = btrfs_search_forward(root, &min_key, &max_key,
path, 0, newer_than);
path, newer_than);
if (ret != 0)
goto none;
if (min_key.objectid != ino)
@ -1206,6 +1202,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
if (!(inode->i_sb->s_flags & MS_ACTIVE))
break;
if (btrfs_defrag_cancelled(root->fs_info)) {
printk(KERN_DEBUG "btrfs: defrag_file cancelled\n");
ret = -EAGAIN;
break;
}
if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
extent_thresh, &last_len, &skip,
&defrag_end, range->flags &
@ -1329,9 +1331,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
int ret = 0;
int mod = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@ -1363,6 +1362,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
*devstr = '\0';
devstr = vol_args->name;
devid = simple_strtoull(devstr, &end, 10);
if (!devid) {
ret = -EINVAL;
goto out_free;
}
printk(KERN_INFO "btrfs: resizing devid %llu\n",
(unsigned long long)devid);
}
@ -1371,7 +1374,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (!device) {
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
(unsigned long long)devid);
ret = -EINVAL;
ret = -ENODEV;
goto out_free;
}
@ -1379,7 +1382,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
printk(KERN_INFO "btrfs: resizer unable to apply on "
"readonly device %llu\n",
(unsigned long long)devid);
ret = -EINVAL;
ret = -EPERM;
goto out_free;
}
@ -1401,7 +1404,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
}
if (device->is_tgtdev_for_dev_replace) {
ret = -EINVAL;
ret = -EPERM;
goto out_free;
}
@ -1457,7 +1460,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
char *name, unsigned long fd, int subvol,
u64 *transid, bool readonly,
struct btrfs_qgroup_inherit **inherit)
struct btrfs_qgroup_inherit *inherit)
{
int namelen;
int ret = 0;
@ -1566,7 +1569,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
vol_args->fd, subvol, ptr,
readonly, &inherit);
readonly, inherit);
if (ret == 0 && ptr &&
copy_to_user(arg +
@ -1863,7 +1866,7 @@ static noinline int search_ioctl(struct inode *inode,
path->keep_locks = 1;
while(1) {
ret = btrfs_search_forward(root, &key, &max_key, path, 0,
ret = btrfs_search_forward(root, &key, &max_key, path,
sk->min_transid);
if (ret != 0) {
if (ret > 0)
@ -2171,6 +2174,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
shrink_dcache_sb(root->fs_info->sb);
btrfs_invalidate_inodes(dest);
d_delete(dentry);
/* the last ref */
if (dest->cache_inode) {
iput(dest->cache_inode);
dest->cache_inode = NULL;
}
}
out_dput:
dput(dentry);
@ -2211,10 +2220,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = -EPERM;
goto out;
}
ret = btrfs_defrag_root(root, 0);
ret = btrfs_defrag_root(root);
if (ret)
goto out;
ret = btrfs_defrag_root(root->fs_info->extent_root, 0);
ret = btrfs_defrag_root(root->fs_info->extent_root);
break;
case S_IFREG:
if (!(file->f_mode & FMODE_WRITE)) {
@ -3111,7 +3120,7 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
u64 transid;
int ret;
trans = btrfs_attach_transaction(root);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT)
return PTR_ERR(trans);
@ -3289,7 +3298,7 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
struct inode_fs_paths *ipath = NULL;
struct btrfs_path *path;
if (!capable(CAP_SYS_ADMIN))
if (!capable(CAP_DAC_READ_SEARCH))
return -EPERM;
path = btrfs_alloc_path();
@ -3914,6 +3923,65 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
return ret;
}
static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
const char *label = root->fs_info->super_copy->label;
size_t len = strnlen(label, BTRFS_LABEL_SIZE);
int ret;
if (len == BTRFS_LABEL_SIZE) {
pr_warn("btrfs: label is too long, return the first %zu bytes\n",
--len);
}
mutex_lock(&root->fs_info->volume_mutex);
ret = copy_to_user(arg, label, len);
mutex_unlock(&root->fs_info->volume_mutex);
return ret ? -EFAULT : 0;
}
static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
{
struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
struct btrfs_super_block *super_block = root->fs_info->super_copy;
struct btrfs_trans_handle *trans;
char label[BTRFS_LABEL_SIZE];
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(label, arg, sizeof(label)))
return -EFAULT;
if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
pr_err("btrfs: unable to set label with more than %d bytes\n",
BTRFS_LABEL_SIZE - 1);
return -EINVAL;
}
ret = mnt_want_write_file(file);
if (ret)
return ret;
mutex_lock(&root->fs_info->volume_mutex);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_unlock;
}
strcpy(super_block->label, label);
ret = btrfs_end_transaction(trans, root);
out_unlock:
mutex_unlock(&root->fs_info->volume_mutex);
mnt_drop_write_file(file);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
@ -4014,6 +4082,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_qgroup_limit(file, argp);
case BTRFS_IOC_DEV_REPLACE:
return btrfs_ioctl_dev_replace(root, argp);
case BTRFS_IOC_GET_FSLABEL:
return btrfs_ioctl_get_fslabel(file, argp);
case BTRFS_IOC_SET_FSLABEL:
return btrfs_ioctl_set_fslabel(file, argp);
}
return -ENOTTY;

View File

@ -113,11 +113,10 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
read_unlock(&eb->lock);
return;
}
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers)) {
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
atomic_read(&eb->blocking_writers) == 0);
goto again;
}
atomic_inc(&eb->read_locks);

View File

@ -196,6 +196,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->file_offset = file_offset;
entry->start = start;
entry->len = len;
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
!(type == BTRFS_ORDERED_NOCOW))
entry->csum_bytes_left = disk_len;
entry->disk_len = disk_len;
entry->bytes_left = len;
entry->inode = igrab(inode);
@ -213,6 +216,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
INIT_LIST_HEAD(&entry->root_extent_list);
INIT_LIST_HEAD(&entry->work_list);
init_completion(&entry->completion);
INIT_LIST_HEAD(&entry->log_list);
trace_btrfs_ordered_extent_add(inode, entry);
@ -270,6 +274,10 @@ void btrfs_add_ordered_sum(struct inode *inode,
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
list_add_tail(&sum->list, &entry->list);
WARN_ON(entry->csum_bytes_left < sum->len);
entry->csum_bytes_left -= sum->len;
if (entry->csum_bytes_left == 0)
wake_up(&entry->wait);
spin_unlock_irq(&tree->lock);
}
@ -405,6 +413,66 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
return ret == 0;
}
/* Needs to either be called under a log transaction or the log_mutex */
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode)
{
struct btrfs_ordered_inode_tree *tree;
struct btrfs_ordered_extent *ordered;
struct rb_node *n;
int index = log->log_transid % 2;
tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
spin_lock(&log->log_extents_lock[index]);
if (list_empty(&ordered->log_list)) {
list_add_tail(&ordered->log_list, &log->logged_list[index]);
atomic_inc(&ordered->refs);
}
spin_unlock(&log->log_extents_lock[index]);
}
spin_unlock_irq(&tree->lock);
}
void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
{
struct btrfs_ordered_extent *ordered;
int index = transid % 2;
spin_lock_irq(&log->log_extents_lock[index]);
while (!list_empty(&log->logged_list[index])) {
ordered = list_first_entry(&log->logged_list[index],
struct btrfs_ordered_extent,
log_list);
list_del_init(&ordered->log_list);
spin_unlock_irq(&log->log_extents_lock[index]);
wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
&ordered->flags));
btrfs_put_ordered_extent(ordered);
spin_lock_irq(&log->log_extents_lock[index]);
}
spin_unlock_irq(&log->log_extents_lock[index]);
}
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
{
struct btrfs_ordered_extent *ordered;
int index = transid % 2;
spin_lock_irq(&log->log_extents_lock[index]);
while (!list_empty(&log->logged_list[index])) {
ordered = list_first_entry(&log->logged_list[index],
struct btrfs_ordered_extent,
log_list);
list_del_init(&ordered->log_list);
spin_unlock_irq(&log->log_extents_lock[index]);
btrfs_put_ordered_extent(ordered);
spin_lock_irq(&log->log_extents_lock[index]);
}
spin_unlock_irq(&log->log_extents_lock[index]);
}
/*
* used to drop a reference on an ordered extent. This will free
* the extent if the last reference is dropped
@ -544,10 +612,12 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
* extra check to make sure the ordered operation list really is empty
* before we return
*/
int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int wait)
{
struct btrfs_inode *btrfs_inode;
struct inode *inode;
struct btrfs_transaction *cur_trans = trans->transaction;
struct list_head splice;
struct list_head works;
struct btrfs_delalloc_work *work, *next;
@ -558,14 +628,10 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->fs_info->ordered_extent_lock);
again:
list_splice_init(&root->fs_info->ordered_operations, &splice);
list_splice_init(&cur_trans->ordered_operations, &splice);
while (!list_empty(&splice)) {
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
ordered_operations);
inode = &btrfs_inode->vfs_inode;
list_del_init(&btrfs_inode->ordered_operations);
@ -574,24 +640,22 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
* the inode may be getting freed (in sys_unlink path).
*/
inode = igrab(inode);
if (!wait && inode) {
list_add_tail(&BTRFS_I(inode)->ordered_operations,
&root->fs_info->ordered_operations);
}
if (!inode)
continue;
if (!wait)
list_add_tail(&BTRFS_I(inode)->ordered_operations,
&cur_trans->ordered_operations);
spin_unlock(&root->fs_info->ordered_extent_lock);
work = btrfs_alloc_delalloc_work(inode, wait, 1);
if (!work) {
spin_lock(&root->fs_info->ordered_extent_lock);
if (list_empty(&BTRFS_I(inode)->ordered_operations))
list_add_tail(&btrfs_inode->ordered_operations,
&splice);
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_tail(&splice,
&root->fs_info->ordered_operations);
&cur_trans->ordered_operations);
spin_unlock(&root->fs_info->ordered_extent_lock);
ret = -ENOMEM;
goto out;
@ -603,9 +667,6 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
cond_resched();
spin_lock(&root->fs_info->ordered_extent_lock);
}
if (wait && !list_empty(&root->fs_info->ordered_operations))
goto again;
spin_unlock(&root->fs_info->ordered_extent_lock);
out:
list_for_each_entry_safe(work, next, &works, list) {
@ -974,6 +1035,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode)
{
struct btrfs_transaction *cur_trans = trans->transaction;
u64 last_mod;
last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
@ -988,7 +1050,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
spin_lock(&root->fs_info->ordered_extent_lock);
if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
list_add_tail(&BTRFS_I(inode)->ordered_operations,
&root->fs_info->ordered_operations);
&cur_trans->ordered_operations);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
}

View File

@ -79,6 +79,8 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent
* has done its due diligence in updating
* the isize. */
#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
ordered extent */
struct btrfs_ordered_extent {
/* logical offset in the file */
@ -96,6 +98,9 @@ struct btrfs_ordered_extent {
/* number of bytes that still need writing */
u64 bytes_left;
/* number of bytes that still need csumming */
u64 csum_bytes_left;
/*
* the end of the ordered extent which is behind it but
* didn't update disk_i_size. Please see the comment of
@ -118,6 +123,9 @@ struct btrfs_ordered_extent {
/* list of checksums for insertion when the extent io is done */
struct list_head list;
/* If we need to wait on this to be done */
struct list_head log_list;
/* used to wait for the BTRFS_ORDERED_COMPLETE bit */
wait_queue_head_t wait;
@ -189,11 +197,15 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int wait);
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
int __init ordered_data_init(void);
void ordered_data_exit(void);
#endif

View File

@ -294,6 +294,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
btrfs_dev_extent_chunk_offset(l, dev_extent),
(unsigned long long)
btrfs_dev_extent_length(l, dev_extent));
break;
case BTRFS_DEV_STATS_KEY:
printk(KERN_INFO "\t\tdevice stats\n");
break;

View File

@ -23,13 +23,13 @@
#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/btrfs.h>
#include "ctree.h"
#include "transaction.h"
#include "disk-io.h"
#include "locking.h"
#include "ulist.h"
#include "ioctl.h"
#include "backref.h"
/* TODO XXX FIXME
@ -847,6 +847,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
int ret = 0;
spin_lock(&fs_info->qgroup_lock);
if (!fs_info->quota_root) {
spin_unlock(&fs_info->qgroup_lock);
return 0;
}
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
quota_root = fs_info->quota_root;

View File

@ -3017,7 +3017,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
}
}
page_start = (u64)page->index << PAGE_CACHE_SHIFT;
page_start = page_offset(page);
page_end = page_start + PAGE_CACHE_SIZE - 1;
lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);

View File

@ -2708,7 +2708,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
int ret;
struct btrfs_root *root = sctx->dev_root;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
return -EIO;
gen = root->fs_info->last_trans_committed;

View File

@ -85,6 +85,7 @@ struct send_ctx {
u32 send_max_size;
u64 total_send_size;
u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
struct vfsmount *mnt;
@ -3709,6 +3710,39 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
return ret;
}
/*
* Send an update extent command to user space.
*/
static int send_update_extent(struct send_ctx *sctx,
u64 offset, u32 len)
{
int ret = 0;
struct fs_path *p;
p = fs_path_alloc(sctx);
if (!p)
return -ENOMEM;
ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
if (ret < 0)
goto out;
ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
if (ret < 0)
goto out;
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
ret = send_cmd(sctx);
tlv_put_failure:
out:
fs_path_free(sctx, p);
return ret;
}
static int send_write_or_clone(struct send_ctx *sctx,
struct btrfs_path *path,
struct btrfs_key *key,
@ -3744,7 +3778,11 @@ static int send_write_or_clone(struct send_ctx *sctx,
goto out;
}
if (!clone_root) {
if (clone_root) {
ret = send_clone(sctx, offset, len, clone_root);
} else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
ret = send_update_extent(sctx, offset, len);
} else {
while (pos < len) {
l = len - pos;
if (l > BTRFS_SEND_READ_SIZE)
@ -3757,10 +3795,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
pos += ret;
}
ret = 0;
} else {
ret = send_clone(sctx, offset, len, clone_root);
}
out:
return ret;
}
@ -4536,7 +4571,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
struct btrfs_fs_info *fs_info;
struct btrfs_ioctl_send_args *arg = NULL;
struct btrfs_key key;
struct file *filp = NULL;
struct send_ctx *sctx = NULL;
u32 i;
u64 *clone_sources_tmp = NULL;
@ -4561,6 +4595,11 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
goto out;
}
if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) {
ret = -EINVAL;
goto out;
}
sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
if (!sctx) {
ret = -ENOMEM;
@ -4572,6 +4611,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
INIT_LIST_HEAD(&sctx->name_cache_list);
sctx->flags = arg->flags;
sctx->send_filp = fget(arg->send_fd);
if (IS_ERR(sctx->send_filp)) {
ret = PTR_ERR(sctx->send_filp);
@ -4673,8 +4714,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
goto out;
out:
if (filp)
fput(filp);
kfree(arg);
vfree(clone_sources_tmp);

View File

@ -86,6 +86,7 @@ enum btrfs_send_cmd {
BTRFS_SEND_C_UTIMES,
BTRFS_SEND_C_END,
BTRFS_SEND_C_UPDATE_EXTENT,
__BTRFS_SEND_C_MAX,
};
#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)

View File

@ -41,13 +41,13 @@
#include <linux/slab.h>
#include <linux/cleancache.h>
#include <linux/ratelimit.h>
#include <linux/btrfs.h>
#include "compat.h"
#include "delayed-inode.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
#include "xattr.h"
#include "volumes.h"
@ -63,8 +63,7 @@
static const struct super_operations btrfs_super_ops;
static struct file_system_type btrfs_fs_type;
static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
char nbuf[16])
static const char *btrfs_decode_error(int errno, char nbuf[16])
{
char *errstr = NULL;
@ -98,7 +97,7 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
* today we only save the error info into ram. Long term we'll
* also send it down to the disk
*/
fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
}
static void save_error_info(struct btrfs_fs_info *fs_info)
@ -114,7 +113,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
if (sb->s_flags & MS_RDONLY)
return;
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
sb->s_flags |= MS_RDONLY;
printk(KERN_INFO "btrfs is forced readonly\n");
/*
@ -142,8 +141,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
struct super_block *sb = fs_info->sb;
char nbuf[16];
const char *errstr;
va_list args;
va_start(args, fmt);
/*
* Special case: if the error is EROFS, and we're already
@ -152,15 +149,18 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
return;
errstr = btrfs_decode_error(fs_info, errno, nbuf);
errstr = btrfs_decode_error(errno, nbuf);
if (fmt) {
struct va_format vaf = {
.fmt = fmt,
.va = &args,
};
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n",
sb->s_id, function, line, errstr, &vaf);
va_end(args);
} else {
printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
sb->s_id, function, line, errstr);
@ -171,7 +171,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
save_error_info(fs_info);
btrfs_handle_error(fs_info);
}
va_end(args);
}
static const char * const logtypes[] = {
@ -261,7 +260,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
char nbuf[16];
const char *errstr;
errstr = btrfs_decode_error(root->fs_info, errno, nbuf);
errstr = btrfs_decode_error(errno, nbuf);
btrfs_printk(root->fs_info,
"%s:%d: Aborting unused transaction(%s).\n",
function, line, errstr);
@ -289,8 +288,8 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
va_start(args, fmt);
vaf.va = &args;
errstr = btrfs_decode_error(fs_info, errno, nbuf);
if (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)
errstr = btrfs_decode_error(errno, nbuf);
if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
s_id, function, line, &vaf, errstr);
@ -438,6 +437,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_compress_force:
case Opt_compress_force_type:
compress_force = true;
/* Fallthrough */
case Opt_compress:
case Opt_compress_type:
if (token == Opt_compress ||
@ -519,7 +519,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_alloc_start:
num = match_strdup(&args[0]);
if (num) {
mutex_lock(&info->chunk_mutex);
info->alloc_start = memparse(num, NULL);
mutex_unlock(&info->chunk_mutex);
kfree(num);
printk(KERN_INFO
"btrfs: allocations start at %llu\n",
@ -876,7 +878,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
btrfs_wait_ordered_extents(root, 0);
trans = btrfs_attach_transaction(root);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
/* no transaction, don't bother */
if (PTR_ERR(trans) == -ENOENT)
@ -1289,7 +1291,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
fs_info->mount_opt = old_opts;
fs_info->compress_type = old_compress_type;
fs_info->max_inline = old_max_inline;
mutex_lock(&fs_info->chunk_mutex);
fs_info->alloc_start = old_alloc_start;
mutex_unlock(&fs_info->chunk_mutex);
btrfs_resize_thread_pool(fs_info,
old_thread_pool_size, fs_info->thread_pool_size);
fs_info->metadata_ratio = old_metadata_ratio;
@ -1559,7 +1563,7 @@ static int btrfs_freeze(struct super_block *sb)
struct btrfs_trans_handle *trans;
struct btrfs_root *root = btrfs_sb(sb)->tree_root;
trans = btrfs_attach_transaction(root);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
/* no transaction, don't bother */
if (PTR_ERR(trans) == -ENOENT)
@ -1684,10 +1688,14 @@ static int __init init_btrfs_fs(void)
if (err)
goto free_delayed_inode;
err = btrfs_interface_init();
err = btrfs_delayed_ref_init();
if (err)
goto free_auto_defrag;
err = btrfs_interface_init();
if (err)
goto free_delayed_ref;
err = register_filesystem(&btrfs_fs_type);
if (err)
goto unregister_ioctl;
@ -1699,6 +1707,8 @@ static int __init init_btrfs_fs(void)
unregister_ioctl:
btrfs_interface_exit();
free_delayed_ref:
btrfs_delayed_ref_exit();
free_auto_defrag:
btrfs_auto_defrag_exit();
free_delayed_inode:
@ -1720,6 +1730,7 @@ static int __init init_btrfs_fs(void)
static void __exit exit_btrfs_fs(void)
{
btrfs_destroy_cachep();
btrfs_delayed_ref_exit();
btrfs_auto_defrag_exit();
btrfs_delayed_inode_exit();
ordered_data_exit();

View File

@ -40,7 +40,6 @@ void put_transaction(struct btrfs_transaction *transaction)
if (atomic_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(transaction->delayed_refs.root.rb_node);
memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
}
@ -51,6 +50,14 @@ static noinline void switch_commit_root(struct btrfs_root *root)
root->commit_root = btrfs_root_node(root);
}
static inline int can_join_transaction(struct btrfs_transaction *trans,
int type)
{
return !(trans->in_commit &&
type != TRANS_JOIN &&
type != TRANS_JOIN_NOLOCK);
}
/*
* either allocate a new transaction or hop into the existing one
*/
@ -62,7 +69,7 @@ static noinline int join_transaction(struct btrfs_root *root, int type)
spin_lock(&fs_info->trans_lock);
loop:
/* The file system has been taken offline. No new transactions. */
if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
spin_unlock(&fs_info->trans_lock);
return -EROFS;
}
@ -86,6 +93,10 @@ static noinline int join_transaction(struct btrfs_root *root, int type)
spin_unlock(&fs_info->trans_lock);
return cur_trans->aborted;
}
if (!can_join_transaction(cur_trans, type)) {
spin_unlock(&fs_info->trans_lock);
return -EBUSY;
}
atomic_inc(&cur_trans->use_count);
atomic_inc(&cur_trans->num_writers);
cur_trans->num_joined++;
@ -114,7 +125,7 @@ static noinline int join_transaction(struct btrfs_root *root, int type)
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
cur_trans = fs_info->running_transaction;
goto loop;
} else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
} else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
spin_unlock(&fs_info->trans_lock);
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
return -EROFS;
@ -158,6 +169,7 @@ static noinline int join_transaction(struct btrfs_root *root, int type)
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
INIT_LIST_HEAD(&cur_trans->ordered_operations);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
fs_info->btree_inode->i_mapping);
@ -302,7 +314,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
int ret;
u64 qgroup_reserved = 0;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
return ERR_PTR(-EROFS);
if (current->journal_info) {
@ -360,8 +372,11 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
do {
ret = join_transaction(root, type);
if (ret == -EBUSY)
if (ret == -EBUSY) {
wait_current_trans(root);
if (unlikely(type == TRANS_ATTACH))
ret = -ENOENT;
}
} while (ret == -EBUSY);
if (ret < 0) {
@ -383,9 +398,10 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
h->block_rsv = NULL;
h->orig_rsv = NULL;
h->aborted = 0;
h->qgroup_reserved = qgroup_reserved;
h->qgroup_reserved = 0;
h->delayed_ref_elem.seq = 0;
h->type = type;
h->allocating_chunk = false;
INIT_LIST_HEAD(&h->qgroup_ref_list);
INIT_LIST_HEAD(&h->new_bgs);
@ -401,6 +417,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
h->block_rsv = &root->fs_info->trans_block_rsv;
h->bytes_reserved = num_bytes;
}
h->qgroup_reserved = qgroup_reserved;
got_it:
btrfs_record_root_in_trans(h, root);
@ -452,11 +469,43 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
return start_transaction(root, 0, TRANS_USERSPACE, 0);
}
/*
* btrfs_attach_transaction() - catch the running transaction
*
* It is used when we want to commit the current the transaction, but
* don't want to start a new one.
*
* Note: If this function return -ENOENT, it just means there is no
* running transaction. But it is possible that the inactive transaction
* is still in the memory, not fully on disk. If you hope there is no
* inactive transaction in the fs when -ENOENT is returned, you should
* invoke
* btrfs_attach_transaction_barrier()
*/
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_ATTACH, 0);
}
/*
* btrfs_attach_transaction() - catch the running transaction
*
* It is similar to the above function, the differentia is this one
* will wait for all the inactive transactions until they fully
* complete.
*/
struct btrfs_trans_handle *
btrfs_attach_transaction_barrier(struct btrfs_root *root)
{
struct btrfs_trans_handle *trans;
trans = start_transaction(root, 0, TRANS_ATTACH, 0);
if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
btrfs_wait_for_commit(root, 0);
return trans;
}
/* wait for a transaction commit to be fully complete */
static noinline void wait_for_commit(struct btrfs_root *root,
struct btrfs_transaction *commit)
@ -645,12 +694,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_run_delayed_iputs(root);
if (trans->aborted ||
root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
err = -EIO;
}
assert_qgroups_uptodate(trans);
memset(trans, 0, sizeof(*trans));
kmem_cache_free(btrfs_trans_handle_cachep, trans);
return err;
}
@ -961,10 +1008,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
}
/*
* defrag a given btree. If cacheonly == 1, this won't read from the disk,
* otherwise every leaf in the btree is read and defragged.
* defrag a given btree.
* Every leaf in the btree is read and defragged.
*/
int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
int btrfs_defrag_root(struct btrfs_root *root)
{
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_trans_handle *trans;
@ -978,7 +1025,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
if (IS_ERR(trans))
return PTR_ERR(trans);
ret = btrfs_defrag_leaves(trans, root, cacheonly);
ret = btrfs_defrag_leaves(trans, root);
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(info->tree_root);
@ -986,6 +1033,12 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
break;
if (btrfs_defrag_cancelled(root->fs_info)) {
printk(KERN_DEBUG "btrfs: defrag_root cancelled\n");
ret = -EAGAIN;
break;
}
}
root->defrag_running = 0;
return ret;
@ -1307,13 +1360,13 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
struct btrfs_async_commit {
struct btrfs_trans_handle *newtrans;
struct btrfs_root *root;
struct delayed_work work;
struct work_struct work;
};
static void do_async_commit(struct work_struct *work)
{
struct btrfs_async_commit *ac =
container_of(work, struct btrfs_async_commit, work.work);
container_of(work, struct btrfs_async_commit, work);
/*
* We've got freeze protection passed with the transaction.
@ -1341,7 +1394,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
if (!ac)
return -ENOMEM;
INIT_DELAYED_WORK(&ac->work, do_async_commit);
INIT_WORK(&ac->work, do_async_commit);
ac->root = root;
ac->newtrans = btrfs_join_transaction(root);
if (IS_ERR(ac->newtrans)) {
@ -1365,7 +1418,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1, _THIS_IP_);
schedule_delayed_work(&ac->work, 0);
schedule_work(&ac->work);
/* wait for transaction to start and unblock */
if (wait_for_unblock)
@ -1428,7 +1481,9 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
}
if (flush_on_commit || snap_pending) {
btrfs_start_delalloc_inodes(root, 1);
ret = btrfs_start_delalloc_inodes(root, 1);
if (ret)
return ret;
btrfs_wait_ordered_extents(root, 1);
}
@ -1450,9 +1505,9 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
* it here and no for sure that nothing new will be added
* to the list
*/
btrfs_run_ordered_operations(root, 1);
ret = btrfs_run_ordered_operations(trans, root, 1);
return 0;
return ret;
}
/*
@ -1473,27 +1528,35 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
int should_grow = 0;
unsigned long now = get_seconds();
ret = btrfs_run_ordered_operations(root, 0);
ret = btrfs_run_ordered_operations(trans, root, 0);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto cleanup_transaction;
btrfs_end_transaction(trans, root);
return ret;
}
/* Stop the commit early if ->aborted is set */
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
goto cleanup_transaction;
btrfs_end_transaction(trans, root);
return ret;
}
/* make a pass through all the delayed refs we have so far
* any runnings procs may add more while we are here
*/
ret = btrfs_run_delayed_refs(trans, root, 0);
if (ret)
goto cleanup_transaction;
if (ret) {
btrfs_end_transaction(trans, root);
return ret;
}
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
if (trans->qgroup_reserved) {
btrfs_qgroup_free(root, trans->qgroup_reserved);
trans->qgroup_reserved = 0;
}
cur_trans = trans->transaction;
@ -1507,8 +1570,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_create_pending_block_groups(trans, root);
ret = btrfs_run_delayed_refs(trans, root, 0);
if (ret)
goto cleanup_transaction;
if (ret) {
btrfs_end_transaction(trans, root);
return ret;
}
spin_lock(&cur_trans->commit_lock);
if (cur_trans->in_commit) {
@ -1772,6 +1837,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
cleanup_transaction:
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
if (trans->qgroup_reserved) {
btrfs_qgroup_free(root, trans->qgroup_reserved);
trans->qgroup_reserved = 0;
}
btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
// WARN_ON(1);
if (current->journal_info == trans)

View File

@ -43,6 +43,7 @@ struct btrfs_transaction {
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
struct list_head pending_snapshots;
struct list_head ordered_operations;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
};
@ -68,6 +69,7 @@ struct btrfs_trans_handle {
struct btrfs_block_rsv *orig_rsv;
short aborted;
short adding_csums;
bool allocating_chunk;
enum btrfs_trans_type type;
/*
* this root is only needed to validate that the root passed to
@ -110,13 +112,15 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush(
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_add_dead_root(struct btrfs_root *root);
int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
int btrfs_defrag_root(struct btrfs_root *root);
int btrfs_clean_old_snapshots(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);

View File

@ -23,13 +23,14 @@
#include "transaction.h"
#include "locking.h"
/* defrag all the leaves in a given btree. If cache_only == 1, don't read
* things from disk, otherwise read all the leaves and try to get key order to
/*
* Defrag all the leaves in a given btree.
* Read all the leaves and try to get key order to
* better reflect disk order
*/
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int cache_only)
struct btrfs_root *root)
{
struct btrfs_path *path = NULL;
struct btrfs_key key;
@ -41,9 +42,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
u64 last_ret = 0;
u64 min_trans = 0;
if (cache_only)
goto out;
if (root->fs_info->extent_root == root) {
/*
* there's recursion here right now in the tree locking,
@ -86,11 +84,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
}
path->keep_locks = 1;
if (cache_only)
min_trans = root->defrag_trans_start;
ret = btrfs_search_forward(root, &key, NULL, path,
cache_only, min_trans);
ret = btrfs_search_forward(root, &key, NULL, path, min_trans);
if (ret < 0)
goto out;
if (ret > 0) {
@ -109,11 +104,11 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
goto out;
}
path->slots[1] = btrfs_header_nritems(path->nodes[1]);
next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only,
next_key_ret = btrfs_find_next_key(root, path, &key, 1,
min_trans);
ret = btrfs_realloc_node(trans, root,
path->nodes[1], 0,
cache_only, &last_ret,
&last_ret,
&root->defrag_progress);
if (ret) {
WARN_ON(ret == -EAGAIN);

View File

@ -278,8 +278,7 @@ static int process_one_buffer(struct btrfs_root *log,
struct walk_control *wc, u64 gen)
{
if (wc->pin)
btrfs_pin_extent_for_log_replay(wc->trans,
log->fs_info->extent_root,
btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
eb->start, eb->len);
if (btrfs_buffer_uptodate(eb, gen, 0)) {
@ -2281,6 +2280,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
unsigned long log_transid = 0;
mutex_lock(&root->log_mutex);
log_transid = root->log_transid;
index1 = root->log_transid % 2;
if (atomic_read(&root->log_commit[index1])) {
wait_log_commit(trans, root, root->log_transid);
@ -2308,11 +2308,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
/* bail out if we need to do a full commit */
if (root->fs_info->last_trans_log_full_commit == trans->transid) {
ret = -EAGAIN;
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&root->log_mutex);
goto out;
}
log_transid = root->log_transid;
if (log_transid % 2 == 0)
mark = EXTENT_DIRTY;
else
@ -2324,6 +2324,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&root->log_mutex);
goto out;
}
@ -2363,6 +2364,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}
root->fs_info->last_trans_log_full_commit = trans->transid;
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out;
@ -2373,6 +2375,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = 0;
goto out;
@ -2392,6 +2395,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
*/
if (root->fs_info->last_trans_log_full_commit == trans->transid) {
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out_wake_log_root;
@ -2402,10 +2406,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
goto out_wake_log_root;
}
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
btrfs_wait_logged_extents(log, log_transid);
btrfs_set_super_log_root(root->fs_info->super_for_commit,
log_root_tree->node->start);
@ -2475,6 +2481,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
}
/*
* We may have short-circuited the log tree with the full commit logic
* and left ordered extents on our list, so clear these out to keep us
* from leaking inodes and memory.
*/
btrfs_free_logged_extents(log, 0);
btrfs_free_logged_extents(log, 1);
free_extent_buffer(log->node);
kfree(log);
}
@ -2724,7 +2738,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
path->keep_locks = 1;
ret = btrfs_search_forward(root, &min_key, &max_key,
path, 0, trans->transid);
path, trans->transid);
/*
* we didn't find anything from this transaction, see if there
@ -3271,14 +3285,18 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *log = root->log_root;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
struct btrfs_ordered_extent *ordered;
struct list_head ordered_sums;
struct btrfs_map_token token;
struct btrfs_key key;
u64 csum_offset = em->mod_start - em->start;
u64 csum_len = em->mod_len;
u64 mod_start = em->mod_start;
u64 mod_len = em->mod_len;
u64 csum_offset;
u64 csum_len;
u64 extent_offset = em->start - em->orig_start;
u64 block_len;
int ret;
int index = log->log_transid % 2;
bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
INIT_LIST_HEAD(&ordered_sums);
@ -3362,6 +3380,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
csum_len = block_len;
}
/*
* First check and see if our csums are on our outstanding ordered
* extents.
*/
again:
spin_lock_irq(&log->log_extents_lock[index]);
list_for_each_entry(ordered, &log->logged_list[index], log_list) {
struct btrfs_ordered_sum *sum;
if (!mod_len)
break;
if (ordered->inode != inode)
continue;
if (ordered->file_offset + ordered->len <= mod_start ||
mod_start + mod_len <= ordered->file_offset)
continue;
/*
* We are going to copy all the csums on this ordered extent, so
* go ahead and adjust mod_start and mod_len in case this
* ordered extent has already been logged.
*/
if (ordered->file_offset > mod_start) {
if (ordered->file_offset + ordered->len >=
mod_start + mod_len)
mod_len = ordered->file_offset - mod_start;
/*
* If we have this case
*
* |--------- logged extent ---------|
* |----- ordered extent ----|
*
* Just don't mess with mod_start and mod_len, we'll
* just end up logging more csums than we need and it
* will be ok.
*/
} else {
if (ordered->file_offset + ordered->len <
mod_start + mod_len) {
mod_len = (mod_start + mod_len) -
(ordered->file_offset + ordered->len);
mod_start = ordered->file_offset +
ordered->len;
} else {
mod_len = 0;
}
}
/*
* To keep us from looping for the above case of an ordered
* extent that falls inside of the logged extent.
*/
if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
&ordered->flags))
continue;
atomic_inc(&ordered->refs);
spin_unlock_irq(&log->log_extents_lock[index]);
/*
* we've dropped the lock, we must either break or
* start over after this.
*/
wait_event(ordered->wait, ordered->csum_bytes_left == 0);
list_for_each_entry(sum, &ordered->list, list) {
ret = btrfs_csum_file_blocks(trans, log, sum);
if (ret) {
btrfs_put_ordered_extent(ordered);
goto unlocked;
}
}
btrfs_put_ordered_extent(ordered);
goto again;
}
spin_unlock_irq(&log->log_extents_lock[index]);
unlocked:
if (!mod_len || ret)
return ret;
csum_offset = mod_start - em->start;
csum_len = mod_len;
/* block start is already adjusted for the file extent offset. */
ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
em->block_start + csum_offset,
@ -3393,6 +3497,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
u64 test_gen;
int ret = 0;
int num = 0;
INIT_LIST_HEAD(&extents);
@ -3401,16 +3506,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
list_del_init(&em->list);
/*
* Just an arbitrary number, this can be really CPU intensive
* once we start getting a lot of extents, and really once we
* have a bunch of extents we just want to commit since it will
* be faster.
*/
if (++num > 32768) {
list_del_init(&tree->modified_extents);
ret = -EFBIG;
goto process;
}
if (em->generation <= test_gen)
continue;
/* Need a ref to keep it from getting evicted from cache */
atomic_inc(&em->refs);
set_bit(EXTENT_FLAG_LOGGING, &em->flags);
list_add_tail(&em->list, &extents);
num++;
}
list_sort(NULL, &extents, extent_cmp);
process:
while (!list_empty(&extents)) {
em = list_entry(extents.next, struct extent_map, list);
@ -3513,6 +3633,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
mutex_lock(&BTRFS_I(inode)->log_mutex);
btrfs_get_logged_extents(log, inode);
/*
* a brute force approach to making sure we get the most uptodate
* copies of everything.
@ -3558,7 +3680,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
while (1) {
ins_nr = 0;
ret = btrfs_search_forward(root, &min_key, &max_key,
path, 0, trans->transid);
path, trans->transid);
if (ret != 0)
break;
again:
@ -3656,6 +3778,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->logged_trans = trans->transid;
BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
out_unlock:
if (err)
btrfs_free_logged_extents(log, log->log_transid);
mutex_unlock(&BTRFS_I(inode)->log_mutex);
btrfs_free_path(path);
@ -3822,7 +3946,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
end_trans:
dput(old_parent);
if (ret < 0) {
WARN_ON(ret != -ENOSPC);
root->fs_info->last_trans_log_full_commit = trans->transid;
ret = 1;
}

View File

@ -792,26 +792,76 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
return ret;
}
/*
* Look for a btrfs signature on a device. This may be called out of the mount path
* and we are not allowed to call set_blocksize during the scan. The superblock
* is read via pagecache
*/
int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
struct btrfs_fs_devices **fs_devices_ret)
{
struct btrfs_super_block *disk_super;
struct block_device *bdev;
struct buffer_head *bh;
int ret;
struct page *page;
void *p;
int ret = -EINVAL;
u64 devid;
u64 transid;
u64 total_devices;
u64 bytenr;
pgoff_t index;
/*
* we would like to check all the supers, but that would make
* a btrfs mount succeed after a mkfs from a different FS.
* So, we need to add a special mount option to scan for
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
*/
bytenr = btrfs_sb_offset(0);
flags |= FMODE_EXCL;
mutex_lock(&uuid_mutex);
ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh);
if (ret)
bdev = blkdev_get_by_path(path, flags, holder);
if (IS_ERR(bdev)) {
ret = PTR_ERR(bdev);
printk(KERN_INFO "btrfs: open %s failed\n", path);
goto error;
disk_super = (struct btrfs_super_block *)bh->b_data;
}
/* make sure our super fits in the device */
if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode))
goto error_bdev_put;
/* make sure our super fits in the page */
if (sizeof(*disk_super) > PAGE_CACHE_SIZE)
goto error_bdev_put;
/* make sure our super doesn't straddle pages on disk */
index = bytenr >> PAGE_CACHE_SHIFT;
if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index)
goto error_bdev_put;
/* pull in the page with our super */
page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
index, GFP_NOFS);
if (IS_ERR_OR_NULL(page))
goto error_bdev_put;
p = kmap(page);
/* align our pointer to the offset of the super block */
disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
if (btrfs_super_bytenr(disk_super) != bytenr ||
disk_super->magic != cpu_to_le64(BTRFS_MAGIC))
goto error_unmap;
devid = btrfs_stack_device_id(&disk_super->dev_item);
transid = btrfs_super_generation(disk_super);
total_devices = btrfs_super_num_devices(disk_super);
if (disk_super->label[0]) {
if (disk_super->label[BTRFS_LABEL_SIZE - 1])
disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
@ -819,12 +869,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
} else {
printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
}
printk(KERN_CONT "devid %llu transid %llu %s\n",
(unsigned long long)devid, (unsigned long long)transid, path);
ret = device_list_add(path, disk_super, devid, fs_devices_ret);
if (!ret && fs_devices_ret)
(*fs_devices_ret)->total_devices = total_devices;
brelse(bh);
error_unmap:
kunmap(page);
page_cache_release(page);
error_bdev_put:
blkdev_put(bdev, flags);
error:
mutex_unlock(&uuid_mutex);
@ -1372,14 +1429,19 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
u64 devid;
u64 num_devices;
u8 *dev_uuid;
unsigned seq;
int ret = 0;
bool clear_super = false;
mutex_lock(&uuid_mutex);
all_avail = root->fs_info->avail_data_alloc_bits |
root->fs_info->avail_system_alloc_bits |
root->fs_info->avail_metadata_alloc_bits;
do {
seq = read_seqbegin(&root->fs_info->profiles_lock);
all_avail = root->fs_info->avail_data_alloc_bits |
root->fs_info->avail_system_alloc_bits |
root->fs_info->avail_metadata_alloc_bits;
} while (read_seqretry(&root->fs_info->profiles_lock, seq));
num_devices = root->fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&root->fs_info->dev_replace);
@ -2616,7 +2678,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
chunk_used = btrfs_block_group_used(&cache->item);
if (bargs->usage == 0)
user_thresh = 0;
user_thresh = 1;
else if (bargs->usage > 100)
user_thresh = cache->key.offset;
else
@ -2985,6 +3047,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
int mixed = 0;
int ret;
u64 num_devices;
unsigned seq;
if (btrfs_fs_closing(fs_info) ||
atomic_read(&fs_info->balance_pause_req) ||
@ -3068,22 +3131,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
/* allow to reduce meta or sys integrity only if force set */
allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID10;
if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(fs_info->avail_system_alloc_bits & allowed) &&
!(bctl->sys.target & allowed)) ||
((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(fs_info->avail_metadata_alloc_bits & allowed) &&
!(bctl->meta.target & allowed))) {
if (bctl->flags & BTRFS_BALANCE_FORCE) {
printk(KERN_INFO "btrfs: force reducing metadata "
"integrity\n");
} else {
printk(KERN_ERR "btrfs: balance will reduce metadata "
"integrity, use force if you want this\n");
ret = -EINVAL;
goto out;
do {
seq = read_seqbegin(&fs_info->profiles_lock);
if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(fs_info->avail_system_alloc_bits & allowed) &&
!(bctl->sys.target & allowed)) ||
((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(fs_info->avail_metadata_alloc_bits & allowed) &&
!(bctl->meta.target & allowed))) {
if (bctl->flags & BTRFS_BALANCE_FORCE) {
printk(KERN_INFO "btrfs: force reducing metadata "
"integrity\n");
} else {
printk(KERN_ERR "btrfs: balance will reduce metadata "
"integrity, use force if you want this\n");
ret = -EINVAL;
goto out;
}
}
}
} while (read_seqretry(&fs_info->profiles_lock, seq));
if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
int num_tolerated_disk_barrier_failures;
@ -3127,6 +3194,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
mutex_lock(&fs_info->balance_mutex);
atomic_dec(&fs_info->balance_running);
if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
fs_info->num_tolerated_disk_barrier_failures =
btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
}
if (bargs) {
memset(bargs, 0, sizeof(*bargs));
update_ioctl_balance_args(fs_info, 0, bargs);
@ -3137,11 +3209,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
__cancel_balance(fs_info);
}
if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
fs_info->num_tolerated_disk_barrier_failures =
btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
}
wake_up(&fs_info->balance_wait_q);
return ret;
@ -3504,13 +3571,48 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
}
struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
{ 2, 1, 0, 4, 2, 2 /* raid10 */ },
{ 1, 1, 2, 2, 2, 2 /* raid1 */ },
{ 1, 2, 1, 1, 1, 2 /* dup */ },
{ 1, 1, 0, 2, 1, 1 /* raid0 */ },
{ 1, 1, 1, 1, 1, 1 /* single */ },
[BTRFS_RAID_RAID10] = {
.sub_stripes = 2,
.dev_stripes = 1,
.devs_max = 0, /* 0 == as many as possible */
.devs_min = 4,
.devs_increment = 2,
.ncopies = 2,
},
[BTRFS_RAID_RAID1] = {
.sub_stripes = 1,
.dev_stripes = 1,
.devs_max = 2,
.devs_min = 2,
.devs_increment = 2,
.ncopies = 2,
},
[BTRFS_RAID_DUP] = {
.sub_stripes = 1,
.dev_stripes = 2,
.devs_max = 1,
.devs_min = 1,
.devs_increment = 1,
.ncopies = 2,
},
[BTRFS_RAID_RAID0] = {
.sub_stripes = 1,
.dev_stripes = 1,
.devs_max = 0,
.devs_min = 2,
.devs_increment = 1,
.ncopies = 1,
},
[BTRFS_RAID_SINGLE] = {
.sub_stripes = 1,
.dev_stripes = 1,
.devs_max = 1,
.devs_min = 1,
.devs_increment = 1,
.ncopies = 1,
},
};
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root,
struct map_lookup **map_ret,
@ -3631,12 +3733,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
continue;
if (ndevs == fs_devices->rw_devices) {
WARN(1, "%s: found more than %llu devices\n",
__func__, fs_devices->rw_devices);
break;
}
devices_info[ndevs].dev_offset = dev_offset;
devices_info[ndevs].max_avail = max_avail;
devices_info[ndevs].total_avail = total_avail;
devices_info[ndevs].dev = device;
++ndevs;
WARN_ON(ndevs > fs_devices->rw_devices);
}
/*
@ -3718,15 +3824,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
free_extent_map(em);
if (ret)
goto error;
ret = btrfs_make_block_group(trans, extent_root, 0, type,
BTRFS_FIRST_CHUNK_TREE_OBJECTID,
start, num_bytes);
if (ret)
if (ret) {
free_extent_map(em);
goto error;
}
for (i = 0; i < map->num_stripes; ++i) {
struct btrfs_device *device;
@ -3739,15 +3840,42 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
info->chunk_root->root_key.objectid,
BTRFS_FIRST_CHUNK_TREE_OBJECTID,
start, dev_offset, stripe_size);
if (ret) {
btrfs_abort_transaction(trans, extent_root, ret);
goto error;
}
if (ret)
goto error_dev_extent;
}
ret = btrfs_make_block_group(trans, extent_root, 0, type,
BTRFS_FIRST_CHUNK_TREE_OBJECTID,
start, num_bytes);
if (ret) {
i = map->num_stripes - 1;
goto error_dev_extent;
}
free_extent_map(em);
kfree(devices_info);
return 0;
error_dev_extent:
for (; i >= 0; i--) {
struct btrfs_device *device;
int err;
device = map->stripes[i].dev;
err = btrfs_free_dev_extent(trans, device, start);
if (err) {
btrfs_abort_transaction(trans, extent_root, err);
break;
}
}
write_lock(&em_tree->lock);
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
/* One for our allocation */
free_extent_map(em);
/* One for the tree reference */
free_extent_map(em);
error:
kfree(map);
kfree(devices_info);
@ -3887,10 +4015,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
if (ret)
return ret;
alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
fs_info->avail_metadata_alloc_bits;
alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
&stripe_size, chunk_offset, alloc_profile);
if (ret)
@ -3898,10 +4023,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
sys_chunk_offset = chunk_offset + chunk_size;
alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
fs_info->avail_system_alloc_bits;
alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
&sys_chunk_size, &sys_stripe_size,
sys_chunk_offset, alloc_profile);

View File

@ -21,8 +21,8 @@
#include <linux/bio.h>
#include <linux/sort.h>
#include <linux/btrfs.h>
#include "async-thread.h"
#include "ioctl.h"
#define BTRFS_STRIPE_LEN (64 * 1024)

6
include/linux/btrfs.h Normal file
View File

@ -0,0 +1,6 @@
#ifndef _LINUX_BTRFS_H
#define _LINUX_BTRFS_H
#include <uapi/linux/btrfs.h>
#endif /* _LINUX_BTRFS_H */

View File

@ -68,6 +68,7 @@ header-y += blkpg.h
header-y += blktrace_api.h
header-y += bpqether.h
header-y += bsg.h
header-y += btrfs.h
header-y += can.h
header-y += capability.h
header-y += capi.h

View File

@ -16,8 +16,9 @@
* Boston, MA 021110-1307, USA.
*/
#ifndef __IOCTL_
#define __IOCTL_
#ifndef _UAPI_LINUX_BTRFS_H
#define _UAPI_LINUX_BTRFS_H
#include <linux/types.h>
#include <linux/ioctl.h>
#define BTRFS_IOCTL_MAGIC 0x94
@ -406,6 +407,13 @@ struct btrfs_ioctl_received_subvol_args {
__u64 reserved[16]; /* in */
};
/*
* Caller doesn't want file data in the send stream, even if the
* search of clone sources doesn't find an extent. UPDATE_EXTENT
* commands will be sent instead of WRITE commands.
*/
#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1
struct btrfs_ioctl_send_args {
__s64 send_fd; /* in */
__u64 clone_sources_count; /* in */
@ -494,9 +502,13 @@ struct btrfs_ioctl_send_args {
struct btrfs_ioctl_qgroup_create_args)
#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
struct btrfs_ioctl_qgroup_limit_args)
#define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
char[BTRFS_LABEL_SIZE])
#define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
char[BTRFS_LABEL_SIZE])
#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
struct btrfs_ioctl_get_dev_stats)
#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
struct btrfs_ioctl_dev_replace_args)
#endif
#endif /* _UAPI_LINUX_BTRFS_H */