kernel_optimize_test/fs/ext4/readpage.c
Linus Torvalds 93061f390f These changes contains a fix for overlayfs interacting with some
(badly behaved) dentry code in various file systems.  These have been
 reviewed by Al and the respective file system mtinainers and are going
 through the ext4 tree for convenience.
 
 This also has a few ext4 encryption bug fixes that were discovered in
 Android testing (yes, we will need to get these sync'ed up with the
 fs/crypto code; I'll take care of that).  It also has some bug fixes
 and a change to ignore the legacy quota options to allow for xfstests
 regression testing of ext4's internal quota feature and to be more
 consistent with how xfs handles this case.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQEcBAABCAAGBQJXBn4aAAoJEPL5WVaVDYGjHWgH/2wXnlQnC2ndJhblBWtPzprz
 OQW4dawdnhxqbTEGUqWe942tZivSb/liu/lF+urCGbWsbgz9jNOCmEAg7JPwlccY
 mjzwDvtVq5U4d2rP+JDWXLy/Gi8XgUclhbQDWFVIIIea6fS7IuFWqoVBR+HPMhra
 9tEygpiy5lNtJA/hqq3/z9x0AywAjwrYR491CuWreo2Uu1aeKg0YZsiDsuAcGioN
 Waa2TgbC/ZZyJuJcPBP8If+VOFAa0ea3F+C/o7Tb9bOqwuz0qSTcaMRgt6eQ2KUt
 P4b9Ecp1XLjJTC7IYOknUOScY3lCyREx/Xya9oGZfFNTSHzbOlLBoplCr3aUpYQ=
 =/HHR
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 bugfixes from Ted Ts'o:
 "These changes contains a fix for overlayfs interacting with some
  (badly behaved) dentry code in various file systems.  These have been
  reviewed by Al and the respective file system mtinainers and are going
  through the ext4 tree for convenience.

  This also has a few ext4 encryption bug fixes that were discovered in
  Android testing (yes, we will need to get these sync'ed up with the
  fs/crypto code; I'll take care of that).  It also has some bug fixes
  and a change to ignore the legacy quota options to allow for xfstests
  regression testing of ext4's internal quota feature and to be more
  consistent with how xfs handles this case"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: ignore quota mount options if the quota feature is enabled
  ext4 crypto: fix some error handling
  ext4: avoid calling dquot_get_next_id() if quota is not enabled
  ext4: retry block allocation for failed DIO and DAX writes
  ext4: add lockdep annotations for i_data_sem
  ext4: allow readdir()'s of large empty directories to be interrupted
  btrfs: fix crash/invalid memory access on fsync when using overlayfs
  ext4 crypto: use dget_parent() in ext4_d_revalidate()
  ext4: use file_dentry()
  ext4: use dget_parent() in ext4_file_open()
  nfs: use file_dentry()
  fs: add file_dentry()
  ext4 crypto: don't let data integrity writebacks fail with ENOMEM
  ext4: check if in-inode xattr is corrupted in ext4_expand_extra_isize_ea()
2016-04-07 17:22:20 -07:00

329 lines
8.2 KiB
C

/*
* linux/fs/ext4/readpage.c
*
* Copyright (C) 2002, Linus Torvalds.
* Copyright (C) 2015, Google, Inc.
*
* This was originally taken from fs/mpage.c
*
* The intent is the ext4_mpage_readpages() function here is intended
* to replace mpage_readpages() in the general case, not just for
* encrypted files. It has some limitations (see below), where it
* will fall back to read_block_full_page(), but these limitations
* should only be hit when page_size != block_size.
*
* This will allow us to attach a callback function to support ext4
* encryption.
*
* If anything unusual happens, such as:
*
* - encountering a page which has buffers
* - encountering a page which has a non-hole after a hole
* - encountering a page with non-contiguous blocks
*
* then this code just gives up and calls the buffer_head-based read function.
* It does handle a page which has holes at the end - that is a common case:
* the end-of-file on blocksize < PAGE_SIZE setups.
*
*/
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/kdev_t.h>
#include <linux/gfp.h>
#include <linux/bio.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/highmem.h>
#include <linux/prefetch.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/cleancache.h>
#include "ext4.h"
/*
* Call ext4_decrypt on every single page, reusing the encryption
* context.
*/
static void completion_pages(struct work_struct *work)
{
#ifdef CONFIG_EXT4_FS_ENCRYPTION
struct ext4_crypto_ctx *ctx =
container_of(work, struct ext4_crypto_ctx, r.work);
struct bio *bio = ctx->r.bio;
struct bio_vec *bv;
int i;
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
int ret = ext4_decrypt(page);
if (ret) {
WARN_ON_ONCE(1);
SetPageError(page);
} else
SetPageUptodate(page);
unlock_page(page);
}
ext4_release_crypto_ctx(ctx);
bio_put(bio);
#else
BUG();
#endif
}
static inline bool ext4_bio_encrypted(struct bio *bio)
{
#ifdef CONFIG_EXT4_FS_ENCRYPTION
return unlikely(bio->bi_private != NULL);
#else
return false;
#endif
}
/*
* I/O completion handler for multipage BIOs.
*
* The mpage code never puts partial pages into a BIO (except for end-of-file).
* If a page does not map to a contiguous run of blocks then it simply falls
* back to block_read_full_page().
*
* Why is this? If a page's completion depends on a number of different BIOs
* which can complete in any order (or at the same time) then determining the
* status of that page is hard. See end_buffer_async_read() for the details.
* There is no point in duplicating all that complexity.
*/
static void mpage_end_io(struct bio *bio)
{
struct bio_vec *bv;
int i;
if (ext4_bio_encrypted(bio)) {
struct ext4_crypto_ctx *ctx = bio->bi_private;
if (bio->bi_error) {
ext4_release_crypto_ctx(ctx);
} else {
INIT_WORK(&ctx->r.work, completion_pages);
ctx->r.bio = bio;
queue_work(ext4_read_workqueue, &ctx->r.work);
return;
}
}
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
if (!bio->bi_error) {
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
SetPageError(page);
}
unlock_page(page);
}
bio_put(bio);
}
int ext4_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
unsigned nr_pages)
{
struct bio *bio = NULL;
unsigned page_idx;
sector_t last_block_in_bio = 0;
struct inode *inode = mapping->host;
const unsigned blkbits = inode->i_blkbits;
const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
const unsigned blocksize = 1 << blkbits;
sector_t block_in_file;
sector_t last_block;
sector_t last_block_in_file;
sector_t blocks[MAX_BUF_PER_PAGE];
unsigned page_block;
struct block_device *bdev = inode->i_sb->s_bdev;
int length;
unsigned relative_block = 0;
struct ext4_map_blocks map;
map.m_pblk = 0;
map.m_lblk = 0;
map.m_len = 0;
map.m_flags = 0;
for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
int fully_mapped = 1;
unsigned first_hole = blocks_per_page;
prefetchw(&page->flags);
if (pages) {
page = list_entry(pages->prev, struct page, lru);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping, page->index,
mapping_gfp_constraint(mapping, GFP_KERNEL)))
goto next_page;
}
if (page_has_buffers(page))
goto confused;
block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
last_block = block_in_file + nr_pages * blocks_per_page;
last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
if (last_block > last_block_in_file)
last_block = last_block_in_file;
page_block = 0;
/*
* Map blocks using the previous result first.
*/
if ((map.m_flags & EXT4_MAP_MAPPED) &&
block_in_file > map.m_lblk &&
block_in_file < (map.m_lblk + map.m_len)) {
unsigned map_offset = block_in_file - map.m_lblk;
unsigned last = map.m_len - map_offset;
for (relative_block = 0; ; relative_block++) {
if (relative_block == last) {
/* needed? */
map.m_flags &= ~EXT4_MAP_MAPPED;
break;
}
if (page_block == blocks_per_page)
break;
blocks[page_block] = map.m_pblk + map_offset +
relative_block;
page_block++;
block_in_file++;
}
}
/*
* Then do more ext4_map_blocks() calls until we are
* done with this page.
*/
while (page_block < blocks_per_page) {
if (block_in_file < last_block) {
map.m_lblk = block_in_file;
map.m_len = last_block - block_in_file;
if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
set_error_page:
SetPageError(page);
zero_user_segment(page, 0,
PAGE_SIZE);
unlock_page(page);
goto next_page;
}
}
if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
fully_mapped = 0;
if (first_hole == blocks_per_page)
first_hole = page_block;
page_block++;
block_in_file++;
continue;
}
if (first_hole != blocks_per_page)
goto confused; /* hole -> non-hole */
/* Contiguous blocks? */
if (page_block && blocks[page_block-1] != map.m_pblk-1)
goto confused;
for (relative_block = 0; ; relative_block++) {
if (relative_block == map.m_len) {
/* needed? */
map.m_flags &= ~EXT4_MAP_MAPPED;
break;
} else if (page_block == blocks_per_page)
break;
blocks[page_block] = map.m_pblk+relative_block;
page_block++;
block_in_file++;
}
}
if (first_hole != blocks_per_page) {
zero_user_segment(page, first_hole << blkbits,
PAGE_SIZE);
if (first_hole == 0) {
SetPageUptodate(page);
unlock_page(page);
goto next_page;
}
} else if (fully_mapped) {
SetPageMappedToDisk(page);
}
if (fully_mapped && blocks_per_page == 1 &&
!PageUptodate(page) && cleancache_get_page(page) == 0) {
SetPageUptodate(page);
goto confused;
}
/*
* This page will go to BIO. Do we need to send this
* BIO off first?
*/
if (bio && (last_block_in_bio != blocks[0] - 1)) {
submit_and_realloc:
submit_bio(READ, bio);
bio = NULL;
}
if (bio == NULL) {
struct ext4_crypto_ctx *ctx = NULL;
if (ext4_encrypted_inode(inode) &&
S_ISREG(inode->i_mode)) {
ctx = ext4_get_crypto_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
goto set_error_page;
}
bio = bio_alloc(GFP_KERNEL,
min_t(int, nr_pages, BIO_MAX_PAGES));
if (!bio) {
if (ctx)
ext4_release_crypto_ctx(ctx);
goto set_error_page;
}
bio->bi_bdev = bdev;
bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
bio->bi_end_io = mpage_end_io;
bio->bi_private = ctx;
}
length = first_hole << blkbits;
if (bio_add_page(bio, page, length, 0) < length)
goto submit_and_realloc;
if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
(relative_block == map.m_len)) ||
(first_hole != blocks_per_page)) {
submit_bio(READ, bio);
bio = NULL;
} else
last_block_in_bio = blocks[blocks_per_page - 1];
goto next_page;
confused:
if (bio) {
submit_bio(READ, bio);
bio = NULL;
}
if (!PageUptodate(page))
block_read_full_page(page, ext4_get_block);
else
unlock_page(page);
next_page:
if (pages)
put_page(page);
}
BUG_ON(pages && !list_empty(pages));
if (bio)
submit_bio(READ, bio);
return 0;
}