kernel_optimize_test/fs/fuse/readdir.c
Miklos Szeredi 261aaba72f fuse: use iversion for readdir cache verification
Use the internal iversion counter to make sure modifications of the
directory through this filesystem are not missed by the mtime check (due to
mtime granularity).

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2018-10-01 10:07:05 +02:00

570 lines
13 KiB
C

/*
FUSE: Filesystem in Userspace
Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
This program can be distributed under the terms of the GNU GPL.
See the file COPYING.
*/
#include "fuse_i.h"
#include <linux/iversion.h>
#include <linux/posix_acl.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
{
struct fuse_conn *fc = get_fuse_conn(dir);
struct fuse_inode *fi = get_fuse_inode(dir);
if (!fc->do_readdirplus)
return false;
if (!fc->readdirplus_auto)
return true;
if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
return true;
if (ctx->pos == 0)
return true;
return false;
}
static void fuse_add_dirent_to_cache(struct file *file,
struct fuse_dirent *dirent, loff_t pos)
{
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
size_t reclen = FUSE_DIRENT_SIZE(dirent);
pgoff_t index;
struct page *page;
loff_t size;
u64 version;
unsigned int offset;
void *addr;
spin_lock(&fi->rdc.lock);
/*
* Is cache already completed? Or this entry does not go at the end of
* cache?
*/
if (fi->rdc.cached || pos != fi->rdc.pos) {
spin_unlock(&fi->rdc.lock);
return;
}
version = fi->rdc.version;
size = fi->rdc.size;
offset = size & ~PAGE_MASK;
index = size >> PAGE_SHIFT;
/* Dirent doesn't fit in current page? Jump to next page. */
if (offset + reclen > PAGE_SIZE) {
index++;
offset = 0;
}
spin_unlock(&fi->rdc.lock);
if (offset) {
page = find_lock_page(file->f_mapping, index);
} else {
page = find_or_create_page(file->f_mapping, index,
mapping_gfp_mask(file->f_mapping));
}
if (!page)
return;
spin_lock(&fi->rdc.lock);
/* Raced with another readdir */
if (fi->rdc.version != version || fi->rdc.size != size ||
WARN_ON(fi->rdc.pos != pos))
goto unlock;
addr = kmap_atomic(page);
if (!offset)
clear_page(addr);
memcpy(addr + offset, dirent, reclen);
kunmap_atomic(addr);
fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
fi->rdc.pos = dirent->off;
unlock:
spin_unlock(&fi->rdc.lock);
unlock_page(page);
put_page(page);
}
static void fuse_readdir_cache_end(struct file *file, loff_t pos)
{
struct fuse_inode *fi = get_fuse_inode(file_inode(file));
loff_t end;
spin_lock(&fi->rdc.lock);
/* does cache end position match current position? */
if (fi->rdc.pos != pos) {
spin_unlock(&fi->rdc.lock);
return;
}
fi->rdc.cached = true;
end = ALIGN(fi->rdc.size, PAGE_SIZE);
spin_unlock(&fi->rdc.lock);
/* truncate unused tail of cache */
truncate_inode_pages(file->f_mapping, end);
}
static bool fuse_emit(struct file *file, struct dir_context *ctx,
struct fuse_dirent *dirent)
{
struct fuse_file *ff = file->private_data;
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_add_dirent_to_cache(file, dirent, ctx->pos);
return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
dirent->type);
}
static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx)
{
while (nbytes >= FUSE_NAME_OFFSET) {
struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!fuse_emit(file, ctx, dirent))
break;
buf += reclen;
nbytes -= reclen;
ctx->pos = dirent->off;
}
return 0;
}
static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
u64 attr_version)
{
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
struct dentry *parent = file->f_path.dentry;
struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
struct dentry *dentry;
struct dentry *alias;
struct inode *dir = d_inode(parent);
struct fuse_conn *fc;
struct inode *inode;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (!o->nodeid) {
/*
* Unlike in the case of fuse_lookup, zero nodeid does not mean
* ENOENT. Instead, it only means the userspace filesystem did
* not want to return attributes/handle for this entry.
*
* So do nothing.
*/
return 0;
}
if (name.name[0] == '.') {
/*
* We could potentially refresh the attributes of the directory
* and its parent?
*/
if (name.len == 1)
return 0;
if (name.name[1] == '.' && name.len == 2)
return 0;
}
if (invalid_nodeid(o->nodeid))
return -EIO;
if (!fuse_valid_type(o->attr.mode))
return -EIO;
fc = get_fuse_conn(dir);
name.hash = full_name_hash(parent, name.name, name.len);
dentry = d_lookup(parent, &name);
if (!dentry) {
retry:
dentry = d_alloc_parallel(parent, &name, &wq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (!d_in_lookup(dentry)) {
struct fuse_inode *fi;
inode = d_inode(dentry);
if (!inode ||
get_node_id(inode) != o->nodeid ||
((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
d_invalidate(dentry);
dput(dentry);
goto retry;
}
if (is_bad_inode(inode)) {
dput(dentry);
return -EIO;
}
fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
fi->nlookup++;
spin_unlock(&fc->lock);
forget_all_cached_acls(inode);
fuse_change_attributes(inode, &o->attr,
entry_attr_timeout(o),
attr_version);
/*
* The other branch comes via fuse_iget()
* which bumps nlookup inside
*/
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, entry_attr_timeout(o),
attr_version);
if (!inode)
inode = ERR_PTR(-ENOMEM);
alias = d_splice_alias(inode, dentry);
d_lookup_done(dentry);
if (alias) {
dput(dentry);
dentry = alias;
}
if (IS_ERR(dentry))
return PTR_ERR(dentry);
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
fuse_change_entry_timeout(dentry, o);
dput(dentry);
return 0;
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx, u64 attr_version)
{
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
size_t reclen;
int over = 0;
int ret;
while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
direntplus = (struct fuse_direntplus *) buf;
dirent = &direntplus->dirent;
reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
return -EIO;
if (reclen > nbytes)
break;
if (memchr(dirent->name, '/', dirent->namelen) != NULL)
return -EIO;
if (!over) {
/* We fill entries into dstbuf only as much as
it can hold. But we still continue iterating
over remaining entries to link them. If not,
we need to send a FORGET for each of those
which we did not link.
*/
over = !fuse_emit(file, ctx, dirent);
if (!over)
ctx->pos = dirent->off;
}
buf += reclen;
nbytes -= reclen;
ret = fuse_direntplus_link(file, direntplus, attr_version);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}
return 0;
}
static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
{
int plus, err;
size_t nbytes;
struct page *page;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
u64 attr_version = 0;
bool locked;
req = fuse_get_req(fc, 1);
if (IS_ERR(req))
return PTR_ERR(req);
page = alloc_page(GFP_KERNEL);
if (!page) {
fuse_put_request(fc, req);
return -ENOMEM;
}
plus = fuse_use_readdirplus(inode, ctx);
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
req->page_descs[0].length = PAGE_SIZE;
if (plus) {
attr_version = fuse_get_attr_version(fc);
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
FUSE_READDIR);
}
locked = fuse_lock_inode(inode);
fuse_request_send(fc, req);
fuse_unlock_inode(inode, locked);
nbytes = req->out.args[0].size;
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err) {
if (!nbytes) {
struct fuse_file *ff = file->private_data;
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
err = parse_dirplusfile(page_address(page), nbytes,
file, ctx, attr_version);
} else {
err = parse_dirfile(page_address(page), nbytes, file,
ctx);
}
}
__free_page(page);
fuse_invalidate_atime(inode);
return err;
}
enum fuse_parse_result {
FOUND_ERR = -1,
FOUND_NONE = 0,
FOUND_SOME,
FOUND_ALL,
};
static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
void *addr, unsigned int size,
struct dir_context *ctx)
{
unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
enum fuse_parse_result res = FOUND_NONE;
WARN_ON(offset >= size);
for (;;) {
struct fuse_dirent *dirent = addr + offset;
unsigned int nbytes = size - offset;
size_t reclen = FUSE_DIRENT_SIZE(dirent);
if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
break;
if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
return FOUND_ERR;
if (WARN_ON(reclen > nbytes))
return FOUND_ERR;
if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
return FOUND_ERR;
if (ff->readdir.pos == ctx->pos) {
res = FOUND_SOME;
if (!dir_emit(ctx, dirent->name, dirent->namelen,
dirent->ino, dirent->type))
return FOUND_ALL;
ctx->pos = dirent->off;
}
ff->readdir.pos = dirent->off;
ff->readdir.cache_off += reclen;
offset += reclen;
}
return res;
}
static void fuse_rdc_reset(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
fi->rdc.cached = false;
fi->rdc.version++;
fi->rdc.size = 0;
fi->rdc.pos = 0;
}
#define UNCACHED 1
static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
{
struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
enum fuse_parse_result res;
pgoff_t index;
unsigned int size;
struct page *page;
void *addr;
/* Seeked? If so, reset the cache stream */
if (ff->readdir.pos != ctx->pos) {
ff->readdir.pos = 0;
ff->readdir.cache_off = 0;
}
/*
* We're just about to start reading into the cache or reading the
* cache; both cases require an up-to-date mtime value.
*/
if (!ctx->pos && fc->auto_inval_data) {
int err = fuse_update_attributes(inode, file);
if (err)
return err;
}
retry:
spin_lock(&fi->rdc.lock);
retry_locked:
if (!fi->rdc.cached) {
/* Starting cache? Set cache mtime. */
if (!ctx->pos && !fi->rdc.size) {
fi->rdc.mtime = inode->i_mtime;
fi->rdc.iversion = inode_query_iversion(inode);
}
spin_unlock(&fi->rdc.lock);
return UNCACHED;
}
/*
* When at the beginning of the directory (i.e. just after opendir(3) or
* rewinddir(3)), then need to check whether directory contents have
* changed, and reset the cache if so.
*/
if (!ctx->pos) {
if (inode_peek_iversion(inode) != fi->rdc.iversion ||
!timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
fuse_rdc_reset(inode);
goto retry_locked;
}
}
/*
* If cache version changed since the last getdents() call, then reset
* the cache stream.
*/
if (ff->readdir.version != fi->rdc.version) {
ff->readdir.pos = 0;
ff->readdir.cache_off = 0;
}
/*
* If at the beginning of the cache, than reset version to
* current.
*/
if (ff->readdir.pos == 0)
ff->readdir.version = fi->rdc.version;
WARN_ON(fi->rdc.size < ff->readdir.cache_off);
index = ff->readdir.cache_off >> PAGE_SHIFT;
if (index == (fi->rdc.size >> PAGE_SHIFT))
size = fi->rdc.size & ~PAGE_MASK;
else
size = PAGE_SIZE;
spin_unlock(&fi->rdc.lock);
/* EOF? */
if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
return 0;
page = find_get_page_flags(file->f_mapping, index,
FGP_ACCESSED | FGP_LOCK);
spin_lock(&fi->rdc.lock);
if (!page) {
/*
* Uh-oh: page gone missing, cache is useless
*/
if (fi->rdc.version == ff->readdir.version)
fuse_rdc_reset(inode);
goto retry_locked;
}
/* Make sure it's still the same version after getting the page. */
if (ff->readdir.version != fi->rdc.version) {
spin_unlock(&fi->rdc.lock);
unlock_page(page);
put_page(page);
goto retry;
}
spin_unlock(&fi->rdc.lock);
/*
* Contents of the page are now protected against changing by holding
* the page lock.
*/
addr = kmap(page);
res = fuse_parse_cache(ff, addr, size, ctx);
kunmap(page);
unlock_page(page);
put_page(page);
if (res == FOUND_ERR)
return -EIO;
if (res == FOUND_ALL)
return 0;
if (size == PAGE_SIZE) {
/* We hit end of page: skip to next page. */
ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
goto retry;
}
/*
* End of cache reached. If found position, then we are done, otherwise
* need to fall back to uncached, since the position we were looking for
* wasn't in the cache.
*/
return res == FOUND_SOME ? 0 : UNCACHED;
}
int fuse_readdir(struct file *file, struct dir_context *ctx)
{
struct fuse_file *ff = file->private_data;
struct inode *inode = file_inode(file);
int err;
if (is_bad_inode(inode))
return -EIO;
mutex_lock(&ff->readdir.lock);
err = UNCACHED;
if (ff->open_flags & FOPEN_CACHE_DIR)
err = fuse_readdir_cached(file, ctx);
if (err == UNCACHED)
err = fuse_readdir_uncached(file, ctx);
mutex_unlock(&ff->readdir.lock);
return err;
}