forked from luck/tmp_suning_uos_patched
Merge branch 'fs-file-descriptor-optimization'
Merge file descriptor allocation speedup. Eric Dumazet has a test-case for a fairly common network deamon load pattern: openign and closing a lot of sockets that each have very little work done on them. It turns out that in that case, the cost of just finding the correct file descriptor number can be a dominating factor. We've long had a trivial optimization for allocating file descriptors sequentially, but that optimization ends up being not very effective when other file descriptors are being closed concurrently, and the fd patterns are not some simple FIFO pattern. In such cases we ended up spending a lot of time just scanning the bitmap of open file descriptors in order to find the next file descriptor number to open. This trivial patch-series mitigates that by simply introducing a second-level bitmap of which words in the first bitmap are already fully allocated. That cuts down the cost of scanning by an order of magnitude in some pathological (but realistic) cases. The second patch is an even more trivial patch to avoid unnecessarily dirtying the cacheline for the close-on-exec bit array that normally ends up being all empty. * fs-file-descriptor-optimization: vfs: conditionally clear close-on-exec flag vfs: Fix pathological performance case for __alloc_fd()
This commit is contained in:
commit
2e00266297
42
fs/file.c
42
fs/file.c
|
@ -56,6 +56,9 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
|
||||||
__free_fdtable(container_of(rcu, struct fdtable, rcu));
|
__free_fdtable(container_of(rcu, struct fdtable, rcu));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr))
|
||||||
|
#define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Expand the fdset in the files_struct. Called with the files spinlock
|
* Expand the fdset in the files_struct. Called with the files spinlock
|
||||||
* held for write.
|
* held for write.
|
||||||
|
@ -77,6 +80,11 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
|
||||||
memset((char *)(nfdt->open_fds) + cpy, 0, set);
|
memset((char *)(nfdt->open_fds) + cpy, 0, set);
|
||||||
memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
|
memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
|
||||||
memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
|
memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
|
||||||
|
|
||||||
|
cpy = BITBIT_SIZE(ofdt->max_fds);
|
||||||
|
set = BITBIT_SIZE(nfdt->max_fds) - cpy;
|
||||||
|
memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
|
||||||
|
memset(cpy+(char *)nfdt->full_fds_bits, 0, set);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct fdtable * alloc_fdtable(unsigned int nr)
|
static struct fdtable * alloc_fdtable(unsigned int nr)
|
||||||
|
@ -115,12 +123,14 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
|
||||||
fdt->fd = data;
|
fdt->fd = data;
|
||||||
|
|
||||||
data = alloc_fdmem(max_t(size_t,
|
data = alloc_fdmem(max_t(size_t,
|
||||||
2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
|
2 * nr / BITS_PER_BYTE + BITBIT_SIZE(nr), L1_CACHE_BYTES));
|
||||||
if (!data)
|
if (!data)
|
||||||
goto out_arr;
|
goto out_arr;
|
||||||
fdt->open_fds = data;
|
fdt->open_fds = data;
|
||||||
data += nr / BITS_PER_BYTE;
|
data += nr / BITS_PER_BYTE;
|
||||||
fdt->close_on_exec = data;
|
fdt->close_on_exec = data;
|
||||||
|
data += nr / BITS_PER_BYTE;
|
||||||
|
fdt->full_fds_bits = data;
|
||||||
|
|
||||||
return fdt;
|
return fdt;
|
||||||
|
|
||||||
|
@ -226,17 +236,22 @@ static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
|
||||||
|
|
||||||
static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
|
static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
|
||||||
{
|
{
|
||||||
__clear_bit(fd, fdt->close_on_exec);
|
if (test_bit(fd, fdt->close_on_exec))
|
||||||
|
__clear_bit(fd, fdt->close_on_exec);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __set_open_fd(int fd, struct fdtable *fdt)
|
static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
|
||||||
{
|
{
|
||||||
__set_bit(fd, fdt->open_fds);
|
__set_bit(fd, fdt->open_fds);
|
||||||
|
fd /= BITS_PER_LONG;
|
||||||
|
if (!~fdt->open_fds[fd])
|
||||||
|
__set_bit(fd, fdt->full_fds_bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __clear_open_fd(int fd, struct fdtable *fdt)
|
static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
|
||||||
{
|
{
|
||||||
__clear_bit(fd, fdt->open_fds);
|
__clear_bit(fd, fdt->open_fds);
|
||||||
|
__clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int count_open_files(struct fdtable *fdt)
|
static int count_open_files(struct fdtable *fdt)
|
||||||
|
@ -280,6 +295,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
|
||||||
new_fdt->max_fds = NR_OPEN_DEFAULT;
|
new_fdt->max_fds = NR_OPEN_DEFAULT;
|
||||||
new_fdt->close_on_exec = newf->close_on_exec_init;
|
new_fdt->close_on_exec = newf->close_on_exec_init;
|
||||||
new_fdt->open_fds = newf->open_fds_init;
|
new_fdt->open_fds = newf->open_fds_init;
|
||||||
|
new_fdt->full_fds_bits = newf->full_fds_bits_init;
|
||||||
new_fdt->fd = &newf->fd_array[0];
|
new_fdt->fd = &newf->fd_array[0];
|
||||||
|
|
||||||
spin_lock(&oldf->file_lock);
|
spin_lock(&oldf->file_lock);
|
||||||
|
@ -323,6 +339,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
|
||||||
|
|
||||||
memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
|
memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
|
||||||
memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
|
memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
|
||||||
|
memcpy(new_fdt->full_fds_bits, old_fdt->full_fds_bits, BITBIT_SIZE(open_files));
|
||||||
|
|
||||||
for (i = open_files; i != 0; i--) {
|
for (i = open_files; i != 0; i--) {
|
||||||
struct file *f = *old_fds++;
|
struct file *f = *old_fds++;
|
||||||
|
@ -454,10 +471,25 @@ struct files_struct init_files = {
|
||||||
.fd = &init_files.fd_array[0],
|
.fd = &init_files.fd_array[0],
|
||||||
.close_on_exec = init_files.close_on_exec_init,
|
.close_on_exec = init_files.close_on_exec_init,
|
||||||
.open_fds = init_files.open_fds_init,
|
.open_fds = init_files.open_fds_init,
|
||||||
|
.full_fds_bits = init_files.full_fds_bits_init,
|
||||||
},
|
},
|
||||||
.file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
|
.file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static unsigned long find_next_fd(struct fdtable *fdt, unsigned long start)
|
||||||
|
{
|
||||||
|
unsigned long maxfd = fdt->max_fds;
|
||||||
|
unsigned long maxbit = maxfd / BITS_PER_LONG;
|
||||||
|
unsigned long bitbit = start / BITS_PER_LONG;
|
||||||
|
|
||||||
|
bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
|
||||||
|
if (bitbit > maxfd)
|
||||||
|
return maxfd;
|
||||||
|
if (bitbit > start)
|
||||||
|
start = bitbit;
|
||||||
|
return find_next_zero_bit(fdt->open_fds, maxfd, start);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* allocate a file descriptor, mark it busy.
|
* allocate a file descriptor, mark it busy.
|
||||||
*/
|
*/
|
||||||
|
@ -476,7 +508,7 @@ int __alloc_fd(struct files_struct *files,
|
||||||
fd = files->next_fd;
|
fd = files->next_fd;
|
||||||
|
|
||||||
if (fd < fdt->max_fds)
|
if (fd < fdt->max_fds)
|
||||||
fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
|
fd = find_next_fd(fdt, fd);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* N.B. For clone tasks sharing a files structure, this test
|
* N.B. For clone tasks sharing a files structure, this test
|
||||||
|
|
|
@ -26,6 +26,7 @@ struct fdtable {
|
||||||
struct file __rcu **fd; /* current fd array */
|
struct file __rcu **fd; /* current fd array */
|
||||||
unsigned long *close_on_exec;
|
unsigned long *close_on_exec;
|
||||||
unsigned long *open_fds;
|
unsigned long *open_fds;
|
||||||
|
unsigned long *full_fds_bits;
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -59,6 +60,7 @@ struct files_struct {
|
||||||
int next_fd;
|
int next_fd;
|
||||||
unsigned long close_on_exec_init[1];
|
unsigned long close_on_exec_init[1];
|
||||||
unsigned long open_fds_init[1];
|
unsigned long open_fds_init[1];
|
||||||
|
unsigned long full_fds_bits_init[1];
|
||||||
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
|
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user