kernel_optimize_test/fs/proc/task_nommu.c
Siddhesh Poyarekar b76437579d procfs: mark thread stack correctly in proc/<pid>/maps
Stack for a new thread is mapped by userspace code and passed via
sys_clone.  This memory is currently seen as anonymous in
/proc/<pid>/maps, which makes it difficult to ascertain which mappings
are being used for thread stacks.  This patch uses the individual task
stack pointers to determine which vmas are actually thread stacks.

For a multithreaded program like the following:

	#include <pthread.h>

	void *thread_main(void *foo)
	{
		while(1);
	}

	int main()
	{
		pthread_t t;
		pthread_create(&t, NULL, thread_main, NULL);
		pthread_join(t, NULL);
	}

proc/PID/maps looks like the following:

    00400000-00401000 r-xp 00000000 fd:0a 3671804                            /home/siddhesh/a.out
    00600000-00601000 rw-p 00000000 fd:0a 3671804                            /home/siddhesh/a.out
    019ef000-01a10000 rw-p 00000000 00:00 0                                  [heap]
    7f8a44491000-7f8a44492000 ---p 00000000 00:00 0
    7f8a44492000-7f8a44c92000 rw-p 00000000 00:00 0
    7f8a44c92000-7f8a44e3d000 r-xp 00000000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a44e3d000-7f8a4503d000 ---p 001ab000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a4503d000-7f8a45041000 r--p 001ab000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a45041000-7f8a45043000 rw-p 001af000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a45043000-7f8a45048000 rw-p 00000000 00:00 0
    7f8a45048000-7f8a4505f000 r-xp 00000000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a4505f000-7f8a4525e000 ---p 00017000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a4525e000-7f8a4525f000 r--p 00016000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a4525f000-7f8a45260000 rw-p 00017000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a45260000-7f8a45264000 rw-p 00000000 00:00 0
    7f8a45264000-7f8a45286000 r-xp 00000000 fd:00 2097348                    /lib64/ld-2.14.90.so
    7f8a45457000-7f8a4545a000 rw-p 00000000 00:00 0
    7f8a45484000-7f8a45485000 rw-p 00000000 00:00 0
    7f8a45485000-7f8a45486000 r--p 00021000 fd:00 2097348                    /lib64/ld-2.14.90.so
    7f8a45486000-7f8a45487000 rw-p 00022000 fd:00 2097348                    /lib64/ld-2.14.90.so
    7f8a45487000-7f8a45488000 rw-p 00000000 00:00 0
    7fff6273b000-7fff6275c000 rw-p 00000000 00:00 0                          [stack]
    7fff627ff000-7fff62800000 r-xp 00000000 00:00 0                          [vdso]
    ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]

Here, one could guess that 7f8a44492000-7f8a44c92000 is a stack since
the earlier vma that has no permissions (7f8a44e3d000-7f8a4503d000) but
that is not always a reliable way to find out which vma is a thread
stack.  Also, /proc/PID/maps and /proc/PID/task/TID/maps has the same
content.

With this patch in place, /proc/PID/task/TID/maps are treated as 'maps
as the task would see it' and hence, only the vma that that task uses as
stack is marked as [stack].  All other 'stack' vmas are marked as
anonymous memory.  /proc/PID/maps acts as a thread group level view,
where all thread stack vmas are marked as [stack:TID] where TID is the
process ID of the task that uses that vma as stack, while the process
stack is marked as [stack].

So /proc/PID/maps will look like this:

    00400000-00401000 r-xp 00000000 fd:0a 3671804                            /home/siddhesh/a.out
    00600000-00601000 rw-p 00000000 fd:0a 3671804                            /home/siddhesh/a.out
    019ef000-01a10000 rw-p 00000000 00:00 0                                  [heap]
    7f8a44491000-7f8a44492000 ---p 00000000 00:00 0
    7f8a44492000-7f8a44c92000 rw-p 00000000 00:00 0                          [stack:1442]
    7f8a44c92000-7f8a44e3d000 r-xp 00000000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a44e3d000-7f8a4503d000 ---p 001ab000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a4503d000-7f8a45041000 r--p 001ab000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a45041000-7f8a45043000 rw-p 001af000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a45043000-7f8a45048000 rw-p 00000000 00:00 0
    7f8a45048000-7f8a4505f000 r-xp 00000000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a4505f000-7f8a4525e000 ---p 00017000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a4525e000-7f8a4525f000 r--p 00016000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a4525f000-7f8a45260000 rw-p 00017000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a45260000-7f8a45264000 rw-p 00000000 00:00 0
    7f8a45264000-7f8a45286000 r-xp 00000000 fd:00 2097348                    /lib64/ld-2.14.90.so
    7f8a45457000-7f8a4545a000 rw-p 00000000 00:00 0
    7f8a45484000-7f8a45485000 rw-p 00000000 00:00 0
    7f8a45485000-7f8a45486000 r--p 00021000 fd:00 2097348                    /lib64/ld-2.14.90.so
    7f8a45486000-7f8a45487000 rw-p 00022000 fd:00 2097348                    /lib64/ld-2.14.90.so
    7f8a45487000-7f8a45488000 rw-p 00000000 00:00 0
    7fff6273b000-7fff6275c000 rw-p 00000000 00:00 0                          [stack]
    7fff627ff000-7fff62800000 r-xp 00000000 00:00 0                          [vdso]
    ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]

Thus marking all vmas that are used as stacks by the threads in the
thread group along with the process stack.  The task level maps will
however like this:

    00400000-00401000 r-xp 00000000 fd:0a 3671804                            /home/siddhesh/a.out
    00600000-00601000 rw-p 00000000 fd:0a 3671804                            /home/siddhesh/a.out
    019ef000-01a10000 rw-p 00000000 00:00 0                                  [heap]
    7f8a44491000-7f8a44492000 ---p 00000000 00:00 0
    7f8a44492000-7f8a44c92000 rw-p 00000000 00:00 0                          [stack]
    7f8a44c92000-7f8a44e3d000 r-xp 00000000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a44e3d000-7f8a4503d000 ---p 001ab000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a4503d000-7f8a45041000 r--p 001ab000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a45041000-7f8a45043000 rw-p 001af000 fd:00 2097482                    /lib64/libc-2.14.90.so
    7f8a45043000-7f8a45048000 rw-p 00000000 00:00 0
    7f8a45048000-7f8a4505f000 r-xp 00000000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a4505f000-7f8a4525e000 ---p 00017000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a4525e000-7f8a4525f000 r--p 00016000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a4525f000-7f8a45260000 rw-p 00017000 fd:00 2099938                    /lib64/libpthread-2.14.90.so
    7f8a45260000-7f8a45264000 rw-p 00000000 00:00 0
    7f8a45264000-7f8a45286000 r-xp 00000000 fd:00 2097348                    /lib64/ld-2.14.90.so
    7f8a45457000-7f8a4545a000 rw-p 00000000 00:00 0
    7f8a45484000-7f8a45485000 rw-p 00000000 00:00 0
    7f8a45485000-7f8a45486000 r--p 00021000 fd:00 2097348                    /lib64/ld-2.14.90.so
    7f8a45486000-7f8a45487000 rw-p 00022000 fd:00 2097348                    /lib64/ld-2.14.90.so
    7f8a45487000-7f8a45488000 rw-p 00000000 00:00 0
    7fff6273b000-7fff6275c000 rw-p 00000000 00:00 0
    7fff627ff000-7fff62800000 r-xp 00000000 00:00 0                          [vdso]
    ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]

where only the vma that is being used as a stack by *that* task is
marked as [stack].

Analogous changes have been made to /proc/PID/smaps,
/proc/PID/numa_maps, /proc/PID/task/TID/smaps and
/proc/PID/task/TID/numa_maps. Relevant snippets from smaps and
numa_maps:

    [siddhesh@localhost ~ ]$ pgrep a.out
    1441
    [siddhesh@localhost ~ ]$ cat /proc/1441/smaps | grep "\[stack"
    7f8a44492000-7f8a44c92000 rw-p 00000000 00:00 0                          [stack:1442]
    7fff6273b000-7fff6275c000 rw-p 00000000 00:00 0                          [stack]
    [siddhesh@localhost ~ ]$ cat /proc/1441/task/1442/smaps | grep "\[stack"
    7f8a44492000-7f8a44c92000 rw-p 00000000 00:00 0                          [stack]
    [siddhesh@localhost ~ ]$ cat /proc/1441/task/1441/smaps | grep "\[stack"
    7fff6273b000-7fff6275c000 rw-p 00000000 00:00 0                          [stack]
    [siddhesh@localhost ~ ]$ cat /proc/1441/numa_maps | grep "stack"
    7f8a44492000 default stack:1442 anon=2 dirty=2 N0=2
    7fff6273a000 default stack anon=3 dirty=3 N0=3
    [siddhesh@localhost ~ ]$ cat /proc/1441/task/1442/numa_maps | grep "stack"
    7f8a44492000 default stack anon=2 dirty=2 N0=2
    [siddhesh@localhost ~ ]$ cat /proc/1441/task/1441/numa_maps | grep "stack"
    7fff6273a000 default stack anon=3 dirty=3 N0=3

[akpm@linux-foundation.org: checkpatch fixes]
[akpm@linux-foundation.org: fix build]
Signed-off-by: Siddhesh Poyarekar <siddhesh.poyarekar@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jamie Lokier <jamie@shareable.org>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-03-21 17:54:58 -07:00

319 lines
7.0 KiB
C

#include <linux/mm.h>
#include <linux/file.h>
#include <linux/fdtable.h>
#include <linux/fs_struct.h>
#include <linux/mount.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include "internal.h"
/*
* Logic: we've got two memory sums for each process, "shared", and
* "non-shared". Shared memory may get counted more than once, for
* each process that owns it. Non-shared memory is counted
* accurately.
*/
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *p;
unsigned long bytes = 0, sbytes = 0, slack = 0, size;
down_read(&mm->mmap_sem);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
bytes += kobjsize(vma);
region = vma->vm_region;
if (region) {
size = kobjsize(region);
size += region->vm_end - region->vm_start;
} else {
size = vma->vm_end - vma->vm_start;
}
if (atomic_read(&mm->mm_count) > 1 ||
vma->vm_flags & VM_MAYSHARE) {
sbytes += size;
} else {
bytes += size;
if (region)
slack = region->vm_end - vma->vm_end;
}
}
if (atomic_read(&mm->mm_count) > 1)
sbytes += kobjsize(mm);
else
bytes += kobjsize(mm);
if (current->fs && current->fs->users > 1)
sbytes += kobjsize(current->fs);
else
bytes += kobjsize(current->fs);
if (current->files && atomic_read(&current->files->count) > 1)
sbytes += kobjsize(current->files);
else
bytes += kobjsize(current->files);
if (current->sighand && atomic_read(&current->sighand->count) > 1)
sbytes += kobjsize(current->sighand);
else
bytes += kobjsize(current->sighand);
bytes += kobjsize(current); /* includes kernel stack */
seq_printf(m,
"Mem:\t%8lu bytes\n"
"Slack:\t%8lu bytes\n"
"Shared:\t%8lu bytes\n",
bytes, slack, sbytes);
up_read(&mm->mmap_sem);
}
unsigned long task_vsize(struct mm_struct *mm)
{
struct vm_area_struct *vma;
struct rb_node *p;
unsigned long vsize = 0;
down_read(&mm->mmap_sem);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
vsize += vma->vm_end - vma->vm_start;
}
up_read(&mm->mmap_sem);
return vsize;
}
unsigned long task_statm(struct mm_struct *mm,
unsigned long *shared, unsigned long *text,
unsigned long *data, unsigned long *resident)
{
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *p;
unsigned long size = kobjsize(mm);
down_read(&mm->mmap_sem);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
size += kobjsize(vma);
region = vma->vm_region;
if (region) {
size += kobjsize(region);
size += region->vm_end - region->vm_start;
}
}
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
*data = (PAGE_ALIGN(mm->start_stack) - (mm->start_data & PAGE_MASK))
>> PAGE_SHIFT;
up_read(&mm->mmap_sem);
size >>= PAGE_SHIFT;
size += *text + *data;
*resident = size;
return size;
}
static void pad_len_spaces(struct seq_file *m, int len)
{
len = 25 + sizeof(void*) * 6 - len;
if (len < 1)
len = 1;
seq_printf(m, "%*c", len, ' ');
}
/*
* display a single VMA to a sequenced file
*/
static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
int is_pid)
{
struct mm_struct *mm = vma->vm_mm;
struct proc_maps_private *priv = m->private;
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
int flags, len;
unsigned long long pgoff = 0;
flags = vma->vm_flags;
file = vma->vm_file;
if (file) {
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
}
seq_printf(m,
"%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
vma->vm_start,
vma->vm_end,
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
pgoff,
MAJOR(dev), MINOR(dev), ino, &len);
if (file) {
pad_len_spaces(m, len);
seq_path(m, &file->f_path, "");
} else if (mm) {
pid_t tid = vm_is_stack(priv->task, vma, is_pid);
if (tid != 0) {
pad_len_spaces(m, len);
/*
* Thread stack in /proc/PID/task/TID/maps or
* the main process stack.
*/
if (!is_pid || (vma->vm_start <= mm->start_stack &&
vma->vm_end >= mm->start_stack))
seq_printf(m, "[stack]");
else
seq_printf(m, "[stack:%d]", tid);
}
}
seq_putc(m, '\n');
return 0;
}
/*
* display mapping lines for a particular process's /proc/pid/maps
*/
static int show_map(struct seq_file *m, void *_p, int is_pid)
{
struct rb_node *p = _p;
return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb),
is_pid);
}
static int show_pid_map(struct seq_file *m, void *_p)
{
return show_map(m, _p, 1);
}
static int show_tid_map(struct seq_file *m, void *_p)
{
return show_map(m, _p, 0);
}
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_maps_private *priv = m->private;
struct mm_struct *mm;
struct rb_node *p;
loff_t n = *pos;
/* pin the task and mm whilst we play with them */
priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
if (!priv->task)
return ERR_PTR(-ESRCH);
mm = mm_for_maps(priv->task);
if (!mm || IS_ERR(mm)) {
put_task_struct(priv->task);
priv->task = NULL;
return mm;
}
down_read(&mm->mmap_sem);
/* start from the Nth VMA */
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
if (n-- == 0)
return p;
return NULL;
}
static void m_stop(struct seq_file *m, void *_vml)
{
struct proc_maps_private *priv = m->private;
if (priv->task) {
struct mm_struct *mm = priv->task->mm;
up_read(&mm->mmap_sem);
mmput(mm);
put_task_struct(priv->task);
}
}
static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
{
struct rb_node *p = _p;
(*pos)++;
return p ? rb_next(p) : NULL;
}
static const struct seq_operations proc_pid_maps_ops = {
.start = m_start,
.next = m_next,
.stop = m_stop,
.show = show_pid_map
};
static const struct seq_operations proc_tid_maps_ops = {
.start = m_start,
.next = m_next,
.stop = m_stop,
.show = show_tid_map
};
static int maps_open(struct inode *inode, struct file *file,
const struct seq_operations *ops)
{
struct proc_maps_private *priv;
int ret = -ENOMEM;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (priv) {
priv->pid = proc_pid(inode);
ret = seq_open(file, ops);
if (!ret) {
struct seq_file *m = file->private_data;
m->private = priv;
} else {
kfree(priv);
}
}
return ret;
}
static int pid_maps_open(struct inode *inode, struct file *file)
{
return maps_open(inode, file, &proc_pid_maps_ops);
}
static int tid_maps_open(struct inode *inode, struct file *file)
{
return maps_open(inode, file, &proc_tid_maps_ops);
}
const struct file_operations proc_pid_maps_operations = {
.open = pid_maps_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
const struct file_operations proc_tid_maps_operations = {
.open = tid_maps_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};