kernel_optimize_test/include/linux/proc_fs.h
Alexey Dobriyan 2d3a4e3666 proc: fix ->open'less usage due to ->proc_fops flip
Typical PDE creation code looks like:

	pde = create_proc_entry("foo", 0, NULL);
	if (pde)
		pde->proc_fops = &foo_proc_fops;

Notice that PDE is first created, only then ->proc_fops is set up to
final value. This is a problem because right after creation
a) PDE is fully visible in /proc , and
b) ->proc_fops are proc_file_operations which do not have ->open callback. So, it's
   possible to ->read without ->open (see one class of oopses below).

The fix is new API called proc_create() which makes sure ->proc_fops are
set up before gluing PDE to main tree. Typical new code looks like:

	pde = proc_create("foo", 0, NULL, &foo_proc_fops);
	if (!pde)
		return -ENOMEM;

Fix most networking users for a start.

In the long run, create_proc_entry() for regular files will go.

BUG: unable to handle kernel NULL pointer dereference at virtual address 00000024
printing eip: c1188c1b *pdpt = 000000002929e001 *pde = 0000000000000000
Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
last sysfs file: /sys/block/sda/sda1/dev
Modules linked in: foo af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom

Pid: 24679, comm: cat Not tainted (2.6.24-rc3-mm1 #2)
EIP: 0060:[<c1188c1b>] EFLAGS: 00210002 CPU: 0
EIP is at mutex_lock_nested+0x75/0x25d
EAX: 000006fe EBX: fffffffb ECX: 00001000 EDX: e9340570
ESI: 00000020 EDI: 00200246 EBP: e9340570 ESP: e8ea1ef8
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process cat (pid: 24679, ti=E8EA1000 task=E9340570 task.ti=E8EA1000)
Stack: 00000000 c106f7ce e8ee05b4 00000000 00000001 458003d0 f6fb6f20 fffffffb
       00000000 c106f7aa 00001000 c106f7ce 08ae9000 f6db53f0 00000020 00200246
       00000000 00000002 00000000 00200246 00200246 e8ee05a0 fffffffb e8ee0550
Call Trace:
 [<c106f7ce>] seq_read+0x24/0x28a
 [<c106f7aa>] seq_read+0x0/0x28a
 [<c106f7ce>] seq_read+0x24/0x28a
 [<c106f7aa>] seq_read+0x0/0x28a
 [<c10818b8>] proc_reg_read+0x60/0x73
 [<c1081858>] proc_reg_read+0x0/0x73
 [<c105a34f>] vfs_read+0x6c/0x8b
 [<c105a6f3>] sys_read+0x3c/0x63
 [<c10025f2>] sysenter_past_esp+0x5f/0xa5
 [<c10697a7>] destroy_inode+0x24/0x33
 =======================
INFO: lockdep is turned off.
Code: 75 21 68 e1 1a 19 c1 68 87 00 00 00 68 b8 e8 1f c1 68 25 73 1f c1 e8 84 06 e9 ff e8 52 b8 e7 ff 83 c4 10 9c 5f fa e8 28 89 ea ff <f0> fe 4e 04 79 0a f3 90 80 7e 04 00 7e f8 eb f0 39 76 34 74 33
EIP: [<c1188c1b>] mutex_lock_nested+0x75/0x25d SS:ESP 0068:e8ea1ef8

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-02-08 09:22:24 -08:00

313 lines
9.2 KiB
C

#ifndef _LINUX_PROC_FS_H
#define _LINUX_PROC_FS_H
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/spinlock.h>
#include <linux/magic.h>
#include <asm/atomic.h>
struct net;
struct completion;
/*
* The proc filesystem constants/structures
*/
/*
* Offset of the first process in the /proc root directory..
*/
#define FIRST_PROCESS_ENTRY 256
/* Worst case buffer size needed for holding an integer. */
#define PROC_NUMBUF 13
/*
* We always define these enumerators
*/
enum {
PROC_ROOT_INO = 1,
};
/*
* This is not completely implemented yet. The idea is to
* create an in-memory tree (like the actual /proc filesystem
* tree) of these proc_dir_entries, so that we can dynamically
* add new files to /proc.
*
* The "next" pointer creates a linked list of one /proc directory,
* while parent/subdir create the directory structure (every
* /proc file has a parent, but "subdir" is NULL for all
* non-directory entries).
*
* "get_info" is called at "read", while "owner" is used to protect module
* from unloading while proc_dir_entry is in use
*/
typedef int (read_proc_t)(char *page, char **start, off_t off,
int count, int *eof, void *data);
typedef int (write_proc_t)(struct file *file, const char __user *buffer,
unsigned long count, void *data);
typedef int (get_info_t)(char *, char **, off_t, int);
typedef struct proc_dir_entry *(shadow_proc_t)(struct task_struct *task,
struct proc_dir_entry *pde);
struct proc_dir_entry {
unsigned int low_ino;
unsigned short namelen;
const char *name;
mode_t mode;
nlink_t nlink;
uid_t uid;
gid_t gid;
loff_t size;
const struct inode_operations *proc_iops;
/*
* NULL ->proc_fops means "PDE is going away RSN" or
* "PDE is just created". In either case, e.g. ->read_proc won't be
* called because it's too late or too early, respectively.
*
* If you're allocating ->proc_fops dynamically, save a pointer
* somewhere.
*/
const struct file_operations *proc_fops;
get_info_t *get_info;
struct module *owner;
struct proc_dir_entry *next, *parent, *subdir;
void *data;
read_proc_t *read_proc;
write_proc_t *write_proc;
atomic_t count; /* use count */
int pde_users; /* number of callers into module in progress */
spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
struct completion *pde_unload_completion;
shadow_proc_t *shadow_proc;
};
struct kcore_list {
struct kcore_list *next;
unsigned long addr;
size_t size;
};
struct vmcore {
struct list_head list;
unsigned long long paddr;
unsigned long long size;
loff_t offset;
};
#ifdef CONFIG_PROC_FS
extern struct proc_dir_entry proc_root;
extern struct proc_dir_entry *proc_root_fs;
extern struct proc_dir_entry *proc_bus;
extern struct proc_dir_entry *proc_root_driver;
extern struct proc_dir_entry *proc_root_kcore;
extern spinlock_t proc_subdir_lock;
extern void proc_root_init(void);
extern void proc_misc_init(void);
struct mm_struct;
void proc_flush_task(struct task_struct *task);
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
unsigned long task_vsize(struct mm_struct *);
int task_statm(struct mm_struct *, int *, int *, int *, int *);
void task_mem(struct seq_file *, struct mm_struct *);
void clear_refs_smap(struct mm_struct *mm);
struct proc_dir_entry *de_get(struct proc_dir_entry *de);
void de_put(struct proc_dir_entry *de);
extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
struct proc_dir_entry *parent);
struct proc_dir_entry *proc_create(const char *name, mode_t mode,
struct proc_dir_entry *parent,
const struct file_operations *proc_fops);
extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
extern struct vfsmount *proc_mnt;
struct pid_namespace;
extern int proc_fill_super(struct super_block *);
extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
/*
* These are generic /proc routines that use the internal
* "struct proc_dir_entry" tree to traverse the filesystem.
*
* The /proc root directory has extended versions to take care
* of the /proc/<pid> subdirectories.
*/
extern int proc_readdir(struct file *, void *, filldir_t);
extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
extern const struct file_operations proc_kcore_operations;
extern const struct file_operations proc_kmsg_operations;
extern const struct file_operations ppc_htab_operations;
extern int pid_ns_prepare_proc(struct pid_namespace *ns);
extern void pid_ns_release_proc(struct pid_namespace *ns);
/*
* proc_tty.c
*/
struct tty_driver;
extern void proc_tty_init(void);
extern void proc_tty_register_driver(struct tty_driver *driver);
extern void proc_tty_unregister_driver(struct tty_driver *driver);
/*
* proc_devtree.c
*/
#ifdef CONFIG_PROC_DEVICETREE
struct device_node;
struct property;
extern void proc_device_tree_init(void);
extern void proc_device_tree_add_node(struct device_node *, struct proc_dir_entry *);
extern void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop);
extern void proc_device_tree_remove_prop(struct proc_dir_entry *pde,
struct property *prop);
extern void proc_device_tree_update_prop(struct proc_dir_entry *pde,
struct property *newprop,
struct property *oldprop);
#endif /* CONFIG_PROC_DEVICETREE */
extern struct proc_dir_entry *proc_symlink(const char *,
struct proc_dir_entry *, const char *);
extern struct proc_dir_entry *proc_mkdir(const char *,struct proc_dir_entry *);
extern struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
struct proc_dir_entry *parent);
static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
mode_t mode, struct proc_dir_entry *base,
read_proc_t *read_proc, void * data)
{
struct proc_dir_entry *res=create_proc_entry(name,mode,base);
if (res) {
res->read_proc=read_proc;
res->data=data;
}
return res;
}
static inline struct proc_dir_entry *create_proc_info_entry(const char *name,
mode_t mode, struct proc_dir_entry *base, get_info_t *get_info)
{
struct proc_dir_entry *res=create_proc_entry(name,mode,base);
if (res) res->get_info=get_info;
return res;
}
extern struct proc_dir_entry *proc_net_fops_create(struct net *net,
const char *name, mode_t mode, const struct file_operations *fops);
extern void proc_net_remove(struct net *net, const char *name);
extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
struct proc_dir_entry *parent);
#else
#define proc_root_driver NULL
#define proc_bus NULL
#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
static inline void proc_net_remove(struct net *net, const char *name) {}
static inline void proc_flush_task(struct task_struct *task)
{
}
static inline struct proc_dir_entry *create_proc_entry(const char *name,
mode_t mode, struct proc_dir_entry *parent) { return NULL; }
static inline struct proc_dir_entry *proc_create(const char *name,
mode_t mode, struct proc_dir_entry *parent,
const struct file_operations *proc_fops)
{
return NULL;
}
#define remove_proc_entry(name, parent) do {} while (0)
static inline struct proc_dir_entry *proc_symlink(const char *name,
struct proc_dir_entry *parent,const char *dest) {return NULL;}
static inline struct proc_dir_entry *proc_mkdir(const char *name,
struct proc_dir_entry *parent) {return NULL;}
static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
mode_t mode, struct proc_dir_entry *base,
read_proc_t *read_proc, void * data) { return NULL; }
static inline struct proc_dir_entry *create_proc_info_entry(const char *name,
mode_t mode, struct proc_dir_entry *base, get_info_t *get_info)
{ return NULL; }
struct tty_driver;
static inline void proc_tty_register_driver(struct tty_driver *driver) {};
static inline void proc_tty_unregister_driver(struct tty_driver *driver) {};
extern struct proc_dir_entry proc_root;
static inline int pid_ns_prepare_proc(struct pid_namespace *ns)
{
return 0;
}
static inline void pid_ns_release_proc(struct pid_namespace *ns)
{
}
#endif /* CONFIG_PROC_FS */
#if !defined(CONFIG_PROC_KCORE)
static inline void kclist_add(struct kcore_list *new, void *addr, size_t size)
{
}
#else
extern void kclist_add(struct kcore_list *, void *, size_t);
#endif
union proc_op {
int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
int (*proc_read)(struct task_struct *task, char *page);
int (*proc_show)(struct seq_file *m,
struct pid_namespace *ns, struct pid *pid,
struct task_struct *task);
};
struct proc_inode {
struct pid *pid;
int fd;
union proc_op op;
struct proc_dir_entry *pde;
struct inode vfs_inode;
};
static inline struct proc_inode *PROC_I(const struct inode *inode)
{
return container_of(inode, struct proc_inode, vfs_inode);
}
static inline struct proc_dir_entry *PDE(const struct inode *inode)
{
return PROC_I(inode)->pde;
}
static inline struct net *PDE_NET(struct proc_dir_entry *pde)
{
return pde->parent->data;
}
struct net *get_proc_net(const struct inode *inode);
struct proc_maps_private {
struct pid *pid;
struct task_struct *task;
#ifdef CONFIG_MMU
struct vm_area_struct *tail_vma;
#endif
};
#endif /* _LINUX_PROC_FS_H */