kernel_optimize_test/net/9p/protocol.c
Aneesh Kumar K.V eeff66ef6e net/9p: Convert the in the 9p rpc call path to GFP_NOFS
Without this we can cause reclaim allocation in writepage.

[ 3433.448430] =================================
[ 3433.449117] [ INFO: inconsistent lock state ]
[ 3433.449117] 2.6.38-rc5+ #84
[ 3433.449117] ---------------------------------
[ 3433.449117] inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-R} usage.
[ 3433.449117] kswapd0/505 [HC0[0]:SC0[0]:HE1:SE1] takes:
[ 3433.449117]  (iprune_sem){+++++-}, at: [<ffffffff810ebbab>] shrink_icache_memory+0x45/0x2b1
[ 3433.449117] {RECLAIM_FS-ON-W} state was registered at:
[ 3433.449117]   [<ffffffff8107fe5f>] mark_held_locks+0x52/0x70
[ 3433.449117]   [<ffffffff8107ff02>] lockdep_trace_alloc+0x85/0x9f
[ 3433.449117]   [<ffffffff810d353d>] slab_pre_alloc_hook+0x18/0x3c
[ 3433.449117]   [<ffffffff810d3fd5>] kmem_cache_alloc+0x23/0xa2
[ 3433.449117]   [<ffffffff8127be77>] idr_pre_get+0x2d/0x6f
[ 3433.449117]   [<ffffffff815434eb>] p9_idpool_get+0x30/0xae
[ 3433.449117]   [<ffffffff81540123>] p9_client_rpc+0xd7/0x9b0
[ 3433.449117]   [<ffffffff815427b0>] p9_client_clunk+0x88/0xdb
[ 3433.449117]   [<ffffffff811d56e5>] v9fs_evict_inode+0x3c/0x48
[ 3433.449117]   [<ffffffff810eb511>] evict+0x1f/0x87
[ 3433.449117]   [<ffffffff810eb5c0>] dispose_list+0x47/0xe3
[ 3433.449117]   [<ffffffff810eb8da>] evict_inodes+0x138/0x14f
[ 3433.449117]   [<ffffffff810d90e2>] generic_shutdown_super+0x57/0xe8
[ 3433.449117]   [<ffffffff810d91e8>] kill_anon_super+0x11/0x50
[ 3433.449117]   [<ffffffff811d4951>] v9fs_kill_super+0x49/0xab
[ 3433.449117]   [<ffffffff810d926e>] deactivate_locked_super+0x21/0x46
[ 3433.449117]   [<ffffffff810d9e84>] deactivate_super+0x40/0x44
[ 3433.449117]   [<ffffffff810ef848>] mntput_no_expire+0x100/0x109
[ 3433.449117]   [<ffffffff810f0aeb>] sys_umount+0x2f1/0x31c
[ 3433.449117]   [<ffffffff8102c87b>] system_call_fastpath+0x16/0x1b
[ 3433.449117] irq event stamp: 192941
[ 3433.449117] hardirqs last  enabled at (192941): [<ffffffff81568dcf>] _raw_spin_unlock_irq+0x2b/0x30
[ 3433.449117] hardirqs last disabled at (192940): [<ffffffff810b5f97>] shrink_inactive_list+0x290/0x2f5
[ 3433.449117] softirqs last  enabled at (188470): [<ffffffff8105fd65>] __do_softirq+0x133/0x152
[ 3433.449117] softirqs last disabled at (188455): [<ffffffff8102d7cc>] call_softirq+0x1c/0x28
[ 3433.449117]
[ 3433.449117] other info that might help us debug this:
[ 3433.449117] 1 lock held by kswapd0/505:
[ 3433.449117]  #0:  (shrinker_rwsem){++++..}, at: [<ffffffff810b52e2>] shrink_slab+0x38/0x15f
[ 3433.449117]
[ 3433.449117] stack backtrace:
[ 3433.449117] Pid: 505, comm: kswapd0 Not tainted 2.6.38-rc5+ #84
[ 3433.449117] Call Trace:
[ 3433.449117]  [<ffffffff8107fbce>] ? valid_state+0x17e/0x191
[ 3433.449117]  [<ffffffff81036896>] ? save_stack_trace+0x28/0x45
[ 3433.449117]  [<ffffffff81080426>] ? check_usage_forwards+0x0/0x87
[ 3433.449117]  [<ffffffff8107fcf4>] ? mark_lock+0x113/0x22c
[ 3433.449117]  [<ffffffff8108105f>] ? __lock_acquire+0x37a/0xcf7
[ 3433.449117]  [<ffffffff8107fc0e>] ? mark_lock+0x2d/0x22c
[ 3433.449117]  [<ffffffff81081077>] ? __lock_acquire+0x392/0xcf7
[ 3433.449117]  [<ffffffff810b14d2>] ? determine_dirtyable_memory+0x15/0x28
[ 3433.449117]  [<ffffffff81081a33>] ? lock_acquire+0x57/0x6d
[ 3433.449117]  [<ffffffff810ebbab>] ? shrink_icache_memory+0x45/0x2b1
[ 3433.449117]  [<ffffffff81567d85>] ? down_read+0x47/0x5c
[ 3433.449117]  [<ffffffff810ebbab>] ? shrink_icache_memory+0x45/0x2b1
[ 3433.449117]  [<ffffffff810ebbab>] ? shrink_icache_memory+0x45/0x2b1
[ 3433.449117]  [<ffffffff810b5385>] ? shrink_slab+0xdb/0x15f
[ 3433.449117]  [<ffffffff810b69bc>] ? kswapd+0x574/0x96a
[ 3433.449117]  [<ffffffff810b6448>] ? kswapd+0x0/0x96a
[ 3433.449117]  [<ffffffff810714e2>] ? kthread+0x7d/0x85
[ 3433.449117]  [<ffffffff8102d6d4>] ? kernel_thread_helper+0x4/0x10
[ 3433.449117]  [<ffffffff81569200>] ? restore_args+0x0/0x30
[ 3433.449117]  [<ffffffff81071465>] ? kthread+0x0/0x85
[ 3433.449117]  [<ffffffff8102d6d0>] ? kernel_thread_helper+0x0/0x10

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
2011-03-22 15:43:35 -05:00

681 lines
14 KiB
C

/*
* net/9p/protocol.c
*
* 9P Protocol Support Code
*
* Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com>
*
* Base on code from Anthony Liguori <aliguori@us.ibm.com>
* Copyright (C) 2008 by IBM, Corp.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to:
* Free Software Foundation
* 51 Franklin Street, Fifth Floor
* Boston, MA 02111-1301 USA
*
*/
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/stddef.h>
#include <linux/types.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
#include "protocol.h"
static int
p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
#ifdef CONFIG_NET_9P_DEBUG
void
p9pdu_dump(int way, struct p9_fcall *pdu)
{
int i, n;
u8 *data = pdu->sdata;
int datalen = pdu->size;
char buf[255];
int buflen = 255;
i = n = 0;
if (datalen > (buflen-16))
datalen = buflen-16;
while (i < datalen) {
n += scnprintf(buf + n, buflen - n, "%02x ", data[i]);
if (i%4 == 3)
n += scnprintf(buf + n, buflen - n, " ");
if (i%32 == 31)
n += scnprintf(buf + n, buflen - n, "\n");
i++;
}
n += scnprintf(buf + n, buflen - n, "\n");
if (way)
P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf);
else
P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf);
}
#else
void
p9pdu_dump(int way, struct p9_fcall *pdu)
{
}
#endif
EXPORT_SYMBOL(p9pdu_dump);
void p9stat_free(struct p9_wstat *stbuf)
{
kfree(stbuf->name);
kfree(stbuf->uid);
kfree(stbuf->gid);
kfree(stbuf->muid);
kfree(stbuf->extension);
}
EXPORT_SYMBOL(p9stat_free);
static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
{
size_t len = min(pdu->size - pdu->offset, size);
memcpy(data, &pdu->sdata[pdu->offset], len);
pdu->offset += len;
return size - len;
}
static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
{
size_t len = min(pdu->capacity - pdu->size, size);
memcpy(&pdu->sdata[pdu->size], data, len);
pdu->size += len;
return size - len;
}
static size_t
pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
{
size_t len = min(pdu->capacity - pdu->size, size);
if (copy_from_user(&pdu->sdata[pdu->size], udata, len))
len = 0;
pdu->size += len;
return size - len;
}
static size_t
pdu_write_urw(struct p9_fcall *pdu, const char *kdata, const char __user *udata,
size_t size)
{
BUG_ON(pdu->size > P9_IOHDRSZ);
pdu->pubuf = (char __user *)udata;
pdu->pkbuf = (char *)kdata;
pdu->pbuf_size = size;
return 0;
}
static size_t
pdu_write_readdir(struct p9_fcall *pdu, const char *kdata, size_t size)
{
BUG_ON(pdu->size > P9_READDIRHDRSZ);
pdu->pkbuf = (char *)kdata;
pdu->pbuf_size = size;
return 0;
}
/*
b - int8_t
w - int16_t
d - int32_t
q - int64_t
s - string
S - stat
Q - qid
D - data blob (int32_t size followed by void *, results are not freed)
T - array of strings (int16_t count, followed by strings)
R - array of qids (int16_t count, followed by qids)
A - stat for 9p2000.L (p9_stat_dotl)
? - if optional = 1, continue parsing
*/
static int
p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
va_list ap)
{
const char *ptr;
int errcode = 0;
for (ptr = fmt; *ptr; ptr++) {
switch (*ptr) {
case 'b':{
int8_t *val = va_arg(ap, int8_t *);
if (pdu_read(pdu, val, sizeof(*val))) {
errcode = -EFAULT;
break;
}
}
break;
case 'w':{
int16_t *val = va_arg(ap, int16_t *);
__le16 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*val = le16_to_cpu(le_val);
}
break;
case 'd':{
int32_t *val = va_arg(ap, int32_t *);
__le32 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*val = le32_to_cpu(le_val);
}
break;
case 'q':{
int64_t *val = va_arg(ap, int64_t *);
__le64 le_val;
if (pdu_read(pdu, &le_val, sizeof(le_val))) {
errcode = -EFAULT;
break;
}
*val = le64_to_cpu(le_val);
}
break;
case 's':{
char **sptr = va_arg(ap, char **);
uint16_t len;
errcode = p9pdu_readf(pdu, proto_version,
"w", &len);
if (errcode)
break;
*sptr = kmalloc(len + 1, GFP_NOFS);
if (*sptr == NULL) {
errcode = -EFAULT;
break;
}
if (pdu_read(pdu, *sptr, len)) {
errcode = -EFAULT;
kfree(*sptr);
*sptr = NULL;
} else
(*sptr)[len] = 0;
}
break;
case 'Q':{
struct p9_qid *qid =
va_arg(ap, struct p9_qid *);
errcode = p9pdu_readf(pdu, proto_version, "bdq",
&qid->type, &qid->version,
&qid->path);
}
break;
case 'S':{
struct p9_wstat *stbuf =
va_arg(ap, struct p9_wstat *);
memset(stbuf, 0, sizeof(struct p9_wstat));
stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
-1;
errcode =
p9pdu_readf(pdu, proto_version,
"wwdQdddqssss?sddd",
&stbuf->size, &stbuf->type,
&stbuf->dev, &stbuf->qid,
&stbuf->mode, &stbuf->atime,
&stbuf->mtime, &stbuf->length,
&stbuf->name, &stbuf->uid,
&stbuf->gid, &stbuf->muid,
&stbuf->extension,
&stbuf->n_uid, &stbuf->n_gid,
&stbuf->n_muid);
if (errcode)
p9stat_free(stbuf);
}
break;
case 'D':{
uint32_t *count = va_arg(ap, uint32_t *);
void **data = va_arg(ap, void **);
errcode =
p9pdu_readf(pdu, proto_version, "d", count);
if (!errcode) {
*count =
min_t(uint32_t, *count,
pdu->size - pdu->offset);
*data = &pdu->sdata[pdu->offset];
}
}
break;
case 'T':{
int16_t *nwname = va_arg(ap, int16_t *);
char ***wnames = va_arg(ap, char ***);
errcode = p9pdu_readf(pdu, proto_version,
"w", nwname);
if (!errcode) {
*wnames =
kmalloc(sizeof(char *) * *nwname,
GFP_NOFS);
if (!*wnames)
errcode = -ENOMEM;
}
if (!errcode) {
int i;
for (i = 0; i < *nwname; i++) {
errcode =
p9pdu_readf(pdu,
proto_version,
"s",
&(*wnames)[i]);
if (errcode)
break;
}
}
if (errcode) {
if (*wnames) {
int i;
for (i = 0; i < *nwname; i++)
kfree((*wnames)[i]);
}
kfree(*wnames);
*wnames = NULL;
}
}
break;
case 'R':{
int16_t *nwqid = va_arg(ap, int16_t *);
struct p9_qid **wqids =
va_arg(ap, struct p9_qid **);
*wqids = NULL;
errcode =
p9pdu_readf(pdu, proto_version, "w", nwqid);
if (!errcode) {
*wqids =
kmalloc(*nwqid *
sizeof(struct p9_qid),
GFP_NOFS);
if (*wqids == NULL)
errcode = -ENOMEM;
}
if (!errcode) {
int i;
for (i = 0; i < *nwqid; i++) {
errcode =
p9pdu_readf(pdu,
proto_version,
"Q",
&(*wqids)[i]);
if (errcode)
break;
}
}
if (errcode) {
kfree(*wqids);
*wqids = NULL;
}
}
break;
case 'A': {
struct p9_stat_dotl *stbuf =
va_arg(ap, struct p9_stat_dotl *);
memset(stbuf, 0, sizeof(struct p9_stat_dotl));
errcode =
p9pdu_readf(pdu, proto_version,
"qQdddqqqqqqqqqqqqqqq",
&stbuf->st_result_mask,
&stbuf->qid,
&stbuf->st_mode,
&stbuf->st_uid, &stbuf->st_gid,
&stbuf->st_nlink,
&stbuf->st_rdev, &stbuf->st_size,
&stbuf->st_blksize, &stbuf->st_blocks,
&stbuf->st_atime_sec,
&stbuf->st_atime_nsec,
&stbuf->st_mtime_sec,
&stbuf->st_mtime_nsec,
&stbuf->st_ctime_sec,
&stbuf->st_ctime_nsec,
&stbuf->st_btime_sec,
&stbuf->st_btime_nsec,
&stbuf->st_gen,
&stbuf->st_data_version);
}
break;
case '?':
if ((proto_version != p9_proto_2000u) &&
(proto_version != p9_proto_2000L))
return 0;
break;
default:
BUG();
break;
}
if (errcode)
break;
}
return errcode;
}
int
p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
va_list ap)
{
const char *ptr;
int errcode = 0;
for (ptr = fmt; *ptr; ptr++) {
switch (*ptr) {
case 'b':{
int8_t val = va_arg(ap, int);
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 'w':{
__le16 val = cpu_to_le16(va_arg(ap, int));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 'd':{
__le32 val = cpu_to_le32(va_arg(ap, int32_t));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 'q':{
__le64 val = cpu_to_le64(va_arg(ap, int64_t));
if (pdu_write(pdu, &val, sizeof(val)))
errcode = -EFAULT;
}
break;
case 's':{
const char *sptr = va_arg(ap, const char *);
uint16_t len = 0;
if (sptr)
len = min_t(uint16_t, strlen(sptr),
USHRT_MAX);
errcode = p9pdu_writef(pdu, proto_version,
"w", len);
if (!errcode && pdu_write(pdu, sptr, len))
errcode = -EFAULT;
}
break;
case 'Q':{
const struct p9_qid *qid =
va_arg(ap, const struct p9_qid *);
errcode =
p9pdu_writef(pdu, proto_version, "bdq",
qid->type, qid->version,
qid->path);
} break;
case 'S':{
const struct p9_wstat *stbuf =
va_arg(ap, const struct p9_wstat *);
errcode =
p9pdu_writef(pdu, proto_version,
"wwdQdddqssss?sddd",
stbuf->size, stbuf->type,
stbuf->dev, &stbuf->qid,
stbuf->mode, stbuf->atime,
stbuf->mtime, stbuf->length,
stbuf->name, stbuf->uid,
stbuf->gid, stbuf->muid,
stbuf->extension, stbuf->n_uid,
stbuf->n_gid, stbuf->n_muid);
} break;
case 'D':{
uint32_t count = va_arg(ap, uint32_t);
const void *data = va_arg(ap, const void *);
errcode = p9pdu_writef(pdu, proto_version, "d",
count);
if (!errcode && pdu_write(pdu, data, count))
errcode = -EFAULT;
}
break;
case 'E':{
int32_t cnt = va_arg(ap, int32_t);
const char *k = va_arg(ap, const void *);
const char *u = va_arg(ap, const void *);
errcode = p9pdu_writef(pdu, proto_version, "d",
cnt);
if (!errcode && pdu_write_urw(pdu, k, u, cnt))
errcode = -EFAULT;
}
break;
case 'F':{
int32_t cnt = va_arg(ap, int32_t);
const char *k = va_arg(ap, const void *);
errcode = p9pdu_writef(pdu, proto_version, "d",
cnt);
if (!errcode && pdu_write_readdir(pdu, k, cnt))
errcode = -EFAULT;
}
break;
case 'U':{
int32_t count = va_arg(ap, int32_t);
const char __user *udata =
va_arg(ap, const void __user *);
errcode = p9pdu_writef(pdu, proto_version, "d",
count);
if (!errcode && pdu_write_u(pdu, udata, count))
errcode = -EFAULT;
}
break;
case 'T':{
int16_t nwname = va_arg(ap, int);
const char **wnames = va_arg(ap, const char **);
errcode = p9pdu_writef(pdu, proto_version, "w",
nwname);
if (!errcode) {
int i;
for (i = 0; i < nwname; i++) {
errcode =
p9pdu_writef(pdu,
proto_version,
"s",
wnames[i]);
if (errcode)
break;
}
}
}
break;
case 'R':{
int16_t nwqid = va_arg(ap, int);
struct p9_qid *wqids =
va_arg(ap, struct p9_qid *);
errcode = p9pdu_writef(pdu, proto_version, "w",
nwqid);
if (!errcode) {
int i;
for (i = 0; i < nwqid; i++) {
errcode =
p9pdu_writef(pdu,
proto_version,
"Q",
&wqids[i]);
if (errcode)
break;
}
}
}
break;
case 'I':{
struct p9_iattr_dotl *p9attr = va_arg(ap,
struct p9_iattr_dotl *);
errcode = p9pdu_writef(pdu, proto_version,
"ddddqqqqq",
p9attr->valid,
p9attr->mode,
p9attr->uid,
p9attr->gid,
p9attr->size,
p9attr->atime_sec,
p9attr->atime_nsec,
p9attr->mtime_sec,
p9attr->mtime_nsec);
}
break;
case '?':
if ((proto_version != p9_proto_2000u) &&
(proto_version != p9_proto_2000L))
return 0;
break;
default:
BUG();
break;
}
if (errcode)
break;
}
return errcode;
}
int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
ret = p9pdu_vreadf(pdu, proto_version, fmt, ap);
va_end(ap);
return ret;
}
static int
p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)
{
va_list ap;
int ret;
va_start(ap, fmt);
ret = p9pdu_vwritef(pdu, proto_version, fmt, ap);
va_end(ap);
return ret;
}
int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version)
{
struct p9_fcall fake_pdu;
int ret;
fake_pdu.size = len;
fake_pdu.capacity = len;
fake_pdu.sdata = buf;
fake_pdu.offset = 0;
ret = p9pdu_readf(&fake_pdu, proto_version, "S", st);
if (ret) {
P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret);
p9pdu_dump(1, &fake_pdu);
}
return ret;
}
EXPORT_SYMBOL(p9stat_read);
int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)
{
pdu->id = type;
return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);
}
int p9pdu_finalize(struct p9_fcall *pdu)
{
int size = pdu->size;
int err;
pdu->size = 0;
err = p9pdu_writef(pdu, 0, "d", size);
pdu->size = size;
#ifdef CONFIG_NET_9P_DEBUG
if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT)
p9pdu_dump(0, pdu);
#endif
P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size,
pdu->id, pdu->tag);
return err;
}
void p9pdu_reset(struct p9_fcall *pdu)
{
pdu->offset = 0;
pdu->size = 0;
pdu->private = NULL;
pdu->pubuf = NULL;
pdu->pkbuf = NULL;
pdu->pbuf_size = 0;
}
int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
int proto_version)
{
struct p9_fcall fake_pdu;
int ret;
char *nameptr;
fake_pdu.size = len;
fake_pdu.capacity = len;
fake_pdu.sdata = buf;
fake_pdu.offset = 0;
ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid,
&dirent->d_off, &dirent->d_type, &nameptr);
if (ret) {
P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
p9pdu_dump(1, &fake_pdu);
goto out;
}
strcpy(dirent->d_name, nameptr);
out:
return fake_pdu.offset;
}
EXPORT_SYMBOL(p9dirent_read);