From 7c2bd9a39845bfb6d72ddb55ce737650271f6f96 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 30 Mar 2019 10:21:07 +0900 Subject: [PATCH 1/5] NFS: Forbid setting AF_INET6 to "struct sockaddr_in"->sin_family. syzbot is reporting uninitialized value at rpc_sockaddr2uaddr() [1]. This is because syzbot is setting AF_INET6 to "struct sockaddr_in"->sin_family (which is embedded into user-visible "struct nfs_mount_data" structure) despite nfs23_validate_mount_data() cannot pass sizeof(struct sockaddr_in6) bytes of AF_INET6 address to rpc_sockaddr2uaddr(). Since "struct nfs_mount_data" structure is user-visible, we can't change "struct nfs_mount_data" to use "struct sockaddr_storage". Therefore, assuming that everybody is using AF_INET family when passing address via "struct nfs_mount_data"->addr, reject if its sin_family is not AF_INET. [1] https://syzkaller.appspot.com/bug?id=599993614e7cbbf66bc2656a919ab2a95fb5d75c Reported-by: syzbot Signed-off-by: Tetsuo Handa Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 23790c7b2289..c27ac96a95bd 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2041,7 +2041,8 @@ static int nfs23_validate_mount_data(void *options, memcpy(sap, &data->addr, sizeof(data->addr)); args->nfs_server.addrlen = sizeof(data->addr); args->nfs_server.port = ntohs(data->addr.sin_port); - if (!nfs_verify_server_address(sap)) + if (sap->sa_family != AF_INET || + !nfs_verify_server_address(sap)) goto out_no_address; if (!(data->flags & NFS_MOUNT_TCP)) From 29e7ca715f2a0b6c0a99b1aec1b0956d1f271955 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 9 Apr 2019 10:44:16 -0400 Subject: [PATCH 2/5] NFS: Fix handling of reply page vector NFSv4 GETACL and FS_LOCATIONS requests stopped working in v5.1-rc. These two need the extra padding to be added directly to the reply length. Reported-by: Olga Kornievskaia Fixes: 02ef04e432ba ("NFS: Account for XDR pad of buf->pages") Signed-off-by: Chuck Lever Tested-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cfcabc33e24d..602446158bfb 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2589,7 +2589,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, ARRAY_SIZE(nfs4_acl_bitmap), &hdr); rpc_prepare_reply_pages(req, args->acl_pages, 0, - args->acl_len, replen); + args->acl_len, replen + 1); encode_nops(&hdr); } @@ -2811,7 +2811,7 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, } rpc_prepare_reply_pages(req, (struct page **)&args->page, 0, - PAGE_SIZE, replen); + PAGE_SIZE, replen + 1); encode_nops(&hdr); } From e1ede312f17e96a9c5cda9aaa1cdcf442c1a5da8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 9 Apr 2019 17:04:09 -0400 Subject: [PATCH 3/5] xprtrdma: Fix helper that drains the transport We want to drain only the RQ first. Otherwise the transport can deadlock on ->close if there are outstanding Send completions. Fixes: 6d2d0ee27c7a ("xprtrdma: Replace rpcrdma_receive_wq ... ") Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org # v5.0+ Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 89a63391d4d4..30cfc0efe699 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -90,7 +90,7 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) /* Flush Receives, then wait for deferred Reply work * to complete. */ - ib_drain_qp(ia->ri_id->qp); + ib_drain_rq(ia->ri_id->qp); drain_workqueue(buf->rb_completion_wq); /* Deferred Reply processing might have scheduled From 0769663b4f580566ef6cdf366f3073dbe8022c39 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 11 Apr 2019 14:34:18 -0400 Subject: [PATCH 4/5] NFSv4.1 fix incorrect return value in copy_file_range According to the NFSv4.2 spec if the input and output file is the same file, operation should fail with EINVAL. However, linux copy_file_range() system call has no such restrictions. Therefore, in such case let's return EOPNOTSUPP and allow VFS to fallback to doing do_splice_direct(). Also when copy_file_range is called on an NFSv4.0 or 4.1 mount (ie., a server that doesn't support COPY functionality), we also need to return EOPNOTSUPP and fallback to a regular copy. Fixes xfstest generic/075, generic/091, generic/112, generic/263 for all NFSv4.x versions. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 3 --- fs/nfs/nfs4file.c | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index ff6f85fb676b..5196bfa7894d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -329,9 +329,6 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, }; ssize_t err, err2; - if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY)) - return -EOPNOTSUPP; - src_lock = nfs_get_lock_context(nfs_file_open_context(src)); if (IS_ERR(src_lock)) return PTR_ERR(src_lock); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 45b2322e092d..00d17198ee12 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -133,8 +133,10 @@ static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, size_t count, unsigned int flags) { + if (!nfs_server_capable(file_inode(file_out), NFS_CAP_COPY)) + return -EOPNOTSUPP; if (file_inode(file_in) == file_inode(file_out)) - return -EINVAL; + return -EOPNOTSUPP; return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count); } From af6b61d7ef58099c82d854395a0e002be6bd036c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 11 Apr 2019 15:16:52 -0400 Subject: [PATCH 5/5] Revert "SUNRPC: Micro-optimise when the task is known not to be sleeping" This reverts commit 009a82f6437490c262584d65a14094a818bcb747. The ability to optimise here relies on compiler being able to optimise away tail calls to avoid stack overflows. Unfortunately, we are seeing reports of problems, so let's just revert. Reported-by: Daniel Mack Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 8 ------- net/sunrpc/clnt.c | 45 +++++++----------------------------- 2 files changed, 8 insertions(+), 45 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index ec861cd0cfe8..52d41d0c1ae1 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -304,12 +304,4 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) } #endif /* CONFIG_SUNRPC_SWAP */ -static inline bool -rpc_task_need_resched(const struct rpc_task *task) -{ - if (RPC_IS_QUEUED(task) || task->tk_callback) - return true; - return false; -} - #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 187d10443a15..1d0395ef62c9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1540,7 +1540,6 @@ call_start(struct rpc_task *task) clnt->cl_stats->rpccnt++; task->tk_action = call_reserve; rpc_task_set_transport(task, clnt); - call_reserve(task); } /* @@ -1554,9 +1553,6 @@ call_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_reserve(task); - if (rpc_task_need_resched(task)) - return; - call_reserveresult(task); } static void call_retry_reserve(struct rpc_task *task); @@ -1579,7 +1575,6 @@ call_reserveresult(struct rpc_task *task) if (status >= 0) { if (task->tk_rqstp) { task->tk_action = call_refresh; - call_refresh(task); return; } @@ -1605,7 +1600,6 @@ call_reserveresult(struct rpc_task *task) /* fall through */ case -EAGAIN: /* woken up; retry */ task->tk_action = call_retry_reserve; - call_retry_reserve(task); return; case -EIO: /* probably a shutdown */ break; @@ -1628,9 +1622,6 @@ call_retry_reserve(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_reserveresult; xprt_retry_reserve(task); - if (rpc_task_need_resched(task)) - return; - call_reserveresult(task); } /* @@ -1645,9 +1636,6 @@ call_refresh(struct rpc_task *task) task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; rpcauth_refreshcred(task); - if (rpc_task_need_resched(task)) - return; - call_refreshresult(task); } /* @@ -1666,7 +1654,6 @@ call_refreshresult(struct rpc_task *task) case 0: if (rpcauth_uptodatecred(task)) { task->tk_action = call_allocate; - call_allocate(task); return; } /* Use rate-limiting and a max number of retries if refresh @@ -1685,7 +1672,6 @@ call_refreshresult(struct rpc_task *task) task->tk_cred_retry--; dprintk("RPC: %5u %s: retry refresh creds\n", task->tk_pid, __func__); - call_refresh(task); return; } dprintk("RPC: %5u %s: refresh creds failed with error %d\n", @@ -1711,10 +1697,8 @@ call_allocate(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_encode; - if (req->rq_buffer) { - call_encode(task); + if (req->rq_buffer) return; - } if (proc->p_proc != 0) { BUG_ON(proc->p_arglen == 0); @@ -1740,12 +1724,8 @@ call_allocate(struct rpc_task *task) status = xprt->ops->buf_alloc(task); xprt_inject_disconnect(xprt); - if (status == 0) { - if (rpc_task_need_resched(task)) - return; - call_encode(task); + if (status == 0) return; - } if (status != -ENOMEM) { rpc_exit(task, status); return; @@ -1828,8 +1808,12 @@ call_encode(struct rpc_task *task) xprt_request_enqueue_receive(task); xprt_request_enqueue_transmit(task); out: - task->tk_action = call_bind; - call_bind(task); + task->tk_action = call_transmit; + /* Check that the connection is OK */ + if (!xprt_bound(task->tk_xprt)) + task->tk_action = call_bind; + else if (!xprt_connected(task->tk_xprt)) + task->tk_action = call_connect; } /* @@ -1847,7 +1831,6 @@ rpc_task_handle_transmitted(struct rpc_task *task) { xprt_end_transmit(task); task->tk_action = call_transmit_status; - call_transmit_status(task); } /* @@ -1865,7 +1848,6 @@ call_bind(struct rpc_task *task) if (xprt_bound(xprt)) { task->tk_action = call_connect; - call_connect(task); return; } @@ -1896,7 +1878,6 @@ call_bind_status(struct rpc_task *task) dprint_status(task); task->tk_status = 0; task->tk_action = call_connect; - call_connect(task); return; } @@ -1981,7 +1962,6 @@ call_connect(struct rpc_task *task) if (xprt_connected(xprt)) { task->tk_action = call_transmit; - call_transmit(task); return; } @@ -2051,7 +2031,6 @@ call_connect_status(struct rpc_task *task) case 0: clnt->cl_stats->netreconn++; task->tk_action = call_transmit; - call_transmit(task); return; } rpc_exit(task, status); @@ -2087,9 +2066,6 @@ call_transmit(struct rpc_task *task) xprt_transmit(task); } xprt_end_transmit(task); - if (rpc_task_need_resched(task)) - return; - call_transmit_status(task); } /* @@ -2107,9 +2083,6 @@ call_transmit_status(struct rpc_task *task) if (rpc_task_transmitted(task)) { if (task->tk_status == 0) xprt_request_wait_receive(task); - if (rpc_task_need_resched(task)) - return; - call_status(task); return; } @@ -2170,7 +2143,6 @@ call_bc_encode(struct rpc_task *task) { xprt_request_enqueue_transmit(task); task->tk_action = call_bc_transmit; - call_bc_transmit(task); } /* @@ -2261,7 +2233,6 @@ call_status(struct rpc_task *task) status = task->tk_status; if (status >= 0) { task->tk_action = call_decode; - call_decode(task); return; }