From 74759e1693311a8d1441de836c4080c192374238 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 28 Jan 2020 20:45:54 -0800 Subject: [PATCH 01/15] MAINTAINERS: mptcp@ mailing list is moderated Note that mptcp@lists.01.org is moderated, like we note for other mailing lists. Signed-off-by: Randy Dunlap Cc: Mat Martineau Cc: Matthieu Baerts Cc: netdev@vger.kernel.org Cc: mptcp@lists.01.org Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index d51ce1a0a817..58e4eb554d0e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11641,7 +11641,7 @@ NETWORKING [MPTCP] M: Mat Martineau M: Matthieu Baerts L: netdev@vger.kernel.org -L: mptcp@lists.01.org +L: mptcp@lists.01.org (moderated for non-subscribers) W: https://github.com/multipath-tcp/mptcp_net-next/wiki B: https://github.com/multipath-tcp/mptcp_net-next/issues S: Maintained From f6f7d8cf55348751cc8a5f40c8f9835ee1752df4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 29 Jan 2020 10:39:23 +0100 Subject: [PATCH 02/15] mptcp: Fix build with PROC_FS disabled. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/mptcp/subflow.c: In function ‘mptcp_subflow_create_socket’: net/mptcp/subflow.c:624:25: error: ‘struct netns_core’ has no member named ‘sock_inuse’ Reported-by: Randy Dunlap Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1662e1178949..205dca1c30b7 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -621,7 +621,9 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) */ sf->sk->sk_net_refcnt = 1; get_net(net); +#ifdef CONFIG_PROC_FS this_cpu_add(*net->core.sock_inuse, 1); +#endif err = tcp_set_ulp(sf->sk, "mptcp"); release_sock(sf->sk); From 793da4bfba7b8a785d38662f57fbcb2252b6f904 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 28 Jan 2020 11:12:03 -0800 Subject: [PATCH 03/15] sch_choke: Use kvcalloc Convert the use of kvmalloc_array with __GFP_ZERO to the equivalent kvcalloc. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/sched/sch_choke.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index dba70377bbd9..a36974e9c601 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -377,7 +377,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, if (mask != q->tab_mask) { struct sk_buff **ntab; - ntab = kvmalloc_array((mask + 1), sizeof(struct sk_buff *), GFP_KERNEL | __GFP_ZERO); + ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); if (!ntab) return -ENOMEM; From 44efc78d0e464ce70b45b165c005f8bedc17952e Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 29 Jan 2020 12:50:53 +0100 Subject: [PATCH 04/15] net: mvneta: fix XDP support if sw bm is used as fallback In order to fix XDP support if sw buffer management is used as fallback for hw bm devices, define MVNETA_SKB_HEADROOM as maximum between XDP_PACKET_HEADROOM and NET_SKB_PAD and let the hw aligns the IP header to 4-byte boundary. Fix rx_offset_correction initialization if mvneta_bm_port_init fails in mvneta_resume routine Fixes: 0db51da7a8e9 ("net: mvneta: add basic XDP support") Tested-by: Sven Auhagen Signed-off-by: Lorenzo Bianconi Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 2dfbfdff45a8..037e054b01a2 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -324,8 +324,7 @@ ETH_HLEN + ETH_FCS_LEN, \ cache_line_size()) -#define MVNETA_SKB_HEADROOM (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \ - NET_IP_ALIGN) +#define MVNETA_SKB_HEADROOM max(XDP_PACKET_HEADROOM, NET_SKB_PAD) #define MVNETA_SKB_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \ MVNETA_SKB_HEADROOM)) #define MVNETA_SKB_SIZE(len) (SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD) @@ -1167,6 +1166,7 @@ static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu) mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, 1 << pp->id); pp->bm_priv = NULL; + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; mvreg_write(pp, MVNETA_ACC_MODE, MVNETA_ACC_MODE_EXT1); netdev_info(pp->dev, "fail to update MTU, fall back to software BM\n"); } @@ -4948,7 +4948,6 @@ static int mvneta_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); pp->id = global_port_id++; - pp->rx_offset_correction = MVNETA_SKB_HEADROOM; /* Obtain access to BM resources if enabled and already initialized */ bm_node = of_parse_phandle(dn, "buffer-manager", 0); @@ -4973,6 +4972,10 @@ static int mvneta_probe(struct platform_device *pdev) } of_node_put(bm_node); + /* sw buffer management */ + if (!pp->bm_priv) + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; + err = mvneta_init(&pdev->dev, pp); if (err < 0) goto err_netdev; @@ -5130,6 +5133,7 @@ static int mvneta_resume(struct device *device) err = mvneta_bm_port_init(pdev, pp); if (err < 0) { dev_info(&pdev->dev, "use SW buffer management\n"); + pp->rx_offset_correction = MVNETA_SKB_HEADROOM; pp->bm_priv = NULL; } } From c9fd9c5f4b935516ba8be31f2e7590caf0bc2c6f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 29 Jan 2020 15:54:43 +0100 Subject: [PATCH 05/15] mptcp: defer freeing of cached ext until last moment access to msk->cached_ext is only legal if the msk is locked or all concurrent accesses are impossible. Furthermore, once we start to tear down, we must make sure nothing else can step in and allocate a new cached ext. So place this code in the destroy callback where it belongs. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 39fdca79ce90..f1b1160dbbb4 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -654,8 +654,6 @@ static void __mptcp_close(struct sock *sk, long timeout) __mptcp_close_ssk(sk, ssk, subflow, timeout); } - if (msk->cached_ext) - __skb_ext_put(msk->cached_ext); release_sock(sk); sk_common_release(sk); } @@ -776,6 +774,10 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, static void mptcp_destroy(struct sock *sk) { + struct mptcp_sock *msk = mptcp_sk(sk); + + if (msk->cached_ext) + __skb_ext_put(msk->cached_ext); } static int mptcp_setsockopt(struct sock *sk, int level, int optname, From 50e741bb3b15fd5410bac379bdb4bc795d62b45c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 29 Jan 2020 15:54:44 +0100 Subject: [PATCH 06/15] mptcp: fix panic on user pointer access Its not possible to call the kernel_(s|g)etsockopt functions here, the address points to user memory: General protection fault in user access. Non-canonical address? WARNING: CPU: 1 PID: 5352 at arch/x86/mm/extable.c:77 ex_handler_uaccess+0xba/0xe0 arch/x86/mm/extable.c:77 Kernel panic - not syncing: panic_on_warn set ... [..] Call Trace: fixup_exception+0x9d/0xcd arch/x86/mm/extable.c:178 general_protection+0x2d/0x40 arch/x86/entry/entry_64.S:1202 do_ip_getsockopt+0x1f6/0x1860 net/ipv4/ip_sockglue.c:1323 ip_getsockopt+0x87/0x1c0 net/ipv4/ip_sockglue.c:1561 tcp_getsockopt net/ipv4/tcp.c:3691 [inline] tcp_getsockopt+0x8c/0xd0 net/ipv4/tcp.c:3685 kernel_getsockopt+0x121/0x1f0 net/socket.c:3736 mptcp_getsockopt+0x69/0x90 net/mptcp/protocol.c:830 __sys_getsockopt+0x13a/0x220 net/socket.c:2175 We can call tcp_get/setsockopt functions instead. Doing so fixes crashing, but still leaves rtnl related lockdep splat: WARNING: possible circular locking dependency detected 5.5.0-rc6 #2 Not tainted ------------------------------------------------------ syz-executor.0/16334 is trying to acquire lock: ffffffff84f7a080 (rtnl_mutex){+.+.}, at: do_ip_setsockopt.isra.0+0x277/0x3820 net/ipv4/ip_sockglue.c:644 but task is already holding lock: ffff888116503b90 (sk_lock-AF_INET){+.+.}, at: lock_sock include/net/sock.h:1516 [inline] ffff888116503b90 (sk_lock-AF_INET){+.+.}, at: mptcp_setsockopt+0x28/0x90 net/mptcp/protocol.c:1284 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (sk_lock-AF_INET){+.+.}: lock_sock_nested+0xca/0x120 net/core/sock.c:2944 lock_sock include/net/sock.h:1516 [inline] do_ip_setsockopt.isra.0+0x281/0x3820 net/ipv4/ip_sockglue.c:645 ip_setsockopt+0x44/0xf0 net/ipv4/ip_sockglue.c:1248 udp_setsockopt+0x5d/0xa0 net/ipv4/udp.c:2639 __sys_setsockopt+0x152/0x240 net/socket.c:2130 __do_sys_setsockopt net/socket.c:2146 [inline] __se_sys_setsockopt net/socket.c:2143 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2143 do_syscall_64+0xbd/0x5b0 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (rtnl_mutex){+.+.}: check_prev_add kernel/locking/lockdep.c:2475 [inline] check_prevs_add kernel/locking/lockdep.c:2580 [inline] validate_chain kernel/locking/lockdep.c:2970 [inline] __lock_acquire+0x1fb2/0x4680 kernel/locking/lockdep.c:3954 lock_acquire+0x127/0x330 kernel/locking/lockdep.c:4484 __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x158/0x1340 kernel/locking/mutex.c:1103 do_ip_setsockopt.isra.0+0x277/0x3820 net/ipv4/ip_sockglue.c:644 ip_setsockopt+0x44/0xf0 net/ipv4/ip_sockglue.c:1248 tcp_setsockopt net/ipv4/tcp.c:3159 [inline] tcp_setsockopt+0x8c/0xd0 net/ipv4/tcp.c:3153 kernel_setsockopt+0x121/0x1f0 net/socket.c:3767 mptcp_setsockopt+0x69/0x90 net/mptcp/protocol.c:1288 __sys_setsockopt+0x152/0x240 net/socket.c:2130 __do_sys_setsockopt net/socket.c:2146 [inline] __se_sys_setsockopt net/socket.c:2143 [inline] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2143 do_syscall_64+0xbd/0x5b0 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sk_lock-AF_INET); lock(rtnl_mutex); lock(sk_lock-AF_INET); lock(rtnl_mutex); The lockdep complaint is because we hold mptcp socket lock when calling the sk_prot get/setsockopt handler, and those might need to acquire the rtnl mutex. Normally, order is: rtnl_lock(sk) -> lock_sock Whereas for mptcp the order is lock_sock(mptcp_sk) rtnl_lock -> lock_sock(subflow_sk) We can avoid this by releasing the mptcp socket lock early, but, as Paolo points out, we need to get/put the subflow socket refcount before doing so to avoid race with concurrent close(). Fixes: 717e79c867ca5 ("mptcp: Add setsockopt()/getsockopt() socket operations") Reported-by: Christoph Paasch Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f1b1160dbbb4..07ebff6396cd 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -781,15 +781,12 @@ static void mptcp_destroy(struct sock *sk) } static int mptcp_setsockopt(struct sock *sk, int level, int optname, - char __user *uoptval, unsigned int optlen) + char __user *optval, unsigned int optlen) { struct mptcp_sock *msk = mptcp_sk(sk); - char __kernel *optval; int ret = -EOPNOTSUPP; struct socket *ssock; - - /* will be treated as __user in tcp_setsockopt */ - optval = (char __kernel __force *)uoptval; + struct sock *ssk; pr_debug("msk=%p", msk); @@ -798,27 +795,28 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, */ lock_sock(sk); ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); - if (!IS_ERR(ssock)) { - pr_debug("subflow=%p", ssock->sk); - ret = kernel_setsockopt(ssock, level, optname, optval, optlen); + if (IS_ERR(ssock)) { + release_sock(sk); + return ret; } + + ssk = ssock->sk; + sock_hold(ssk); release_sock(sk); + ret = tcp_setsockopt(ssk, level, optname, optval, optlen); + sock_put(ssk); + return ret; } static int mptcp_getsockopt(struct sock *sk, int level, int optname, - char __user *uoptval, int __user *uoption) + char __user *optval, int __user *option) { struct mptcp_sock *msk = mptcp_sk(sk); - char __kernel *optval; int ret = -EOPNOTSUPP; - int __kernel *option; struct socket *ssock; - - /* will be treated as __user in tcp_getsockopt */ - optval = (char __kernel __force *)uoptval; - option = (int __kernel __force *)uoption; + struct sock *ssk; pr_debug("msk=%p", msk); @@ -827,12 +825,18 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, */ lock_sock(sk); ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); - if (!IS_ERR(ssock)) { - pr_debug("subflow=%p", ssock->sk); - ret = kernel_getsockopt(ssock, level, optname, optval, option); + if (IS_ERR(ssock)) { + release_sock(sk); + return ret; } + + ssk = ssock->sk; + sock_hold(ssk); release_sock(sk); + ret = tcp_getsockopt(ssk, level, optname, optval, option); + sock_put(ssk); + return ret; } From b2c5b614ca6ed460144788e7f9634569cc0c7b51 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 29 Jan 2020 15:54:45 +0100 Subject: [PATCH 07/15] mptcp: avoid a lockdep splat when mcast group was joined syzbot triggered following lockdep splat: ffffffff82d2cd40 (rtnl_mutex){+.+.}, at: ip_mc_drop_socket+0x52/0x180 but task is already holding lock: ffff8881187a2310 (sk_lock-AF_INET){+.+.}, at: mptcp_close+0x18/0x30 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (sk_lock-AF_INET){+.+.}: lock_acquire+0xee/0x230 lock_sock_nested+0x89/0xc0 do_ip_setsockopt.isra.0+0x335/0x22f0 ip_setsockopt+0x35/0x60 tcp_setsockopt+0x5d/0x90 __sys_setsockopt+0xf3/0x190 __x64_sys_setsockopt+0x61/0x70 do_syscall_64+0x72/0x300 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (rtnl_mutex){+.+.}: check_prevs_add+0x2b7/0x1210 __lock_acquire+0x10b6/0x1400 lock_acquire+0xee/0x230 __mutex_lock+0x120/0xc70 ip_mc_drop_socket+0x52/0x180 inet_release+0x36/0xe0 __sock_release+0xfd/0x130 __mptcp_close+0xa8/0x1f0 inet_release+0x7f/0xe0 __sock_release+0x69/0x130 sock_close+0x18/0x20 __fput+0x179/0x400 task_work_run+0xd5/0x110 do_exit+0x685/0x1510 do_group_exit+0x7e/0x170 __x64_sys_exit_group+0x28/0x30 do_syscall_64+0x72/0x300 entry_SYSCALL_64_after_hwframe+0x49/0xbe The trigger is: socket(AF_INET, SOCK_STREAM, 0x106 /* IPPROTO_MPTCP */) = 4 setsockopt(4, SOL_IP, MCAST_JOIN_GROUP, {gr_interface=7, gr_group={sa_family=AF_INET, sin_port=htons(20003), sin_addr=inet_addr("224.0.0.2")}}, 136) = 0 exit(0) Which results in a call to rtnl_lock while we are holding the parent mptcp socket lock via mptcp_close -> lock_sock(msk) -> inet_release -> ip_mc_drop_socket -> rtnl_lock(). >From lockdep point of view we thus have both 'rtnl_lock; lock_sock' and 'lock_sock; rtnl_lock'. Fix this by stealing the msk conn_list and doing the subflow close without holding the msk lock. Fixes: cec37a6e41aae7bf ("mptcp: Handle MP_CAPABLE options for outgoing connections") Reported-by: Christoph Paasch Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 07ebff6396cd..73c192d8c158 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -644,17 +644,21 @@ static void __mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); + LIST_HEAD(conn_list); mptcp_token_destroy(msk->token); inet_sk_state_store(sk, TCP_CLOSE); - list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { + list_splice_init(&msk->conn_list, &conn_list); + + release_sock(sk); + + list_for_each_entry_safe(subflow, tmp, &conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); __mptcp_close_ssk(sk, ssk, subflow, timeout); } - release_sock(sk); sk_common_release(sk); } From ae2dd7164943e03644293af92802550d052632e6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 29 Jan 2020 15:54:46 +0100 Subject: [PATCH 08/15] mptcp: handle tcp fallback when using syn cookies We can't deal with syncookie mode yet, the syncookie rx path will create tcp reqsk, i.e. we get OOB access because we treat tcp reqsk as mptcp reqsk one: TCP: SYN flooding on port 20002. Sending cookies. BUG: KASAN: slab-out-of-bounds in subflow_syn_recv_sock+0x451/0x4d0 net/mptcp/subflow.c:191 Read of size 1 at addr ffff8881167bc148 by task syz-executor099/2120 subflow_syn_recv_sock+0x451/0x4d0 net/mptcp/subflow.c:191 tcp_get_cookie_sock+0xcf/0x520 net/ipv4/syncookies.c:209 cookie_v6_check+0x15a5/0x1e90 net/ipv6/syncookies.c:252 tcp_v6_cookie_check net/ipv6/tcp_ipv6.c:1123 [inline] [..] Bug can be reproduced via "sysctl net.ipv4.tcp_syncookies=2". Note that MPTCP should work with syncookies (4th ack would carry needed state), but it appears better to sort that out in -next so do tcp fallback for now. I removed the MPTCP ifdef for tcp_rsk "is_mptcp" member because if (IS_ENABLED()) is easier to read than "#ifdef IS_ENABLED()/#endif" pair. Cc: Eric Dumazet Fixes: cec37a6e41aae7bf ("mptcp: Handle MP_CAPABLE options for outgoing connections") Reported-by: Christoph Paasch Tested-by: Christoph Paasch Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 -- net/ipv4/syncookies.c | 4 ++++ net/ipv4/tcp_input.c | 3 +++ net/ipv6/syncookies.c | 3 +++ net/mptcp/subflow.c | 5 ++++- 5 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1cf73e6f85ca..3dc964010fef 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -148,9 +148,7 @@ struct tcp_request_sock { const struct tcp_request_sock_ops *af_specific; u64 snt_synack; /* first SYNACK sent time */ bool tfo_listener; -#if IS_ENABLED(CONFIG_MPTCP) bool is_mptcp; -#endif u32 txhash; u32 rcv_isn; u32 snt_isn; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 345b2b0ff618..9a4f6b16c9bc 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -349,6 +349,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = 0; treq->tfo_listener = false; + + if (IS_ENABLED(CONFIG_MPTCP)) + treq->is_mptcp = 0; + if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e8b840a4767e..e325b4506e25 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6637,6 +6637,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, af_ops->init_req(req, sk, skb); + if (IS_ENABLED(CONFIG_MPTCP) && want_cookie) + tcp_rsk(req)->is_mptcp = 0; + if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 30915f6f31e3..13235a012388 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -178,6 +178,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->tfo_listener = false; + if (IS_ENABLED(CONFIG_MPTCP)) + treq->is_mptcp = 0; + if (security_inet_conn_request(sk, skb, req)) goto out_free; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 205dca1c30b7..c90c0e6ffb82 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -186,6 +186,9 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); + if (tcp_rsk(req)->is_mptcp == 0) + goto create_child; + /* if the sk is MP_CAPABLE, we try to fetch the client key */ subflow_req = mptcp_subflow_rsk(req); if (subflow_req->mp_capable) { @@ -769,7 +772,7 @@ static void subflow_ulp_clone(const struct request_sock *req, struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk); struct mptcp_subflow_context *new_ctx; - if (!subflow_req->mp_capable) { + if (!tcp_rsk(req)->is_mptcp || !subflow_req->mp_capable) { subflow_ulp_fallback(newsk, old_ctx); return; } From ccd1f27368e42a11117cf1bdb113cc640802f91e Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Wed, 29 Jan 2020 09:41:37 -0800 Subject: [PATCH 09/15] Revert "MAINTAINERS: mptcp@ mailing list is moderated" This reverts commit 74759e1693311a8d1441de836c4080c192374238. mptcp@lists.01.org accepts messages from non-subscribers. There was an invisible and unexpected server-wide rule limiting the number of recipients for subscribers and non-subscribers alike, and that has now been turned off for this list. Cc: Randy Dunlap Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 58e4eb554d0e..d51ce1a0a817 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11641,7 +11641,7 @@ NETWORKING [MPTCP] M: Mat Martineau M: Matthieu Baerts L: netdev@vger.kernel.org -L: mptcp@lists.01.org (moderated for non-subscribers) +L: mptcp@lists.01.org W: https://github.com/multipath-tcp/mptcp_net-next/wiki B: https://github.com/multipath-tcp/mptcp_net-next/issues S: Maintained From 8e1974a2a02bf5b77831c686f01761163ea0cd3e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 29 Jan 2020 19:01:17 +0100 Subject: [PATCH 10/15] mptcp: Fix incorrect IPV6 dependency check If CONFIG_MPTCP=y, CONFIG_MPTCP_IPV6=n, and CONFIG_IPV6=m: net/mptcp/protocol.o: In function `__mptcp_tcp_fallback': protocol.c:(.text+0x786): undefined reference to `inet6_stream_ops' Fix this by checking for CONFIG_MPTCP_IPV6 instead of CONFIG_IPV6, like is done in all other places in the mptcp code. Fixes: 8ab183deb26a3b79 ("mptcp: cope with later TCP fallback") Signed-off-by: Geert Uytterhoeven Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 73c192d8c158..3bccee455688 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -28,7 +28,7 @@ static void __mptcp_close(struct sock *sk, long timeout); static const struct proto_ops *tcp_proto_ops(struct sock *sk) { -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_MPTCP_IPV6) if (sk->sk_family == AF_INET6) return &inet6_stream_ops; #endif From 389b8fb3c4befcc42edbc6d40525c6cb7704daf8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 29 Jan 2020 19:02:24 +0100 Subject: [PATCH 11/15] mptcp: MPTCP_HMAC_TEST should depend on MPTCP As the MPTCP HMAC test is integrated into the MPTCP code, it can be built only when MPTCP is enabled. Hence when MPTCP is disabled, asking the user if the test code should be enabled is futile. Wrap the whole block of MPTCP-specific config options inside a check for MPTCP. While at it, drop the "default n" for MPTCP_HMAC_TEST, as that is the default anyway. Fixes: 65492c5a6ab5df50 ("mptcp: move from sha1 (v0) to sha256 (v1)") Signed-off-by: Geert Uytterhoeven Reviewed-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index 5db56d2218c5..49f6054e7f4e 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -10,17 +10,19 @@ config MPTCP uses the TCP protocol, and TCP options carry header information for MPTCP. +if MPTCP + config MPTCP_IPV6 bool "MPTCP: IPv6 support for Multipath TCP" - depends on MPTCP select IPV6 default y config MPTCP_HMAC_TEST bool "Tests for MPTCP HMAC implementation" - default n help This option enable boot time self-test for the HMAC implementation used by the MPTCP code Say N if you are unsure. + +endif From d0208bf4da97f76237300afb83c097de25645de6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 29 Jan 2020 15:20:17 -0500 Subject: [PATCH 12/15] udp: document udp_rcv_segment special case for looped packets Commit 6cd021a58c18a ("udp: segment looped gso packets correctly") fixes an issue with rare udp gso multicast packets looped onto the receive path. The stable backport makes the narrowest change to target only these packets, when needed. As opposed to, say, expanding __udp_gso_segment, which is harder to reason to be free from unintended side-effects. But the resulting code is hardly self-describing. Document its purpose and rationale. Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/udp.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/net/udp.h b/include/net/udp.h index 4a180f2a13e3..e55d5f765807 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -476,6 +476,13 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, if (!inet_get_convert_csum(sk)) features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + /* UDP segmentation expects packets of type CHECKSUM_PARTIAL or + * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial + * packets in udp_gro_complete_segment. As does UDP GSO, verified by + * udp_send_skb. But when those packets are looped in dev_loopback_xmit + * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this + * specific case, where PARTIAL is both correct and required. + */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; From 72d62c4e422ed2380a2acd51747789b48146e4ee Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 28 Jan 2020 11:02:50 -0800 Subject: [PATCH 13/15] net: drop_monitor: Use kstrdup Convert the equivalent but rather odd uses of kmemdup with __GFP_ZERO to the more common kstrdup and avoid unnecessary zeroing of copied over memory. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 536e032d95c8..ea46fc6aa883 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -802,16 +802,12 @@ net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata) if (!n_hw_metadata) return NULL; - trap_group_name = kmemdup(hw_metadata->trap_group_name, - strlen(hw_metadata->trap_group_name) + 1, - GFP_ATOMIC | __GFP_ZERO); + trap_group_name = kstrdup(hw_metadata->trap_group_name, GFP_ATOMIC); if (!trap_group_name) goto free_hw_metadata; n_hw_metadata->trap_group_name = trap_group_name; - trap_name = kmemdup(hw_metadata->trap_name, - strlen(hw_metadata->trap_name) + 1, - GFP_ATOMIC | __GFP_ZERO); + trap_name = kstrdup(hw_metadata->trap_name, GFP_ATOMIC); if (!trap_name) goto free_trap_group; n_hw_metadata->trap_name = trap_name; From 31484d56ca9511e62c08626db338d2aaafb3aa3f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Jan 2020 10:45:26 +0100 Subject: [PATCH 14/15] mptcp: Fix undefined mptcp_handle_ipv6_mapped for modular IPV6 If CONFIG_MPTCP=y, CONFIG_MPTCP_IPV6=n, and CONFIG_IPV6=m: ERROR: "mptcp_handle_ipv6_mapped" [net/ipv6/ipv6.ko] undefined! This does not happen if CONFIG_MPTCP_IPV6=y, as CONFIG_MPTCP_IPV6 selects CONFIG_IPV6, and thus forces CONFIG_IPV6 builtin. As exporting a symbol for an empty function would be a bit wasteful, fix this by providing a dummy version of mptcp_handle_ipv6_mapped() for the CONFIG_MPTCP_IPV6=n case. Rename mptcp_handle_ipv6_mapped() to mptcpv6_handle_mapped(), to make it clear this is a pure-IPV6 function, just like mptcpv6_init(). Fixes: cec37a6e41aae7bf ("mptcp: Handle MP_CAPABLE options for outgoing connections") Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- include/net/mptcp.h | 9 +++------ net/ipv6/tcp_ipv6.c | 6 +++--- net/mptcp/subflow.c | 6 +++--- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 27627e2d1bc2..c971d25431ea 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -174,15 +174,12 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to, #endif /* CONFIG_MPTCP */ -void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped); - #if IS_ENABLED(CONFIG_MPTCP_IPV6) int mptcpv6_init(void); +void mptcpv6_handle_mapped(struct sock *sk, bool mapped); #elif IS_ENABLED(CONFIG_IPV6) -static inline int mptcpv6_init(void) -{ - return 0; -} +static inline int mptcpv6_init(void) { return 0; } +static inline void mptcpv6_handle_mapped(struct sock *sk, bool mapped) { } #endif #endif /* __NET_MPTCP_H */ diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 33a578a3eb3a..eaf09e6b7844 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -239,7 +239,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, icsk->icsk_af_ops = &ipv6_mapped; if (sk_is_mptcp(sk)) - mptcp_handle_ipv6_mapped(sk, true); + mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; #ifdef CONFIG_TCP_MD5SIG tp->af_specific = &tcp_sock_ipv6_mapped_specific; @@ -251,7 +251,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, icsk->icsk_ext_hdr_len = exthdrlen; icsk->icsk_af_ops = &ipv6_specific; if (sk_is_mptcp(sk)) - mptcp_handle_ipv6_mapped(sk, false); + mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; #ifdef CONFIG_TCP_MD5SIG tp->af_specific = &tcp_sock_ipv6_specific; @@ -1208,7 +1208,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; if (sk_is_mptcp(newsk)) - mptcp_handle_ipv6_mapped(newsk, true); + mptcpv6_handle_mapped(newsk, true); newsk->sk_backlog_rcv = tcp_v4_do_rcv; #ifdef CONFIG_TCP_MD5SIG newtp->af_specific = &tcp_sock_ipv6_mapped_specific; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index c90c0e6ffb82..65122edf60aa 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -585,9 +585,9 @@ subflow_default_af_ops(struct sock *sk) return &subflow_specific; } -void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped) -{ #if IS_ENABLED(CONFIG_MPTCP_IPV6) +void mptcpv6_handle_mapped(struct sock *sk, bool mapped) +{ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock_af_ops *target; @@ -602,8 +602,8 @@ void mptcp_handle_ipv6_mapped(struct sock *sk, bool mapped) subflow->icsk_af_ops = icsk->icsk_af_ops; icsk->icsk_af_ops = target; -#endif } +#endif int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) { From 9fbf082f569980ddd7cab348e0a118678db0e47e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Jan 2020 14:59:49 +0200 Subject: [PATCH 15/15] net/core: Do not clear VF index for node/port GUIDs query VF numbers were assigned to node_guid and port_guid, but cleared right before such query calls were issued. It caused to return node/port GUIDs of VF index 0 for all VFs. Fixes: 30aad41721e0 ("net/core: Add support for getting VF GUIDs") Reported-by: Adrian Chiris Signed-off-by: Leon Romanovsky Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cdad6ed532c4..09c44bf2e1d2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1242,6 +1242,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, return 0; memset(&vf_vlan_info, 0, sizeof(vf_vlan_info)); + memset(&node_guid, 0, sizeof(node_guid)); + memset(&port_guid, 0, sizeof(port_guid)); vf_mac.vf = vf_vlan.vf = @@ -1290,8 +1292,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, sizeof(vf_trust), &vf_trust)) goto nla_put_vf_failure; - memset(&node_guid, 0, sizeof(node_guid)); - memset(&port_guid, 0, sizeof(port_guid)); if (dev->netdev_ops->ndo_get_vf_guid && !dev->netdev_ops->ndo_get_vf_guid(dev, vfs_num, &node_guid, &port_guid)) {