Merge branch 'tcp-Namespaceify-3-sysctls'

Eric Dumazet says:

====================
tcp: Namespaceify 3 sysctls

Move tcp_sack, tcp_window_scaling and tcp_timestamps
sysctls to network namespaces.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-06-08 10:53:30 -04:00
commit 06fcb3b69f
13 changed files with 86 additions and 73 deletions

View File

@ -3756,7 +3756,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
*/
memset(&tmp_opt, 0, sizeof(tmp_opt));
tcp_clear_options(&tmp_opt);
tcp_parse_options(skb, &tmp_opt, 0, NULL);
tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
memset(req, 0, sizeof(*req));

View File

@ -122,6 +122,9 @@ struct netns_ipv4 {
int sysctl_tcp_fin_timeout;
unsigned int sysctl_tcp_notsent_lowat;
int sysctl_tcp_tw_reuse;
int sysctl_tcp_sack;
int sysctl_tcp_window_scaling;
int sysctl_tcp_timestamps;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;

View File

@ -8,10 +8,11 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport);
u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport);
u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr);
u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr);
u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport);
u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr);
u32 secure_tcpv6_ts_off(const struct net *net,
const __be32 *saddr, const __be32 *daddr);
u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
__be16 sport, __be16 dport);
u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,

View File

@ -237,9 +237,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
/* sysctl variables for tcp */
extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
extern int sysctl_tcp_sack;
extern int sysctl_tcp_fastopen;
extern int sysctl_tcp_retrans_collapse;
extern int sysctl_tcp_stdurg;
@ -427,7 +424,7 @@ void tcp_set_keepalive(struct sock *sk, int val);
void tcp_syn_ack_timeout(const struct request_sock *req);
int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
void tcp_parse_options(const struct sk_buff *skb,
void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab, struct tcp_fastopen_cookie *foc);
const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
@ -520,7 +517,8 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
u16 *mssp);
__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss);
u64 cookie_init_timestamp(struct request_sock *req);
bool cookie_timestamp_decode(struct tcp_options_received *opt);
bool cookie_timestamp_decode(const struct net *net,
struct tcp_options_received *opt);
bool cookie_ecn_ok(const struct tcp_options_received *opt,
const struct net *net, const struct dst_entry *dst);
@ -1870,7 +1868,7 @@ struct tcp_request_sock_ops {
struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
const struct request_sock *req);
u32 (*init_seq)(const struct sk_buff *skb);
u32 (*init_ts_off)(const struct sk_buff *skb);
u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl, struct request_sock *req,
struct tcp_fastopen_cookie *foc,

View File

@ -51,7 +51,8 @@ static u32 seq_scale(u32 seq)
#endif
#if IS_ENABLED(CONFIG_IPV6)
u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
u32 secure_tcpv6_ts_off(const struct net *net,
const __be32 *saddr, const __be32 *daddr)
{
const struct {
struct in6_addr saddr;
@ -61,7 +62,7 @@ u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
.daddr = *(struct in6_addr *)daddr,
};
if (sysctl_tcp_timestamps != 1)
if (net->ipv4.sysctl_tcp_timestamps != 1)
return 0;
ts_secret_init();
@ -113,9 +114,9 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
{
if (sysctl_tcp_timestamps != 1)
if (net->ipv4.sysctl_tcp_timestamps != 1)
return 0;
ts_secret_init();

View File

@ -232,7 +232,8 @@ EXPORT_SYMBOL(tcp_get_cookie_sock);
* return false if we decode a tcp option that is disabled
* on the host.
*/
bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt)
bool cookie_timestamp_decode(const struct net *net,
struct tcp_options_received *tcp_opt)
{
/* echoed timestamp, lowest bits contain options */
u32 options = tcp_opt->rcv_tsecr;
@ -242,12 +243,12 @@ bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt)
return true;
}
if (!sysctl_tcp_timestamps)
if (!net->ipv4.sysctl_tcp_timestamps)
return false;
tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
if (tcp_opt->sack_ok && !sysctl_tcp_sack)
if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
return false;
if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
@ -256,7 +257,7 @@ bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt)
tcp_opt->wscale_ok = 1;
tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
return sysctl_tcp_window_scaling != 0;
return net->ipv4.sysctl_tcp_window_scaling != 0;
}
EXPORT_SYMBOL(cookie_timestamp_decode);
@ -312,14 +313,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
tcp_parse_options(skb, &tcp_opt, 0, NULL);
tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
tsoff = secure_tcp_ts_off(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
tsoff = secure_tcp_ts_off(sock_net(sk),
ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr);
tcp_opt.rcv_tsecr -= tsoff;
}
if (!cookie_timestamp_decode(&tcp_opt))
if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
goto out;
ret = NULL;

View File

@ -364,27 +364,6 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
}
static struct ctl_table ipv4_table[] = {
{
.procname = "tcp_timestamps",
.data = &sysctl_tcp_timestamps,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_window_scaling",
.data = &sysctl_tcp_window_scaling,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_sack",
.data = &sysctl_tcp_sack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_retrans_collapse",
.data = &sysctl_tcp_retrans_collapse,
@ -1116,6 +1095,27 @@ static struct ctl_table ipv4_net_table[] = {
.extra2 = &one,
},
#endif
{
.procname = "tcp_sack",
.data = &init_net.ipv4.sysctl_tcp_sack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_window_scaling",
.data = &init_net.ipv4.sysctl_tcp_window_scaling,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{
.procname = "tcp_timestamps",
.data = &init_net.ipv4.sysctl_tcp_timestamps,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ }
};

View File

@ -76,9 +76,6 @@
#include <asm/unaligned.h>
#include <linux/errqueue.h>
int sysctl_tcp_timestamps __read_mostly = 1;
int sysctl_tcp_window_scaling __read_mostly = 1;
int sysctl_tcp_sack __read_mostly = 1;
int sysctl_tcp_fack __read_mostly;
int sysctl_tcp_max_reordering __read_mostly = 300;
int sysctl_tcp_dsack __read_mostly = 1;
@ -3724,7 +3721,8 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
* But, this can also be called on packets in the established flow when
* the fast version below fails.
*/
void tcp_parse_options(const struct sk_buff *skb,
void tcp_parse_options(const struct net *net,
const struct sk_buff *skb,
struct tcp_options_received *opt_rx, int estab,
struct tcp_fastopen_cookie *foc)
{
@ -3765,7 +3763,7 @@ void tcp_parse_options(const struct sk_buff *skb,
break;
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && th->syn &&
!estab && sysctl_tcp_window_scaling) {
!estab && net->ipv4.sysctl_tcp_window_scaling) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > TCP_MAX_WSCALE) {
@ -3781,7 +3779,7 @@ void tcp_parse_options(const struct sk_buff *skb,
case TCPOPT_TIMESTAMP:
if ((opsize == TCPOLEN_TIMESTAMP) &&
((estab && opt_rx->tstamp_ok) ||
(!estab && sysctl_tcp_timestamps))) {
(!estab && net->ipv4.sysctl_tcp_timestamps))) {
opt_rx->saw_tstamp = 1;
opt_rx->rcv_tsval = get_unaligned_be32(ptr);
opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
@ -3789,7 +3787,7 @@ void tcp_parse_options(const struct sk_buff *skb,
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
!estab && sysctl_tcp_sack) {
!estab && net->ipv4.sysctl_tcp_sack) {
opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
@ -3858,7 +3856,8 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
/* Fast parse options. This hopes to only see timestamps.
* If it is wrong it falls back on tcp_parse_options().
*/
static bool tcp_fast_parse_options(const struct sk_buff *skb,
static bool tcp_fast_parse_options(const struct net *net,
const struct sk_buff *skb,
const struct tcphdr *th, struct tcp_sock *tp)
{
/* In the spirit of fast parsing, compare doff directly to constant
@ -3873,7 +3872,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
return true;
}
tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
@ -5234,7 +5233,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
bool rst_seq_match = false;
/* RFC1323: H1. Apply PAWS check first. */
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@ -5605,7 +5605,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
/* Get original SYNACK MSS value if user MSS sets mss_clamp */
tcp_clear_options(&opt);
opt.user_mss = opt.mss_clamp = 0;
tcp_parse_options(synack, &opt, 0, NULL);
tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
mss = opt.mss_clamp;
}
@ -5659,7 +5659,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
int saved_clamp = tp->rx_opt.mss_clamp;
bool fastopen_fail;
tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
@ -6332,7 +6332,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = af_ops->mss_clamp;
tmp_opt.user_mss = tp->rx_opt.user_mss;
tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
want_cookie ? NULL : &foc);
if (want_cookie && !tmp_opt.saw_tstamp)
tcp_clear_options(&tmp_opt);
@ -6350,7 +6351,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_free;
if (tmp_opt.tstamp_ok)
tcp_rsk(req)->ts_off = af_ops->init_ts_off(skb);
tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
if (!want_cookie && !isn) {
/* Kill the following clause, if you dislike this way. */

View File

@ -102,10 +102,9 @@ static u32 tcp_v4_init_seq(const struct sk_buff *skb)
tcp_hdr(skb)->source);
}
static u32 tcp_v4_init_ts_off(const struct sk_buff *skb)
static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
{
return secure_tcp_ts_off(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr);
return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr);
}
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@ -242,7 +241,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
usin->sin_port);
tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr,
tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
inet->inet_saddr,
inet->inet_daddr);
}
@ -2465,6 +2465,9 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
net->ipv4.sysctl_tcp_sack = 1;
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
return 0;
fail:

View File

@ -98,7 +98,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
tcp_parse_options(skb, &tmp_opt, 0, NULL);
tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
if (tmp_opt.rcv_tsecr)
@ -559,7 +559,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
tcp_parse_options(skb, &tmp_opt, 0, NULL);
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent;

View File

@ -569,18 +569,18 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
if (likely(sysctl_tcp_timestamps && !*md5)) {
if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
if (likely(sysctl_tcp_window_scaling)) {
if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
opts->ws = tp->rx_opt.rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
if (likely(sysctl_tcp_sack)) {
if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!(OPTION_TS & opts->options)))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
@ -3271,8 +3271,9 @@ static void tcp_connect_init(struct sock *sk)
/* We'll fix this up when we get a response from the other end.
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
*/
tp->tcp_header_len = sizeof(struct tcphdr) +
(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
tp->tcp_header_len = sizeof(struct tcphdr);
if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
if (tp->af_specific->md5_lookup(sk, sk))
@ -3303,7 +3304,7 @@ static void tcp_connect_init(struct sock *sk)
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
sysctl_tcp_window_scaling,
sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
&rcv_wscale,
dst_metric(dst, RTAX_INITRWND));

View File

@ -162,15 +162,16 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
tcp_parse_options(skb, &tcp_opt, 0, NULL);
tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
tsoff = secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32,
tsoff = secure_tcpv6_ts_off(sock_net(sk),
ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32);
tcp_opt.rcv_tsecr -= tsoff;
}
if (!cookie_timestamp_decode(&tcp_opt))
if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
goto out;
ret = NULL;

View File

@ -109,9 +109,9 @@ static u32 tcp_v6_init_seq(const struct sk_buff *skb)
tcp_hdr(skb)->source);
}
static u32 tcp_v6_init_ts_off(const struct sk_buff *skb)
static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
{
return secure_tcpv6_ts_off(ipv6_hdr(skb)->daddr.s6_addr32,
return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32);
}
@ -292,7 +292,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport);
tp->tsoffset = secure_tcpv6_ts_off(np->saddr.s6_addr32,
tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32);
}