forked from luck/tmp_suning_uos_patched
Merge branch 'inet_frag_kill_lru_list'
Nikolay Aleksandrov says: ==================== inet: frag: cleanup and update The end goal of this patchset is to remove the LRU list and to move the frag eviction to a work queue. It also does a couple of necessary cleanups and fixes. Brief patch descriptions: Patches 1 - 3 inclusive: necessary clean ups Patch 4 moves the eviction from the softirqs to a workqueue. Patch 5 removes the nqueues counter which was protected by the LRU lock Patch 6 removes the, by now unused, lru list. Patch 7 moves the rebuild timer to the workqueue and schedules the rebuilds only if we've hit the maximum queue length on some of the chains. Patch 8 migrate the rwlock to a seqlock since the rehash is usually a rare operation. Patch 9 introduces an artificial global memory limit based on the value of init_net's high_thresh which is used to cap the high_thresh of the other namespaces. Also introduces some sane limits on the other tunables, and makes it impossible to have low_thresh > high_thresh. Here are some numbers from running netperf before and after the patchset: Each test consists of the following setting: -I 95,5 -i 15,10 1. Bound test (-T 4,4) 1.1 Virtio before the patchset - MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.177 () port 0 AF_INET : +/-2.500% @ 95% conf. : cpu bind Socket Message Elapsed Messages CPU Service Size Size Time Okay Errors Throughput Util Demand bytes bytes secs # # 10^6bits/sec % SS us/KB 212992 64000 30.00 722177 0 12325.1 34.55 2.025 212992 30.00 368020 6280.9 34.05 0.752 1.2 Virtio after the patchset - MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.177 () port 0 AF_INET : +/-2.500% @ 95% conf. : cpu bind Socket Message Elapsed Messages CPU Service Size Size Time Okay Errors Throughput Util Demand bytes bytes secs # # 10^6bits/sec % SS us/KB 212992 64000 30.00 727030 0 12407.9 35.45 1.876 212992 30.00 505405 8625.5 34.92 0.693 2. Virtio unbound test 2.1 Before the patchset MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.177 () port 0 AF_INET : +/-2.500% @ 95% conf. Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 212992 64000 30.00 730008 0 12458.77 212992 30.00 416721 7112.02 2.2 After the patchset MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.122.177 () port 0 AF_INET : +/-2.500% @ 95% conf. Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 212992 64000 30.00 731129 0 12477.89 212992 30.00 487707 8323.50 3. 10 gig unbound tests 3.1 Before the patchset MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.133.1 () port 0 AF_INET : +/-2.500% @ 95% conf. Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 212992 64000 30.00 417209 0 7120.33 212992 30.00 416740 7112.33 3.2 After the patchset MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.133.1 () port 0 AF_INET : +/-2.500% @ 95% conf. Socket Message Elapsed Messages Size Size Time Okay Errors Throughput bytes bytes secs # # 10^6bits/sec 212992 64000 30.00 438009 0 7475.33 212992 30.00 437630 7468.87 Given the options each netperf ran between 10 and 15 times for 30 seconds to get the necessary confidence, also the tests themselves ran 3 times and were consistent. Another set of tests that I ran were parallel stress tests which consisted of flooding the machine with fragmented packets from different sources with frag timeout set to 0 (so there're lots of timeouts) and low_thresh set to 1 byte (so evictions are happening all the time) and on top of that running a namespace create/destroy endless loop with network interfaces and addresses that got flooded (for the brief periods they were up) in parallel. This test ran for an hour without any issues. ====================
This commit is contained in:
commit
6ceed78664
|
@ -101,19 +101,17 @@ ipfrag_high_thresh - INTEGER
|
|||
Maximum memory used to reassemble IP fragments. When
|
||||
ipfrag_high_thresh bytes of memory is allocated for this purpose,
|
||||
the fragment handler will toss packets until ipfrag_low_thresh
|
||||
is reached.
|
||||
is reached. This also serves as a maximum limit to namespaces
|
||||
different from the initial one.
|
||||
|
||||
ipfrag_low_thresh - INTEGER
|
||||
See ipfrag_high_thresh
|
||||
Maximum memory used to reassemble IP fragments before the kernel
|
||||
begins to remove incomplete fragment queues to free up resources.
|
||||
The kernel still accepts new fragments for defragmentation.
|
||||
|
||||
ipfrag_time - INTEGER
|
||||
Time in seconds to keep an IP fragment in memory.
|
||||
|
||||
ipfrag_secret_interval - INTEGER
|
||||
Regeneration interval (in seconds) of the hash secret (or lifetime
|
||||
for the hash secret) for IP fragments.
|
||||
Default: 600
|
||||
|
||||
ipfrag_max_dist - INTEGER
|
||||
ipfrag_max_dist is a non-negative integer value which defines the
|
||||
maximum "disorder" which is allowed among fragments which share a
|
||||
|
@ -1162,11 +1160,6 @@ ip6frag_low_thresh - INTEGER
|
|||
ip6frag_time - INTEGER
|
||||
Time in seconds to keep an IPv6 fragment in memory.
|
||||
|
||||
ip6frag_secret_interval - INTEGER
|
||||
Regeneration interval (in seconds) of the hash secret (or lifetime
|
||||
for the hash secret) for IPv6 fragments.
|
||||
Default: 600
|
||||
|
||||
conf/default/*:
|
||||
Change the interface-specific default settings.
|
||||
|
||||
|
|
|
@ -4,10 +4,6 @@
|
|||
#include <linux/percpu_counter.h>
|
||||
|
||||
struct netns_frags {
|
||||
int nqueues;
|
||||
struct list_head lru_list;
|
||||
spinlock_t lru_lock;
|
||||
|
||||
/* The percpu_counter "mem" need to be cacheline aligned.
|
||||
* mem.count must not share cacheline with other writers
|
||||
*/
|
||||
|
@ -22,7 +18,6 @@ struct netns_frags {
|
|||
struct inet_frag_queue {
|
||||
spinlock_t lock;
|
||||
struct timer_list timer; /* when will this queue expire? */
|
||||
struct list_head lru_list; /* lru list member */
|
||||
struct hlist_node list;
|
||||
atomic_t refcnt;
|
||||
struct sk_buff *fragments; /* list of received fragments */
|
||||
|
@ -32,6 +27,7 @@ struct inet_frag_queue {
|
|||
int meat;
|
||||
__u8 last_in; /* first/last segment arrived? */
|
||||
|
||||
#define INET_FRAG_EVICTED 8
|
||||
#define INET_FRAG_COMPLETE 4
|
||||
#define INET_FRAG_FIRST_IN 2
|
||||
#define INET_FRAG_LAST_IN 1
|
||||
|
@ -48,7 +44,7 @@ struct inet_frag_queue {
|
|||
* rounded up (SKB_TRUELEN(0) + sizeof(struct ipq or
|
||||
* struct frag_queue))
|
||||
*/
|
||||
#define INETFRAGS_MAXDEPTH 128
|
||||
#define INETFRAGS_MAXDEPTH 128
|
||||
|
||||
struct inet_frag_bucket {
|
||||
struct hlist_head chain;
|
||||
|
@ -57,24 +53,27 @@ struct inet_frag_bucket {
|
|||
|
||||
struct inet_frags {
|
||||
struct inet_frag_bucket hash[INETFRAGS_HASHSZ];
|
||||
/* This rwlock is a global lock (seperate per IPv4, IPv6 and
|
||||
* netfilter). Important to keep this on a seperate cacheline.
|
||||
* Its primarily a rebuild protection rwlock.
|
||||
*/
|
||||
rwlock_t lock ____cacheline_aligned_in_smp;
|
||||
int secret_interval;
|
||||
struct timer_list secret_timer;
|
||||
|
||||
struct work_struct frags_work;
|
||||
unsigned int next_bucket;
|
||||
unsigned long last_rebuild_jiffies;
|
||||
bool rebuild;
|
||||
|
||||
/* The first call to hashfn is responsible to initialize
|
||||
* rnd. This is best done with net_get_random_once.
|
||||
*
|
||||
* rnd_seqlock is used to let hash insertion detect
|
||||
* when it needs to re-lookup the hash chain to use.
|
||||
*/
|
||||
u32 rnd;
|
||||
seqlock_t rnd_seqlock;
|
||||
int qsize;
|
||||
|
||||
unsigned int (*hashfn)(struct inet_frag_queue *);
|
||||
bool (*match)(struct inet_frag_queue *q, void *arg);
|
||||
unsigned int (*hashfn)(const struct inet_frag_queue *);
|
||||
bool (*match)(const struct inet_frag_queue *q,
|
||||
const void *arg);
|
||||
void (*constructor)(struct inet_frag_queue *q,
|
||||
void *arg);
|
||||
const void *arg);
|
||||
void (*destructor)(struct inet_frag_queue *);
|
||||
void (*skb_free)(struct sk_buff *);
|
||||
void (*frag_expire)(unsigned long data);
|
||||
|
@ -87,19 +86,17 @@ void inet_frags_init_net(struct netns_frags *nf);
|
|||
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
|
||||
|
||||
void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
|
||||
void inet_frag_destroy(struct inet_frag_queue *q,
|
||||
struct inet_frags *f, int *work);
|
||||
int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force);
|
||||
void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f);
|
||||
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
|
||||
struct inet_frags *f, void *key, unsigned int hash)
|
||||
__releases(&f->lock);
|
||||
struct inet_frags *f, void *key, unsigned int hash);
|
||||
|
||||
void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
|
||||
const char *prefix);
|
||||
|
||||
static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f)
|
||||
{
|
||||
if (atomic_dec_and_test(&q->refcnt))
|
||||
inet_frag_destroy(q, f, NULL);
|
||||
inet_frag_destroy(q, f);
|
||||
}
|
||||
|
||||
/* Memory Tracking Functions. */
|
||||
|
@ -131,9 +128,9 @@ static inline void init_frag_mem_limit(struct netns_frags *nf)
|
|||
percpu_counter_init(&nf->mem, 0);
|
||||
}
|
||||
|
||||
static inline int sum_frag_mem_limit(struct netns_frags *nf)
|
||||
static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
|
||||
{
|
||||
int res;
|
||||
unsigned int res;
|
||||
|
||||
local_bh_disable();
|
||||
res = percpu_counter_sum_positive(&nf->mem);
|
||||
|
@ -142,31 +139,6 @@ static inline int sum_frag_mem_limit(struct netns_frags *nf)
|
|||
return res;
|
||||
}
|
||||
|
||||
static inline void inet_frag_lru_move(struct inet_frag_queue *q)
|
||||
{
|
||||
spin_lock(&q->net->lru_lock);
|
||||
if (!list_empty(&q->lru_list))
|
||||
list_move_tail(&q->lru_list, &q->net->lru_list);
|
||||
spin_unlock(&q->net->lru_lock);
|
||||
}
|
||||
|
||||
static inline void inet_frag_lru_del(struct inet_frag_queue *q)
|
||||
{
|
||||
spin_lock(&q->net->lru_lock);
|
||||
list_del_init(&q->lru_list);
|
||||
q->net->nqueues--;
|
||||
spin_unlock(&q->net->lru_lock);
|
||||
}
|
||||
|
||||
static inline void inet_frag_lru_add(struct netns_frags *nf,
|
||||
struct inet_frag_queue *q)
|
||||
{
|
||||
spin_lock(&nf->lru_lock);
|
||||
list_add_tail(&q->lru_list, &nf->lru_list);
|
||||
q->net->nqueues++;
|
||||
spin_unlock(&nf->lru_lock);
|
||||
}
|
||||
|
||||
/* RFC 3168 support :
|
||||
* We want to check ECN values of all fragments, do detect invalid combinations.
|
||||
* In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
|
||||
|
|
|
@ -495,7 +495,6 @@ static inline struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
|
|||
}
|
||||
#endif
|
||||
int ip_frag_mem(struct net *net);
|
||||
int ip_frag_nqueues(struct net *net);
|
||||
|
||||
/*
|
||||
* Functions provided by ip_forward.c
|
||||
|
|
|
@ -299,11 +299,6 @@ static inline bool ipv6_accept_ra(struct inet6_dev *idev)
|
|||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
static inline int ip6_frag_nqueues(struct net *net)
|
||||
{
|
||||
return net->ipv6.frags.nqueues;
|
||||
}
|
||||
|
||||
static inline int ip6_frag_mem(struct net *net)
|
||||
{
|
||||
return sum_frag_mem_limit(&net->ipv6.frags);
|
||||
|
@ -496,8 +491,8 @@ struct ip6_create_arg {
|
|||
u8 ecn;
|
||||
};
|
||||
|
||||
void ip6_frag_init(struct inet_frag_queue *q, void *a);
|
||||
bool ip6_frag_match(struct inet_frag_queue *q, void *a);
|
||||
void ip6_frag_init(struct inet_frag_queue *q, const void *a);
|
||||
bool ip6_frag_match(const struct inet_frag_queue *q, const void *a);
|
||||
|
||||
/*
|
||||
* Equivalent of ipv4 struct ip
|
||||
|
|
|
@ -50,29 +50,25 @@ static unsigned int lowpan_hash_frag(__be16 tag, u16 d_size,
|
|||
const struct ieee802154_addr *saddr,
|
||||
const struct ieee802154_addr *daddr)
|
||||
{
|
||||
u32 c;
|
||||
|
||||
net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
|
||||
c = jhash_3words(ieee802154_addr_hash(saddr),
|
||||
ieee802154_addr_hash(daddr),
|
||||
(__force u32)(tag + (d_size << 16)),
|
||||
lowpan_frags.rnd);
|
||||
|
||||
return c & (INETFRAGS_HASHSZ - 1);
|
||||
return jhash_3words(ieee802154_addr_hash(saddr),
|
||||
ieee802154_addr_hash(daddr),
|
||||
(__force u32)(tag + (d_size << 16)),
|
||||
lowpan_frags.rnd);
|
||||
}
|
||||
|
||||
static unsigned int lowpan_hashfn(struct inet_frag_queue *q)
|
||||
static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
|
||||
{
|
||||
struct lowpan_frag_queue *fq;
|
||||
const struct lowpan_frag_queue *fq;
|
||||
|
||||
fq = container_of(q, struct lowpan_frag_queue, q);
|
||||
return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
|
||||
}
|
||||
|
||||
static bool lowpan_frag_match(struct inet_frag_queue *q, void *a)
|
||||
static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
|
||||
{
|
||||
struct lowpan_frag_queue *fq;
|
||||
struct lowpan_create_arg *arg = a;
|
||||
const struct lowpan_frag_queue *fq;
|
||||
const struct lowpan_create_arg *arg = a;
|
||||
|
||||
fq = container_of(q, struct lowpan_frag_queue, q);
|
||||
return fq->tag == arg->tag && fq->d_size == arg->d_size &&
|
||||
|
@ -80,10 +76,10 @@ static bool lowpan_frag_match(struct inet_frag_queue *q, void *a)
|
|||
ieee802154_addr_equal(&fq->daddr, arg->dst);
|
||||
}
|
||||
|
||||
static void lowpan_frag_init(struct inet_frag_queue *q, void *a)
|
||||
static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
|
||||
{
|
||||
const struct lowpan_create_arg *arg = a;
|
||||
struct lowpan_frag_queue *fq;
|
||||
struct lowpan_create_arg *arg = a;
|
||||
|
||||
fq = container_of(q, struct lowpan_frag_queue, q);
|
||||
|
||||
|
@ -128,7 +124,6 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
|
|||
arg.src = src;
|
||||
arg.dst = dst;
|
||||
|
||||
read_lock(&lowpan_frags.lock);
|
||||
hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
|
||||
|
||||
q = inet_frag_find(&ieee802154_lowpan->frags,
|
||||
|
@ -223,7 +218,6 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
|
|||
return res;
|
||||
}
|
||||
|
||||
inet_frag_lru_move(&fq->q);
|
||||
return -1;
|
||||
err:
|
||||
kfree_skb(skb);
|
||||
|
@ -373,8 +367,6 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
|
|||
if (frag_info->d_size > ieee802154_lowpan->max_dsize)
|
||||
goto err;
|
||||
|
||||
inet_frag_evictor(&ieee802154_lowpan->frags, &lowpan_frags, false);
|
||||
|
||||
fq = fq_find(net, frag_info, &source, &dest);
|
||||
if (fq != NULL) {
|
||||
int ret;
|
||||
|
@ -394,20 +386,25 @@ int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
|
|||
EXPORT_SYMBOL(lowpan_frag_rcv);
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static int zero;
|
||||
|
||||
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
|
||||
{
|
||||
.procname = "6lowpanfrag_high_thresh",
|
||||
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
|
||||
},
|
||||
{
|
||||
.procname = "6lowpanfrag_low_thresh",
|
||||
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
|
||||
},
|
||||
{
|
||||
.procname = "6lowpanfrag_time",
|
||||
|
@ -426,10 +423,12 @@ static struct ctl_table lowpan_frags_ns_ctl_table[] = {
|
|||
{ }
|
||||
};
|
||||
|
||||
/* secret interval has been deprecated */
|
||||
static int lowpan_frags_secret_interval_unused;
|
||||
static struct ctl_table lowpan_frags_ctl_table[] = {
|
||||
{
|
||||
.procname = "6lowpanfrag_secret_interval",
|
||||
.data = &lowpan_frags.secret_interval,
|
||||
.data = &lowpan_frags_secret_interval_unused,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
|
@ -452,7 +451,10 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
|
|||
goto err_alloc;
|
||||
|
||||
table[0].data = &ieee802154_lowpan->frags.high_thresh;
|
||||
table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
|
||||
table[0].extra2 = &init_net.ieee802154_lowpan.frags.high_thresh;
|
||||
table[1].data = &ieee802154_lowpan->frags.low_thresh;
|
||||
table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
|
||||
table[2].data = &ieee802154_lowpan->frags.timeout;
|
||||
table[3].data = &ieee802154_lowpan->max_dsize;
|
||||
|
||||
|
@ -569,7 +571,6 @@ int __init lowpan_net_frag_init(void)
|
|||
lowpan_frags.qsize = sizeof(struct frag_queue);
|
||||
lowpan_frags.match = lowpan_frag_match;
|
||||
lowpan_frags.frag_expire = lowpan_frag_expire;
|
||||
lowpan_frags.secret_interval = 10 * 60 * HZ;
|
||||
inet_frags_init(&lowpan_frags);
|
||||
|
||||
return ret;
|
||||
|
|
|
@ -25,6 +25,12 @@
|
|||
#include <net/inet_frag.h>
|
||||
#include <net/inet_ecn.h>
|
||||
|
||||
#define INETFRAGS_EVICT_BUCKETS 128
|
||||
#define INETFRAGS_EVICT_MAX 512
|
||||
|
||||
/* don't rebuild inetfrag table with new secret more often than this */
|
||||
#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
|
||||
|
||||
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
|
||||
* Value : 0xff if frame should be dropped.
|
||||
* 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
|
||||
|
@ -46,24 +52,39 @@ const u8 ip_frag_ecn_table[16] = {
|
|||
};
|
||||
EXPORT_SYMBOL(ip_frag_ecn_table);
|
||||
|
||||
static void inet_frag_secret_rebuild(unsigned long dummy)
|
||||
static unsigned int
|
||||
inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
|
||||
{
|
||||
return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
|
||||
}
|
||||
|
||||
static bool inet_frag_may_rebuild(struct inet_frags *f)
|
||||
{
|
||||
return time_after(jiffies,
|
||||
f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
|
||||
}
|
||||
|
||||
static void inet_frag_secret_rebuild(struct inet_frags *f)
|
||||
{
|
||||
struct inet_frags *f = (struct inet_frags *)dummy;
|
||||
unsigned long now = jiffies;
|
||||
int i;
|
||||
|
||||
/* Per bucket lock NOT needed here, due to write lock protection */
|
||||
write_lock(&f->lock);
|
||||
write_seqlock_bh(&f->rnd_seqlock);
|
||||
|
||||
if (!inet_frag_may_rebuild(f))
|
||||
goto out;
|
||||
|
||||
get_random_bytes(&f->rnd, sizeof(u32));
|
||||
|
||||
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
|
||||
struct inet_frag_bucket *hb;
|
||||
struct inet_frag_queue *q;
|
||||
struct hlist_node *n;
|
||||
|
||||
hb = &f->hash[i];
|
||||
spin_lock(&hb->chain_lock);
|
||||
|
||||
hlist_for_each_entry_safe(q, n, &hb->chain, list) {
|
||||
unsigned int hval = f->hashfn(q);
|
||||
unsigned int hval = inet_frag_hashfn(f, q);
|
||||
|
||||
if (hval != i) {
|
||||
struct inet_frag_bucket *hb_dest;
|
||||
|
@ -72,76 +93,195 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
|
|||
|
||||
/* Relink to new hash chain. */
|
||||
hb_dest = &f->hash[hval];
|
||||
|
||||
/* This is the only place where we take
|
||||
* another chain_lock while already holding
|
||||
* one. As this will not run concurrently,
|
||||
* we cannot deadlock on hb_dest lock below, if its
|
||||
* already locked it will be released soon since
|
||||
* other caller cannot be waiting for hb lock
|
||||
* that we've taken above.
|
||||
*/
|
||||
spin_lock_nested(&hb_dest->chain_lock,
|
||||
SINGLE_DEPTH_NESTING);
|
||||
hlist_add_head(&q->list, &hb_dest->chain);
|
||||
spin_unlock(&hb_dest->chain_lock);
|
||||
}
|
||||
}
|
||||
spin_unlock(&hb->chain_lock);
|
||||
}
|
||||
write_unlock(&f->lock);
|
||||
|
||||
mod_timer(&f->secret_timer, now + f->secret_interval);
|
||||
f->rebuild = false;
|
||||
f->last_rebuild_jiffies = jiffies;
|
||||
out:
|
||||
write_sequnlock_bh(&f->rnd_seqlock);
|
||||
}
|
||||
|
||||
static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
|
||||
{
|
||||
return q->net->low_thresh == 0 ||
|
||||
frag_mem_limit(q->net) >= q->net->low_thresh;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
|
||||
{
|
||||
struct inet_frag_queue *fq;
|
||||
struct hlist_node *n;
|
||||
unsigned int evicted = 0;
|
||||
HLIST_HEAD(expired);
|
||||
|
||||
evict_again:
|
||||
spin_lock(&hb->chain_lock);
|
||||
|
||||
hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
|
||||
if (!inet_fragq_should_evict(fq))
|
||||
continue;
|
||||
|
||||
if (!del_timer(&fq->timer)) {
|
||||
/* q expiring right now thus increment its refcount so
|
||||
* it won't be freed under us and wait until the timer
|
||||
* has finished executing then destroy it
|
||||
*/
|
||||
atomic_inc(&fq->refcnt);
|
||||
spin_unlock(&hb->chain_lock);
|
||||
del_timer_sync(&fq->timer);
|
||||
WARN_ON(atomic_read(&fq->refcnt) != 1);
|
||||
inet_frag_put(fq, f);
|
||||
goto evict_again;
|
||||
}
|
||||
|
||||
/* suppress xmit of (icmp) error packet */
|
||||
fq->last_in &= ~INET_FRAG_FIRST_IN;
|
||||
fq->last_in |= INET_FRAG_EVICTED;
|
||||
hlist_del(&fq->list);
|
||||
hlist_add_head(&fq->list, &expired);
|
||||
++evicted;
|
||||
}
|
||||
|
||||
spin_unlock(&hb->chain_lock);
|
||||
|
||||
hlist_for_each_entry_safe(fq, n, &expired, list)
|
||||
f->frag_expire((unsigned long) fq);
|
||||
|
||||
return evicted;
|
||||
}
|
||||
|
||||
static void inet_frag_worker(struct work_struct *work)
|
||||
{
|
||||
unsigned int budget = INETFRAGS_EVICT_BUCKETS;
|
||||
unsigned int i, evicted = 0;
|
||||
struct inet_frags *f;
|
||||
|
||||
f = container_of(work, struct inet_frags, frags_work);
|
||||
|
||||
BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
|
||||
|
||||
local_bh_disable();
|
||||
|
||||
for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
|
||||
evicted += inet_evict_bucket(f, &f->hash[i]);
|
||||
i = (i + 1) & (INETFRAGS_HASHSZ - 1);
|
||||
if (evicted > INETFRAGS_EVICT_MAX)
|
||||
break;
|
||||
}
|
||||
|
||||
f->next_bucket = i;
|
||||
|
||||
local_bh_enable();
|
||||
|
||||
if (f->rebuild && inet_frag_may_rebuild(f))
|
||||
inet_frag_secret_rebuild(f);
|
||||
}
|
||||
|
||||
static void inet_frag_schedule_worker(struct inet_frags *f)
|
||||
{
|
||||
if (unlikely(!work_pending(&f->frags_work)))
|
||||
schedule_work(&f->frags_work);
|
||||
}
|
||||
|
||||
void inet_frags_init(struct inet_frags *f)
|
||||
{
|
||||
int i;
|
||||
|
||||
INIT_WORK(&f->frags_work, inet_frag_worker);
|
||||
|
||||
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
|
||||
struct inet_frag_bucket *hb = &f->hash[i];
|
||||
|
||||
spin_lock_init(&hb->chain_lock);
|
||||
INIT_HLIST_HEAD(&hb->chain);
|
||||
}
|
||||
rwlock_init(&f->lock);
|
||||
|
||||
setup_timer(&f->secret_timer, inet_frag_secret_rebuild,
|
||||
(unsigned long)f);
|
||||
f->secret_timer.expires = jiffies + f->secret_interval;
|
||||
add_timer(&f->secret_timer);
|
||||
seqlock_init(&f->rnd_seqlock);
|
||||
f->last_rebuild_jiffies = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frags_init);
|
||||
|
||||
void inet_frags_init_net(struct netns_frags *nf)
|
||||
{
|
||||
nf->nqueues = 0;
|
||||
init_frag_mem_limit(nf);
|
||||
INIT_LIST_HEAD(&nf->lru_list);
|
||||
spin_lock_init(&nf->lru_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frags_init_net);
|
||||
|
||||
void inet_frags_fini(struct inet_frags *f)
|
||||
{
|
||||
del_timer(&f->secret_timer);
|
||||
cancel_work_sync(&f->frags_work);
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frags_fini);
|
||||
|
||||
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
|
||||
{
|
||||
nf->low_thresh = 0;
|
||||
unsigned int seq;
|
||||
int i;
|
||||
|
||||
nf->low_thresh = 0;
|
||||
local_bh_disable();
|
||||
inet_frag_evictor(nf, f, true);
|
||||
|
||||
evict_again:
|
||||
seq = read_seqbegin(&f->rnd_seqlock);
|
||||
|
||||
for (i = 0; i < INETFRAGS_HASHSZ ; i++)
|
||||
inet_evict_bucket(f, &f->hash[i]);
|
||||
|
||||
if (read_seqretry(&f->rnd_seqlock, seq))
|
||||
goto evict_again;
|
||||
|
||||
local_bh_enable();
|
||||
|
||||
percpu_counter_destroy(&nf->mem);
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frags_exit_net);
|
||||
|
||||
static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
|
||||
static struct inet_frag_bucket *
|
||||
get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
|
||||
__acquires(hb->chain_lock)
|
||||
{
|
||||
struct inet_frag_bucket *hb;
|
||||
unsigned int hash;
|
||||
unsigned int seq, hash;
|
||||
|
||||
read_lock(&f->lock);
|
||||
hash = f->hashfn(fq);
|
||||
restart:
|
||||
seq = read_seqbegin(&f->rnd_seqlock);
|
||||
|
||||
hash = inet_frag_hashfn(f, fq);
|
||||
hb = &f->hash[hash];
|
||||
|
||||
spin_lock(&hb->chain_lock);
|
||||
if (read_seqretry(&f->rnd_seqlock, seq)) {
|
||||
spin_unlock(&hb->chain_lock);
|
||||
goto restart;
|
||||
}
|
||||
|
||||
return hb;
|
||||
}
|
||||
|
||||
static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
|
||||
{
|
||||
struct inet_frag_bucket *hb;
|
||||
|
||||
hb = get_frag_bucket_locked(fq, f);
|
||||
hlist_del(&fq->list);
|
||||
spin_unlock(&hb->chain_lock);
|
||||
|
||||
read_unlock(&f->lock);
|
||||
inet_frag_lru_del(fq);
|
||||
}
|
||||
|
||||
void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
|
||||
|
@ -165,8 +305,7 @@ static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
|
|||
kfree_skb(skb);
|
||||
}
|
||||
|
||||
void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
|
||||
int *work)
|
||||
void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
|
||||
{
|
||||
struct sk_buff *fp;
|
||||
struct netns_frags *nf;
|
||||
|
@ -186,86 +325,30 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
|
|||
fp = xp;
|
||||
}
|
||||
sum = sum_truesize + f->qsize;
|
||||
if (work)
|
||||
*work -= sum;
|
||||
sub_frag_mem_limit(q, sum);
|
||||
|
||||
if (f->destructor)
|
||||
f->destructor(q);
|
||||
kfree(q);
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frag_destroy);
|
||||
|
||||
int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
|
||||
{
|
||||
struct inet_frag_queue *q;
|
||||
int work, evicted = 0;
|
||||
|
||||
if (!force) {
|
||||
if (frag_mem_limit(nf) <= nf->high_thresh)
|
||||
return 0;
|
||||
}
|
||||
|
||||
work = frag_mem_limit(nf) - nf->low_thresh;
|
||||
while (work > 0 || force) {
|
||||
spin_lock(&nf->lru_lock);
|
||||
|
||||
if (list_empty(&nf->lru_list)) {
|
||||
spin_unlock(&nf->lru_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
q = list_first_entry(&nf->lru_list,
|
||||
struct inet_frag_queue, lru_list);
|
||||
atomic_inc(&q->refcnt);
|
||||
/* Remove q from list to avoid several CPUs grabbing it */
|
||||
list_del_init(&q->lru_list);
|
||||
|
||||
spin_unlock(&nf->lru_lock);
|
||||
|
||||
spin_lock(&q->lock);
|
||||
if (!(q->last_in & INET_FRAG_COMPLETE))
|
||||
inet_frag_kill(q, f);
|
||||
spin_unlock(&q->lock);
|
||||
|
||||
if (atomic_dec_and_test(&q->refcnt))
|
||||
inet_frag_destroy(q, f, &work);
|
||||
evicted++;
|
||||
}
|
||||
|
||||
return evicted;
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frag_evictor);
|
||||
|
||||
static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
|
||||
struct inet_frag_queue *qp_in, struct inet_frags *f,
|
||||
void *arg)
|
||||
{
|
||||
struct inet_frag_bucket *hb;
|
||||
struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
|
||||
struct inet_frag_queue *qp;
|
||||
unsigned int hash;
|
||||
|
||||
read_lock(&f->lock); /* Protects against hash rebuild */
|
||||
/*
|
||||
* While we stayed w/o the lock other CPU could update
|
||||
* the rnd seed, so we need to re-calculate the hash
|
||||
* chain. Fortunatelly the qp_in can be used to get one.
|
||||
*/
|
||||
hash = f->hashfn(qp_in);
|
||||
hb = &f->hash[hash];
|
||||
spin_lock(&hb->chain_lock);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* With SMP race we have to recheck hash table, because
|
||||
* such entry could be created on other cpu, while we
|
||||
* released the hash bucket lock.
|
||||
* such entry could have been created on other cpu before
|
||||
* we acquired hash bucket lock.
|
||||
*/
|
||||
hlist_for_each_entry(qp, &hb->chain, list) {
|
||||
if (qp->net == nf && f->match(qp, arg)) {
|
||||
atomic_inc(&qp->refcnt);
|
||||
spin_unlock(&hb->chain_lock);
|
||||
read_unlock(&f->lock);
|
||||
qp_in->last_in |= INET_FRAG_COMPLETE;
|
||||
inet_frag_put(qp_in, f);
|
||||
return qp;
|
||||
|
@ -278,9 +361,8 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
|
|||
|
||||
atomic_inc(&qp->refcnt);
|
||||
hlist_add_head(&qp->list, &hb->chain);
|
||||
inet_frag_lru_add(nf, qp);
|
||||
|
||||
spin_unlock(&hb->chain_lock);
|
||||
read_unlock(&f->lock);
|
||||
|
||||
return qp;
|
||||
}
|
||||
|
@ -290,6 +372,11 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
|
|||
{
|
||||
struct inet_frag_queue *q;
|
||||
|
||||
if (frag_mem_limit(nf) > nf->high_thresh) {
|
||||
inet_frag_schedule_worker(f);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
q = kzalloc(f->qsize, GFP_ATOMIC);
|
||||
if (q == NULL)
|
||||
return NULL;
|
||||
|
@ -301,7 +388,6 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
|
|||
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
|
||||
spin_lock_init(&q->lock);
|
||||
atomic_set(&q->refcnt, 1);
|
||||
INIT_LIST_HEAD(&q->lru_list);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
@ -320,12 +406,15 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
|
|||
|
||||
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
|
||||
struct inet_frags *f, void *key, unsigned int hash)
|
||||
__releases(&f->lock)
|
||||
{
|
||||
struct inet_frag_bucket *hb;
|
||||
struct inet_frag_queue *q;
|
||||
int depth = 0;
|
||||
|
||||
if (frag_mem_limit(nf) > nf->low_thresh)
|
||||
inet_frag_schedule_worker(f);
|
||||
|
||||
hash &= (INETFRAGS_HASHSZ - 1);
|
||||
hb = &f->hash[hash];
|
||||
|
||||
spin_lock(&hb->chain_lock);
|
||||
|
@ -333,18 +422,22 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
|
|||
if (q->net == nf && f->match(q, key)) {
|
||||
atomic_inc(&q->refcnt);
|
||||
spin_unlock(&hb->chain_lock);
|
||||
read_unlock(&f->lock);
|
||||
return q;
|
||||
}
|
||||
depth++;
|
||||
}
|
||||
spin_unlock(&hb->chain_lock);
|
||||
read_unlock(&f->lock);
|
||||
|
||||
if (depth <= INETFRAGS_MAXDEPTH)
|
||||
return inet_frag_create(nf, f, key);
|
||||
else
|
||||
return ERR_PTR(-ENOBUFS);
|
||||
|
||||
if (inet_frag_may_rebuild(f)) {
|
||||
if (!f->rebuild)
|
||||
f->rebuild = true;
|
||||
inet_frag_schedule_worker(f);
|
||||
}
|
||||
|
||||
return ERR_PTR(-ENOBUFS);
|
||||
}
|
||||
EXPORT_SYMBOL(inet_frag_find);
|
||||
|
||||
|
|
|
@ -86,11 +86,6 @@ static inline u8 ip4_frag_ecn(u8 tos)
|
|||
|
||||
static struct inet_frags ip4_frags;
|
||||
|
||||
int ip_frag_nqueues(struct net *net)
|
||||
{
|
||||
return net->ipv4.frags.nqueues;
|
||||
}
|
||||
|
||||
int ip_frag_mem(struct net *net)
|
||||
{
|
||||
return sum_frag_mem_limit(&net->ipv4.frags);
|
||||
|
@ -109,21 +104,21 @@ static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
|
|||
net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
|
||||
return jhash_3words((__force u32)id << 16 | prot,
|
||||
(__force u32)saddr, (__force u32)daddr,
|
||||
ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
|
||||
ip4_frags.rnd);
|
||||
}
|
||||
|
||||
static unsigned int ip4_hashfn(struct inet_frag_queue *q)
|
||||
static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
|
||||
{
|
||||
struct ipq *ipq;
|
||||
const struct ipq *ipq;
|
||||
|
||||
ipq = container_of(q, struct ipq, q);
|
||||
return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
|
||||
}
|
||||
|
||||
static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
|
||||
static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
|
||||
{
|
||||
struct ipq *qp;
|
||||
struct ip4_create_arg *arg = a;
|
||||
const struct ipq *qp;
|
||||
const struct ip4_create_arg *arg = a;
|
||||
|
||||
qp = container_of(q, struct ipq, q);
|
||||
return qp->id == arg->iph->id &&
|
||||
|
@ -133,14 +128,14 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
|
|||
qp->user == arg->user;
|
||||
}
|
||||
|
||||
static void ip4_frag_init(struct inet_frag_queue *q, void *a)
|
||||
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
|
||||
{
|
||||
struct ipq *qp = container_of(q, struct ipq, q);
|
||||
struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
|
||||
frags);
|
||||
struct net *net = container_of(ipv4, struct net, ipv4);
|
||||
|
||||
struct ip4_create_arg *arg = a;
|
||||
const struct ip4_create_arg *arg = a;
|
||||
|
||||
qp->protocol = arg->iph->protocol;
|
||||
qp->id = arg->iph->id;
|
||||
|
@ -177,18 +172,6 @@ static void ipq_kill(struct ipq *ipq)
|
|||
inet_frag_kill(&ipq->q, &ip4_frags);
|
||||
}
|
||||
|
||||
/* Memory limiting on fragments. Evictor trashes the oldest
|
||||
* fragment queue until we are back under the threshold.
|
||||
*/
|
||||
static void ip_evictor(struct net *net)
|
||||
{
|
||||
int evicted;
|
||||
|
||||
evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false);
|
||||
if (evicted)
|
||||
IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
|
||||
}
|
||||
|
||||
/*
|
||||
* Oops, a fragment queue timed out. Kill it and send an ICMP reply.
|
||||
*/
|
||||
|
@ -207,7 +190,8 @@ static void ip_expire(unsigned long arg)
|
|||
|
||||
ipq_kill(qp);
|
||||
|
||||
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
|
||||
if (!(qp->q.last_in & INET_FRAG_EVICTED))
|
||||
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
|
||||
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
|
||||
|
||||
if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
|
||||
|
@ -260,7 +244,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
|
|||
arg.iph = iph;
|
||||
arg.user = user;
|
||||
|
||||
read_lock(&ip4_frags.lock);
|
||||
hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
|
||||
|
||||
q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
|
||||
|
@ -505,7 +488,6 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
|
|||
}
|
||||
|
||||
skb_dst_drop(skb);
|
||||
inet_frag_lru_move(&qp->q);
|
||||
return -EINPROGRESS;
|
||||
|
||||
err:
|
||||
|
@ -655,9 +637,6 @@ int ip_defrag(struct sk_buff *skb, u32 user)
|
|||
net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev);
|
||||
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
|
||||
|
||||
/* Start by cleaning up the memory. */
|
||||
ip_evictor(net);
|
||||
|
||||
/* Lookup (or create) queue header */
|
||||
if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
|
||||
int ret;
|
||||
|
@ -721,14 +700,17 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
|
|||
.data = &init_net.ipv4.frags.high_thresh,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &init_net.ipv4.frags.low_thresh
|
||||
},
|
||||
{
|
||||
.procname = "ipfrag_low_thresh",
|
||||
.data = &init_net.ipv4.frags.low_thresh,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &init_net.ipv4.frags.high_thresh
|
||||
},
|
||||
{
|
||||
.procname = "ipfrag_time",
|
||||
|
@ -740,10 +722,12 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
|
|||
{ }
|
||||
};
|
||||
|
||||
/* secret interval has been deprecated */
|
||||
static int ip4_frags_secret_interval_unused;
|
||||
static struct ctl_table ip4_frags_ctl_table[] = {
|
||||
{
|
||||
.procname = "ipfrag_secret_interval",
|
||||
.data = &ip4_frags.secret_interval,
|
||||
.data = &ip4_frags_secret_interval_unused,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
|
@ -771,7 +755,10 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
|
|||
goto err_alloc;
|
||||
|
||||
table[0].data = &net->ipv4.frags.high_thresh;
|
||||
table[0].extra1 = &net->ipv4.frags.low_thresh;
|
||||
table[0].extra2 = &init_net.ipv4.frags.high_thresh;
|
||||
table[1].data = &net->ipv4.frags.low_thresh;
|
||||
table[1].extra2 = &net->ipv4.frags.high_thresh;
|
||||
table[2].data = &net->ipv4.frags.timeout;
|
||||
|
||||
/* Don't export sysctls to unprivileged users */
|
||||
|
@ -873,6 +860,5 @@ void __init ipfrag_init(void)
|
|||
ip4_frags.qsize = sizeof(struct ipq);
|
||||
ip4_frags.match = ip4_frag_match;
|
||||
ip4_frags.frag_expire = ip_expire;
|
||||
ip4_frags.secret_interval = 10 * 60 * HZ;
|
||||
inet_frags_init(&ip4_frags);
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
static int sockstat_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct net *net = seq->private;
|
||||
unsigned int frag_mem;
|
||||
int orphans, sockets;
|
||||
|
||||
local_bh_disable();
|
||||
|
@ -71,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
|
|||
sock_prot_inuse_get(net, &udplite_prot));
|
||||
seq_printf(seq, "RAW: inuse %d\n",
|
||||
sock_prot_inuse_get(net, &raw_prot));
|
||||
seq_printf(seq, "FRAG: inuse %d memory %d\n",
|
||||
ip_frag_nqueues(net), ip_frag_mem(net));
|
||||
frag_mem = ip_frag_mem(net);
|
||||
seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -63,6 +63,8 @@ struct nf_ct_frag6_skb_cb
|
|||
static struct inet_frags nf_frags;
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static int zero;
|
||||
|
||||
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
|
||||
{
|
||||
.procname = "nf_conntrack_frag6_timeout",
|
||||
|
@ -76,14 +78,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
|
|||
.data = &init_net.nf_frag.frags.low_thresh,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &init_net.nf_frag.frags.high_thresh
|
||||
},
|
||||
{
|
||||
.procname = "nf_conntrack_frag6_high_thresh",
|
||||
.data = &init_net.nf_frag.frags.high_thresh,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &init_net.nf_frag.frags.low_thresh
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
@ -102,7 +107,10 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
|
|||
|
||||
table[0].data = &net->nf_frag.frags.timeout;
|
||||
table[1].data = &net->nf_frag.frags.low_thresh;
|
||||
table[1].extra2 = &net->nf_frag.frags.high_thresh;
|
||||
table[2].data = &net->nf_frag.frags.high_thresh;
|
||||
table[2].extra1 = &net->nf_frag.frags.low_thresh;
|
||||
table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
|
||||
}
|
||||
|
||||
hdr = register_net_sysctl(net, "net/netfilter", table);
|
||||
|
@ -147,16 +155,13 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
|
|||
static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
|
||||
const struct in6_addr *daddr)
|
||||
{
|
||||
u32 c;
|
||||
|
||||
net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
|
||||
c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
|
||||
(__force u32)id, nf_frags.rnd);
|
||||
return c & (INETFRAGS_HASHSZ - 1);
|
||||
return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
|
||||
(__force u32)id, nf_frags.rnd);
|
||||
}
|
||||
|
||||
|
||||
static unsigned int nf_hashfn(struct inet_frag_queue *q)
|
||||
static unsigned int nf_hashfn(const struct inet_frag_queue *q)
|
||||
{
|
||||
const struct frag_queue *nq;
|
||||
|
||||
|
@ -196,7 +201,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
|
|||
arg.dst = dst;
|
||||
arg.ecn = ecn;
|
||||
|
||||
read_lock_bh(&nf_frags.lock);
|
||||
local_bh_disable();
|
||||
hash = nf_hash_frag(id, src, dst);
|
||||
|
||||
q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
|
||||
|
@ -352,7 +357,6 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
|
|||
fq->q.last_in |= INET_FRAG_FIRST_IN;
|
||||
}
|
||||
|
||||
inet_frag_lru_move(&fq->q);
|
||||
return 0;
|
||||
|
||||
discard_fq:
|
||||
|
@ -597,10 +601,6 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
|
|||
hdr = ipv6_hdr(clone);
|
||||
fhdr = (struct frag_hdr *)skb_transport_header(clone);
|
||||
|
||||
local_bh_disable();
|
||||
inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
|
||||
local_bh_enable();
|
||||
|
||||
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
|
||||
ip6_frag_ecn(hdr));
|
||||
if (fq == NULL) {
|
||||
|
@ -677,7 +677,6 @@ int nf_ct_frag6_init(void)
|
|||
nf_frags.qsize = sizeof(struct frag_queue);
|
||||
nf_frags.match = ip6_frag_match;
|
||||
nf_frags.frag_expire = nf_ct_frag6_expire;
|
||||
nf_frags.secret_interval = 10 * 60 * HZ;
|
||||
inet_frags_init(&nf_frags);
|
||||
|
||||
ret = register_pernet_subsys(&nf_ct_net_ops);
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
static int sockstat6_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct net *net = seq->private;
|
||||
unsigned int frag_mem = ip6_frag_mem(net);
|
||||
|
||||
seq_printf(seq, "TCP6: inuse %d\n",
|
||||
sock_prot_inuse_get(net, &tcpv6_prot));
|
||||
|
@ -42,8 +43,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
|
|||
sock_prot_inuse_get(net, &udplitev6_prot));
|
||||
seq_printf(seq, "RAW6: inuse %d\n",
|
||||
sock_prot_inuse_get(net, &rawv6_prot));
|
||||
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
|
||||
ip6_frag_nqueues(net), ip6_frag_mem(net));
|
||||
seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -85,27 +85,23 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
|
|||
static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
|
||||
const struct in6_addr *daddr)
|
||||
{
|
||||
u32 c;
|
||||
|
||||
net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
|
||||
c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
|
||||
(__force u32)id, ip6_frags.rnd);
|
||||
|
||||
return c & (INETFRAGS_HASHSZ - 1);
|
||||
return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
|
||||
(__force u32)id, ip6_frags.rnd);
|
||||
}
|
||||
|
||||
static unsigned int ip6_hashfn(struct inet_frag_queue *q)
|
||||
static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
|
||||
{
|
||||
struct frag_queue *fq;
|
||||
const struct frag_queue *fq;
|
||||
|
||||
fq = container_of(q, struct frag_queue, q);
|
||||
return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
|
||||
}
|
||||
|
||||
bool ip6_frag_match(struct inet_frag_queue *q, void *a)
|
||||
bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
|
||||
{
|
||||
struct frag_queue *fq;
|
||||
struct ip6_create_arg *arg = a;
|
||||
const struct frag_queue *fq;
|
||||
const struct ip6_create_arg *arg = a;
|
||||
|
||||
fq = container_of(q, struct frag_queue, q);
|
||||
return fq->id == arg->id &&
|
||||
|
@ -115,10 +111,10 @@ bool ip6_frag_match(struct inet_frag_queue *q, void *a)
|
|||
}
|
||||
EXPORT_SYMBOL(ip6_frag_match);
|
||||
|
||||
void ip6_frag_init(struct inet_frag_queue *q, void *a)
|
||||
void ip6_frag_init(struct inet_frag_queue *q, const void *a)
|
||||
{
|
||||
struct frag_queue *fq = container_of(q, struct frag_queue, q);
|
||||
struct ip6_create_arg *arg = a;
|
||||
const struct ip6_create_arg *arg = a;
|
||||
|
||||
fq->id = arg->id;
|
||||
fq->user = arg->user;
|
||||
|
@ -145,7 +141,9 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
|
|||
if (!dev)
|
||||
goto out_rcu_unlock;
|
||||
|
||||
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
|
||||
if (!(fq->q.last_in & INET_FRAG_EVICTED))
|
||||
IP6_INC_STATS_BH(net, __in6_dev_get(dev),
|
||||
IPSTATS_MIB_REASMTIMEOUT);
|
||||
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
|
||||
|
||||
/* Don't send error if the first segment did not arrive. */
|
||||
|
@ -192,7 +190,6 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
|
|||
arg.dst = dst;
|
||||
arg.ecn = ecn;
|
||||
|
||||
read_lock(&ip6_frags.lock);
|
||||
hash = inet6_hash_frag(id, src, dst);
|
||||
|
||||
q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
|
||||
|
@ -353,7 +350,6 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
|
|||
}
|
||||
|
||||
skb_dst_drop(skb);
|
||||
inet_frag_lru_move(&fq->q);
|
||||
return -1;
|
||||
|
||||
discard_fq:
|
||||
|
@ -523,7 +519,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
|
|||
struct frag_queue *fq;
|
||||
const struct ipv6hdr *hdr = ipv6_hdr(skb);
|
||||
struct net *net = dev_net(skb_dst(skb)->dev);
|
||||
int evicted;
|
||||
|
||||
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
|
||||
goto fail_hdr;
|
||||
|
@ -552,11 +547,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
|
|||
return 1;
|
||||
}
|
||||
|
||||
evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false);
|
||||
if (evicted)
|
||||
IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
|
||||
IPSTATS_MIB_REASMFAILS, evicted);
|
||||
|
||||
fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
|
||||
ip6_frag_ecn(hdr));
|
||||
if (fq != NULL) {
|
||||
|
@ -588,20 +578,25 @@ static const struct inet6_protocol frag_protocol =
|
|||
};
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static int zero;
|
||||
|
||||
static struct ctl_table ip6_frags_ns_ctl_table[] = {
|
||||
{
|
||||
.procname = "ip6frag_high_thresh",
|
||||
.data = &init_net.ipv6.frags.high_thresh,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &init_net.ipv6.frags.low_thresh
|
||||
},
|
||||
{
|
||||
.procname = "ip6frag_low_thresh",
|
||||
.data = &init_net.ipv6.frags.low_thresh,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &init_net.ipv6.frags.high_thresh
|
||||
},
|
||||
{
|
||||
.procname = "ip6frag_time",
|
||||
|
@ -613,10 +608,12 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
|
|||
{ }
|
||||
};
|
||||
|
||||
/* secret interval has been deprecated */
|
||||
static int ip6_frags_secret_interval_unused;
|
||||
static struct ctl_table ip6_frags_ctl_table[] = {
|
||||
{
|
||||
.procname = "ip6frag_secret_interval",
|
||||
.data = &ip6_frags.secret_interval,
|
||||
.data = &ip6_frags_secret_interval_unused,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_jiffies,
|
||||
|
@ -636,7 +633,10 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
|
|||
goto err_alloc;
|
||||
|
||||
table[0].data = &net->ipv6.frags.high_thresh;
|
||||
table[0].extra1 = &net->ipv6.frags.low_thresh;
|
||||
table[0].extra2 = &init_net.ipv6.frags.high_thresh;
|
||||
table[1].data = &net->ipv6.frags.low_thresh;
|
||||
table[1].extra2 = &net->ipv6.frags.high_thresh;
|
||||
table[2].data = &net->ipv6.frags.timeout;
|
||||
|
||||
/* Don't export sysctls to unprivileged users */
|
||||
|
@ -746,7 +746,6 @@ int __init ipv6_frag_init(void)
|
|||
ip6_frags.qsize = sizeof(struct frag_queue);
|
||||
ip6_frags.match = ip6_frag_match;
|
||||
ip6_frags.frag_expire = ip6_frag_expire;
|
||||
ip6_frags.secret_interval = 10 * 60 * HZ;
|
||||
inet_frags_init(&ip6_frags);
|
||||
out:
|
||||
return ret;
|
||||
|
|
Loading…
Reference in New Issue
Block a user