net: Introduce netns_bpf for BPF programs attached to netns

In order to:

 (1) attach more than one BPF program type to netns, or
 (2) support attaching BPF programs to netns with bpf_link, or
 (3) support multi-prog attach points for netns

we will need to keep more state per netns than a single pointer like we
have now for BPF flow dissector program.

Prepare for the above by extracting netns_bpf that is part of struct net,
for storing all state related to BPF programs attached to netns.

Turn flow dissector callbacks for querying/attaching/detaching a program
into generic ones that operate on netns_bpf. Next patch will move the
generic callbacks into their own module.

This is similar to how it is organized for cgroup with cgroup_bpf.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-3-jakub@cloudflare.com
This commit is contained in:
Jakub Sitnicki 2020-05-31 10:28:36 +02:00 committed by Alexei Starovoitov
parent 171526f6fe
commit a3fd7ceee0
6 changed files with 149 additions and 66 deletions

56
include/linux/bpf-netns.h Normal file
View File

@ -0,0 +1,56 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BPF_NETNS_H
#define _BPF_NETNS_H
#include <linux/mutex.h>
#include <uapi/linux/bpf.h>
enum netns_bpf_attach_type {
NETNS_BPF_INVALID = -1,
NETNS_BPF_FLOW_DISSECTOR = 0,
MAX_NETNS_BPF_ATTACH_TYPE
};
static inline enum netns_bpf_attach_type
to_netns_bpf_attach_type(enum bpf_attach_type attach_type)
{
switch (attach_type) {
case BPF_FLOW_DISSECTOR:
return NETNS_BPF_FLOW_DISSECTOR;
default:
return NETNS_BPF_INVALID;
}
}
/* Protects updates to netns_bpf */
extern struct mutex netns_bpf_mutex;
union bpf_attr;
struct bpf_prog;
#ifdef CONFIG_NET
int netns_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr);
int netns_bpf_prog_attach(const union bpf_attr *attr,
struct bpf_prog *prog);
int netns_bpf_prog_detach(const union bpf_attr *attr);
#else
static inline int netns_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return -EOPNOTSUPP;
}
static inline int netns_bpf_prog_attach(const union bpf_attr *attr,
struct bpf_prog *prog)
{
return -EOPNOTSUPP;
}
static inline int netns_bpf_prog_detach(const union bpf_attr *attr)
{
return -EOPNOTSUPP;
}
#endif
#endif /* _BPF_NETNS_H */

View File

@ -1283,32 +1283,6 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
const struct flow_dissector_key *key, const struct flow_dissector_key *key,
unsigned int key_count); unsigned int key_count);
#ifdef CONFIG_NET
int skb_flow_dissector_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr);
int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
struct bpf_prog *prog);
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
#else
static inline int skb_flow_dissector_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return -EOPNOTSUPP;
}
static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
struct bpf_prog *prog)
{
return -EOPNOTSUPP;
}
static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
{
return -EOPNOTSUPP;
}
#endif
struct bpf_flow_dissector; struct bpf_flow_dissector;
bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
__be16 proto, int nhoff, int hlen, unsigned int flags); __be16 proto, int nhoff, int hlen, unsigned int flags);

View File

@ -33,6 +33,7 @@
#include <net/netns/mpls.h> #include <net/netns/mpls.h>
#include <net/netns/can.h> #include <net/netns/can.h>
#include <net/netns/xdp.h> #include <net/netns/xdp.h>
#include <net/netns/bpf.h>
#include <linux/ns_common.h> #include <linux/ns_common.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
@ -162,7 +163,8 @@ struct net {
#endif #endif
struct net_generic __rcu *gen; struct net_generic __rcu *gen;
struct bpf_prog __rcu *flow_dissector_prog; /* Used to store attached BPF programs */
struct netns_bpf bpf;
/* Note : following structs are cache line aligned */ /* Note : following structs are cache line aligned */
#ifdef CONFIG_XFRM #ifdef CONFIG_XFRM

17
include/net/netns/bpf.h Normal file
View File

@ -0,0 +1,17 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* BPF programs attached to network namespace
*/
#ifndef __NETNS_BPF_H__
#define __NETNS_BPF_H__
#include <linux/bpf-netns.h>
struct bpf_prog;
struct netns_bpf {
struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE];
};
#endif /* __NETNS_BPF_H__ */

View File

@ -27,6 +27,7 @@
#include <uapi/linux/btf.h> #include <uapi/linux/btf.h>
#include <linux/bpf_lsm.h> #include <linux/bpf_lsm.h>
#include <linux/poll.h> #include <linux/poll.h>
#include <linux/bpf-netns.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@ -2868,7 +2869,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
ret = lirc_prog_attach(attr, prog); ret = lirc_prog_attach(attr, prog);
break; break;
case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_FLOW_DISSECTOR:
ret = skb_flow_dissector_bpf_prog_attach(attr, prog); ret = netns_bpf_prog_attach(attr, prog);
break; break;
case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SKB:
@ -2908,7 +2909,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_FLOW_DISSECTOR:
if (!capable(CAP_NET_ADMIN)) if (!capable(CAP_NET_ADMIN))
return -EPERM; return -EPERM;
return skb_flow_dissector_bpf_prog_detach(attr); return netns_bpf_prog_detach(attr);
case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_CGROUP_SOCK:
@ -2961,7 +2962,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_LIRC_MODE2: case BPF_LIRC_MODE2:
return lirc_prog_query(attr, uattr); return lirc_prog_query(attr, uattr);
case BPF_FLOW_DISSECTOR: case BPF_FLOW_DISSECTOR:
return skb_flow_dissector_prog_query(attr, uattr); return netns_bpf_prog_query(attr, uattr);
default: default:
return -EINVAL; return -EINVAL;
} }

View File

@ -31,8 +31,10 @@
#include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_labels.h>
#endif #endif
#include <linux/bpf-netns.h>
static DEFINE_MUTEX(flow_dissector_mutex); /* Protects updates to netns_bpf */
DEFINE_MUTEX(netns_bpf_mutex);
static void dissector_set_key(struct flow_dissector *flow_dissector, static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id) enum flow_dissector_key_id key_id)
@ -70,23 +72,28 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
} }
EXPORT_SYMBOL(skb_flow_dissector_init); EXPORT_SYMBOL(skb_flow_dissector_init);
int skb_flow_dissector_prog_query(const union bpf_attr *attr, int netns_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr) union bpf_attr __user *uattr)
{ {
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
u32 prog_id, prog_cnt = 0, flags = 0; u32 prog_id, prog_cnt = 0, flags = 0;
enum netns_bpf_attach_type type;
struct bpf_prog *attached; struct bpf_prog *attached;
struct net *net; struct net *net;
if (attr->query.query_flags) if (attr->query.query_flags)
return -EINVAL; return -EINVAL;
type = to_netns_bpf_attach_type(attr->query.attach_type);
if (type < 0)
return -EINVAL;
net = get_net_ns_by_fd(attr->query.target_fd); net = get_net_ns_by_fd(attr->query.target_fd);
if (IS_ERR(net)) if (IS_ERR(net))
return PTR_ERR(net); return PTR_ERR(net);
rcu_read_lock(); rcu_read_lock();
attached = rcu_dereference(net->flow_dissector_prog); attached = rcu_dereference(net->bpf.progs[type]);
if (attached) { if (attached) {
prog_cnt = 1; prog_cnt = 1;
prog_id = attached->aux->id; prog_id = attached->aux->id;
@ -112,6 +119,7 @@ int skb_flow_dissector_prog_query(const union bpf_attr *attr,
static int flow_dissector_bpf_prog_attach(struct net *net, static int flow_dissector_bpf_prog_attach(struct net *net,
struct bpf_prog *prog) struct bpf_prog *prog)
{ {
enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
struct bpf_prog *attached; struct bpf_prog *attached;
if (net == &init_net) { if (net == &init_net) {
@ -125,74 +133,97 @@ static int flow_dissector_bpf_prog_attach(struct net *net,
for_each_net(ns) { for_each_net(ns) {
if (ns == &init_net) if (ns == &init_net)
continue; continue;
if (rcu_access_pointer(ns->flow_dissector_prog)) if (rcu_access_pointer(ns->bpf.progs[type]))
return -EEXIST; return -EEXIST;
} }
} else { } else {
/* Make sure root flow dissector is not attached /* Make sure root flow dissector is not attached
* when attaching to the non-root namespace. * when attaching to the non-root namespace.
*/ */
if (rcu_access_pointer(init_net.flow_dissector_prog)) if (rcu_access_pointer(init_net.bpf.progs[type]))
return -EEXIST; return -EEXIST;
} }
attached = rcu_dereference_protected(net->flow_dissector_prog, attached = rcu_dereference_protected(net->bpf.progs[type],
lockdep_is_held(&flow_dissector_mutex)); lockdep_is_held(&netns_bpf_mutex));
if (attached == prog) if (attached == prog)
/* The same program cannot be attached twice */ /* The same program cannot be attached twice */
return -EINVAL; return -EINVAL;
rcu_assign_pointer(net->flow_dissector_prog, prog); rcu_assign_pointer(net->bpf.progs[type], prog);
if (attached) if (attached)
bpf_prog_put(attached); bpf_prog_put(attached);
return 0; return 0;
} }
int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
struct bpf_prog *prog)
{ {
enum netns_bpf_attach_type type;
struct net *net;
int ret; int ret;
mutex_lock(&flow_dissector_mutex); type = to_netns_bpf_attach_type(attr->attach_type);
ret = flow_dissector_bpf_prog_attach(current->nsproxy->net_ns, prog); if (type < 0)
mutex_unlock(&flow_dissector_mutex); return -EINVAL;
net = current->nsproxy->net_ns;
mutex_lock(&netns_bpf_mutex);
switch (type) {
case NETNS_BPF_FLOW_DISSECTOR:
ret = flow_dissector_bpf_prog_attach(net, prog);
break;
default:
ret = -EINVAL;
break;
}
mutex_unlock(&netns_bpf_mutex);
return ret; return ret;
} }
static int flow_dissector_bpf_prog_detach(struct net *net) /* Must be called with netns_bpf_mutex held. */
static int __netns_bpf_prog_detach(struct net *net,
enum netns_bpf_attach_type type)
{ {
struct bpf_prog *attached; struct bpf_prog *attached;
mutex_lock(&flow_dissector_mutex); attached = rcu_dereference_protected(net->bpf.progs[type],
attached = rcu_dereference_protected(net->flow_dissector_prog, lockdep_is_held(&netns_bpf_mutex));
lockdep_is_held(&flow_dissector_mutex)); if (!attached)
if (!attached) {
mutex_unlock(&flow_dissector_mutex);
return -ENOENT; return -ENOENT;
} RCU_INIT_POINTER(net->bpf.progs[type], NULL);
RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
bpf_prog_put(attached); bpf_prog_put(attached);
mutex_unlock(&flow_dissector_mutex);
return 0; return 0;
} }
int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) int netns_bpf_prog_detach(const union bpf_attr *attr)
{ {
return flow_dissector_bpf_prog_detach(current->nsproxy->net_ns); enum netns_bpf_attach_type type;
int ret;
type = to_netns_bpf_attach_type(attr->attach_type);
if (type < 0)
return -EINVAL;
mutex_lock(&netns_bpf_mutex);
ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type);
mutex_unlock(&netns_bpf_mutex);
return ret;
} }
static void __net_exit flow_dissector_pernet_pre_exit(struct net *net) static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
{ {
/* We're not racing with attach/detach because there are no enum netns_bpf_attach_type type;
* references to netns left when pre_exit gets called.
*/ mutex_lock(&netns_bpf_mutex);
if (rcu_access_pointer(net->flow_dissector_prog)) for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
flow_dissector_bpf_prog_detach(net); __netns_bpf_prog_detach(net, type);
mutex_unlock(&netns_bpf_mutex);
} }
static struct pernet_operations flow_dissector_pernet_ops __net_initdata = { static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
.pre_exit = flow_dissector_pernet_pre_exit, .pre_exit = netns_bpf_pernet_pre_exit,
}; };
/** /**
@ -1044,11 +1075,13 @@ bool __skb_flow_dissect(const struct net *net,
WARN_ON_ONCE(!net); WARN_ON_ONCE(!net);
if (net) { if (net) {
enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
rcu_read_lock(); rcu_read_lock();
attached = rcu_dereference(init_net.flow_dissector_prog); attached = rcu_dereference(init_net.bpf.progs[type]);
if (!attached) if (!attached)
attached = rcu_dereference(net->flow_dissector_prog); attached = rcu_dereference(net->bpf.progs[type]);
if (attached) { if (attached) {
struct bpf_flow_keys flow_keys; struct bpf_flow_keys flow_keys;
@ -1870,6 +1903,6 @@ static int __init init_default_flow_dissectors(void)
flow_keys_basic_dissector_keys, flow_keys_basic_dissector_keys,
ARRAY_SIZE(flow_keys_basic_dissector_keys)); ARRAY_SIZE(flow_keys_basic_dissector_keys));
return register_pernet_subsys(&flow_dissector_pernet_ops); return register_pernet_subsys(&netns_bpf_pernet_ops);
} }
core_initcall(init_default_flow_dissectors); core_initcall(init_default_flow_dissectors);