kernel_optimize_test/kernel/bpf/net_namespace.c

448 lines
10 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/filter.h>
#include <net/net_namespace.h>
/*
* Functions to manage BPF programs attached to netns
*/
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
struct bpf_netns_link {
struct bpf_link link;
enum bpf_attach_type type;
enum netns_bpf_attach_type netns_type;
/* We don't hold a ref to net in order to auto-detach the link
* when netns is going away. Instead we rely on pernet
* pre_exit callback to clear this pointer. Must be accessed
* with netns_bpf_mutex held.
*/
struct net *net;
struct list_head node; /* node in list of links attached to net */
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
};
/* Protects updates to netns_bpf */
DEFINE_MUTEX(netns_bpf_mutex);
/* Must be called with netns_bpf_mutex held. */
static void netns_bpf_run_array_detach(struct net *net,
enum netns_bpf_attach_type type)
{
struct bpf_prog_array *run_array;
run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
lockdep_is_held(&netns_bpf_mutex));
bpf_prog_array_free(run_array);
}
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
static void bpf_netns_link_release(struct bpf_link *link)
{
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
enum netns_bpf_attach_type type = net_link->netns_type;
struct net *net;
mutex_lock(&netns_bpf_mutex);
bpf, netns: Fix use-after-free in pernet pre_exit callback Iterating over BPF links attached to network namespace in pre_exit hook is not safe, even if there is just one. Once link gets auto-detached, that is its back-pointer to net object is set to NULL, the link can be released and freed without waiting on netns_bpf_mutex, effectively causing the list element we are operating on to be freed. This leads to use-after-free when trying to access the next element on the list, as reported by KASAN. Bug can be triggered by destroying a network namespace, while also releasing a link attached to this network namespace. | ================================================================== | BUG: KASAN: use-after-free in netns_bpf_pernet_pre_exit+0xd9/0x130 | Read of size 8 at addr ffff888119e0d778 by task kworker/u8:2/177 | | CPU: 3 PID: 177 Comm: kworker/u8:2 Not tainted 5.8.0-rc1-00197-ga0c04c9d1008-dirty #776 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014 | Workqueue: netns cleanup_net | Call Trace: | dump_stack+0x9e/0xe0 | print_address_description.constprop.0+0x3a/0x60 | ? netns_bpf_pernet_pre_exit+0xd9/0x130 | kasan_report.cold+0x1f/0x40 | ? netns_bpf_pernet_pre_exit+0xd9/0x130 | netns_bpf_pernet_pre_exit+0xd9/0x130 | cleanup_net+0x30b/0x5b0 | ? unregister_pernet_device+0x50/0x50 | ? rcu_read_lock_bh_held+0xb0/0xb0 | ? _raw_spin_unlock_irq+0x24/0x50 | process_one_work+0x4d1/0xa10 | ? lock_release+0x3e0/0x3e0 | ? pwq_dec_nr_in_flight+0x110/0x110 | ? rwlock_bug.part.0+0x60/0x60 | worker_thread+0x7a/0x5c0 | ? process_one_work+0xa10/0xa10 | kthread+0x1e3/0x240 | ? kthread_create_on_node+0xd0/0xd0 | ret_from_fork+0x1f/0x30 | | Allocated by task 280: | save_stack+0x1b/0x40 | __kasan_kmalloc.constprop.0+0xc2/0xd0 | netns_bpf_link_create+0xfe/0x650 | __do_sys_bpf+0x153a/0x2a50 | do_syscall_64+0x59/0x300 | entry_SYSCALL_64_after_hwframe+0x44/0xa9 | | Freed by task 198: | save_stack+0x1b/0x40 | __kasan_slab_free+0x12f/0x180 | kfree+0xed/0x350 | process_one_work+0x4d1/0xa10 | worker_thread+0x7a/0x5c0 | kthread+0x1e3/0x240 | ret_from_fork+0x1f/0x30 | | The buggy address belongs to the object at ffff888119e0d700 | which belongs to the cache kmalloc-192 of size 192 | The buggy address is located 120 bytes inside of | 192-byte region [ffff888119e0d700, ffff888119e0d7c0) | The buggy address belongs to the page: | page:ffffea0004678340 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 | flags: 0x2fffe0000000200(slab) | raw: 02fffe0000000200 ffffea00045ba8c0 0000000600000006 ffff88811a80ea80 | raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 | page dumped because: kasan: bad access detected | | Memory state around the buggy address: | ffff888119e0d600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb | ffff888119e0d680: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc | >ffff888119e0d700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb | ^ | ffff888119e0d780: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc | ffff888119e0d800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb | ================================================================== Remove the "fast-path" for releasing a link that got auto-detached by a dying network namespace to fix it. This way as long as link is on the list and netns_bpf mutex is held, we have a guarantee that link memory can be accessed. An alternative way to fix this issue would be to safely iterate over the list of links and ensure there is no access to link object after detaching it. But, at the moment, optimizing synchronization overhead on link release without a workload in mind seems like an overkill. Fixes: ab53cad90eb1 ("bpf, netns: Keep a list of attached bpf_link's") Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20200630164541.1329993-1-jakub@cloudflare.com
2020-07-01 00:45:41 +08:00
/* We can race with cleanup_net, but if we see a non-NULL
* struct net pointer, pre_exit has not run yet and wait for
* netns_bpf_mutex.
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
*/
net = net_link->net;
if (!net)
goto out_unlock;
netns_bpf_run_array_detach(net, type);
list_del(&net_link->node);
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
out_unlock:
mutex_unlock(&netns_bpf_mutex);
}
static void bpf_netns_link_dealloc(struct bpf_link *link)
{
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
kfree(net_link);
}
static int bpf_netns_link_update_prog(struct bpf_link *link,
struct bpf_prog *new_prog,
struct bpf_prog *old_prog)
{
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
enum netns_bpf_attach_type type = net_link->netns_type;
struct bpf_prog_array *run_array;
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
struct net *net;
int ret = 0;
if (old_prog && old_prog != link->prog)
return -EPERM;
if (new_prog->type != link->prog->type)
return -EINVAL;
mutex_lock(&netns_bpf_mutex);
net = net_link->net;
if (!net || !check_net(net)) {
/* Link auto-detached or netns dying */
ret = -ENOLINK;
goto out_unlock;
}
run_array = rcu_dereference_protected(net->bpf.run_array[type],
lockdep_is_held(&netns_bpf_mutex));
WRITE_ONCE(run_array->items[0].prog, new_prog);
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
old_prog = xchg(&link->prog, new_prog);
bpf_prog_put(old_prog);
out_unlock:
mutex_unlock(&netns_bpf_mutex);
return ret;
}
static int bpf_netns_link_fill_info(const struct bpf_link *link,
struct bpf_link_info *info)
{
const struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
unsigned int inum = 0;
struct net *net;
mutex_lock(&netns_bpf_mutex);
net = net_link->net;
if (net && check_net(net))
inum = net->ns.inum;
mutex_unlock(&netns_bpf_mutex);
info->netns.netns_ino = inum;
info->netns.attach_type = net_link->type;
return 0;
}
static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
struct seq_file *seq)
{
struct bpf_link_info info = {};
bpf_netns_link_fill_info(link, &info);
seq_printf(seq,
"netns_ino:\t%u\n"
"attach_type:\t%u\n",
info.netns.netns_ino,
info.netns.attach_type);
}
static const struct bpf_link_ops bpf_netns_link_ops = {
.release = bpf_netns_link_release,
.dealloc = bpf_netns_link_dealloc,
.update_prog = bpf_netns_link_update_prog,
.fill_link_info = bpf_netns_link_fill_info,
.show_fdinfo = bpf_netns_link_show_fdinfo,
};
/* Must be called with netns_bpf_mutex held. */
static int __netns_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr,
struct net *net,
enum netns_bpf_attach_type type)
{
__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
struct bpf_prog_array *run_array;
u32 prog_cnt = 0, flags = 0;
run_array = rcu_dereference_protected(net->bpf.run_array[type],
lockdep_is_held(&netns_bpf_mutex));
if (run_array)
prog_cnt = bpf_prog_array_length(run_array);
if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
return -EFAULT;
if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
return -EFAULT;
if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
return 0;
return bpf_prog_array_copy_to_user(run_array, prog_ids,
attr->query.prog_cnt);
}
int netns_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
enum netns_bpf_attach_type type;
struct net *net;
int ret;
if (attr->query.query_flags)
return -EINVAL;
type = to_netns_bpf_attach_type(attr->query.attach_type);
if (type < 0)
return -EINVAL;
net = get_net_ns_by_fd(attr->query.target_fd);
if (IS_ERR(net))
return PTR_ERR(net);
mutex_lock(&netns_bpf_mutex);
ret = __netns_bpf_prog_query(attr, uattr, net, type);
mutex_unlock(&netns_bpf_mutex);
put_net(net);
return ret;
}
int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_prog_array *run_array;
enum netns_bpf_attach_type type;
struct bpf_prog *attached;
struct net *net;
int ret;
if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd)
return -EINVAL;
type = to_netns_bpf_attach_type(attr->attach_type);
if (type < 0)
return -EINVAL;
net = current->nsproxy->net_ns;
mutex_lock(&netns_bpf_mutex);
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
/* Attaching prog directly is not compatible with links */
if (!list_empty(&net->bpf.links[type])) {
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
ret = -EEXIST;
goto out_unlock;
}
switch (type) {
case NETNS_BPF_FLOW_DISSECTOR:
ret = flow_dissector_bpf_prog_attach_check(net, prog);
break;
default:
ret = -EINVAL;
break;
}
if (ret)
goto out_unlock;
attached = net->bpf.progs[type];
if (attached == prog) {
/* The same program cannot be attached twice */
ret = -EINVAL;
goto out_unlock;
}
run_array = rcu_dereference_protected(net->bpf.run_array[type],
lockdep_is_held(&netns_bpf_mutex));
if (run_array) {
WRITE_ONCE(run_array->items[0].prog, prog);
} else {
run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
if (!run_array) {
ret = -ENOMEM;
goto out_unlock;
}
run_array->items[0].prog = prog;
rcu_assign_pointer(net->bpf.run_array[type], run_array);
}
net->bpf.progs[type] = prog;
if (attached)
bpf_prog_put(attached);
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
out_unlock:
mutex_unlock(&netns_bpf_mutex);
return ret;
}
/* Must be called with netns_bpf_mutex held. */
static int __netns_bpf_prog_detach(struct net *net,
enum netns_bpf_attach_type type,
struct bpf_prog *old)
{
struct bpf_prog *attached;
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
/* Progs attached via links cannot be detached */
if (!list_empty(&net->bpf.links[type]))
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
return -EINVAL;
attached = net->bpf.progs[type];
if (!attached || attached != old)
return -ENOENT;
netns_bpf_run_array_detach(net, type);
net->bpf.progs[type] = NULL;
bpf_prog_put(attached);
return 0;
}
int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
{
enum netns_bpf_attach_type type;
struct bpf_prog *prog;
int ret;
if (attr->target_fd)
return -EINVAL;
type = to_netns_bpf_attach_type(attr->attach_type);
if (type < 0)
return -EINVAL;
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog))
return PTR_ERR(prog);
mutex_lock(&netns_bpf_mutex);
ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog);
mutex_unlock(&netns_bpf_mutex);
bpf_prog_put(prog);
return ret;
}
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
enum netns_bpf_attach_type type)
{
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
struct bpf_prog_array *run_array;
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
int err;
mutex_lock(&netns_bpf_mutex);
/* Allow attaching only one prog or link for now */
if (!list_empty(&net->bpf.links[type])) {
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
err = -E2BIG;
goto out_unlock;
}
/* Links are not compatible with attaching prog directly */
if (net->bpf.progs[type]) {
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
err = -EEXIST;
goto out_unlock;
}
switch (type) {
case NETNS_BPF_FLOW_DISSECTOR:
err = flow_dissector_bpf_prog_attach_check(net, link->prog);
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
break;
default:
err = -EINVAL;
break;
}
if (err)
goto out_unlock;
run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
if (!run_array) {
err = -ENOMEM;
goto out_unlock;
}
run_array->items[0].prog = link->prog;
rcu_assign_pointer(net->bpf.run_array[type], run_array);
list_add_tail(&net_link->node, &net->bpf.links[type]);
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
out_unlock:
mutex_unlock(&netns_bpf_mutex);
return err;
}
int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
{
enum netns_bpf_attach_type netns_type;
struct bpf_link_primer link_primer;
struct bpf_netns_link *net_link;
enum bpf_attach_type type;
struct net *net;
int err;
if (attr->link_create.flags)
return -EINVAL;
type = attr->link_create.attach_type;
netns_type = to_netns_bpf_attach_type(type);
if (netns_type < 0)
return -EINVAL;
net = get_net_ns_by_fd(attr->link_create.target_fd);
if (IS_ERR(net))
return PTR_ERR(net);
net_link = kzalloc(sizeof(*net_link), GFP_USER);
if (!net_link) {
err = -ENOMEM;
goto out_put_net;
}
bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
&bpf_netns_link_ops, prog);
net_link->net = net;
net_link->type = type;
net_link->netns_type = netns_type;
err = bpf_link_prime(&net_link->link, &link_primer);
if (err) {
kfree(net_link);
goto out_put_net;
}
err = netns_bpf_link_attach(net, &net_link->link, netns_type);
if (err) {
bpf_link_cleanup(&link_primer);
goto out_put_net;
}
put_net(net);
return bpf_link_settle(&link_primer);
out_put_net:
put_net(net);
return err;
}
static int __net_init netns_bpf_pernet_init(struct net *net)
{
int type;
for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
INIT_LIST_HEAD(&net->bpf.links[type]);
return 0;
}
static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
{
enum netns_bpf_attach_type type;
struct bpf_netns_link *net_link;
mutex_lock(&netns_bpf_mutex);
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
netns_bpf_run_array_detach(net, type);
list_for_each_entry(net_link, &net->bpf.links[type], node)
net_link->net = NULL; /* auto-detach link */
if (net->bpf.progs[type])
bpf_prog_put(net->bpf.progs[type]);
bpf: Add link-based BPF program attachment to network namespace Extend bpf() syscall subcommands that operate on bpf_link, that is LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to network namespaces (only flow dissector at the moment). Link-based and prog-based attachment can be used interchangeably, but only one can exist at a time. Attempts to attach a link when a prog is already attached directly, and the other way around, will be met with -EEXIST. Attempts to detach a program when link exists result in -EINVAL. Attachment of multiple links of same attach type to one netns is not supported with the intention to lift the restriction when a use-case presents itself. Because of that link create returns -E2BIG when trying to create another netns link, when one already exists. Link-based attachments to netns don't keep a netns alive by holding a ref to it. Instead links get auto-detached from netns when the latter is being destroyed, using a pernet pre_exit callback. When auto-detached, link lives in defunct state as long there are open FDs for it. -ENOLINK is returned if a user tries to update a defunct link. Because bpf_link to netns doesn't hold a ref to struct net, special care is taken when releasing, updating, or filling link info. The netns might be getting torn down when any of these link operations are in progress. That is why auto-detach and update/release/fill_info are synchronized by the same mutex. Also, link ops have to always check if auto-detach has not happened yet and if netns is still alive (refcnt > 0). Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
2020-05-31 16:28:38 +08:00
}
mutex_unlock(&netns_bpf_mutex);
}
static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
.init = netns_bpf_pernet_init,
.pre_exit = netns_bpf_pernet_pre_exit,
};
static int __init netns_bpf_init(void)
{
return register_pernet_subsys(&netns_bpf_pernet_ops);
}
subsys_initcall(netns_bpf_init);