forked from luck/tmp_suning_uos_patched
bpf: Allocate ID for bpf_link
Generate ID for each bpf_link using IDR, similarly to bpf_map and bpf_prog. bpf_link creation, initialization, attachment, and exposing to user-space through FD and ID is a complicated multi-step process, abstract it away through bpf_link_primer and bpf_link_prime(), bpf_link_settle(), and bpf_link_cleanup() internal API. They guarantee that until bpf_link is properly attached, user-space won't be able to access partially-initialized bpf_link either from FD or ID. All this allows to simplify bpf_link attachment and error handling code. Signed-off-by: Andrii Nakryiko <andriin@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20200429001614.1544-3-andriin@fb.com
This commit is contained in:
parent
f9d041271c
commit
a3b80e1078
|
@ -1085,11 +1085,19 @@ int bpf_prog_new_fd(struct bpf_prog *prog);
|
||||||
|
|
||||||
struct bpf_link {
|
struct bpf_link {
|
||||||
atomic64_t refcnt;
|
atomic64_t refcnt;
|
||||||
|
u32 id;
|
||||||
const struct bpf_link_ops *ops;
|
const struct bpf_link_ops *ops;
|
||||||
struct bpf_prog *prog;
|
struct bpf_prog *prog;
|
||||||
struct work_struct work;
|
struct work_struct work;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct bpf_link_primer {
|
||||||
|
struct bpf_link *link;
|
||||||
|
struct file *file;
|
||||||
|
int fd;
|
||||||
|
u32 id;
|
||||||
|
};
|
||||||
|
|
||||||
struct bpf_link_ops {
|
struct bpf_link_ops {
|
||||||
void (*release)(struct bpf_link *link);
|
void (*release)(struct bpf_link *link);
|
||||||
void (*dealloc)(struct bpf_link *link);
|
void (*dealloc)(struct bpf_link *link);
|
||||||
|
@ -1097,10 +1105,11 @@ struct bpf_link_ops {
|
||||||
struct bpf_prog *old_prog);
|
struct bpf_prog *old_prog);
|
||||||
};
|
};
|
||||||
|
|
||||||
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
|
void bpf_link_init(struct bpf_link *link,
|
||||||
struct bpf_prog *prog);
|
const struct bpf_link_ops *ops, struct bpf_prog *prog);
|
||||||
void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
|
int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
|
||||||
int link_fd);
|
int bpf_link_settle(struct bpf_link_primer *primer);
|
||||||
|
void bpf_link_cleanup(struct bpf_link_primer *primer);
|
||||||
void bpf_link_inc(struct bpf_link *link);
|
void bpf_link_inc(struct bpf_link *link);
|
||||||
void bpf_link_put(struct bpf_link *link);
|
void bpf_link_put(struct bpf_link *link);
|
||||||
int bpf_link_new_fd(struct bpf_link *link);
|
int bpf_link_new_fd(struct bpf_link *link);
|
||||||
|
|
|
@ -523,6 +523,7 @@ union bpf_attr {
|
||||||
__u32 prog_id;
|
__u32 prog_id;
|
||||||
__u32 map_id;
|
__u32 map_id;
|
||||||
__u32 btf_id;
|
__u32 btf_id;
|
||||||
|
__u32 link_id;
|
||||||
};
|
};
|
||||||
__u32 next_id;
|
__u32 next_id;
|
||||||
__u32 open_flags;
|
__u32 open_flags;
|
||||||
|
|
|
@ -841,10 +841,10 @@ const struct bpf_link_ops bpf_cgroup_link_lops = {
|
||||||
|
|
||||||
int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
||||||
{
|
{
|
||||||
|
struct bpf_link_primer link_primer;
|
||||||
struct bpf_cgroup_link *link;
|
struct bpf_cgroup_link *link;
|
||||||
struct file *link_file;
|
|
||||||
struct cgroup *cgrp;
|
struct cgroup *cgrp;
|
||||||
int err, link_fd;
|
int err;
|
||||||
|
|
||||||
if (attr->link_create.flags)
|
if (attr->link_create.flags)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -862,22 +862,20 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
||||||
link->cgroup = cgrp;
|
link->cgroup = cgrp;
|
||||||
link->type = attr->link_create.attach_type;
|
link->type = attr->link_create.attach_type;
|
||||||
|
|
||||||
link_file = bpf_link_new_file(&link->link, &link_fd);
|
err = bpf_link_prime(&link->link, &link_primer);
|
||||||
if (IS_ERR(link_file)) {
|
if (err) {
|
||||||
kfree(link);
|
kfree(link);
|
||||||
err = PTR_ERR(link_file);
|
|
||||||
goto out_put_cgroup;
|
goto out_put_cgroup;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,
|
err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,
|
||||||
BPF_F_ALLOW_MULTI);
|
BPF_F_ALLOW_MULTI);
|
||||||
if (err) {
|
if (err) {
|
||||||
bpf_link_cleanup(&link->link, link_file, link_fd);
|
bpf_link_cleanup(&link_primer);
|
||||||
goto out_put_cgroup;
|
goto out_put_cgroup;
|
||||||
}
|
}
|
||||||
|
|
||||||
fd_install(link_fd, link_file);
|
return bpf_link_settle(&link_primer);
|
||||||
return link_fd;
|
|
||||||
|
|
||||||
out_put_cgroup:
|
out_put_cgroup:
|
||||||
cgroup_put(cgrp);
|
cgroup_put(cgrp);
|
||||||
|
|
|
@ -42,6 +42,8 @@ static DEFINE_IDR(prog_idr);
|
||||||
static DEFINE_SPINLOCK(prog_idr_lock);
|
static DEFINE_SPINLOCK(prog_idr_lock);
|
||||||
static DEFINE_IDR(map_idr);
|
static DEFINE_IDR(map_idr);
|
||||||
static DEFINE_SPINLOCK(map_idr_lock);
|
static DEFINE_SPINLOCK(map_idr_lock);
|
||||||
|
static DEFINE_IDR(link_idr);
|
||||||
|
static DEFINE_SPINLOCK(link_idr_lock);
|
||||||
|
|
||||||
int sysctl_unprivileged_bpf_disabled __read_mostly;
|
int sysctl_unprivileged_bpf_disabled __read_mostly;
|
||||||
|
|
||||||
|
@ -2181,25 +2183,38 @@ static int bpf_obj_get(const union bpf_attr *attr)
|
||||||
attr->file_flags);
|
attr->file_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
|
void bpf_link_init(struct bpf_link *link,
|
||||||
struct bpf_prog *prog)
|
const struct bpf_link_ops *ops, struct bpf_prog *prog)
|
||||||
{
|
{
|
||||||
atomic64_set(&link->refcnt, 1);
|
atomic64_set(&link->refcnt, 1);
|
||||||
|
link->id = 0;
|
||||||
link->ops = ops;
|
link->ops = ops;
|
||||||
link->prog = prog;
|
link->prog = prog;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void bpf_link_free_id(int id)
|
||||||
|
{
|
||||||
|
if (!id)
|
||||||
|
return;
|
||||||
|
|
||||||
|
spin_lock_bh(&link_idr_lock);
|
||||||
|
idr_remove(&link_idr, id);
|
||||||
|
spin_unlock_bh(&link_idr_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/* Clean up bpf_link and corresponding anon_inode file and FD. After
|
/* Clean up bpf_link and corresponding anon_inode file and FD. After
|
||||||
* anon_inode is created, bpf_link can't be just kfree()'d due to deferred
|
* anon_inode is created, bpf_link can't be just kfree()'d due to deferred
|
||||||
* anon_inode's release() call. This helper manages marking bpf_link as
|
* anon_inode's release() call. This helper marksbpf_link as
|
||||||
* defunct, releases anon_inode file and puts reserved FD.
|
* defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
|
||||||
|
* is not decremented, it's the responsibility of a calling code that failed
|
||||||
|
* to complete bpf_link initialization.
|
||||||
*/
|
*/
|
||||||
void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
|
void bpf_link_cleanup(struct bpf_link_primer *primer)
|
||||||
int link_fd)
|
|
||||||
{
|
{
|
||||||
link->prog = NULL;
|
primer->link->prog = NULL;
|
||||||
fput(link_file);
|
bpf_link_free_id(primer->id);
|
||||||
put_unused_fd(link_fd);
|
fput(primer->file);
|
||||||
|
put_unused_fd(primer->fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bpf_link_inc(struct bpf_link *link)
|
void bpf_link_inc(struct bpf_link *link)
|
||||||
|
@ -2210,6 +2225,7 @@ void bpf_link_inc(struct bpf_link *link)
|
||||||
/* bpf_link_free is guaranteed to be called from process context */
|
/* bpf_link_free is guaranteed to be called from process context */
|
||||||
static void bpf_link_free(struct bpf_link *link)
|
static void bpf_link_free(struct bpf_link *link)
|
||||||
{
|
{
|
||||||
|
bpf_link_free_id(link->id);
|
||||||
if (link->prog) {
|
if (link->prog) {
|
||||||
/* detach BPF program, clean up used resources */
|
/* detach BPF program, clean up used resources */
|
||||||
link->ops->release(link);
|
link->ops->release(link);
|
||||||
|
@ -2275,9 +2291,11 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
|
||||||
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
|
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
|
||||||
seq_printf(m,
|
seq_printf(m,
|
||||||
"link_type:\t%s\n"
|
"link_type:\t%s\n"
|
||||||
|
"link_id:\t%u\n"
|
||||||
"prog_tag:\t%s\n"
|
"prog_tag:\t%s\n"
|
||||||
"prog_id:\t%u\n",
|
"prog_id:\t%u\n",
|
||||||
link_type,
|
link_type,
|
||||||
|
link->id,
|
||||||
prog_tag,
|
prog_tag,
|
||||||
prog->aux->id);
|
prog->aux->id);
|
||||||
}
|
}
|
||||||
|
@ -2292,36 +2310,76 @@ static const struct file_operations bpf_link_fops = {
|
||||||
.write = bpf_dummy_write,
|
.write = bpf_dummy_write,
|
||||||
};
|
};
|
||||||
|
|
||||||
int bpf_link_new_fd(struct bpf_link *link)
|
static int bpf_link_alloc_id(struct bpf_link *link)
|
||||||
{
|
{
|
||||||
return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
|
int id;
|
||||||
|
|
||||||
|
idr_preload(GFP_KERNEL);
|
||||||
|
spin_lock_bh(&link_idr_lock);
|
||||||
|
id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
|
||||||
|
spin_unlock_bh(&link_idr_lock);
|
||||||
|
idr_preload_end();
|
||||||
|
|
||||||
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but
|
/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
|
||||||
* instead of immediately installing fd in fdtable, just reserve it and
|
* reserving unused FD and allocating ID from link_idr. This is to be paired
|
||||||
* return. Caller then need to either install it with fd_install(fd, file) or
|
* with bpf_link_settle() to install FD and ID and expose bpf_link to
|
||||||
* release with put_unused_fd(fd).
|
* user-space, if bpf_link is successfully attached. If not, bpf_link and
|
||||||
* This is useful for cases when bpf_link attachment/detachment are
|
* pre-allocated resources are to be freed with bpf_cleanup() call. All the
|
||||||
* complicated and expensive operations and should be delayed until all the fd
|
* transient state is passed around in struct bpf_link_primer.
|
||||||
* reservation and anon_inode creation succeeds.
|
* This is preferred way to create and initialize bpf_link, especially when
|
||||||
|
* there are complicated and expensive operations inbetween creating bpf_link
|
||||||
|
* itself and attaching it to BPF hook. By using bpf_link_prime() and
|
||||||
|
* bpf_link_settle() kernel code using bpf_link doesn't have to perform
|
||||||
|
* expensive (and potentially failing) roll back operations in a rare case
|
||||||
|
* that file, FD, or ID can't be allocated.
|
||||||
*/
|
*/
|
||||||
struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd)
|
int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
|
||||||
{
|
{
|
||||||
struct file *file;
|
struct file *file;
|
||||||
int fd;
|
int fd, id;
|
||||||
|
|
||||||
fd = get_unused_fd_flags(O_CLOEXEC);
|
fd = get_unused_fd_flags(O_CLOEXEC);
|
||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
return ERR_PTR(fd);
|
return fd;
|
||||||
|
|
||||||
file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
|
file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
|
||||||
if (IS_ERR(file)) {
|
if (IS_ERR(file)) {
|
||||||
put_unused_fd(fd);
|
put_unused_fd(fd);
|
||||||
return file;
|
return PTR_ERR(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
*reserved_fd = fd;
|
id = bpf_link_alloc_id(link);
|
||||||
return file;
|
if (id < 0) {
|
||||||
|
put_unused_fd(fd);
|
||||||
|
fput(file);
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
primer->link = link;
|
||||||
|
primer->file = file;
|
||||||
|
primer->fd = fd;
|
||||||
|
primer->id = id;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bpf_link_settle(struct bpf_link_primer *primer)
|
||||||
|
{
|
||||||
|
/* make bpf_link fetchable by ID */
|
||||||
|
spin_lock_bh(&link_idr_lock);
|
||||||
|
primer->link->id = primer->id;
|
||||||
|
spin_unlock_bh(&link_idr_lock);
|
||||||
|
/* make bpf_link fetchable by FD */
|
||||||
|
fd_install(primer->fd, primer->file);
|
||||||
|
/* pass through installed FD */
|
||||||
|
return primer->fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bpf_link_new_fd(struct bpf_link *link)
|
||||||
|
{
|
||||||
|
return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
|
struct bpf_link *bpf_link_get_from_fd(u32 ufd)
|
||||||
|
@ -2367,9 +2425,9 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
|
||||||
|
|
||||||
static int bpf_tracing_prog_attach(struct bpf_prog *prog)
|
static int bpf_tracing_prog_attach(struct bpf_prog *prog)
|
||||||
{
|
{
|
||||||
|
struct bpf_link_primer link_primer;
|
||||||
struct bpf_tracing_link *link;
|
struct bpf_tracing_link *link;
|
||||||
struct file *link_file;
|
int err;
|
||||||
int link_fd, err;
|
|
||||||
|
|
||||||
switch (prog->type) {
|
switch (prog->type) {
|
||||||
case BPF_PROG_TYPE_TRACING:
|
case BPF_PROG_TYPE_TRACING:
|
||||||
|
@ -2404,22 +2462,19 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
|
||||||
}
|
}
|
||||||
bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
|
bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
|
||||||
|
|
||||||
link_file = bpf_link_new_file(&link->link, &link_fd);
|
err = bpf_link_prime(&link->link, &link_primer);
|
||||||
if (IS_ERR(link_file)) {
|
if (err) {
|
||||||
kfree(link);
|
kfree(link);
|
||||||
err = PTR_ERR(link_file);
|
|
||||||
goto out_put_prog;
|
goto out_put_prog;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = bpf_trampoline_link_prog(prog);
|
err = bpf_trampoline_link_prog(prog);
|
||||||
if (err) {
|
if (err) {
|
||||||
bpf_link_cleanup(&link->link, link_file, link_fd);
|
bpf_link_cleanup(&link_primer);
|
||||||
goto out_put_prog;
|
goto out_put_prog;
|
||||||
}
|
}
|
||||||
|
|
||||||
fd_install(link_fd, link_file);
|
return bpf_link_settle(&link_primer);
|
||||||
return link_fd;
|
|
||||||
|
|
||||||
out_put_prog:
|
out_put_prog:
|
||||||
bpf_prog_put(prog);
|
bpf_prog_put(prog);
|
||||||
return err;
|
return err;
|
||||||
|
@ -2447,7 +2502,7 @@ static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
|
||||||
kfree(raw_tp);
|
kfree(raw_tp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct bpf_link_ops bpf_raw_tp_lops = {
|
static const struct bpf_link_ops bpf_raw_tp_link_lops = {
|
||||||
.release = bpf_raw_tp_link_release,
|
.release = bpf_raw_tp_link_release,
|
||||||
.dealloc = bpf_raw_tp_link_dealloc,
|
.dealloc = bpf_raw_tp_link_dealloc,
|
||||||
};
|
};
|
||||||
|
@ -2456,13 +2511,13 @@ static const struct bpf_link_ops bpf_raw_tp_lops = {
|
||||||
|
|
||||||
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
||||||
{
|
{
|
||||||
|
struct bpf_link_primer link_primer;
|
||||||
struct bpf_raw_tp_link *link;
|
struct bpf_raw_tp_link *link;
|
||||||
struct bpf_raw_event_map *btp;
|
struct bpf_raw_event_map *btp;
|
||||||
struct file *link_file;
|
|
||||||
struct bpf_prog *prog;
|
struct bpf_prog *prog;
|
||||||
const char *tp_name;
|
const char *tp_name;
|
||||||
char buf[128];
|
char buf[128];
|
||||||
int link_fd, err;
|
int err;
|
||||||
|
|
||||||
if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
|
if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -2515,24 +2570,22 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto out_put_btp;
|
goto out_put_btp;
|
||||||
}
|
}
|
||||||
bpf_link_init(&link->link, &bpf_raw_tp_lops, prog);
|
bpf_link_init(&link->link, &bpf_raw_tp_link_lops, prog);
|
||||||
link->btp = btp;
|
link->btp = btp;
|
||||||
|
|
||||||
link_file = bpf_link_new_file(&link->link, &link_fd);
|
err = bpf_link_prime(&link->link, &link_primer);
|
||||||
if (IS_ERR(link_file)) {
|
if (err) {
|
||||||
kfree(link);
|
kfree(link);
|
||||||
err = PTR_ERR(link_file);
|
|
||||||
goto out_put_btp;
|
goto out_put_btp;
|
||||||
}
|
}
|
||||||
|
|
||||||
err = bpf_probe_register(link->btp, prog);
|
err = bpf_probe_register(link->btp, prog);
|
||||||
if (err) {
|
if (err) {
|
||||||
bpf_link_cleanup(&link->link, link_file, link_fd);
|
bpf_link_cleanup(&link_primer);
|
||||||
goto out_put_btp;
|
goto out_put_btp;
|
||||||
}
|
}
|
||||||
|
|
||||||
fd_install(link_fd, link_file);
|
return bpf_link_settle(&link_primer);
|
||||||
return link_fd;
|
|
||||||
|
|
||||||
out_put_btp:
|
out_put_btp:
|
||||||
bpf_put_raw_tracepoint(btp);
|
bpf_put_raw_tracepoint(btp);
|
||||||
|
@ -3464,7 +3517,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
|
||||||
if (file->f_op == &bpf_link_fops) {
|
if (file->f_op == &bpf_link_fops) {
|
||||||
struct bpf_link *link = file->private_data;
|
struct bpf_link *link = file->private_data;
|
||||||
|
|
||||||
if (link->ops == &bpf_raw_tp_lops) {
|
if (link->ops == &bpf_raw_tp_link_lops) {
|
||||||
struct bpf_raw_tp_link *raw_tp =
|
struct bpf_raw_tp_link *raw_tp =
|
||||||
container_of(link, struct bpf_raw_tp_link, link);
|
container_of(link, struct bpf_raw_tp_link, link);
|
||||||
struct bpf_raw_event_map *btp = raw_tp->btp;
|
struct bpf_raw_event_map *btp = raw_tp->btp;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user