kernel_optimize_test/samples/bpf/tcbpf1_kern.c
Alexei Starovoitov 27b29f6305 bpf: add bpf_redirect() helper
Existing bpf_clone_redirect() helper clones skb before redirecting
it to RX or TX of destination netdev.
Introduce bpf_redirect() helper that does that without cloning.

Benchmarked with two hosts using 10G ixgbe NICs.
One host is doing line rate pktgen.
Another host is configured as:
$ tc qdisc add dev $dev ingress
$ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
   action bpf run object-file tcbpf1_kern.o section clone_redirect_xmit drop
so it receives the packet on $dev and immediately xmits it on $dev + 1
The section 'clone_redirect_xmit' in tcbpf1_kern.o file has the program
that does bpf_clone_redirect() and performance is 2.0 Mpps

$ tc filter add dev $dev root pref 10 u32 match u32 0 0 flowid 1:2 \
   action bpf run object-file tcbpf1_kern.o section redirect_xmit drop
which is using bpf_redirect() - 2.4 Mpps

and using cls_bpf with integrated actions as:
$ tc filter add dev $dev root pref 10 \
  bpf run object-file tcbpf1_kern.o section redirect_xmit integ_act classid 1
performance is 2.5 Mpps

To summarize:
u32+act_bpf using clone_redirect - 2.0 Mpps
u32+act_bpf using redirect - 2.4 Mpps
cls_bpf using redirect - 2.5 Mpps

For comparison linux bridge in this setup is doing 2.1 Mpps
and ixgbe rx + drop in ip_rcv - 7.8 Mpps

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-17 21:09:07 -07:00

90 lines
2.5 KiB
C

#include <uapi/linux/bpf.h>
#include <uapi/linux/if_ether.h>
#include <uapi/linux/if_packet.h>
#include <uapi/linux/ip.h>
#include <uapi/linux/in.h>
#include <uapi/linux/tcp.h>
#include <uapi/linux/filter.h>
#include <uapi/linux/pkt_cls.h>
#include "bpf_helpers.h"
/* compiler workaround */
#define _htonl __builtin_bswap32
static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
{
bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1);
}
#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
{
__u8 old_tos = load_byte(skb, TOS_OFF);
bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
}
#define TCP_CSUM_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, check))
#define IP_SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr))
#define IS_PSEUDO 0x10
static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
{
__u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
}
#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
{
__u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
}
SEC("classifier")
int bpf_prog1(struct __sk_buff *skb)
{
__u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
long *value;
if (proto == IPPROTO_TCP) {
set_ip_tos(skb, 8);
set_tcp_ip_src(skb, 0xA010101);
set_tcp_dest_port(skb, 5001);
}
return 0;
}
SEC("redirect_xmit")
int _redirect_xmit(struct __sk_buff *skb)
{
return bpf_redirect(skb->ifindex + 1, 0);
}
SEC("redirect_recv")
int _redirect_recv(struct __sk_buff *skb)
{
return bpf_redirect(skb->ifindex + 1, 1);
}
SEC("clone_redirect_xmit")
int _clone_redirect_xmit(struct __sk_buff *skb)
{
bpf_clone_redirect(skb, skb->ifindex + 1, 0);
return TC_ACT_SHOT;
}
SEC("clone_redirect_recv")
int _clone_redirect_recv(struct __sk_buff *skb)
{
bpf_clone_redirect(skb, skb->ifindex + 1, 1);
return TC_ACT_SHOT;
}
char _license[] SEC("license") = "GPL";