forked from luck/tmp_suning_uos_patched
bpf, lpm: make longest_prefix_match() faster
At LPC 2018 in Vancouver, Vlad Dumitrescu mentioned that longest_prefix_match() has a high cost [1]. One reason for that cost is a loop handling one byte at a time. We can handle more bytes at a time, if enough attention is paid to endianness. I was able to remove ~55 % of longest_prefix_match() cpu costs. [1] https://linuxplumbersconf.org/event/2/contributions/88/attachments/76/87/lpc-bpf-2018-shaping.pdf Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Vlad Dumitrescu <vladum@google.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
parent
e4b0c94bd2
commit
8d75839b84
|
@ -168,20 +168,59 @@ static size_t longest_prefix_match(const struct lpm_trie *trie,
|
||||||
const struct lpm_trie_node *node,
|
const struct lpm_trie_node *node,
|
||||||
const struct bpf_lpm_trie_key *key)
|
const struct bpf_lpm_trie_key *key)
|
||||||
{
|
{
|
||||||
size_t prefixlen = 0;
|
u32 limit = min(node->prefixlen, key->prefixlen);
|
||||||
size_t i;
|
u32 prefixlen = 0, i = 0;
|
||||||
|
|
||||||
for (i = 0; i < trie->data_size; i++) {
|
BUILD_BUG_ON(offsetof(struct lpm_trie_node, data) % sizeof(u32));
|
||||||
size_t b;
|
BUILD_BUG_ON(offsetof(struct bpf_lpm_trie_key, data) % sizeof(u32));
|
||||||
|
|
||||||
b = 8 - fls(node->data[i] ^ key->data[i]);
|
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(CONFIG_64BIT)
|
||||||
prefixlen += b;
|
|
||||||
|
|
||||||
if (prefixlen >= node->prefixlen || prefixlen >= key->prefixlen)
|
/* data_size >= 16 has very small probability.
|
||||||
return min(node->prefixlen, key->prefixlen);
|
* We do not use a loop for optimal code generation.
|
||||||
|
*/
|
||||||
|
if (trie->data_size >= 8) {
|
||||||
|
u64 diff = be64_to_cpu(*(__be64 *)node->data ^
|
||||||
|
*(__be64 *)key->data);
|
||||||
|
|
||||||
if (b < 8)
|
prefixlen = 64 - fls64(diff);
|
||||||
break;
|
if (prefixlen >= limit)
|
||||||
|
return limit;
|
||||||
|
if (diff)
|
||||||
|
return prefixlen;
|
||||||
|
i = 8;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
while (trie->data_size >= i + 4) {
|
||||||
|
u32 diff = be32_to_cpu(*(__be32 *)&node->data[i] ^
|
||||||
|
*(__be32 *)&key->data[i]);
|
||||||
|
|
||||||
|
prefixlen += 32 - fls(diff);
|
||||||
|
if (prefixlen >= limit)
|
||||||
|
return limit;
|
||||||
|
if (diff)
|
||||||
|
return prefixlen;
|
||||||
|
i += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trie->data_size >= i + 2) {
|
||||||
|
u16 diff = be16_to_cpu(*(__be16 *)&node->data[i] ^
|
||||||
|
*(__be16 *)&key->data[i]);
|
||||||
|
|
||||||
|
prefixlen += 16 - fls(diff);
|
||||||
|
if (prefixlen >= limit)
|
||||||
|
return limit;
|
||||||
|
if (diff)
|
||||||
|
return prefixlen;
|
||||||
|
i += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trie->data_size >= i + 1) {
|
||||||
|
prefixlen += 8 - fls(node->data[i] ^ key->data[i]);
|
||||||
|
|
||||||
|
if (prefixlen >= limit)
|
||||||
|
return limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
return prefixlen;
|
return prefixlen;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user