kernel_optimize_test/lib/chacha.c

118 lines
3.8 KiB
C
Raw Normal View History

/*
* The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
*
* Copyright (C) 2015 Martin Willi
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/bitops.h>
#include <linux/cryptohash.h>
#include <asm/unaligned.h>
#include <crypto/chacha.h>
static void chacha_permute(u32 *x, int nrounds)
{
int i;
/* whitelist the allowed round counts */
crypto: chacha - add XChaCha12 support Now that the generic implementation of ChaCha20 has been refactored to allow varying the number of rounds, add support for XChaCha12, which is the XSalsa construction applied to ChaCha12. ChaCha12 is one of the three ciphers specified by the original ChaCha paper (https://cr.yp.to/chacha/chacha-20080128.pdf: "ChaCha, a variant of Salsa20"), alongside ChaCha8 and ChaCha20. ChaCha12 is faster than ChaCha20 but has a lower, but still large, security margin. We need XChaCha12 support so that it can be used in the Adiantum encryption mode, which enables disk/file encryption on low-end mobile devices where AES-XTS is too slow as the CPUs lack AES instructions. We'd prefer XChaCha20 (the more popular variant), but it's too slow on some of our target devices, so at least in some cases we do need the XChaCha12-based version. In more detail, the problem is that Adiantum is still much slower than we're happy with, and encryption still has a quite noticeable effect on the feel of low-end devices. Users and vendors push back hard against encryption that degrades the user experience, which always risks encryption being disabled entirely. So we need to choose the fastest option that gives us a solid margin of security, and here that's XChaCha12. The best known attack on ChaCha breaks only 7 rounds and has 2^235 time complexity, so ChaCha12's security margin is still better than AES-256's. Much has been learned about cryptanalysis of ARX ciphers since Salsa20 was originally designed in 2005, and it now seems we can be comfortable with a smaller number of rounds. The eSTREAM project also suggests the 12-round version of Salsa20 as providing the best balance among the different variants: combining very good performance with a "comfortable margin of security". Note that it would be trivial to add vanilla ChaCha12 in addition to XChaCha12. However, it's unneeded for now and therefore is omitted. As discussed in the patch that introduced XChaCha20 support, I considered splitting the code into separate chacha-common, chacha20, xchacha20, and xchacha12 modules, so that these algorithms could be enabled/disabled independently. However, since nearly all the code is shared anyway, I ultimately decided there would have been little benefit to the added complexity. Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Martin Willi <martin@strongswan.org> Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-11-17 09:26:22 +08:00
WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
for (i = 0; i < nrounds; i += 2) {
x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
}
}
/**
* chacha_block - generate one keystream block and increment block counter
* @state: input state matrix (16 32-bit words)
* @stream: output keystream block (64 bytes)
crypto: chacha - add XChaCha12 support Now that the generic implementation of ChaCha20 has been refactored to allow varying the number of rounds, add support for XChaCha12, which is the XSalsa construction applied to ChaCha12. ChaCha12 is one of the three ciphers specified by the original ChaCha paper (https://cr.yp.to/chacha/chacha-20080128.pdf: "ChaCha, a variant of Salsa20"), alongside ChaCha8 and ChaCha20. ChaCha12 is faster than ChaCha20 but has a lower, but still large, security margin. We need XChaCha12 support so that it can be used in the Adiantum encryption mode, which enables disk/file encryption on low-end mobile devices where AES-XTS is too slow as the CPUs lack AES instructions. We'd prefer XChaCha20 (the more popular variant), but it's too slow on some of our target devices, so at least in some cases we do need the XChaCha12-based version. In more detail, the problem is that Adiantum is still much slower than we're happy with, and encryption still has a quite noticeable effect on the feel of low-end devices. Users and vendors push back hard against encryption that degrades the user experience, which always risks encryption being disabled entirely. So we need to choose the fastest option that gives us a solid margin of security, and here that's XChaCha12. The best known attack on ChaCha breaks only 7 rounds and has 2^235 time complexity, so ChaCha12's security margin is still better than AES-256's. Much has been learned about cryptanalysis of ARX ciphers since Salsa20 was originally designed in 2005, and it now seems we can be comfortable with a smaller number of rounds. The eSTREAM project also suggests the 12-round version of Salsa20 as providing the best balance among the different variants: combining very good performance with a "comfortable margin of security". Note that it would be trivial to add vanilla ChaCha12 in addition to XChaCha12. However, it's unneeded for now and therefore is omitted. As discussed in the patch that introduced XChaCha20 support, I considered splitting the code into separate chacha-common, chacha20, xchacha20, and xchacha12 modules, so that these algorithms could be enabled/disabled independently. However, since nearly all the code is shared anyway, I ultimately decided there would have been little benefit to the added complexity. Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Martin Willi <martin@strongswan.org> Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-11-17 09:26:22 +08:00
* @nrounds: number of rounds (20 or 12; 20 is recommended)
*
* This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
* The caller has already converted the endianness of the input. This function
* also handles incrementing the block counter in the input matrix.
*/
void chacha_block(u32 *state, u8 *stream, int nrounds)
{
u32 x[16];
int i;
memcpy(x, state, 64);
chacha_permute(x, nrounds);
for (i = 0; i < ARRAY_SIZE(x); i++)
put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
state[12]++;
}
EXPORT_SYMBOL(chacha_block);
/**
* hchacha_block - abbreviated ChaCha core, for XChaCha
* @in: input state matrix (16 32-bit words)
* @out: output (8 32-bit words)
crypto: chacha - add XChaCha12 support Now that the generic implementation of ChaCha20 has been refactored to allow varying the number of rounds, add support for XChaCha12, which is the XSalsa construction applied to ChaCha12. ChaCha12 is one of the three ciphers specified by the original ChaCha paper (https://cr.yp.to/chacha/chacha-20080128.pdf: "ChaCha, a variant of Salsa20"), alongside ChaCha8 and ChaCha20. ChaCha12 is faster than ChaCha20 but has a lower, but still large, security margin. We need XChaCha12 support so that it can be used in the Adiantum encryption mode, which enables disk/file encryption on low-end mobile devices where AES-XTS is too slow as the CPUs lack AES instructions. We'd prefer XChaCha20 (the more popular variant), but it's too slow on some of our target devices, so at least in some cases we do need the XChaCha12-based version. In more detail, the problem is that Adiantum is still much slower than we're happy with, and encryption still has a quite noticeable effect on the feel of low-end devices. Users and vendors push back hard against encryption that degrades the user experience, which always risks encryption being disabled entirely. So we need to choose the fastest option that gives us a solid margin of security, and here that's XChaCha12. The best known attack on ChaCha breaks only 7 rounds and has 2^235 time complexity, so ChaCha12's security margin is still better than AES-256's. Much has been learned about cryptanalysis of ARX ciphers since Salsa20 was originally designed in 2005, and it now seems we can be comfortable with a smaller number of rounds. The eSTREAM project also suggests the 12-round version of Salsa20 as providing the best balance among the different variants: combining very good performance with a "comfortable margin of security". Note that it would be trivial to add vanilla ChaCha12 in addition to XChaCha12. However, it's unneeded for now and therefore is omitted. As discussed in the patch that introduced XChaCha20 support, I considered splitting the code into separate chacha-common, chacha20, xchacha20, and xchacha12 modules, so that these algorithms could be enabled/disabled independently. However, since nearly all the code is shared anyway, I ultimately decided there would have been little benefit to the added complexity. Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Martin Willi <martin@strongswan.org> Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
2018-11-17 09:26:22 +08:00
* @nrounds: number of rounds (20 or 12; 20 is recommended)
*
* HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
* towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha
* skips the final addition of the initial state, and outputs only certain words
* of the state. It should not be used for streaming directly.
*/
void hchacha_block(const u32 *in, u32 *out, int nrounds)
{
u32 x[16];
memcpy(x, in, 64);
chacha_permute(x, nrounds);
memcpy(&out[0], &x[0], 16);
memcpy(&out[4], &x[12], 16);
}
EXPORT_SYMBOL(hchacha_block);