kernel_optimize_test/arch/x86/crypto/glue_helper-asm-avx.S
Thomas Gleixner c942fddf87 treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 157
Based on 3 normalized pattern(s):

  this program is free software you can redistribute it and or modify
  it under the terms of the gnu general public license as published by
  the free software foundation either version 2 of the license or at
  your option any later version this program is distributed in the
  hope that it will be useful but without any warranty without even
  the implied warranty of merchantability or fitness for a particular
  purpose see the gnu general public license for more details

  this program is free software you can redistribute it and or modify
  it under the terms of the gnu general public license as published by
  the free software foundation either version 2 of the license or at
  your option any later version [author] [kishon] [vijay] [abraham]
  [i] [kishon]@[ti] [com] this program is distributed in the hope that
  it will be useful but without any warranty without even the implied
  warranty of merchantability or fitness for a particular purpose see
  the gnu general public license for more details

  this program is free software you can redistribute it and or modify
  it under the terms of the gnu general public license as published by
  the free software foundation either version 2 of the license or at
  your option any later version [author] [graeme] [gregory]
  [gg]@[slimlogic] [co] [uk] [author] [kishon] [vijay] [abraham] [i]
  [kishon]@[ti] [com] [based] [on] [twl6030]_[usb] [c] [author] [hema]
  [hk] [hemahk]@[ti] [com] this program is distributed in the hope
  that it will be useful but without any warranty without even the
  implied warranty of merchantability or fitness for a particular
  purpose see the gnu general public license for more details

extracted by the scancode license scanner the SPDX license identifier

  GPL-2.0-or-later

has been chosen to replace the boilerplate/reference in 1105 file(s).

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Allison Randal <allison@lohutok.net>
Reviewed-by: Richard Fontana <rfontana@redhat.com>
Reviewed-by: Kate Stewart <kstewart@linuxfoundation.org>
Cc: linux-spdx@vger.kernel.org
Link: https://lkml.kernel.org/r/20190527070033.202006027@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-05-30 11:26:37 -07:00

141 lines
3.7 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Shared glue code for 128bit block ciphers, AVX assembler macros
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*/
#define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
vmovdqu (0*16)(src), x0; \
vmovdqu (1*16)(src), x1; \
vmovdqu (2*16)(src), x2; \
vmovdqu (3*16)(src), x3; \
vmovdqu (4*16)(src), x4; \
vmovdqu (5*16)(src), x5; \
vmovdqu (6*16)(src), x6; \
vmovdqu (7*16)(src), x7;
#define store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vmovdqu x0, (0*16)(dst); \
vmovdqu x1, (1*16)(dst); \
vmovdqu x2, (2*16)(dst); \
vmovdqu x3, (3*16)(dst); \
vmovdqu x4, (4*16)(dst); \
vmovdqu x5, (5*16)(dst); \
vmovdqu x6, (6*16)(dst); \
vmovdqu x7, (7*16)(dst);
#define store_cbc_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*16)(src), x1, x1; \
vpxor (1*16)(src), x2, x2; \
vpxor (2*16)(src), x3, x3; \
vpxor (3*16)(src), x4, x4; \
vpxor (4*16)(src), x5, x5; \
vpxor (5*16)(src), x6, x6; \
vpxor (6*16)(src), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
vpsubq minus_one, x, x; \
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \
vpcmpeqd t0, t0, t0; \
vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \
vmovdqa bswap, t1; \
\
/* load IV and byteswap */ \
vmovdqu (iv), x7; \
vpshufb t1, x7, x0; \
\
/* construct IVs */ \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x1; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x2; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x3; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x4; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x5; \
inc_le128(x7, t0, t2); \
vpshufb t1, x7, x6; \
inc_le128(x7, t0, t2); \
vmovdqa x7, t2; \
vpshufb t1, x7, x7; \
inc_le128(t2, t0, t1); \
vmovdqu t2, (iv);
#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*16)(src), x0, x0; \
vpxor (1*16)(src), x1, x1; \
vpxor (2*16)(src), x2, x2; \
vpxor (3*16)(src), x3, x3; \
vpxor (4*16)(src), x4, x4; \
vpxor (5*16)(src), x5, x5; \
vpxor (6*16)(src), x6, x6; \
vpxor (7*16)(src), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
#define gf128mul_x_ble(iv, mask, tmp) \
vpsrad $31, iv, tmp; \
vpaddq iv, iv, iv; \
vpshufd $0x13, tmp, tmp; \
vpand mask, tmp, tmp; \
vpxor tmp, iv, iv;
#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \
t1, xts_gf128mul_and_shl1_mask) \
vmovdqa xts_gf128mul_and_shl1_mask, t0; \
\
/* load IV */ \
vmovdqu (iv), tiv; \
vpxor (0*16)(src), tiv, x0; \
vmovdqu tiv, (0*16)(dst); \
\
/* construct and store IVs, also xor with source */ \
gf128mul_x_ble(tiv, t0, t1); \
vpxor (1*16)(src), tiv, x1; \
vmovdqu tiv, (1*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (2*16)(src), tiv, x2; \
vmovdqu tiv, (2*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (3*16)(src), tiv, x3; \
vmovdqu tiv, (3*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (4*16)(src), tiv, x4; \
vmovdqu tiv, (4*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (5*16)(src), tiv, x5; \
vmovdqu tiv, (5*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (6*16)(src), tiv, x6; \
vmovdqu tiv, (6*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vpxor (7*16)(src), tiv, x7; \
vmovdqu tiv, (7*16)(dst); \
\
gf128mul_x_ble(tiv, t0, t1); \
vmovdqu tiv, (iv);
#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
vpxor (0*16)(dst), x0, x0; \
vpxor (1*16)(dst), x1, x1; \
vpxor (2*16)(dst), x2, x2; \
vpxor (3*16)(dst), x3, x3; \
vpxor (4*16)(dst), x4, x4; \
vpxor (5*16)(dst), x5, x5; \
vpxor (6*16)(dst), x6, x6; \
vpxor (7*16)(dst), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);