7#include <botan/internal/ghash.h>
15void GHASH::ghash_multiply_vperm(uint8_t x[16], const uint64_t HM[256], const uint8_t input_bytes[],
size_t blocks) {
16 const __m128i BSWAP_MASK = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
18 const __m128i* HM_mm =
reinterpret_cast<const __m128i*
>(HM);
20 __m128i
X = _mm_loadu_si128(
reinterpret_cast<__m128i*
>(x));
21 X = _mm_shuffle_epi8(
X, BSWAP_MASK);
23 const __m128i ones = _mm_set1_epi8(-1);
25 for(
size_t b = 0;
b != blocks; ++
b) {
26 __m128i M = _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(input_bytes) +
b);
27 M = _mm_shuffle_epi8(M, BSWAP_MASK);
29 X = _mm_xor_si128(
X, M);
31 __m128i
Z = _mm_setzero_si128();
33 for(
size_t i = 0; i != 64; i += 2) {
34 const __m128i HM0 = _mm_loadu_si128(HM_mm + 2 * i);
35 const __m128i HM1 = _mm_loadu_si128(HM_mm + 2 * i + 1);
36 const __m128i HM2 = _mm_loadu_si128(HM_mm + 2 * i + 2);
37 const __m128i HM3 = _mm_loadu_si128(HM_mm + 2 * i + 3);
39 const __m128i XMASK1 = _mm_add_epi64(_mm_srli_epi64(
X, 63), ones);
40 X = _mm_slli_epi64(
X, 1);
41 const __m128i XMASK2 = _mm_add_epi64(_mm_srli_epi64(
X, 63), ones);
42 X = _mm_slli_epi64(
X, 1);
44 Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpackhi_epi64(XMASK1, XMASK1), HM0));
45 Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpacklo_epi64(XMASK1, XMASK1), HM1));
46 Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpackhi_epi64(XMASK2, XMASK2), HM2));
47 Z = _mm_xor_si128(Z, _mm_andnot_si128(_mm_unpacklo_epi64(XMASK2, XMASK2), HM3));
50 X = _mm_shuffle_epi32(Z, _MM_SHUFFLE(1, 0, 3, 2));
53 X = _mm_shuffle_epi8(
X, BSWAP_MASK);
54 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(x),
X);
#define BOTAN_FUNC_ISA(isa)