Botan 3.11.0
Crypto and TLS for C&
xts_avx512_clmul.cpp
Go to the documentation of this file.
1/*
2* (C) 2026 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/xts.h>
8
9#include <botan/assert.h>
10#include <botan/internal/isa_extn.h>
11#include <botan/internal/poly_dbl.h>
12#include <immintrin.h>
13
14namespace Botan {
15
16void BOTAN_FN_ISA_AVX512_CLMUL XTS_Mode::update_tweak_block_avx512_clmul(uint8_t tweak[], size_t BS, size_t N) {
17 BOTAN_ASSERT_NOMSG(N > 0);
18
19 if(BS == 16 && N % 8 == 0) {
20 constexpr uint64_t P128 = 0x87;
21 const __m512i poly = _mm512_set_epi64(0, P128, 0, P128, 0, P128, 0, P128);
22
23 /*
24 * We need to perform N doublings on each block.
25 *
26 * We can compute the carryless multiplication with any size. Here, curiously, the
27 * constraint is that AVX2/AVX512 don't include an equivalent of psrldq (aka
28 * _mm_srli_si128), which allows shifting 128-bit lanes by any number of bits.
29 * Instead only byte-wide lane shifts are available, so we can only raise to powers
30 * where N is a multiple of 8.
31 */
32 const size_t N_32 = N / 32;
33 const size_t N_8 = (N - N_32 * 32) / 8;
34
35 // Since we must anyway require N % 8 == 0, unrolling once is free and allows better ILP
36 for(size_t i = 0; i != N; i += 8) {
37 __m512i W0 = _mm512_loadu_si512(&tweak[i * BS]);
38 __m512i W1 = _mm512_loadu_si512(&tweak[(i + 4) * BS]);
39
40 for(size_t r = 0; r != N_32; ++r) {
41 // (W << 32) ^ compute_carry(W >> 96)
42 const auto C0 = _mm512_clmulepi64_epi128(_mm512_bsrli_epi128(W0, 12), poly, 0);
43 const auto C1 = _mm512_clmulepi64_epi128(_mm512_bsrli_epi128(W1, 12), poly, 0);
44 W0 = _mm512_xor_si512(_mm512_bslli_epi128(W0, 4), C0);
45 W1 = _mm512_xor_si512(_mm512_bslli_epi128(W1, 4), C1);
46 }
47
48 for(size_t r = 0; r != N_8; ++r) {
49 // (W << 8) ^ compute_carry(W >> 120)
50 const auto C0 = _mm512_clmulepi64_epi128(_mm512_bsrli_epi128(W0, 15), poly, 0);
51 const auto C1 = _mm512_clmulepi64_epi128(_mm512_bsrli_epi128(W1, 15), poly, 0);
52 W0 = _mm512_xor_si512(_mm512_bslli_epi128(W0, 1), C0);
53 W1 = _mm512_xor_si512(_mm512_bslli_epi128(W1, 1), C1);
54 }
55
56 _mm512_storeu_epi64(&tweak[i * BS], W0);
57 _mm512_storeu_epi64(&tweak[(i + 4) * BS], W1);
58 }
59 } else {
60 poly_double_n_le(tweak, &tweak[(N - 1) * BS], BS);
62 }
63}
64
65} // namespace Botan
#define BOTAN_ASSERT_NOMSG(expr)
Definition assert.h:75
const uint8_t * tweak() const
Definition xts.h:45
void poly_double_n_le(uint8_t out[], const uint8_t in[], size_t n)
Definition poly_dbl.cpp:100
void xts_compute_tweak_block(uint8_t tweak[], size_t BS, size_t blocks_in_tweak)
Definition poly_dbl.cpp:119