Botan 3.11.1
Crypto and TLS for C&
argon2_avx512.cpp
Go to the documentation of this file.
1/**
2* (C) 2026 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/argon2.h>
8
9#include <botan/internal/isa_extn.h>
10#include <botan/internal/simd_8x64.h>
11
12namespace Botan {
13
14namespace {
15
16BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX512 void blamka_G(SIMD_8x64& A, SIMD_8x64& B, SIMD_8x64& C, SIMD_8x64& D) {
17 A += B + SIMD_8x64::mul2_32(A, B);
18 D ^= A;
19 D = D.rotr<32>();
20
21 C += D + SIMD_8x64::mul2_32(C, D);
22 B ^= C;
23 B = B.rotr<24>();
24
25 A += B + SIMD_8x64::mul2_32(A, B);
26 D ^= A;
27 D = D.rotr<16>();
28
29 C += D + SIMD_8x64::mul2_32(C, D);
30 B ^= C;
31 B = B.rotr<63>();
32}
33
34BOTAN_FORCE_INLINE BOTAN_FN_ISA_AVX512 void blamka_R(SIMD_8x64& A, SIMD_8x64& B, SIMD_8x64& C, SIMD_8x64& D) {
35 blamka_G(A, B, C, D);
36
37 SIMD_8x64::twist(B, C, D);
38 blamka_G(A, B, C, D);
39 SIMD_8x64::untwist(B, C, D);
40}
41
42} // namespace
43
44BOTAN_FN_ISA_AVX512 void Argon2::blamka_avx512(uint64_t N[128], uint64_t T[128]) {
45 for(size_t i = 0; i != 8; i += 2) {
46 SIMD_8x64 A = SIMD_8x64::load_le4(
47 &N[16 * i + 4 * 0], &N[16 * i + 4 * 0 + 2], &N[16 * (i + 1) + 4 * 0], &N[16 * (i + 1) + 4 * 0 + 2]);
48 SIMD_8x64 B = SIMD_8x64::load_le4(
49 &N[16 * i + 4 * 1], &N[16 * i + 4 * 1 + 2], &N[16 * (i + 1) + 4 * 1], &N[16 * (i + 1) + 4 * 1 + 2]);
50 SIMD_8x64 C = SIMD_8x64::load_le4(
51 &N[16 * i + 4 * 2], &N[16 * i + 4 * 2 + 2], &N[16 * (i + 1) + 4 * 2], &N[16 * (i + 1) + 4 * 2 + 2]);
52 SIMD_8x64 D = SIMD_8x64::load_le4(
53 &N[16 * i + 4 * 3], &N[16 * i + 4 * 3 + 2], &N[16 * (i + 1) + 4 * 3], &N[16 * (i + 1) + 4 * 3 + 2]);
54
55 blamka_R(A, B, C, D);
56
57 A.store_le4(&T[16 * i + 4 * 0], &T[16 * i + 4 * 0 + 2], &T[16 * (i + 1) + 4 * 0], &T[16 * (i + 1) + 4 * 0 + 2]);
58 B.store_le4(&T[16 * i + 4 * 1], &T[16 * i + 4 * 1 + 2], &T[16 * (i + 1) + 4 * 1], &T[16 * (i + 1) + 4 * 1 + 2]);
59 C.store_le4(&T[16 * i + 4 * 2], &T[16 * i + 4 * 2 + 2], &T[16 * (i + 1) + 4 * 2], &T[16 * (i + 1) + 4 * 2 + 2]);
60 D.store_le4(&T[16 * i + 4 * 3], &T[16 * i + 4 * 3 + 2], &T[16 * (i + 1) + 4 * 3], &T[16 * (i + 1) + 4 * 3 + 2]);
61 }
62
63 for(size_t i = 0; i != 8; i += 2) {
64 SIMD_8x64 A = SIMD_8x64::load_le4(
65 &T[2 * i + 32 * 0], &T[2 * i + 32 * 0 + 16], &T[2 * (i + 1) + 32 * 0], &T[2 * (i + 1) + 32 * 0 + 16]);
66 SIMD_8x64 B = SIMD_8x64::load_le4(
67 &T[2 * i + 32 * 1], &T[2 * i + 32 * 1 + 16], &T[2 * (i + 1) + 32 * 1], &T[2 * (i + 1) + 32 * 1 + 16]);
68 SIMD_8x64 C = SIMD_8x64::load_le4(
69 &T[2 * i + 32 * 2], &T[2 * i + 32 * 2 + 16], &T[2 * (i + 1) + 32 * 2], &T[2 * (i + 1) + 32 * 2 + 16]);
70 SIMD_8x64 D = SIMD_8x64::load_le4(
71 &T[2 * i + 32 * 3], &T[2 * i + 32 * 3 + 16], &T[2 * (i + 1) + 32 * 3], &T[2 * (i + 1) + 32 * 3 + 16]);
72
73 blamka_R(A, B, C, D);
74
75 A.store_le4(&T[2 * i + 32 * 0], &T[2 * i + 32 * 0 + 16], &T[2 * (i + 1) + 32 * 0], &T[2 * (i + 1) + 32 * 0 + 16]);
76 B.store_le4(&T[2 * i + 32 * 1], &T[2 * i + 32 * 1 + 16], &T[2 * (i + 1) + 32 * 1], &T[2 * (i + 1) + 32 * 1 + 16]);
77 C.store_le4(&T[2 * i + 32 * 2], &T[2 * i + 32 * 2 + 16], &T[2 * (i + 1) + 32 * 2], &T[2 * (i + 1) + 32 * 2 + 16]);
78 D.store_le4(&T[2 * i + 32 * 3], &T[2 * i + 32 * 3 + 16], &T[2 * (i + 1) + 32 * 3], &T[2 * (i + 1) + 32 * 3 + 16]);
79 }
80
81 for(size_t i = 0; i != 128 / 8; ++i) {
82 SIMD_8x64 n = SIMD_8x64::load_le(&N[8 * i]);
83 n ^= SIMD_8x64::load_le(&T[8 * i]);
84 n.store_le(&N[8 * i]);
85 }
86}
87
88} // namespace Botan
static void BOTAN_FN_ISA_SIMD_8X64 untwist(SIMD_8x64 &B, SIMD_8x64 &C, SIMD_8x64 &D)
Definition simd_8x64.h:172
static BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 load_le4(const void *in0, const void *in1, const void *in2, const void *in3)
Definition simd_8x64.h:34
static void BOTAN_FN_ISA_SIMD_8X64 twist(SIMD_8x64 &B, SIMD_8x64 &C, SIMD_8x64 &D)
Definition simd_8x64.h:163
static BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 mul2_32(SIMD_8x64 x, SIMD_8x64 y)
Definition simd_8x64.h:157
static BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 load_le(const void *in)
Definition simd_8x64.h:53
#define BOTAN_FORCE_INLINE
Definition compiler.h:87