Botan 3.7.1
Crypto and TLS for C&
argon2_avx2.cpp
Go to the documentation of this file.
1/**
2* (C) 2023 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/argon2.h>
8
9#include <botan/compiler.h>
10#include <immintrin.h>
11
12namespace Botan {
13
14namespace {
15
16class SIMD_4x64 final {
17 public:
18 SIMD_4x64& operator=(const SIMD_4x64& other) = default;
19 SIMD_4x64(const SIMD_4x64& other) = default;
20
21 SIMD_4x64& operator=(SIMD_4x64&& other) = default;
22 SIMD_4x64(SIMD_4x64&& other) = default;
23
24 ~SIMD_4x64() = default;
25
26 // zero initialized
27 BOTAN_FUNC_ISA("avx2") SIMD_4x64() { m_simd = _mm256_setzero_si256(); }
28
29 // Load two halves at different addresses
30 static BOTAN_FUNC_ISA("avx2") SIMD_4x64 load_le2(const void* inl, const void* inh) {
31 return SIMD_4x64(
32 _mm256_loadu2_m128i(reinterpret_cast<const __m128i*>(inl), reinterpret_cast<const __m128i*>(inh)));
33 }
34
35 static BOTAN_FUNC_ISA("avx2") SIMD_4x64 load_le(const void* in) {
36 return SIMD_4x64(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
37 }
38
39 void store_le(uint64_t out[4]) const { this->store_le(reinterpret_cast<uint8_t*>(out)); }
40
41 BOTAN_FUNC_ISA("avx2") void store_le(uint8_t out[]) const {
42 _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_simd);
43 }
44
45 BOTAN_FUNC_ISA("avx2") void store_le2(void* outh, void* outl) {
46 _mm256_storeu2_m128i(reinterpret_cast<__m128i*>(outh), reinterpret_cast<__m128i*>(outl), m_simd);
47 }
48
49 SIMD_4x64 operator+(const SIMD_4x64& other) const {
50 SIMD_4x64 retval(*this);
51 retval += other;
52 return retval;
53 }
54
55 SIMD_4x64 operator^(const SIMD_4x64& other) const {
56 SIMD_4x64 retval(*this);
57 retval ^= other;
58 return retval;
59 }
60
61 BOTAN_FUNC_ISA("avx2") void operator+=(const SIMD_4x64& other) {
62 m_simd = _mm256_add_epi64(m_simd, other.m_simd);
63 }
64
65 BOTAN_FUNC_ISA("avx2") void operator^=(const SIMD_4x64& other) {
66 m_simd = _mm256_xor_si256(m_simd, other.m_simd);
67 }
68
69 template <size_t ROT>
70 BOTAN_FUNC_ISA("avx2")
71 SIMD_4x64 rotr() const
72 requires(ROT > 0 && ROT < 64)
73 {
74 if constexpr(ROT == 16) {
75 auto shuf_rot_16 =
76 _mm256_set_epi64x(0x09080f0e0d0c0b0a, 0x0100070605040302, 0x09080f0e0d0c0b0a, 0x0100070605040302);
77
78 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_16));
79 } else if constexpr(ROT == 24) {
80 auto shuf_rot_24 =
81 _mm256_set_epi64x(0x0a09080f0e0d0c0b, 0x0201000706050403, 0x0a09080f0e0d0c0b, 0x0201000706050403);
82
83 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_24));
84 } else if constexpr(ROT == 32) {
85 auto shuf_rot_32 =
86 _mm256_set_epi64x(0x0b0a09080f0e0d0c, 0x0302010007060504, 0x0b0a09080f0e0d0c, 0x0302010007060504);
87
88 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_32));
89 } else {
90 return SIMD_4x64(_mm256_or_si256(_mm256_srli_epi64(m_simd, static_cast<int>(ROT)),
91 _mm256_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
92 }
93 }
94
95 template <size_t ROT>
96 SIMD_4x64 rotl() const {
97 return this->rotr<64 - ROT>();
98 }
99
100 // Argon2 specific operation
101 static BOTAN_FUNC_ISA("avx2") SIMD_4x64 mul2_32(SIMD_4x64 x, SIMD_4x64 y) {
102 const __m256i m = _mm256_mul_epu32(x.m_simd, y.m_simd);
103 return SIMD_4x64(_mm256_add_epi64(m, m));
104 }
105
106 template <uint8_t CTRL>
107 static BOTAN_FUNC_ISA("avx2") SIMD_4x64 permute_4x64(SIMD_4x64 x) {
108 return SIMD_4x64(_mm256_permute4x64_epi64(x.m_simd, CTRL));
109 }
110
111 // Argon2 specific
112 static void twist(SIMD_4x64& B, SIMD_4x64& C, SIMD_4x64& D) {
113 B = SIMD_4x64::permute_4x64<0b00'11'10'01>(B);
114 C = SIMD_4x64::permute_4x64<0b01'00'11'10>(C);
115 D = SIMD_4x64::permute_4x64<0b10'01'00'11>(D);
116 }
117
118 // Argon2 specific
119 static void untwist(SIMD_4x64& B, SIMD_4x64& C, SIMD_4x64& D) {
120 B = SIMD_4x64::permute_4x64<0b10'01'00'11>(B);
121 C = SIMD_4x64::permute_4x64<0b01'00'11'10>(C);
122 D = SIMD_4x64::permute_4x64<0b00'11'10'01>(D);
123 }
124
125 explicit BOTAN_FUNC_ISA("avx2") SIMD_4x64(__m256i x) : m_simd(x) {}
126
127 private:
128 __m256i m_simd;
129};
130
131BOTAN_FORCE_INLINE void blamka_G(SIMD_4x64& A, SIMD_4x64& B, SIMD_4x64& C, SIMD_4x64& D) {
132 A += B + SIMD_4x64::mul2_32(A, B);
133 D ^= A;
134 D = D.rotr<32>();
135
136 C += D + SIMD_4x64::mul2_32(C, D);
137 B ^= C;
138 B = B.rotr<24>();
139
140 A += B + SIMD_4x64::mul2_32(A, B);
141 D ^= A;
142 D = D.rotr<16>();
143
144 C += D + SIMD_4x64::mul2_32(C, D);
145 B ^= C;
146 B = B.rotr<63>();
147}
148
149BOTAN_FORCE_INLINE void blamka_R(SIMD_4x64& A, SIMD_4x64& B, SIMD_4x64& C, SIMD_4x64& D) {
150 blamka_G(A, B, C, D);
151
152 SIMD_4x64::twist(B, C, D);
153 blamka_G(A, B, C, D);
154 SIMD_4x64::untwist(B, C, D);
155}
156
157} // namespace
158
159BOTAN_FUNC_ISA("avx2") void Argon2::blamka_avx2(uint64_t N[128], uint64_t T[128]) {
160 for(size_t i = 0; i != 8; ++i) {
161 SIMD_4x64 A = SIMD_4x64::load_le(&N[16 * i + 4 * 0]);
162 SIMD_4x64 B = SIMD_4x64::load_le(&N[16 * i + 4 * 1]);
163 SIMD_4x64 C = SIMD_4x64::load_le(&N[16 * i + 4 * 2]);
164 SIMD_4x64 D = SIMD_4x64::load_le(&N[16 * i + 4 * 3]);
165
166 blamka_R(A, B, C, D);
167
168 A.store_le(&T[16 * i + 4 * 0]);
169 B.store_le(&T[16 * i + 4 * 1]);
170 C.store_le(&T[16 * i + 4 * 2]);
171 D.store_le(&T[16 * i + 4 * 3]);
172 }
173
174 for(size_t i = 0; i != 8; ++i) {
175 SIMD_4x64 A = SIMD_4x64::load_le2(&T[2 * i + 32 * 0], &T[2 * i + 32 * 0 + 16]);
176 SIMD_4x64 B = SIMD_4x64::load_le2(&T[2 * i + 32 * 1], &T[2 * i + 32 * 1 + 16]);
177 SIMD_4x64 C = SIMD_4x64::load_le2(&T[2 * i + 32 * 2], &T[2 * i + 32 * 2 + 16]);
178 SIMD_4x64 D = SIMD_4x64::load_le2(&T[2 * i + 32 * 3], &T[2 * i + 32 * 3 + 16]);
179
180 blamka_R(A, B, C, D);
181
182 A.store_le2(&T[2 * i + 32 * 0], &T[2 * i + 32 * 0 + 16]);
183 B.store_le2(&T[2 * i + 32 * 1], &T[2 * i + 32 * 1 + 16]);
184 C.store_le2(&T[2 * i + 32 * 2], &T[2 * i + 32 * 2 + 16]);
185 D.store_le2(&T[2 * i + 32 * 3], &T[2 * i + 32 * 3 + 16]);
186 }
187
188 for(size_t i = 0; i != 128 / 8; ++i) {
189 SIMD_4x64 n0 = SIMD_4x64::load_le(&N[8 * i]);
190 SIMD_4x64 n1 = SIMD_4x64::load_le(&N[8 * i + 4]);
191 SIMD_4x64 t0 = SIMD_4x64::load_le(&T[8 * i]);
192 SIMD_4x64 t1 = SIMD_4x64::load_le(&T[8 * i + 4]);
193
194 n0 ^= t0;
195 n1 ^= t1;
196 n0.store_le(&N[8 * i]);
197 n1.store_le(&N[8 * i + 4]);
198 }
199}
200
201} // namespace Botan
static const void * inh
void * outl
int(* final)(unsigned char *, CTX *)
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:42
#define BOTAN_FORCE_INLINE
Definition compiler.h:71
FE_25519 T
Definition ge.cpp:34
constexpr T rotl(T input)
Definition rotate.h:21
OctetString operator^(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:109
OctetString operator+(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:99
constexpr T rotr(T input)
Definition rotate.h:33
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:764
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:521