Botan 3.0.0
Crypto and TLS for C&
argon2_avx2.cpp
Go to the documentation of this file.
1/**
2* (C) 2023 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/argon2.h>
8#include <immintrin.h>
9
10namespace Botan {
11
12namespace {
13
14class SIMD_4x64 final
15 {
16 public:
17 SIMD_4x64& operator=(const SIMD_4x64& other) = default;
18 SIMD_4x64(const SIMD_4x64& other) = default;
19
20 SIMD_4x64& operator=(SIMD_4x64&& other) = default;
21 SIMD_4x64(SIMD_4x64&& other) = default;
22
23 ~SIMD_4x64() = default;
24
25 BOTAN_FUNC_ISA("avx2")
26 SIMD_4x64() // zero initialized
27 {
28 m_simd = _mm256_setzero_si256();
29 }
30
31 // Load two halves at different addresses
32 static BOTAN_FUNC_ISA("avx2")
33 SIMD_4x64 load_le2(const void* inl, const void* inh)
34 {
35 return SIMD_4x64(_mm256_loadu2_m128i(
36 reinterpret_cast<const __m128i*>(inl),
37 reinterpret_cast<const __m128i*>(inh)));
38 }
39
40 static BOTAN_FUNC_ISA("avx2")
41 SIMD_4x64 load_le(const void* in)
42 {
43 return SIMD_4x64(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
44 }
45
46 void store_le(uint64_t out[4]) const
47 {
48 this->store_le(reinterpret_cast<uint8_t*>(out));
49 }
50
51 BOTAN_FUNC_ISA("avx2")
52 void store_le(uint8_t out[]) const
53 {
54 _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_simd);
55 }
56
57 BOTAN_FUNC_ISA("avx2")
58 void store_le2(void* outh, void* outl)
59 {
60 _mm256_storeu2_m128i(
61 reinterpret_cast<__m128i*>(outh),
62 reinterpret_cast<__m128i*>(outl),
63 m_simd);
64 }
65
66 SIMD_4x64 operator+(const SIMD_4x64& other) const
67 {
68 SIMD_4x64 retval(*this);
69 retval += other;
70 return retval;
71 }
72
73 SIMD_4x64 operator^(const SIMD_4x64& other) const
74 {
75 SIMD_4x64 retval(*this);
76 retval ^= other;
77 return retval;
78 }
79
80 BOTAN_FUNC_ISA("avx2")
81 void operator+=(const SIMD_4x64& other)
82 {
83 m_simd = _mm256_add_epi64(m_simd, other.m_simd);
84 }
85
86 BOTAN_FUNC_ISA("avx2")
87 void operator^=(const SIMD_4x64& other)
88 {
89 m_simd = _mm256_xor_si256(m_simd, other.m_simd);
90 }
91
92 template<size_t ROT>
93 BOTAN_FUNC_ISA("avx2")
94 SIMD_4x64 rotr() const
95 {
96 static_assert(ROT > 0 && ROT < 64, "Invalid rotation constant");
97
98 if constexpr(ROT == 16)
99 {
100 auto tab = _mm256_setr_epi8(
101 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9,
102 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
103 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, tab));
104 }
105 else if constexpr(ROT == 24)
106 {
107 auto tab = _mm256_setr_epi8(
108 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10,
109 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
110 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, tab));
111 }
112 else if constexpr(ROT == 32)
113 {
114 auto tab = _mm256_setr_epi8(
115 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11,
116 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
117 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, tab));
118 }
119 else
120 {
121 return SIMD_4x64(_mm256_or_si256(_mm256_srli_epi64(m_simd, static_cast<int>(ROT)),
122 _mm256_slli_epi64(m_simd, static_cast<int>(64-ROT))));
123 }
124 }
125
126 template<size_t ROT>
127 SIMD_4x64 rotl() const
128 {
129 return this->rotr<64-ROT>();
130 }
131
132 // Argon2 specific operation
133 static BOTAN_FUNC_ISA("avx2")
134 SIMD_4x64 mul2_32(SIMD_4x64 x, SIMD_4x64 y)
135 {
136 const __m256i m = _mm256_mul_epu32(x.m_simd, y.m_simd);
137 return SIMD_4x64(_mm256_add_epi64(m, m));
138 }
139
140 template<uint8_t CTRL>
141 static BOTAN_FUNC_ISA("avx2") SIMD_4x64 permute_4x64(SIMD_4x64 x)
142 {
143 return SIMD_4x64(_mm256_permute4x64_epi64(x.m_simd, CTRL));
144 }
145
146 // Argon2 specific
147 static void twist(
148 SIMD_4x64& B,
149 SIMD_4x64& C,
150 SIMD_4x64& D)
151 {
152 B = SIMD_4x64::permute_4x64<0b00'11'10'01>(B);
153 C = SIMD_4x64::permute_4x64<0b01'00'11'10>(C);
154 D = SIMD_4x64::permute_4x64<0b10'01'00'11>(D);
155 }
156
157 // Argon2 specific
158 static void untwist(
159 SIMD_4x64& B,
160 SIMD_4x64& C,
161 SIMD_4x64& D)
162 {
163 B = SIMD_4x64::permute_4x64<0b10'01'00'11>(B);
164 C = SIMD_4x64::permute_4x64<0b01'00'11'10>(C);
165 D = SIMD_4x64::permute_4x64<0b00'11'10'01>(D);
166 }
167
168 explicit BOTAN_FUNC_ISA("avx2") SIMD_4x64(__m256i x) : m_simd(x) {}
169 private:
170 __m256i m_simd;
171 };
172
173BOTAN_FORCE_INLINE void blamka_G(
174 SIMD_4x64& A,
175 SIMD_4x64& B,
176 SIMD_4x64& C,
177 SIMD_4x64& D)
178 {
179 A += B + SIMD_4x64::mul2_32(A, B);
180 D ^= A;
181 D = D.rotr<32>();
182
183 C += D + SIMD_4x64::mul2_32(C, D);
184 B ^= C;
185 B = B.rotr<24>();
186
187 A += B + SIMD_4x64::mul2_32(A, B);
188 D ^= A;
189 D = D.rotr<16>();
190
191 C += D + SIMD_4x64::mul2_32(C, D);
192 B ^= C;
193 B = B.rotr<63>();
194 }
195
196BOTAN_FORCE_INLINE void blamka_R(
197 SIMD_4x64& A,
198 SIMD_4x64& B,
199 SIMD_4x64& C,
200 SIMD_4x64& D)
201 {
202 blamka_G(A, B, C, D);
203
204 SIMD_4x64::twist(B, C, D);
205 blamka_G(A, B, C, D);
206 SIMD_4x64::untwist(B, C, D);
207 }
208
209}
210
211BOTAN_FUNC_ISA("avx2")
212void Argon2::blamka_avx2(uint64_t N[128], uint64_t T[128])
213 {
214 for(size_t i = 0; i != 8; ++i)
215 {
216 SIMD_4x64 A = SIMD_4x64::load_le(&N[16*i + 4*0]);
217 SIMD_4x64 B = SIMD_4x64::load_le(&N[16*i + 4*1]);
218 SIMD_4x64 C = SIMD_4x64::load_le(&N[16*i + 4*2]);
219 SIMD_4x64 D = SIMD_4x64::load_le(&N[16*i + 4*3]);
220
221 blamka_R(A, B, C, D);
222
223 A.store_le(&T[16*i + 4*0]);
224 B.store_le(&T[16*i + 4*1]);
225 C.store_le(&T[16*i + 4*2]);
226 D.store_le(&T[16*i + 4*3]);
227 }
228
229 for(size_t i = 0; i != 8; ++i)
230 {
231 SIMD_4x64 A = SIMD_4x64::load_le2(&T[2*i + 32*0], &T[2*i + 32*0 + 16]);
232 SIMD_4x64 B = SIMD_4x64::load_le2(&T[2*i + 32*1], &T[2*i + 32*1 + 16]);
233 SIMD_4x64 C = SIMD_4x64::load_le2(&T[2*i + 32*2], &T[2*i + 32*2 + 16]);
234 SIMD_4x64 D = SIMD_4x64::load_le2(&T[2*i + 32*3], &T[2*i + 32*3 + 16]);
235
236 blamka_R(A, B, C, D);
237
238 A.store_le2(&T[2*i + 32*0], &T[2*i + 32*0 + 16]);
239 B.store_le2(&T[2*i + 32*1], &T[2*i + 32*1 + 16]);
240 C.store_le2(&T[2*i + 32*2], &T[2*i + 32*2 + 16]);
241 D.store_le2(&T[2*i + 32*3], &T[2*i + 32*3 + 16]);
242 }
243
244 for(size_t i = 0; i != 128 / 8; ++i)
245 {
246 SIMD_4x64 n0 = SIMD_4x64::load_le(&N[8*i]);
247 SIMD_4x64 n1 = SIMD_4x64::load_le(&N[8*i+4]);
248 SIMD_4x64 t0 = SIMD_4x64::load_le(&T[8*i]);
249 SIMD_4x64 t1 = SIMD_4x64::load_le(&T[8*i+4]);
250
251 n0 ^= t0;
252 n1 ^= t1;
253 n0.store_le(&N[8*i]);
254 n1.store_le(&N[8*i+4]);
255 }
256 }
257
258}
static const void * inh
Definition: argon2_avx2.cpp:34
static SIMD_4x64 y
int(* final)(unsigned char *, CTX *)
#define BOTAN_FUNC_ISA(isa)
Definition: compiler.h:92
#define BOTAN_FORCE_INLINE
Definition: compiler.h:167
FE_25519 T
Definition: ge.cpp:36
Definition: alg_id.cpp:12
constexpr void store_le(uint16_t in, uint8_t out[2])
Definition: loadstor.h:465
constexpr T rotr(T input)
Definition: rotate.h:33
OctetString operator^(const OctetString &k1, const OctetString &k2)
Definition: symkey.cpp:115
OctetString operator+(const OctetString &k1, const OctetString &k2)
Definition: symkey.cpp:104
constexpr T load_le(const uint8_t in[], size_t off)
Definition: loadstor.h:134
constexpr T rotl(T input)
Definition: rotate.h:21