Botan 3.6.0
Crypto and TLS for C&
argon2_avx2.cpp
Go to the documentation of this file.
1/**
2* (C) 2023 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/argon2.h>
8#include <immintrin.h>
9
10namespace Botan {
11
12namespace {
13
14class SIMD_4x64 final {
15 public:
16 SIMD_4x64& operator=(const SIMD_4x64& other) = default;
17 SIMD_4x64(const SIMD_4x64& other) = default;
18
19 SIMD_4x64& operator=(SIMD_4x64&& other) = default;
20 SIMD_4x64(SIMD_4x64&& other) = default;
21
22 ~SIMD_4x64() = default;
23
24 // zero initialized
25 BOTAN_FUNC_ISA("avx2") SIMD_4x64() { m_simd = _mm256_setzero_si256(); }
26
27 // Load two halves at different addresses
28 static BOTAN_FUNC_ISA("avx2") SIMD_4x64 load_le2(const void* inl, const void* inh) {
29 return SIMD_4x64(
30 _mm256_loadu2_m128i(reinterpret_cast<const __m128i*>(inl), reinterpret_cast<const __m128i*>(inh)));
31 }
32
33 static BOTAN_FUNC_ISA("avx2") SIMD_4x64 load_le(const void* in) {
34 return SIMD_4x64(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
35 }
36
37 void store_le(uint64_t out[4]) const { this->store_le(reinterpret_cast<uint8_t*>(out)); }
38
39 BOTAN_FUNC_ISA("avx2") void store_le(uint8_t out[]) const {
40 _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_simd);
41 }
42
43 BOTAN_FUNC_ISA("avx2") void store_le2(void* outh, void* outl) {
44 _mm256_storeu2_m128i(reinterpret_cast<__m128i*>(outh), reinterpret_cast<__m128i*>(outl), m_simd);
45 }
46
47 SIMD_4x64 operator+(const SIMD_4x64& other) const {
48 SIMD_4x64 retval(*this);
49 retval += other;
50 return retval;
51 }
52
53 SIMD_4x64 operator^(const SIMD_4x64& other) const {
54 SIMD_4x64 retval(*this);
55 retval ^= other;
56 return retval;
57 }
58
59 BOTAN_FUNC_ISA("avx2") void operator+=(const SIMD_4x64& other) {
60 m_simd = _mm256_add_epi64(m_simd, other.m_simd);
61 }
62
63 BOTAN_FUNC_ISA("avx2") void operator^=(const SIMD_4x64& other) {
64 m_simd = _mm256_xor_si256(m_simd, other.m_simd);
65 }
66
67 template <size_t ROT>
68 BOTAN_FUNC_ISA("avx2")
69 SIMD_4x64 rotr() const
70 requires(ROT > 0 && ROT < 64)
71 {
72 if constexpr(ROT == 16) {
73 auto shuf_rot_16 =
74 _mm256_set_epi64x(0x09080f0e0d0c0b0a, 0x0100070605040302, 0x09080f0e0d0c0b0a, 0x0100070605040302);
75
76 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_16));
77 } else if constexpr(ROT == 24) {
78 auto shuf_rot_24 =
79 _mm256_set_epi64x(0x0a09080f0e0d0c0b, 0x0201000706050403, 0x0a09080f0e0d0c0b, 0x0201000706050403);
80
81 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_24));
82 } else if constexpr(ROT == 32) {
83 auto shuf_rot_32 =
84 _mm256_set_epi64x(0x0b0a09080f0e0d0c, 0x0302010007060504, 0x0b0a09080f0e0d0c, 0x0302010007060504);
85
86 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_32));
87 } else {
88 return SIMD_4x64(_mm256_or_si256(_mm256_srli_epi64(m_simd, static_cast<int>(ROT)),
89 _mm256_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
90 }
91 }
92
93 template <size_t ROT>
94 SIMD_4x64 rotl() const {
95 return this->rotr<64 - ROT>();
96 }
97
98 // Argon2 specific operation
99 static BOTAN_FUNC_ISA("avx2") SIMD_4x64 mul2_32(SIMD_4x64 x, SIMD_4x64 y) {
100 const __m256i m = _mm256_mul_epu32(x.m_simd, y.m_simd);
101 return SIMD_4x64(_mm256_add_epi64(m, m));
102 }
103
104 template <uint8_t CTRL>
105 static BOTAN_FUNC_ISA("avx2") SIMD_4x64 permute_4x64(SIMD_4x64 x) {
106 return SIMD_4x64(_mm256_permute4x64_epi64(x.m_simd, CTRL));
107 }
108
109 // Argon2 specific
110 static void twist(SIMD_4x64& B, SIMD_4x64& C, SIMD_4x64& D) {
111 B = SIMD_4x64::permute_4x64<0b00'11'10'01>(B);
112 C = SIMD_4x64::permute_4x64<0b01'00'11'10>(C);
113 D = SIMD_4x64::permute_4x64<0b10'01'00'11>(D);
114 }
115
116 // Argon2 specific
117 static void untwist(SIMD_4x64& B, SIMD_4x64& C, SIMD_4x64& D) {
118 B = SIMD_4x64::permute_4x64<0b10'01'00'11>(B);
119 C = SIMD_4x64::permute_4x64<0b01'00'11'10>(C);
120 D = SIMD_4x64::permute_4x64<0b00'11'10'01>(D);
121 }
122
123 explicit BOTAN_FUNC_ISA("avx2") SIMD_4x64(__m256i x) : m_simd(x) {}
124
125 private:
126 __m256i m_simd;
127};
128
129BOTAN_FORCE_INLINE void blamka_G(SIMD_4x64& A, SIMD_4x64& B, SIMD_4x64& C, SIMD_4x64& D) {
130 A += B + SIMD_4x64::mul2_32(A, B);
131 D ^= A;
132 D = D.rotr<32>();
133
134 C += D + SIMD_4x64::mul2_32(C, D);
135 B ^= C;
136 B = B.rotr<24>();
137
138 A += B + SIMD_4x64::mul2_32(A, B);
139 D ^= A;
140 D = D.rotr<16>();
141
142 C += D + SIMD_4x64::mul2_32(C, D);
143 B ^= C;
144 B = B.rotr<63>();
145}
146
147BOTAN_FORCE_INLINE void blamka_R(SIMD_4x64& A, SIMD_4x64& B, SIMD_4x64& C, SIMD_4x64& D) {
148 blamka_G(A, B, C, D);
149
150 SIMD_4x64::twist(B, C, D);
151 blamka_G(A, B, C, D);
152 SIMD_4x64::untwist(B, C, D);
153}
154
155} // namespace
156
157BOTAN_FUNC_ISA("avx2") void Argon2::blamka_avx2(uint64_t N[128], uint64_t T[128]) {
158 for(size_t i = 0; i != 8; ++i) {
159 SIMD_4x64 A = SIMD_4x64::load_le(&N[16 * i + 4 * 0]);
160 SIMD_4x64 B = SIMD_4x64::load_le(&N[16 * i + 4 * 1]);
161 SIMD_4x64 C = SIMD_4x64::load_le(&N[16 * i + 4 * 2]);
162 SIMD_4x64 D = SIMD_4x64::load_le(&N[16 * i + 4 * 3]);
163
164 blamka_R(A, B, C, D);
165
166 A.store_le(&T[16 * i + 4 * 0]);
167 B.store_le(&T[16 * i + 4 * 1]);
168 C.store_le(&T[16 * i + 4 * 2]);
169 D.store_le(&T[16 * i + 4 * 3]);
170 }
171
172 for(size_t i = 0; i != 8; ++i) {
173 SIMD_4x64 A = SIMD_4x64::load_le2(&T[2 * i + 32 * 0], &T[2 * i + 32 * 0 + 16]);
174 SIMD_4x64 B = SIMD_4x64::load_le2(&T[2 * i + 32 * 1], &T[2 * i + 32 * 1 + 16]);
175 SIMD_4x64 C = SIMD_4x64::load_le2(&T[2 * i + 32 * 2], &T[2 * i + 32 * 2 + 16]);
176 SIMD_4x64 D = SIMD_4x64::load_le2(&T[2 * i + 32 * 3], &T[2 * i + 32 * 3 + 16]);
177
178 blamka_R(A, B, C, D);
179
180 A.store_le2(&T[2 * i + 32 * 0], &T[2 * i + 32 * 0 + 16]);
181 B.store_le2(&T[2 * i + 32 * 1], &T[2 * i + 32 * 1 + 16]);
182 C.store_le2(&T[2 * i + 32 * 2], &T[2 * i + 32 * 2 + 16]);
183 D.store_le2(&T[2 * i + 32 * 3], &T[2 * i + 32 * 3 + 16]);
184 }
185
186 for(size_t i = 0; i != 128 / 8; ++i) {
187 SIMD_4x64 n0 = SIMD_4x64::load_le(&N[8 * i]);
188 SIMD_4x64 n1 = SIMD_4x64::load_le(&N[8 * i + 4]);
189 SIMD_4x64 t0 = SIMD_4x64::load_le(&T[8 * i]);
190 SIMD_4x64 t1 = SIMD_4x64::load_le(&T[8 * i + 4]);
191
192 n0 ^= t0;
193 n1 ^= t1;
194 n0.store_le(&N[8 * i]);
195 n1.store_le(&N[8 * i + 4]);
196 }
197}
198
199} // namespace Botan
static const void * inh
void * outl
int(* final)(unsigned char *, CTX *)
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:92
#define BOTAN_FORCE_INLINE
Definition compiler.h:165
FE_25519 T
Definition ge.cpp:34
constexpr T rotl(T input)
Definition rotate.h:21
OctetString operator^(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:109
OctetString operator+(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:99
constexpr T rotr(T input)
Definition rotate.h:33
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:764
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:521