Botan 3.4.0
Crypto and TLS for C&
argon2_ssse3.cpp
Go to the documentation of this file.
1/**
2* (C) 2022 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/argon2.h>
8#include <tmmintrin.h>
9
10namespace Botan {
11
12namespace {
13
14class SIMD_2x64 final {
15 public:
16 SIMD_2x64& operator=(const SIMD_2x64& other) = default;
17 SIMD_2x64(const SIMD_2x64& other) = default;
18
19 SIMD_2x64& operator=(SIMD_2x64&& other) = default;
20 SIMD_2x64(SIMD_2x64&& other) = default;
21
22 ~SIMD_2x64() = default;
23
24 SIMD_2x64() // zero initialized
25 {
26 m_simd = _mm_setzero_si128();
27 }
28
29 static SIMD_2x64 load_le(const void* in) {
30 return SIMD_2x64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
31 }
32
33 void store_le(uint64_t out[2]) const { this->store_le(reinterpret_cast<uint8_t*>(out)); }
34
35 void store_le(uint8_t out[]) const { _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_simd); }
36
37 SIMD_2x64 operator+(const SIMD_2x64& other) const {
38 SIMD_2x64 retval(*this);
39 retval += other;
40 return retval;
41 }
42
43 SIMD_2x64 operator^(const SIMD_2x64& other) const {
44 SIMD_2x64 retval(*this);
45 retval ^= other;
46 return retval;
47 }
48
49 void operator+=(const SIMD_2x64& other) { m_simd = _mm_add_epi64(m_simd, other.m_simd); }
50
51 void operator^=(const SIMD_2x64& other) { m_simd = _mm_xor_si128(m_simd, other.m_simd); }
52
53 template <size_t ROT>
54 BOTAN_FUNC_ISA("ssse3")
55 SIMD_2x64 rotr() const
56 requires(ROT > 0 && ROT < 64)
57 {
58 if constexpr(ROT == 16) {
59 auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
60 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
61 } else if constexpr(ROT == 24) {
62 auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
63 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
64 } else if constexpr(ROT == 32) {
65 auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
66 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
67 } else {
68 return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)),
69 _mm_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
70 }
71 }
72
73 template <size_t ROT>
74 SIMD_2x64 rotl() const {
75 return this->rotr<64 - ROT>();
76 }
77
78 // Argon2 specific operation
79 static SIMD_2x64 mul2_32(SIMD_2x64 x, SIMD_2x64 y) {
80 const __m128i m = _mm_mul_epu32(x.m_simd, y.m_simd);
81 return SIMD_2x64(_mm_add_epi64(m, m));
82 }
83
84 template <size_t T>
85 BOTAN_FUNC_ISA("ssse3")
86 static SIMD_2x64 alignr(SIMD_2x64 a, SIMD_2x64 b)
87 requires(T > 0 && T < 16)
88 {
89 return SIMD_2x64(_mm_alignr_epi8(a.m_simd, b.m_simd, T));
90 }
91
92 // Argon2 specific
93 static void twist(SIMD_2x64& B0, SIMD_2x64& B1, SIMD_2x64& C0, SIMD_2x64& C1, SIMD_2x64& D0, SIMD_2x64& D1) {
94 SIMD_2x64 T0, T1;
95
96 T0 = SIMD_2x64::alignr<8>(B1, B0);
97 T1 = SIMD_2x64::alignr<8>(B0, B1);
98 B0 = T0;
99 B1 = T1;
100
101 T0 = C0;
102 C0 = C1;
103 C1 = T0;
104
105 T0 = SIMD_2x64::alignr<8>(D0, D1);
106 T1 = SIMD_2x64::alignr<8>(D1, D0);
107 D0 = T0;
108 D1 = T1;
109 }
110
111 // Argon2 specific
112 static void untwist(SIMD_2x64& B0, SIMD_2x64& B1, SIMD_2x64& C0, SIMD_2x64& C1, SIMD_2x64& D0, SIMD_2x64& D1) {
113 SIMD_2x64 T0, T1;
114
115 T0 = SIMD_2x64::alignr<8>(B0, B1);
116 T1 = SIMD_2x64::alignr<8>(B1, B0);
117 B0 = T0;
118 B1 = T1;
119
120 T0 = C0;
121 C0 = C1;
122 C1 = T0;
123
124 T0 = SIMD_2x64::alignr<8>(D1, D0);
125 T1 = SIMD_2x64::alignr<8>(D0, D1);
126 D0 = T0;
127 D1 = T1;
128 }
129
130 explicit SIMD_2x64(__m128i x) : m_simd(x) {}
131
132 private:
133 __m128i m_simd;
134};
135
136BOTAN_FORCE_INLINE void blamka_G(SIMD_2x64& A0,
137 SIMD_2x64& A1,
138 SIMD_2x64& B0,
139 SIMD_2x64& B1,
140 SIMD_2x64& C0,
141 SIMD_2x64& C1,
142 SIMD_2x64& D0,
143 SIMD_2x64& D1) {
144 A0 += B0 + SIMD_2x64::mul2_32(A0, B0);
145 A1 += B1 + SIMD_2x64::mul2_32(A1, B1);
146 D0 ^= A0;
147 D1 ^= A1;
148 D0 = D0.rotr<32>();
149 D1 = D1.rotr<32>();
150
151 C0 += D0 + SIMD_2x64::mul2_32(C0, D0);
152 C1 += D1 + SIMD_2x64::mul2_32(C1, D1);
153 B0 ^= C0;
154 B1 ^= C1;
155 B0 = B0.rotr<24>();
156 B1 = B1.rotr<24>();
157
158 A0 += B0 + SIMD_2x64::mul2_32(A0, B0);
159 A1 += B1 + SIMD_2x64::mul2_32(A1, B1);
160 D0 ^= A0;
161 D1 ^= A1;
162 D0 = D0.rotr<16>();
163 D1 = D1.rotr<16>();
164
165 C0 += D0 + SIMD_2x64::mul2_32(C0, D0);
166 C1 += D1 + SIMD_2x64::mul2_32(C1, D1);
167 B0 ^= C0;
168 B1 ^= C1;
169 B0 = B0.rotr<63>();
170 B1 = B1.rotr<63>();
171}
172
173BOTAN_FORCE_INLINE void blamka_R(SIMD_2x64& A0,
174 SIMD_2x64& A1,
175 SIMD_2x64& B0,
176 SIMD_2x64& B1,
177 SIMD_2x64& C0,
178 SIMD_2x64& C1,
179 SIMD_2x64& D0,
180 SIMD_2x64& D1) {
181 blamka_G(A0, A1, B0, B1, C0, C1, D0, D1);
182
183 SIMD_2x64::twist(B0, B1, C0, C1, D0, D1);
184 blamka_G(A0, A1, B0, B1, C0, C1, D0, D1);
185 SIMD_2x64::untwist(B0, B1, C0, C1, D0, D1);
186}
187
188} // namespace
189
190void Argon2::blamka_ssse3(uint64_t N[128], uint64_t T[128]) {
191 for(size_t i = 0; i != 8; ++i) {
192 SIMD_2x64 Tv[8];
193 for(size_t j = 0; j != 4; ++j) {
194 Tv[2 * j] = SIMD_2x64::load_le(&N[16 * i + 4 * j]);
195 Tv[2 * j + 1] = SIMD_2x64::load_le(&N[16 * i + 4 * j + 2]);
196 }
197
198 blamka_R(Tv[0], Tv[1], Tv[2], Tv[3], Tv[4], Tv[5], Tv[6], Tv[7]);
199
200 for(size_t j = 0; j != 4; ++j) {
201 Tv[2 * j].store_le(&T[16 * i + 4 * j]);
202 Tv[2 * j + 1].store_le(&T[16 * i + 4 * j + 2]);
203 }
204 }
205
206 for(size_t i = 0; i != 8; ++i) {
207 SIMD_2x64 Tv[8];
208 for(size_t j = 0; j != 4; ++j) {
209 Tv[2 * j] = SIMD_2x64::load_le(&T[2 * i + 32 * j]);
210 Tv[2 * j + 1] = SIMD_2x64::load_le(&T[2 * i + 32 * j + 16]);
211 }
212
213 blamka_R(Tv[0], Tv[1], Tv[2], Tv[3], Tv[4], Tv[5], Tv[6], Tv[7]);
214
215 for(size_t j = 0; j != 4; ++j) {
216 Tv[2 * j].store_le(&T[2 * i + 32 * j]);
217 Tv[2 * j + 1].store_le(&T[2 * i + 32 * j + 16]);
218 }
219 }
220
221 for(size_t i = 0; i != 128 / 4; ++i) {
222 SIMD_2x64 n0 = SIMD_2x64::load_le(&N[4 * i]);
223 SIMD_2x64 n1 = SIMD_2x64::load_le(&N[4 * i + 2]);
224 SIMD_2x64 t0 = SIMD_2x64::load_le(&T[4 * i]);
225 SIMD_2x64 t1 = SIMD_2x64::load_le(&T[4 * i + 2]);
226
227 n0 ^= t0;
228 n1 ^= t1;
229 n0.store_le(&N[4 * i]);
230 n1.store_le(&N[4 * i + 2]);
231 }
232}
233
234} // namespace Botan
int(* final)(unsigned char *, CTX *)
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:92
#define BOTAN_FORCE_INLINE
Definition compiler.h:165
FE_25519 T
Definition ge.cpp:34
constexpr T rotl(T input)
Definition rotate.h:21
OctetString operator^(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:109
OctetString operator+(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:99
constexpr T rotr(T input)
Definition rotate.h:33
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:702
std::vector< uint8_t, Alloc > & operator^=(std::vector< uint8_t, Alloc > &out, const std::vector< uint8_t, Alloc2 > &in)
Definition mem_ops.h:447
std::vector< T, Alloc > & operator+=(std::vector< T, Alloc > &out, const std::vector< T, Alloc2 > &in)
Definition secmem.h:80
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:462