Botan 3.6.1
Crypto and TLS for C&
argon2_ssse3.cpp
Go to the documentation of this file.
1/**
2* (C) 2022 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/argon2.h>
8#include <tmmintrin.h>
9
10namespace Botan {
11
12namespace {
13
14class SIMD_2x64 final {
15 public:
16 SIMD_2x64& operator=(const SIMD_2x64& other) = default;
17 SIMD_2x64(const SIMD_2x64& other) = default;
18
19 SIMD_2x64& operator=(SIMD_2x64&& other) = default;
20 SIMD_2x64(SIMD_2x64&& other) = default;
21
22 ~SIMD_2x64() = default;
23
24 // zero initialized
25 SIMD_2x64() { m_simd = _mm_setzero_si128(); }
26
27 static SIMD_2x64 load_le(const void* in) {
28 return SIMD_2x64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
29 }
30
31 void store_le(uint64_t out[2]) const { this->store_le(reinterpret_cast<uint8_t*>(out)); }
32
33 void store_le(uint8_t out[]) const { _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_simd); }
34
35 SIMD_2x64 operator+(const SIMD_2x64& other) const {
36 SIMD_2x64 retval(*this);
37 retval += other;
38 return retval;
39 }
40
41 SIMD_2x64 operator^(const SIMD_2x64& other) const {
42 SIMD_2x64 retval(*this);
43 retval ^= other;
44 return retval;
45 }
46
47 void operator+=(const SIMD_2x64& other) { m_simd = _mm_add_epi64(m_simd, other.m_simd); }
48
49 void operator^=(const SIMD_2x64& other) { m_simd = _mm_xor_si128(m_simd, other.m_simd); }
50
51 template <size_t ROT>
52 BOTAN_FUNC_ISA("ssse3")
53 SIMD_2x64 rotr() const
54 requires(ROT > 0 && ROT < 64)
55 {
56 if constexpr(ROT == 16) {
57 auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
58 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
59 } else if constexpr(ROT == 24) {
60 auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
61 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
62 } else if constexpr(ROT == 32) {
63 auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
64 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
65 } else {
66 return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)),
67 _mm_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
68 }
69 }
70
71 template <size_t ROT>
72 SIMD_2x64 rotl() const {
73 return this->rotr<64 - ROT>();
74 }
75
76 // Argon2 specific operation
77 static SIMD_2x64 mul2_32(SIMD_2x64 x, SIMD_2x64 y) {
78 const __m128i m = _mm_mul_epu32(x.m_simd, y.m_simd);
79 return SIMD_2x64(_mm_add_epi64(m, m));
80 }
81
82 template <size_t T>
83 BOTAN_FUNC_ISA("ssse3")
84 static SIMD_2x64 alignr(SIMD_2x64 a, SIMD_2x64 b)
85 requires(T > 0 && T < 16)
86 {
87 return SIMD_2x64(_mm_alignr_epi8(a.m_simd, b.m_simd, T));
88 }
89
90 // Argon2 specific
91 static void twist(SIMD_2x64& B0, SIMD_2x64& B1, SIMD_2x64& C0, SIMD_2x64& C1, SIMD_2x64& D0, SIMD_2x64& D1) {
92 SIMD_2x64 T0, T1;
93
94 T0 = SIMD_2x64::alignr<8>(B1, B0);
95 T1 = SIMD_2x64::alignr<8>(B0, B1);
96 B0 = T0;
97 B1 = T1;
98
99 T0 = C0;
100 C0 = C1;
101 C1 = T0;
102
103 T0 = SIMD_2x64::alignr<8>(D0, D1);
104 T1 = SIMD_2x64::alignr<8>(D1, D0);
105 D0 = T0;
106 D1 = T1;
107 }
108
109 // Argon2 specific
110 static void untwist(SIMD_2x64& B0, SIMD_2x64& B1, SIMD_2x64& C0, SIMD_2x64& C1, SIMD_2x64& D0, SIMD_2x64& D1) {
111 SIMD_2x64 T0, T1;
112
113 T0 = SIMD_2x64::alignr<8>(B0, B1);
114 T1 = SIMD_2x64::alignr<8>(B1, B0);
115 B0 = T0;
116 B1 = T1;
117
118 T0 = C0;
119 C0 = C1;
120 C1 = T0;
121
122 T0 = SIMD_2x64::alignr<8>(D1, D0);
123 T1 = SIMD_2x64::alignr<8>(D0, D1);
124 D0 = T0;
125 D1 = T1;
126 }
127
128 explicit SIMD_2x64(__m128i x) : m_simd(x) {}
129
130 private:
131 __m128i m_simd;
132};
133
134BOTAN_FORCE_INLINE void blamka_G(SIMD_2x64& A0,
135 SIMD_2x64& A1,
136 SIMD_2x64& B0,
137 SIMD_2x64& B1,
138 SIMD_2x64& C0,
139 SIMD_2x64& C1,
140 SIMD_2x64& D0,
141 SIMD_2x64& D1) {
142 A0 += B0 + SIMD_2x64::mul2_32(A0, B0);
143 A1 += B1 + SIMD_2x64::mul2_32(A1, B1);
144 D0 ^= A0;
145 D1 ^= A1;
146 D0 = D0.rotr<32>();
147 D1 = D1.rotr<32>();
148
149 C0 += D0 + SIMD_2x64::mul2_32(C0, D0);
150 C1 += D1 + SIMD_2x64::mul2_32(C1, D1);
151 B0 ^= C0;
152 B1 ^= C1;
153 B0 = B0.rotr<24>();
154 B1 = B1.rotr<24>();
155
156 A0 += B0 + SIMD_2x64::mul2_32(A0, B0);
157 A1 += B1 + SIMD_2x64::mul2_32(A1, B1);
158 D0 ^= A0;
159 D1 ^= A1;
160 D0 = D0.rotr<16>();
161 D1 = D1.rotr<16>();
162
163 C0 += D0 + SIMD_2x64::mul2_32(C0, D0);
164 C1 += D1 + SIMD_2x64::mul2_32(C1, D1);
165 B0 ^= C0;
166 B1 ^= C1;
167 B0 = B0.rotr<63>();
168 B1 = B1.rotr<63>();
169}
170
171BOTAN_FORCE_INLINE void blamka_R(SIMD_2x64& A0,
172 SIMD_2x64& A1,
173 SIMD_2x64& B0,
174 SIMD_2x64& B1,
175 SIMD_2x64& C0,
176 SIMD_2x64& C1,
177 SIMD_2x64& D0,
178 SIMD_2x64& D1) {
179 blamka_G(A0, A1, B0, B1, C0, C1, D0, D1);
180
181 SIMD_2x64::twist(B0, B1, C0, C1, D0, D1);
182 blamka_G(A0, A1, B0, B1, C0, C1, D0, D1);
183 SIMD_2x64::untwist(B0, B1, C0, C1, D0, D1);
184}
185
186} // namespace
187
188void Argon2::blamka_ssse3(uint64_t N[128], uint64_t T[128]) {
189 for(size_t i = 0; i != 8; ++i) {
190 SIMD_2x64 Tv[8];
191 for(size_t j = 0; j != 4; ++j) {
192 Tv[2 * j] = SIMD_2x64::load_le(&N[16 * i + 4 * j]);
193 Tv[2 * j + 1] = SIMD_2x64::load_le(&N[16 * i + 4 * j + 2]);
194 }
195
196 blamka_R(Tv[0], Tv[1], Tv[2], Tv[3], Tv[4], Tv[5], Tv[6], Tv[7]);
197
198 for(size_t j = 0; j != 4; ++j) {
199 Tv[2 * j].store_le(&T[16 * i + 4 * j]);
200 Tv[2 * j + 1].store_le(&T[16 * i + 4 * j + 2]);
201 }
202 }
203
204 for(size_t i = 0; i != 8; ++i) {
205 SIMD_2x64 Tv[8];
206 for(size_t j = 0; j != 4; ++j) {
207 Tv[2 * j] = SIMD_2x64::load_le(&T[2 * i + 32 * j]);
208 Tv[2 * j + 1] = SIMD_2x64::load_le(&T[2 * i + 32 * j + 16]);
209 }
210
211 blamka_R(Tv[0], Tv[1], Tv[2], Tv[3], Tv[4], Tv[5], Tv[6], Tv[7]);
212
213 for(size_t j = 0; j != 4; ++j) {
214 Tv[2 * j].store_le(&T[2 * i + 32 * j]);
215 Tv[2 * j + 1].store_le(&T[2 * i + 32 * j + 16]);
216 }
217 }
218
219 for(size_t i = 0; i != 128 / 4; ++i) {
220 SIMD_2x64 n0 = SIMD_2x64::load_le(&N[4 * i]);
221 SIMD_2x64 n1 = SIMD_2x64::load_le(&N[4 * i + 2]);
222 SIMD_2x64 t0 = SIMD_2x64::load_le(&T[4 * i]);
223 SIMD_2x64 t1 = SIMD_2x64::load_le(&T[4 * i + 2]);
224
225 n0 ^= t0;
226 n1 ^= t1;
227 n0.store_le(&N[4 * i]);
228 n1.store_le(&N[4 * i + 2]);
229 }
230}
231
232} // namespace Botan
int(* final)(unsigned char *, CTX *)
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:92
#define BOTAN_FORCE_INLINE
Definition compiler.h:165
FE_25519 T
Definition ge.cpp:34
constexpr T rotl(T input)
Definition rotate.h:21
OctetString operator^(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:109
OctetString operator+(const OctetString &k1, const OctetString &k2)
Definition symkey.cpp:99
constexpr T rotr(T input)
Definition rotate.h:33
constexpr auto store_le(ParamTs &&... params)
Definition loadstor.h:764
std::vector< uint8_t, Alloc > & operator^=(std::vector< uint8_t, Alloc > &out, const std::vector< uint8_t, Alloc2 > &in)
Definition mem_ops.h:445
std::vector< T, Alloc > & operator+=(std::vector< T, Alloc > &out, const std::vector< T, Alloc2 > &in)
Definition secmem.h:80
constexpr auto load_le(ParamTs &&... params)
Definition loadstor.h:521
const SIMD_8x32 & b