Botan 3.11.1
Crypto and TLS for C&
simd_2x64.h
Go to the documentation of this file.
1/*
2* (C) 2022,2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#ifndef BOTAN_SIMD_2X64_H_
8#define BOTAN_SIMD_2X64_H_
9
10#include <botan/compiler.h>
11#include <botan/types.h>
12#include <botan/internal/isa_extn.h>
13#include <botan/internal/target_info.h>
14#include <span>
15
16// TODO: extend this to support NEON / AltiVec / LSX
17
18#if defined(BOTAN_TARGET_ARCH_SUPPORTS_SSSE3)
19 #include <emmintrin.h>
20 #include <tmmintrin.h>
21 #define BOTAN_SIMD_USE_SSSE3
22#elif defined(BOTAN_TARGET_ARCH_SUPPORTS_SIMD128)
23 #include <wasm_simd128.h>
24 #define BOTAN_SIMD_USE_SIMD128
25#endif
26
27namespace Botan {
28
29// NOLINTBEGIN(portability-simd-intrinsics)
30
31class SIMD_2x64 final {
32 public:
33#if defined(BOTAN_SIMD_USE_SSSE3)
34 using native_simd_type = __m128i;
35#elif defined(BOTAN_SIMD_USE_SIMD128)
36 using native_simd_type = v128_t;
37#endif
38
39 SIMD_2x64& operator=(const SIMD_2x64& other) = default;
40 SIMD_2x64(const SIMD_2x64& other) = default;
41
42 SIMD_2x64& operator=(SIMD_2x64&& other) = default;
43 SIMD_2x64(SIMD_2x64&& other) = default;
44
45 ~SIMD_2x64() = default;
46
47 // zero initialized
48 BOTAN_FN_ISA_SIMD_2X64 SIMD_2x64() :
49#if defined(BOTAN_SIMD_USE_SSSE3)
50 m_simd(_mm_setzero_si128())
51#elif defined(BOTAN_SIMD_USE_SIMD128)
52 m_simd(wasm_u64x2_const_splat(0))
53#endif
54 {
55 }
56
57 static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 splat(uint64_t v) {
58#if defined(BOTAN_SIMD_USE_SSSE3)
59 return SIMD_2x64(_mm_set1_epi64x(v));
60#elif defined(BOTAN_SIMD_USE_SIMD128)
61 return SIMD_2x64(wasm_u64x2_splat(v));
62#endif
63 }
64
65 static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 all_ones() {
66#if defined(BOTAN_SIMD_USE_SSSE3)
67 return SIMD_2x64(_mm_set1_epi8(-1));
68#elif defined(BOTAN_SIMD_USE_SIMD128)
69 return SIMD_2x64(wasm_i8x16_splat(0xFF));
70#endif
71 }
72
73 BOTAN_FN_ISA_SIMD_2X64 SIMD_2x64(uint64_t low, uint64_t high) :
74#if defined(BOTAN_SIMD_USE_SSSE3)
75 m_simd(_mm_set_epi64x(high, low))
76#elif defined(BOTAN_SIMD_USE_SIMD128)
77 m_simd(wasm_u64x2_make(low, high))
78#endif
79 {
80 }
81
82 static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 load_le(const void* in) {
83#if defined(BOTAN_SIMD_USE_SSSE3)
84 return SIMD_2x64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
85#elif defined(BOTAN_SIMD_USE_SIMD128)
86 return SIMD_2x64(wasm_v128_load(in));
87#endif
88 }
89
90 static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 load_be(const void* in) { return SIMD_2x64::load_le(in).bswap(); }
91
92 static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 load_le(std::span<const uint8_t, 16> in) {
93 return SIMD_2x64::load_le(in.data());
94 }
95
96 static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 load_be(std::span<const uint8_t, 16> in) {
97 return SIMD_2x64::load_be(in.data());
98 }
99
100 SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 bswap() const {
101#if defined(BOTAN_SIMD_USE_SSSE3)
102 const auto idx = _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
103 return SIMD_2x64(_mm_shuffle_epi8(m_simd, idx));
104#elif defined(BOTAN_SIMD_USE_SIMD128)
105 return SIMD_2x64(wasm_i8x16_shuffle(m_simd, m_simd, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8));
106#endif
107 }
108
109 SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 swap_lanes() const {
110#if defined(BOTAN_SIMD_USE_SSSE3)
111 return SIMD_2x64(_mm_shuffle_epi32(m_simd, _MM_SHUFFLE(1, 0, 3, 2)));
112#elif defined(BOTAN_SIMD_USE_SIMD128)
113 return SIMD_2x64(wasm_i64x2_shuffle(m_simd, m_simd, 1, 0));
114#endif
115 }
116
117 SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 reverse_all_bytes() const {
118#if defined(BOTAN_SIMD_USE_SSSE3)
119 const auto idx = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
120 return SIMD_2x64(_mm_shuffle_epi8(m_simd, idx));
121#elif defined(BOTAN_SIMD_USE_SIMD128)
122 return SIMD_2x64(wasm_i8x16_shuffle(m_simd, m_simd, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
123#endif
124 }
125
126 void BOTAN_FN_ISA_SIMD_2X64 store_le(uint64_t out[2]) const { this->store_le(reinterpret_cast<uint8_t*>(out)); }
127
128 void BOTAN_FN_ISA_SIMD_2X64 store_le(uint8_t out[]) const {
129#if defined(BOTAN_SIMD_USE_SSSE3)
130 _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_simd);
131#elif defined(BOTAN_SIMD_USE_SIMD128)
132 wasm_v128_store(out, m_simd);
133#endif
134 }
135
136 void BOTAN_FN_ISA_SIMD_2X64 store_be(uint64_t out[2]) const { this->store_be(reinterpret_cast<uint8_t*>(out)); }
137
138 void BOTAN_FN_ISA_SIMD_2X64 store_be(uint8_t out[]) const { bswap().store_le(out); }
139
140 void BOTAN_FN_ISA_SIMD_2X64 store_be(std::span<uint8_t, 16> out) const { this->store_be(out.data()); }
141
142 void BOTAN_FN_ISA_SIMD_2X64 store_le(std::span<uint8_t, 16> out) const { this->store_le(out.data()); }
143
144 SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 operator+(const SIMD_2x64& other) const {
145 SIMD_2x64 retval(*this);
146 retval += other;
147 return retval;
148 }
149
150 SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 operator^(const SIMD_2x64& other) const {
151 SIMD_2x64 retval(*this);
152 retval ^= other;
153 return retval;
154 }
155
156 void BOTAN_FN_ISA_SIMD_2X64 operator+=(const SIMD_2x64& other) {
157#if defined(BOTAN_SIMD_USE_SSSE3)
158 m_simd = _mm_add_epi64(m_simd, other.m_simd);
159#elif defined(BOTAN_SIMD_USE_SIMD128)
160 m_simd = wasm_i64x2_add(m_simd, other.m_simd);
161#endif
162 }
163
164 void BOTAN_FN_ISA_SIMD_2X64 operator^=(const SIMD_2x64& other) {
165#if defined(BOTAN_SIMD_USE_SSSE3)
166 m_simd = _mm_xor_si128(m_simd, other.m_simd);
167#elif defined(BOTAN_SIMD_USE_SIMD128)
168 m_simd = wasm_v128_xor(m_simd, other.m_simd);
169#endif
170 }
171
172 SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 andc(const SIMD_2x64& other) const noexcept {
173#if defined(BOTAN_SIMD_USE_SSSE3)
174 return SIMD_2x64(_mm_andnot_si128(m_simd, other.m_simd));
175#elif defined(BOTAN_SIMD_USE_SIMD128)
176 // SIMD128 is a & ~b
177 return SIMD_2x64(wasm_v128_andnot(other.m_simd, m_simd));
178#endif
179 }
180
181 template <size_t ROT>
182 BOTAN_FN_ISA_SIMD_2X64 SIMD_2x64 rotr() const
183 requires(ROT > 0 && ROT < 64)
184 {
185#if defined(BOTAN_SIMD_USE_SSSE3)
186 if constexpr(ROT == 8) {
187 auto tab = _mm_setr_epi8(1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15, 8);
188 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
189 } else if constexpr(ROT == 16) {
190 auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
191 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
192 } else if constexpr(ROT == 24) {
193 auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
194 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
195 } else if constexpr(ROT == 32) {
196 auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
197 return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
198 } else {
199 return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)),
200 _mm_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
201 }
202#elif defined(BOTAN_SIMD_USE_SIMD128)
203 if constexpr(ROT == 8) {
204 return SIMD_2x64(wasm_i8x16_shuffle(m_simd, m_simd, 1, 2, 3, 4, 5, 6, 7, 0, 9, 10, 11, 12, 13, 14, 15, 8));
205 } else if constexpr(ROT == 16) {
206 return SIMD_2x64(wasm_i8x16_shuffle(m_simd, m_simd, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9));
207 } else if constexpr(ROT == 24) {
208 return SIMD_2x64(wasm_i8x16_shuffle(m_simd, m_simd, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10));
209 } else if constexpr(ROT == 32) {
210 return SIMD_2x64(wasm_i8x16_shuffle(m_simd, m_simd, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11));
211 } else {
212 return SIMD_2x64(wasm_v128_or(wasm_u64x2_shr(m_simd, ROT), wasm_i64x2_shl(m_simd, 64 - ROT)));
213 }
214#endif
215 }
216
217 template <size_t ROT>
218 SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 rotl() const {
219 return this->rotr<64 - ROT>();
220 }
221
222 template <int SHIFT>
223 SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 shr() const noexcept {
224#if defined(BOTAN_SIMD_USE_SSSE3)
225 return SIMD_2x64(_mm_srli_epi64(m_simd, SHIFT));
226#elif defined(BOTAN_SIMD_USE_SIMD128)
227 return SIMD_2x64(wasm_u64x2_shr(m_simd, SHIFT));
228#endif
229 }
230
231 template <int SHIFT>
232 SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 shl() const noexcept {
233#if defined(BOTAN_SIMD_USE_SSSE3)
234 return SIMD_2x64(_mm_slli_epi64(m_simd, SHIFT));
235#elif defined(BOTAN_SIMD_USE_SIMD128)
236 return SIMD_2x64(wasm_i64x2_shl(m_simd, SHIFT));
237#endif
238 }
239
240 static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 alignr8(const SIMD_2x64& a, const SIMD_2x64& b) {
241#if defined(BOTAN_SIMD_USE_SSSE3)
242 return SIMD_2x64(_mm_alignr_epi8(a.m_simd, b.m_simd, 8));
243#elif defined(BOTAN_SIMD_USE_SIMD128)
244 return SIMD_2x64(
245 wasm_i8x16_shuffle(b.m_simd, a.m_simd, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23));
246#endif
247 }
248
249 static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 interleave_low(const SIMD_2x64& a, const SIMD_2x64& b) {
250#if defined(BOTAN_SIMD_USE_SSSE3)
251 return SIMD_2x64(_mm_unpacklo_epi64(a.m_simd, b.m_simd));
252#elif defined(BOTAN_SIMD_USE_SIMD128)
253 return SIMD_2x64(wasm_u64x2_extract_lane(a.m_simd, 0), wasm_u64x2_extract_lane(b.m_simd, 0));
254#endif
255 }
256
257 static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 interleave_high(const SIMD_2x64& a, const SIMD_2x64& b) {
258#if defined(BOTAN_SIMD_USE_SSSE3)
259 return SIMD_2x64(_mm_unpackhi_epi64(a.m_simd, b.m_simd));
260#elif defined(BOTAN_SIMD_USE_SIMD128)
261 return SIMD_2x64(wasm_u64x2_extract_lane(a.m_simd, 1), wasm_u64x2_extract_lane(b.m_simd, 1));
262#endif
263 }
264
265 // Argon2 specific operation
266 static void BOTAN_FN_ISA_SIMD_2X64
268 auto T0 = SIMD_2x64::alignr8(B1, B0);
269 auto T1 = SIMD_2x64::alignr8(B0, B1);
270 B0 = T0;
271 B1 = T1;
272
273 T0 = C0;
274 C0 = C1;
275 C1 = T0;
276
277 T0 = SIMD_2x64::alignr8(D0, D1);
278 T1 = SIMD_2x64::alignr8(D1, D0);
279 D0 = T0;
280 D1 = T1;
281 }
282
283 // Argon2 specific operation
284 static void BOTAN_FN_ISA_SIMD_2X64
286 auto T0 = SIMD_2x64::alignr8(B0, B1);
287 auto T1 = SIMD_2x64::alignr8(B1, B0);
288 B0 = T0;
289 B1 = T1;
290
291 T0 = C0;
292 C0 = C1;
293 C1 = T0;
294
295 T0 = SIMD_2x64::alignr8(D1, D0);
296 T1 = SIMD_2x64::alignr8(D0, D1);
297 D0 = T0;
298 D1 = T1;
299 }
300
301 // Argon2 specific operation
302 static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 mul2_32(SIMD_2x64 x, SIMD_2x64 y) {
303#if defined(BOTAN_SIMD_USE_SSSE3)
304 const __m128i m = _mm_mul_epu32(x.m_simd, y.m_simd);
305 return SIMD_2x64(_mm_add_epi64(m, m));
306#elif defined(BOTAN_SIMD_USE_SIMD128)
307 const auto m = wasm_u64x2_extmul_low_u32x4(wasm_i32x4_shuffle(x.m_simd, x.m_simd, 0, 2, 0, 2),
308 wasm_i32x4_shuffle(y.m_simd, y.m_simd, 0, 2, 0, 2));
309
310 return SIMD_2x64(wasm_i64x2_add(m, m));
311#endif
312 }
313
314 native_simd_type BOTAN_FN_ISA_SIMD_2X64 raw() const noexcept { return m_simd; }
315
316 explicit BOTAN_FN_ISA_SIMD_2X64 SIMD_2x64(native_simd_type x) : m_simd(x) {}
317
318 private:
319 native_simd_type m_simd;
320};
321
322// NOLINTEND(portability-simd-intrinsics)
323
324} // namespace Botan
325
326#endif
SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 shr() const noexcept
Definition simd_2x64.h:223
static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 load_le(std::span< const uint8_t, 16 > in)
Definition simd_2x64.h:92
static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 mul2_32(SIMD_2x64 x, SIMD_2x64 y)
Definition simd_2x64.h:302
SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 operator^(const SIMD_2x64 &other) const
Definition simd_2x64.h:150
BOTAN_FN_ISA_SIMD_2X64 SIMD_2x64()
Definition simd_2x64.h:48
native_simd_type BOTAN_FN_ISA_SIMD_2X64 raw() const noexcept
Definition simd_2x64.h:314
void BOTAN_FN_ISA_SIMD_2X64 store_be(uint8_t out[]) const
Definition simd_2x64.h:138
SIMD_2x64 & operator=(const SIMD_2x64 &other)=default
SIMD_2x64(const SIMD_2x64 &other)=default
static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 load_be(std::span< const uint8_t, 16 > in)
Definition simd_2x64.h:96
void BOTAN_FN_ISA_SIMD_2X64 store_le(uint64_t out[2]) const
Definition simd_2x64.h:126
SIMD_2x64(SIMD_2x64 &&other)=default
static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 load_be(const void *in)
Definition simd_2x64.h:90
BOTAN_FN_ISA_SIMD_2X64 SIMD_2x64(uint64_t low, uint64_t high)
Definition simd_2x64.h:73
SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 reverse_all_bytes() const
Definition simd_2x64.h:117
SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 shl() const noexcept
Definition simd_2x64.h:232
void BOTAN_FN_ISA_SIMD_2X64 store_be(std::span< uint8_t, 16 > out) const
Definition simd_2x64.h:140
static void BOTAN_FN_ISA_SIMD_2X64 untwist(SIMD_2x64 &B0, SIMD_2x64 &B1, SIMD_2x64 &C0, SIMD_2x64 &C1, SIMD_2x64 &D0, SIMD_2x64 &D1)
Definition simd_2x64.h:285
BOTAN_FN_ISA_SIMD_2X64 SIMD_2x64(native_simd_type x)
Definition simd_2x64.h:316
~SIMD_2x64()=default
SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 swap_lanes() const
Definition simd_2x64.h:109
void BOTAN_FN_ISA_SIMD_2X64 store_be(uint64_t out[2]) const
Definition simd_2x64.h:136
static void BOTAN_FN_ISA_SIMD_2X64 twist(SIMD_2x64 &B0, SIMD_2x64 &B1, SIMD_2x64 &C0, SIMD_2x64 &C1, SIMD_2x64 &D0, SIMD_2x64 &D1)
Definition simd_2x64.h:267
SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 andc(const SIMD_2x64 &other) const noexcept
Definition simd_2x64.h:172
SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 operator+(const SIMD_2x64 &other) const
Definition simd_2x64.h:144
static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 all_ones()
Definition simd_2x64.h:65
SIMD_2x64 & operator=(SIMD_2x64 &&other)=default
static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 load_le(const void *in)
Definition simd_2x64.h:82
void BOTAN_FN_ISA_SIMD_2X64 store_le(std::span< uint8_t, 16 > out) const
Definition simd_2x64.h:142
static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 interleave_high(const SIMD_2x64 &a, const SIMD_2x64 &b)
Definition simd_2x64.h:257
SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 rotl() const
Definition simd_2x64.h:218
static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 interleave_low(const SIMD_2x64 &a, const SIMD_2x64 &b)
Definition simd_2x64.h:249
void BOTAN_FN_ISA_SIMD_2X64 operator+=(const SIMD_2x64 &other)
Definition simd_2x64.h:156
static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 splat(uint64_t v)
Definition simd_2x64.h:57
static SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 alignr8(const SIMD_2x64 &a, const SIMD_2x64 &b)
Definition simd_2x64.h:240
SIMD_2x64 BOTAN_FN_ISA_SIMD_2X64 bswap() const
Definition simd_2x64.h:100
BOTAN_FN_ISA_SIMD_2X64 SIMD_2x64 rotr() const
Definition simd_2x64.h:182
void BOTAN_FN_ISA_SIMD_2X64 operator^=(const SIMD_2x64 &other)
Definition simd_2x64.h:164
void BOTAN_FN_ISA_SIMD_2X64 store_le(uint8_t out[]) const
Definition simd_2x64.h:128