Botan 3.11.0
Crypto and TLS for C&
simd_4x64.h
Go to the documentation of this file.
1/*
2* (C) 2022,2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#ifndef BOTAN_SIMD_4X64_H_
8#define BOTAN_SIMD_4X64_H_
9
10#include <botan/compiler.h>
11#include <botan/types.h>
12#include <botan/internal/isa_extn.h>
13#include <botan/internal/target_info.h>
14
15#if defined(BOTAN_TARGET_ARCH_SUPPORTS_AVX2)
16 #include <immintrin.h>
17#endif
18
19namespace Botan {
20
21// NOLINTBEGIN(portability-simd-intrinsics)
22
23class SIMD_4x64 final {
24 public:
25 SIMD_4x64& operator=(const SIMD_4x64& other) = default;
26 SIMD_4x64(const SIMD_4x64& other) = default;
27
28 SIMD_4x64& operator=(SIMD_4x64&& other) = default;
29 SIMD_4x64(SIMD_4x64&& other) = default;
30
31 ~SIMD_4x64() = default;
32
33 // zero initialized
34 BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64() : m_simd(_mm256_setzero_si256()) {}
35
36 // Load two halves at different addresses
37 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le2(const void* lo, const void* hi) {
38 return SIMD_4x64(
39 _mm256_loadu2_m128i(reinterpret_cast<const __m128i*>(lo), reinterpret_cast<const __m128i*>(hi)));
40 }
41
42 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be2(const void* lo, const void* hi) {
43 return SIMD_4x64::load_le2(lo, hi).bswap();
44 }
45
46 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le(const void* in) {
47 return SIMD_4x64(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
48 }
49
50 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be(const void* in) { return SIMD_4x64::load_le(in).bswap(); }
51
52 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 broadcast_2x64(const uint64_t* in) {
53 return SIMD_4x64(_mm256_broadcastsi128_si256(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in))));
54 }
55
56 SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 bswap() const {
57 const auto idx = _mm256_set_epi8(
58 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
59
60 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, idx));
61 }
62
63 void BOTAN_FN_ISA_SIMD_4X64 store_le(uint64_t out[4]) const { this->store_le(reinterpret_cast<uint8_t*>(out)); }
64
65 BOTAN_FN_ISA_SIMD_4X64 void store_le(uint8_t out[]) const {
66 _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_simd);
67 }
68
69 BOTAN_FN_ISA_SIMD_4X64 void store_le2(void* outh, void* outl) {
70 _mm256_storeu2_m128i(reinterpret_cast<__m128i*>(outh), reinterpret_cast<__m128i*>(outl), m_simd);
71 }
72
73 BOTAN_FN_ISA_SIMD_4X64 void store_be(uint8_t out[]) const { bswap().store_le(out); }
74
75 SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator+(const SIMD_4x64& other) const {
76 SIMD_4x64 retval(*this);
77 retval += other;
78 return retval;
79 }
80
81 SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator^(const SIMD_4x64& other) const {
82 SIMD_4x64 retval(*this);
83 retval ^= other;
84 return retval;
85 }
86
87 SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator&(const SIMD_4x64& other) const {
88 SIMD_4x64 retval(*this);
89 retval &= other;
90 return retval;
91 }
92
93 SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator|(const SIMD_4x64& other) const {
94 SIMD_4x64 retval(*this);
95 retval |= other;
96 return retval;
97 }
98
99 BOTAN_FN_ISA_SIMD_4X64 void operator+=(const SIMD_4x64& other) {
100 m_simd = _mm256_add_epi64(m_simd, other.m_simd);
101 }
102
103 BOTAN_FN_ISA_SIMD_4X64 void operator^=(const SIMD_4x64& other) {
104 m_simd = _mm256_xor_si256(m_simd, other.m_simd);
105 }
106
107 BOTAN_FN_ISA_SIMD_4X64 void operator&=(const SIMD_4x64& other) {
108 m_simd = _mm256_and_si256(m_simd, other.m_simd);
109 }
110
111 BOTAN_FN_ISA_SIMD_4X64 void operator|=(const SIMD_4x64& other) { m_simd = _mm256_or_si256(m_simd, other.m_simd); }
112
113 template <size_t ROT>
114 BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 rotr() const
115 requires(ROT > 0 && ROT < 64)
116 {
117#if defined(__AVX512VL__)
118 return SIMD_4x64(_mm256_ror_epi64(m_simd, ROT));
119#else
120 if constexpr(ROT == 8) {
121 auto shuf_rot_8 =
122 _mm256_set_epi64x(0x080f0e0d0c0b0a09, 0x0007060504030201, 0x080f0e0d0c0b0a09, 0x0007060504030201);
123
124 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_8));
125 } else if constexpr(ROT == 16) {
126 auto shuf_rot_16 =
127 _mm256_set_epi64x(0x09080f0e0d0c0b0a, 0x0100070605040302, 0x09080f0e0d0c0b0a, 0x0100070605040302);
128
129 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_16));
130 } else if constexpr(ROT == 24) {
131 auto shuf_rot_24 =
132 _mm256_set_epi64x(0x0a09080f0e0d0c0b, 0x0201000706050403, 0x0a09080f0e0d0c0b, 0x0201000706050403);
133
134 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_24));
135 } else if constexpr(ROT == 32) {
136 auto shuf_rot_32 =
137 _mm256_set_epi64x(0x0b0a09080f0e0d0c, 0x0302010007060504, 0x0b0a09080f0e0d0c, 0x0302010007060504);
138
139 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_32));
140 } else {
141 return SIMD_4x64(_mm256_or_si256(_mm256_srli_epi64(m_simd, static_cast<int>(ROT)),
142 _mm256_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
143 }
144#endif
145 }
146
147 template <size_t ROT>
148 SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 rotl() const {
149 return this->rotr<64 - ROT>();
150 }
151
152 template <int SHIFT>
153 SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 shr() const noexcept {
154 return SIMD_4x64(_mm256_srli_epi64(m_simd, SHIFT));
155 }
156
157 template <int SHIFT>
158 SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 shl() const noexcept {
159 return SIMD_4x64(_mm256_slli_epi64(m_simd, SHIFT));
160 }
161
162 static SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 alignr8(const SIMD_4x64& a, const SIMD_4x64& b) {
163 return SIMD_4x64(_mm256_alignr_epi8(a.m_simd, b.m_simd, 8));
164 }
165
166 // Argon2 specific operation
167 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 mul2_32(SIMD_4x64 x, SIMD_4x64 y) {
168 const __m256i m = _mm256_mul_epu32(x.m_simd, y.m_simd);
169 return SIMD_4x64(_mm256_add_epi64(m, m));
170 }
171
172 template <uint8_t CTRL>
173 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 permute_4x64(SIMD_4x64 x) {
174 return SIMD_4x64(_mm256_permute4x64_epi64(x.m_simd, CTRL));
175 }
176
177 // Argon2 specific
183
184 // Argon2 specific
190
191 BOTAN_FN_ISA_SIMD_4X64
192 static SIMD_4x64 splat(uint64_t v) { return SIMD_4x64(_mm256_set1_epi64x(v)); }
193
194 __m256i BOTAN_FN_ISA_SIMD_4X64 raw() const noexcept { return m_simd; }
195
196 explicit BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64(__m256i x) : m_simd(x) {}
197
198 private:
199 __m256i m_simd;
200};
201
202// NOLINTEND(portability-simd-intrinsics)
203
204} // namespace Botan
205
206#endif
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 permute_4x64(SIMD_4x64 x)
Definition simd_4x64.h:173
void BOTAN_FN_ISA_SIMD_4X64 store_le(uint64_t out[4]) const
Definition simd_4x64.h:63
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator&(const SIMD_4x64 &other) const
Definition simd_4x64.h:87
static SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 alignr8(const SIMD_4x64 &a, const SIMD_4x64 &b)
Definition simd_4x64.h:162
BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64()
Definition simd_4x64.h:34
BOTAN_FN_ISA_SIMD_4X64 void operator&=(const SIMD_4x64 &other)
Definition simd_4x64.h:107
SIMD_4x64(SIMD_4x64 &&other)=default
SIMD_4x64(const SIMD_4x64 &other)=default
BOTAN_FN_ISA_SIMD_4X64 void operator^=(const SIMD_4x64 &other)
Definition simd_4x64.h:103
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 broadcast_2x64(const uint64_t *in)
Definition simd_4x64.h:52
SIMD_4x64 & operator=(SIMD_4x64 &&other)=default
BOTAN_FN_ISA_SIMD_4X64 void operator|=(const SIMD_4x64 &other)
Definition simd_4x64.h:111
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 shl() const noexcept
Definition simd_4x64.h:158
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be2(const void *lo, const void *hi)
Definition simd_4x64.h:42
BOTAN_FN_ISA_SIMD_4X64 void store_be(uint8_t out[]) const
Definition simd_4x64.h:73
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 bswap() const
Definition simd_4x64.h:56
BOTAN_FN_ISA_SIMD_4X64 void store_le2(void *outh, void *outl)
Definition simd_4x64.h:69
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be(const void *in)
Definition simd_4x64.h:50
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator+(const SIMD_4x64 &other) const
Definition simd_4x64.h:75
static void BOTAN_FN_ISA_SIMD_4X64 untwist(SIMD_4x64 &B, SIMD_4x64 &C, SIMD_4x64 &D)
Definition simd_4x64.h:185
BOTAN_FN_ISA_SIMD_4X64 void operator+=(const SIMD_4x64 &other)
Definition simd_4x64.h:99
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator|(const SIMD_4x64 &other) const
Definition simd_4x64.h:93
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le2(const void *lo, const void *hi)
Definition simd_4x64.h:37
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le(const void *in)
Definition simd_4x64.h:46
__m256i BOTAN_FN_ISA_SIMD_4X64 raw() const noexcept
Definition simd_4x64.h:194
SIMD_4x64 & operator=(const SIMD_4x64 &other)=default
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator^(const SIMD_4x64 &other) const
Definition simd_4x64.h:81
BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64(__m256i x)
Definition simd_4x64.h:196
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 mul2_32(SIMD_4x64 x, SIMD_4x64 y)
Definition simd_4x64.h:167
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 shr() const noexcept
Definition simd_4x64.h:153
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 rotl() const
Definition simd_4x64.h:148
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 splat(uint64_t v)
Definition simd_4x64.h:192
~SIMD_4x64()=default
BOTAN_FN_ISA_SIMD_4X64 void store_le(uint8_t out[]) const
Definition simd_4x64.h:65
static void BOTAN_FN_ISA_SIMD_4X64 twist(SIMD_4x64 &B, SIMD_4x64 &C, SIMD_4x64 &D)
Definition simd_4x64.h:178
BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 rotr() const
Definition simd_4x64.h:114