Botan 3.9.0
Crypto and TLS for C&
simd_4x64.h
Go to the documentation of this file.
1/*
2* (C) 2022,2025 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#ifndef BOTAN_SIMD_4X64_H_
8#define BOTAN_SIMD_4X64_H_
9
10#include <botan/compiler.h>
11#include <botan/types.h>
12#include <botan/internal/isa_extn.h>
13#include <botan/internal/target_info.h>
14
15#if defined(BOTAN_TARGET_ARCH_SUPPORTS_AVX2)
16 #include <immintrin.h>
17#endif
18
19namespace Botan {
20
21// NOLINTBEGIN(portability-simd-intrinsics)
22
23class SIMD_4x64 final {
24 public:
25 SIMD_4x64& operator=(const SIMD_4x64& other) = default;
26 SIMD_4x64(const SIMD_4x64& other) = default;
27
28 SIMD_4x64& operator=(SIMD_4x64&& other) = default;
29 SIMD_4x64(SIMD_4x64&& other) = default;
30
31 ~SIMD_4x64() = default;
32
33 // zero initialized
34 BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64() : m_simd(_mm256_setzero_si256()) {}
35
36 // Load two halves at different addresses
37 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le2(const void* inl, const void* inh) {
38 return SIMD_4x64(
39 _mm256_loadu2_m128i(reinterpret_cast<const __m128i*>(inl), reinterpret_cast<const __m128i*>(inh)));
40 }
41
42 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be2(const void* inl, const void* inh) {
43 return SIMD_4x64::load_le2(inl, inh).bswap();
44 }
45
46 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le(const void* in) {
47 return SIMD_4x64(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
48 }
49
50 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be(const void* in) { return SIMD_4x64::load_le(in).bswap(); }
51
52 SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 bswap() const {
53 const auto idx = _mm256_set_epi8(
54 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
55
56 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, idx));
57 }
58
59 void store_le(uint64_t out[4]) const { this->store_le(reinterpret_cast<uint8_t*>(out)); }
60
61 BOTAN_FN_ISA_SIMD_4X64 void store_le(uint8_t out[]) const {
62 _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_simd);
63 }
64
65 BOTAN_FN_ISA_SIMD_4X64 void store_le2(void* outh, void* outl) {
66 _mm256_storeu2_m128i(reinterpret_cast<__m128i*>(outh), reinterpret_cast<__m128i*>(outl), m_simd);
67 }
68
69 SIMD_4x64 operator+(const SIMD_4x64& other) const {
70 SIMD_4x64 retval(*this);
71 retval += other;
72 return retval;
73 }
74
75 SIMD_4x64 operator^(const SIMD_4x64& other) const {
76 SIMD_4x64 retval(*this);
77 retval ^= other;
78 return retval;
79 }
80
81 BOTAN_FN_ISA_SIMD_4X64 void operator+=(const SIMD_4x64& other) {
82 m_simd = _mm256_add_epi64(m_simd, other.m_simd);
83 }
84
85 BOTAN_FN_ISA_SIMD_4X64 void operator^=(const SIMD_4x64& other) {
86 m_simd = _mm256_xor_si256(m_simd, other.m_simd);
87 }
88
89 template <size_t ROT>
90 BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 rotr() const
91 requires(ROT > 0 && ROT < 64)
92 {
93#if defined(__AVX512VL__)
94 return SIMD_4x64(_mm256_ror_epi64(m_simd, ROT));
95#else
96 if constexpr(ROT == 8) {
97 auto shuf_rot_8 =
98 _mm256_set_epi64x(0x080f0e0d0c0b0a09, 0x0007060504030201, 0x080f0e0d0c0b0a09, 0x0007060504030201);
99
100 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_8));
101 } else if constexpr(ROT == 16) {
102 auto shuf_rot_16 =
103 _mm256_set_epi64x(0x09080f0e0d0c0b0a, 0x0100070605040302, 0x09080f0e0d0c0b0a, 0x0100070605040302);
104
105 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_16));
106 } else if constexpr(ROT == 24) {
107 auto shuf_rot_24 =
108 _mm256_set_epi64x(0x0a09080f0e0d0c0b, 0x0201000706050403, 0x0a09080f0e0d0c0b, 0x0201000706050403);
109
110 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_24));
111 } else if constexpr(ROT == 32) {
112 auto shuf_rot_32 =
113 _mm256_set_epi64x(0x0b0a09080f0e0d0c, 0x0302010007060504, 0x0b0a09080f0e0d0c, 0x0302010007060504);
114
115 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_32));
116 } else {
117 return SIMD_4x64(_mm256_or_si256(_mm256_srli_epi64(m_simd, static_cast<int>(ROT)),
118 _mm256_slli_epi64(m_simd, static_cast<int>(64 - ROT))));
119 }
120#endif
121 }
122
123 template <size_t ROT>
124 SIMD_4x64 rotl() const {
125 return this->rotr<64 - ROT>();
126 }
127
128 template <int SHIFT>
129 SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 shr() const noexcept {
130 return SIMD_4x64(_mm256_srli_epi64(m_simd, SHIFT));
131 }
132
133 static SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 alignr8(const SIMD_4x64& a, const SIMD_4x64& b) {
134 return SIMD_4x64(_mm256_alignr_epi8(a.m_simd, b.m_simd, 8));
135 }
136
137 // Argon2 specific operation
138 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 mul2_32(SIMD_4x64 x, SIMD_4x64 y) {
139 const __m256i m = _mm256_mul_epu32(x.m_simd, y.m_simd);
140 return SIMD_4x64(_mm256_add_epi64(m, m));
141 }
142
143 template <uint8_t CTRL>
144 static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 permute_4x64(SIMD_4x64 x) {
145 return SIMD_4x64(_mm256_permute4x64_epi64(x.m_simd, CTRL));
146 }
147
148 // Argon2 specific
154
155 // Argon2 specific
161
162 explicit BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64(__m256i x) : m_simd(x) {}
163
164 private:
165 __m256i m_simd;
166};
167
168// NOLINTEND(portability-simd-intrinsics)
169
170} // namespace Botan
171
172#endif
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 permute_4x64(SIMD_4x64 x)
Definition simd_4x64.h:144
static SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 alignr8(const SIMD_4x64 &a, const SIMD_4x64 &b)
Definition simd_4x64.h:133
BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64()
Definition simd_4x64.h:34
SIMD_4x64 rotl() const
Definition simd_4x64.h:124
SIMD_4x64(SIMD_4x64 &&other)=default
SIMD_4x64(const SIMD_4x64 &other)=default
BOTAN_FN_ISA_SIMD_4X64 void operator^=(const SIMD_4x64 &other)
Definition simd_4x64.h:85
SIMD_4x64 & operator=(SIMD_4x64 &&other)=default
static void twist(SIMD_4x64 &B, SIMD_4x64 &C, SIMD_4x64 &D)
Definition simd_4x64.h:149
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 bswap() const
Definition simd_4x64.h:52
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be2(const void *inl, const void *inh)
Definition simd_4x64.h:42
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le2(const void *inl, const void *inh)
Definition simd_4x64.h:37
BOTAN_FN_ISA_SIMD_4X64 void store_le2(void *outh, void *outl)
Definition simd_4x64.h:65
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be(const void *in)
Definition simd_4x64.h:50
SIMD_4x64 operator+(const SIMD_4x64 &other) const
Definition simd_4x64.h:69
BOTAN_FN_ISA_SIMD_4X64 void operator+=(const SIMD_4x64 &other)
Definition simd_4x64.h:81
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le(const void *in)
Definition simd_4x64.h:46
SIMD_4x64 & operator=(const SIMD_4x64 &other)=default
BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64(__m256i x)
Definition simd_4x64.h:162
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 mul2_32(SIMD_4x64 x, SIMD_4x64 y)
Definition simd_4x64.h:138
void store_le(uint64_t out[4]) const
Definition simd_4x64.h:59
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 shr() const noexcept
Definition simd_4x64.h:129
static void untwist(SIMD_4x64 &B, SIMD_4x64 &C, SIMD_4x64 &D)
Definition simd_4x64.h:156
SIMD_4x64 operator^(const SIMD_4x64 &other) const
Definition simd_4x64.h:75
~SIMD_4x64()=default
BOTAN_FN_ISA_SIMD_4X64 void store_le(uint8_t out[]) const
Definition simd_4x64.h:61
BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 rotr() const
Definition simd_4x64.h:90