7#ifndef BOTAN_SIMD_4X64_H_
8#define BOTAN_SIMD_4X64_H_
10#include <botan/compiler.h>
11#include <botan/types.h>
12#include <botan/internal/isa_extn.h>
13#include <botan/internal/target_info.h>
15#if defined(BOTAN_TARGET_ARCH_SUPPORTS_AVX2)
16 #include <immintrin.h>
34 BOTAN_FN_ISA_SIMD_4X64
SIMD_4x64() : m_simd(_mm256_setzero_si256()) {}
39 _mm256_loadu2_m128i(
reinterpret_cast<const __m128i*
>(lo),
reinterpret_cast<const __m128i*
>(hi)));
47 return SIMD_4x64(_mm256_loadu_si256(
reinterpret_cast<const __m256i*
>(in)));
53 return SIMD_4x64(_mm256_broadcastsi128_si256(_mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in))));
57 const auto idx = _mm256_set_epi8(
58 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
60 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, idx));
63 void BOTAN_FN_ISA_SIMD_4X64
store_le(uint64_t out[4])
const { this->
store_le(
reinterpret_cast<uint8_t*
>(out)); }
65 BOTAN_FN_ISA_SIMD_4X64
void store_le(uint8_t out[])
const {
66 _mm256_storeu_si256(
reinterpret_cast<__m256i*
>(out), m_simd);
69 BOTAN_FN_ISA_SIMD_4X64
void store_le2(
void* outh,
void* outl) {
70 _mm256_storeu2_m128i(
reinterpret_cast<__m128i*
>(outh),
reinterpret_cast<__m128i*
>(outl), m_simd);
100 m_simd = _mm256_add_epi64(m_simd, other.m_simd);
104 m_simd = _mm256_xor_si256(m_simd, other.m_simd);
108 m_simd = _mm256_and_si256(m_simd, other.m_simd);
111 BOTAN_FN_ISA_SIMD_4X64
void operator|=(
const SIMD_4x64& other) { m_simd = _mm256_or_si256(m_simd, other.m_simd); }
113 template <
size_t ROT>
115 requires(ROT > 0 && ROT < 64)
117#if defined(__AVX512VL__)
118 return SIMD_4x64(_mm256_ror_epi64(m_simd, ROT));
120 if constexpr(ROT == 8) {
122 _mm256_set_epi64x(0x080f0e0d0c0b0a09, 0x0007060504030201, 0x080f0e0d0c0b0a09, 0x0007060504030201);
124 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_8));
125 }
else if constexpr(ROT == 16) {
127 _mm256_set_epi64x(0x09080f0e0d0c0b0a, 0x0100070605040302, 0x09080f0e0d0c0b0a, 0x0100070605040302);
129 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_16));
130 }
else if constexpr(ROT == 24) {
132 _mm256_set_epi64x(0x0a09080f0e0d0c0b, 0x0201000706050403, 0x0a09080f0e0d0c0b, 0x0201000706050403);
134 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_24));
135 }
else if constexpr(ROT == 32) {
137 _mm256_set_epi64x(0x0b0a09080f0e0d0c, 0x0302010007060504, 0x0b0a09080f0e0d0c, 0x0302010007060504);
139 return SIMD_4x64(_mm256_shuffle_epi8(m_simd, shuf_rot_32));
141 return SIMD_4x64(_mm256_or_si256(_mm256_srli_epi64(m_simd,
static_cast<int>(ROT)),
142 _mm256_slli_epi64(m_simd,
static_cast<int>(64 - ROT))));
147 template <
size_t ROT>
149 return this->
rotr<64 - ROT>();
154 return SIMD_4x64(_mm256_srli_epi64(m_simd, SHIFT));
159 return SIMD_4x64(_mm256_slli_epi64(m_simd, SHIFT));
163 return SIMD_4x64(_mm256_alignr_epi8(a.m_simd, b.m_simd, 8));
168 const __m256i m = _mm256_mul_epu32(x.m_simd, y.m_simd);
169 return SIMD_4x64(_mm256_add_epi64(m, m));
172 template <u
int8_t CTRL>
174 return SIMD_4x64(_mm256_permute4x64_epi64(x.m_simd, CTRL));
191 BOTAN_FN_ISA_SIMD_4X64
194 __m256i BOTAN_FN_ISA_SIMD_4X64
raw() const noexcept {
return m_simd; }
196 explicit BOTAN_FN_ISA_SIMD_4X64
SIMD_4x64(__m256i x) : m_simd(x) {}
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 permute_4x64(SIMD_4x64 x)
void BOTAN_FN_ISA_SIMD_4X64 store_le(uint64_t out[4]) const
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator&(const SIMD_4x64 &other) const
static SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 alignr8(const SIMD_4x64 &a, const SIMD_4x64 &b)
BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64()
BOTAN_FN_ISA_SIMD_4X64 void operator&=(const SIMD_4x64 &other)
SIMD_4x64(SIMD_4x64 &&other)=default
SIMD_4x64(const SIMD_4x64 &other)=default
BOTAN_FN_ISA_SIMD_4X64 void operator^=(const SIMD_4x64 &other)
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 broadcast_2x64(const uint64_t *in)
SIMD_4x64 & operator=(SIMD_4x64 &&other)=default
BOTAN_FN_ISA_SIMD_4X64 void operator|=(const SIMD_4x64 &other)
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 shl() const noexcept
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be2(const void *lo, const void *hi)
BOTAN_FN_ISA_SIMD_4X64 void store_be(uint8_t out[]) const
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 bswap() const
BOTAN_FN_ISA_SIMD_4X64 void store_le2(void *outh, void *outl)
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_be(const void *in)
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator+(const SIMD_4x64 &other) const
static void BOTAN_FN_ISA_SIMD_4X64 untwist(SIMD_4x64 &B, SIMD_4x64 &C, SIMD_4x64 &D)
BOTAN_FN_ISA_SIMD_4X64 void operator+=(const SIMD_4x64 &other)
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator|(const SIMD_4x64 &other) const
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le2(const void *lo, const void *hi)
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 load_le(const void *in)
__m256i BOTAN_FN_ISA_SIMD_4X64 raw() const noexcept
SIMD_4x64 & operator=(const SIMD_4x64 &other)=default
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 operator^(const SIMD_4x64 &other) const
BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64(__m256i x)
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 mul2_32(SIMD_4x64 x, SIMD_4x64 y)
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 shr() const noexcept
SIMD_4x64 BOTAN_FN_ISA_SIMD_4X64 rotl() const
static BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 splat(uint64_t v)
BOTAN_FN_ISA_SIMD_4X64 void store_le(uint8_t out[]) const
static void BOTAN_FN_ISA_SIMD_4X64 twist(SIMD_4x64 &B, SIMD_4x64 &C, SIMD_4x64 &D)
BOTAN_FN_ISA_SIMD_4X64 SIMD_4x64 rotr() const