7#ifndef BOTAN_SIMD_8X64_H_
8#define BOTAN_SIMD_8X64_H_
10#include <botan/compiler.h>
11#include <botan/types.h>
12#include <botan/internal/isa_extn.h>
13#include <botan/internal/target_info.h>
31 BOTAN_FN_ISA_SIMD_8X64
SIMD_8x64() : m_simd(_mm512_setzero_si512()) {}
38 auto r = _mm512_setzero_si512();
39 r = _mm512_inserti32x4(r, _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in0)), 3);
40 r = _mm512_inserti32x4(r, _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in1)), 2);
41 r = _mm512_inserti32x4(r, _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in2)), 1);
42 r = _mm512_inserti32x4(r, _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in3)), 0);
54 return SIMD_8x64(_mm512_loadu_si512(
reinterpret_cast<const __m512i*
>(in)));
59 return SIMD_8x64(_mm512_broadcast_i64x2(_mm_loadu_si128(
reinterpret_cast<const __m128i*
>(in))));
66 const auto idx = _mm512_set_epi8(
67 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7,
68 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7);
71 return SIMD_8x64(_mm512_shuffle_epi8(m_simd, idx));
74 void store_le(uint64_t out[8])
const { this->
store_le(
reinterpret_cast<uint8_t*
>(out)); }
76 BOTAN_FN_ISA_SIMD_8X64
void store_le(uint8_t out[])
const {
77 _mm512_storeu_si512(
reinterpret_cast<__m512i*
>(out), m_simd);
82 BOTAN_FN_ISA_SIMD_8X64
void store_le4(
void* out0,
void* out1,
void* out2,
void* out3) {
83 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out0), _mm512_extracti32x4_epi32(m_simd, 3));
84 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out1), _mm512_extracti32x4_epi32(m_simd, 2));
85 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out2), _mm512_extracti32x4_epi32(m_simd, 1));
86 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(out3), _mm512_extracti32x4_epi32(m_simd, 0));
114 m_simd = _mm512_add_epi64(m_simd, other.m_simd);
118 m_simd = _mm512_xor_si512(m_simd, other.m_simd);
122 m_simd = _mm512_and_si512(m_simd, other.m_simd);
125 BOTAN_FN_ISA_SIMD_8X64
void operator|=(
const SIMD_8x64& other) { m_simd = _mm512_or_si512(m_simd, other.m_simd); }
127 template <
size_t ROT>
129 requires(ROT > 0 && ROT < 64)
131 return SIMD_8x64(_mm512_ror_epi64(m_simd, ROT));
134 template <
size_t ROT>
136 return this->
rotr<64 - ROT>();
141 return SIMD_8x64(_mm512_srli_epi64(m_simd, SHIFT));
146 return SIMD_8x64(_mm512_slli_epi64(m_simd, SHIFT));
150 return SIMD_8x64(_mm512_alignr_epi8(a.m_simd, b.m_simd, 8));
153 BOTAN_FN_ISA_SIMD_8X64
158 const __m512i m = _mm512_mul_epu32(x.m_simd, y.m_simd);
159 return SIMD_8x64(_mm512_add_epi64(m, m));
164 const auto b_perm = _mm512_set_epi64(4, 7, 6, 5, 0, 3, 2, 1);
165 const auto c_perm = _mm512_set_epi64(5, 4, 7, 6, 1, 0, 3, 2);
166 const auto d_perm = _mm512_set_epi64(6, 5, 4, 7, 2, 1, 0, 3);
167 B =
SIMD_8x64(_mm512_permutexvar_epi64(b_perm, B.m_simd));
168 C =
SIMD_8x64(_mm512_permutexvar_epi64(c_perm, C.m_simd));
169 D =
SIMD_8x64(_mm512_permutexvar_epi64(d_perm, D.m_simd));
173 const auto b_perm = _mm512_set_epi64(6, 5, 4, 7, 2, 1, 0, 3);
174 const auto c_perm = _mm512_set_epi64(5, 4, 7, 6, 1, 0, 3, 2);
175 const auto d_perm = _mm512_set_epi64(4, 7, 6, 5, 0, 3, 2, 1);
176 B =
SIMD_8x64(_mm512_permutexvar_epi64(b_perm, B.m_simd));
177 C =
SIMD_8x64(_mm512_permutexvar_epi64(c_perm, C.m_simd));
178 D =
SIMD_8x64(_mm512_permutexvar_epi64(d_perm, D.m_simd));
181 __m512i BOTAN_FN_ISA_SIMD_8X64
raw() const noexcept {
return m_simd; }
183 explicit BOTAN_FN_ISA_SIMD_8X64
SIMD_8x64(__m512i x) : m_simd(x) {}
BOTAN_FN_ISA_SIMD_8X64 void operator+=(const SIMD_8x64 &other)
BOTAN_FN_ISA_SIMD_8X64 void operator&=(const SIMD_8x64 &other)
static void BOTAN_FN_ISA_SIMD_8X64 untwist(SIMD_8x64 &B, SIMD_8x64 &C, SIMD_8x64 &D)
BOTAN_FN_ISA_SIMD_8X64 void store_le(uint8_t out[]) const
SIMD_8x64 BOTAN_FN_ISA_SIMD_8X64 bswap() const
static BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 load_le4(const void *in0, const void *in1, const void *in2, const void *in3)
static void BOTAN_FN_ISA_SIMD_8X64 twist(SIMD_8x64 &B, SIMD_8x64 &C, SIMD_8x64 &D)
SIMD_8x64(SIMD_8x64 &&other)=default
void store_le(uint64_t out[8]) const
static BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 mul2_32(SIMD_8x64 x, SIMD_8x64 y)
static BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 load_be4(const void *in0, const void *in1, const void *in2, const void *in3)
SIMD_8x64(const SIMD_8x64 &other)=default
SIMD_8x64 BOTAN_FN_ISA_SIMD_8X64 operator&(const SIMD_8x64 &other) const
BOTAN_FN_ISA_SIMD_8X64 void operator|=(const SIMD_8x64 &other)
static BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 load_le(const void *in)
SIMD_8x64 BOTAN_FN_ISA_SIMD_8X64 operator|(const SIMD_8x64 &other) const
static BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 splat(uint64_t v)
__m512i BOTAN_FN_ISA_SIMD_8X64 raw() const noexcept
static BOTAN_FN_ISA_AVX512 SIMD_8x64 broadcast_2x64(const uint64_t *in)
static SIMD_8x64 BOTAN_FN_ISA_SIMD_8X64 alignr8(const SIMD_8x64 &a, const SIMD_8x64 &b)
BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64()
SIMD_8x64 BOTAN_FN_ISA_SIMD_8X64 shl() const noexcept
BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 rotr() const
BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 rotl() const
BOTAN_FN_ISA_SIMD_8X64 void store_be(uint8_t out[]) const
BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64(__m512i x)
BOTAN_FN_ISA_SIMD_8X64 void store_le4(void *out0, void *out1, void *out2, void *out3)
SIMD_8x64 BOTAN_FN_ISA_SIMD_8X64 operator+(const SIMD_8x64 &other) const
BOTAN_FN_ISA_SIMD_8X64 void operator^=(const SIMD_8x64 &other)
SIMD_8x64 & operator=(SIMD_8x64 &&other)=default
static BOTAN_FN_ISA_SIMD_8X64 SIMD_8x64 load_be(const void *in)
SIMD_8x64 BOTAN_FN_ISA_SIMD_8X64 operator^(const SIMD_8x64 &other) const
SIMD_8x64 BOTAN_FN_ISA_SIMD_8X64 shr() const noexcept
SIMD_8x64 & operator=(const SIMD_8x64 &other)=default