Botan 3.5.0
Crypto and TLS for C&
zfec_sse2.cpp
Go to the documentation of this file.
1/*
2* (C) 2009,2010,2021 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/zfec.h>
8
9#include <botan/internal/simd_32.h>
10#include <immintrin.h>
11
12namespace Botan {
13
14namespace {
15
16inline SIMD_4x32 rshift_1_u8(SIMD_4x32 v) {
17 return SIMD_4x32(_mm_add_epi8(v.raw(), v.raw()));
18}
19
20inline SIMD_4x32 high_bit_set_u8(SIMD_4x32 v) {
21 return SIMD_4x32(_mm_cmpgt_epi8(_mm_setzero_si128(), v.raw()));
22}
23
24} // namespace
25
26BOTAN_FUNC_ISA("sse2") size_t ZFEC::addmul_sse2(uint8_t z[], const uint8_t x[], uint8_t y, size_t size) {
27 const SIMD_4x32 polynomial = SIMD_4x32::splat_u8(0x1D);
28
29 const size_t orig_size = size;
30
31 // unrolled out to cache line size
32 while(size >= 64) {
33 SIMD_4x32 x_1 = SIMD_4x32::load_le(x);
34 SIMD_4x32 x_2 = SIMD_4x32::load_le(x + 16);
35 SIMD_4x32 x_3 = SIMD_4x32::load_le(x + 32);
36 SIMD_4x32 x_4 = SIMD_4x32::load_le(x + 48);
37
38 SIMD_4x32 z_1 = SIMD_4x32::load_le(z);
39 SIMD_4x32 z_2 = SIMD_4x32::load_le(z + 16);
40 SIMD_4x32 z_3 = SIMD_4x32::load_le(z + 32);
41 SIMD_4x32 z_4 = SIMD_4x32::load_le(z + 48);
42
43 if(y & 0x01) {
44 z_1 ^= x_1;
45 z_2 ^= x_2;
46 z_3 ^= x_3;
47 z_4 ^= x_4;
48 }
49
50 for(size_t j = 1; j != 8; ++j) {
51 /*
52 * Each byte of each mask is either 0 or the polynomial 0x1D,
53 * depending on if the high bit of x_i is set or not.
54 */
55
56 const SIMD_4x32 mask_1(high_bit_set_u8(x_1));
57 const SIMD_4x32 mask_2(high_bit_set_u8(x_2));
58 const SIMD_4x32 mask_3(high_bit_set_u8(x_3));
59 const SIMD_4x32 mask_4(high_bit_set_u8(x_4));
60
61 // x <<= 1
62 x_1 = rshift_1_u8(x_1);
63 x_2 = rshift_1_u8(x_2);
64 x_3 = rshift_1_u8(x_3);
65 x_4 = rshift_1_u8(x_4);
66
67 x_1 ^= mask_1 & polynomial;
68 x_2 ^= mask_2 & polynomial;
69 x_3 ^= mask_3 & polynomial;
70 x_4 ^= mask_4 & polynomial;
71
72 if((y >> j) & 1) {
73 z_1 ^= x_1;
74 z_2 ^= x_2;
75 z_3 ^= x_3;
76 z_4 ^= x_4;
77 }
78 }
79
80 z_1.store_le(z);
81 z_2.store_le(z + 16);
82 z_3.store_le(z + 32);
83 z_4.store_le(z + 48);
84
85 x += 64;
86 z += 64;
87 size -= 64;
88 }
89
90 return orig_size - size;
91}
92
93} // namespace Botan
static SIMD_4x32 load_le(const void *in) noexcept
Definition simd_32.h:158
static SIMD_4x32 splat_u8(uint8_t B) noexcept
Definition simd_32.h:144
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:92