Botan 3.10.0
Crypto and TLS for C&
zfec_sse2.cpp
Go to the documentation of this file.
1/*
2* (C) 2009,2010,2021 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/zfec.h>
8
9#include <botan/internal/isa_extn.h>
10#include <botan/internal/simd_4x32.h>
11#include <immintrin.h>
12
13namespace Botan {
14
15namespace {
16
17// NOLINTBEGIN(portability-simd-intrinsics)
18
19inline SIMD_4x32 rshift_1_u8(SIMD_4x32 v) {
20 return SIMD_4x32(_mm_add_epi8(v.raw(), v.raw()));
21}
22
23inline SIMD_4x32 high_bit_set_u8(SIMD_4x32 v) {
24 return SIMD_4x32(_mm_cmpgt_epi8(_mm_setzero_si128(), v.raw()));
25}
26
27// NOLINTEND(portability-simd-intrinsics)
28
29} // namespace
30
31BOTAN_FN_ISA_SSE2 size_t ZFEC::addmul_sse2(uint8_t z[], const uint8_t x[], uint8_t y, size_t size) {
32 const SIMD_4x32 polynomial = SIMD_4x32::splat_u8(0x1D);
33
34 const size_t orig_size = size;
35
36 // unrolled out to cache line size
37 while(size >= 64) {
38 SIMD_4x32 x_1 = SIMD_4x32::load_le(x);
39 SIMD_4x32 x_2 = SIMD_4x32::load_le(x + 16);
40 SIMD_4x32 x_3 = SIMD_4x32::load_le(x + 32);
41 SIMD_4x32 x_4 = SIMD_4x32::load_le(x + 48);
42
43 SIMD_4x32 z_1 = SIMD_4x32::load_le(z);
44 SIMD_4x32 z_2 = SIMD_4x32::load_le(z + 16);
45 SIMD_4x32 z_3 = SIMD_4x32::load_le(z + 32);
46 SIMD_4x32 z_4 = SIMD_4x32::load_le(z + 48);
47
48 if((y & 0x01) == 0x01) {
49 z_1 ^= x_1;
50 z_2 ^= x_2;
51 z_3 ^= x_3;
52 z_4 ^= x_4;
53 }
54
55 for(size_t j = 1; j != 8; ++j) {
56 /*
57 * Each byte of each mask is either 0 or the polynomial 0x1D,
58 * depending on if the high bit of x_i is set or not.
59 */
60
61 const SIMD_4x32 mask_1(high_bit_set_u8(x_1));
62 const SIMD_4x32 mask_2(high_bit_set_u8(x_2));
63 const SIMD_4x32 mask_3(high_bit_set_u8(x_3));
64 const SIMD_4x32 mask_4(high_bit_set_u8(x_4));
65
66 // x <<= 1
67 x_1 = rshift_1_u8(x_1);
68 x_2 = rshift_1_u8(x_2);
69 x_3 = rshift_1_u8(x_3);
70 x_4 = rshift_1_u8(x_4);
71
72 x_1 ^= mask_1 & polynomial;
73 x_2 ^= mask_2 & polynomial;
74 x_3 ^= mask_3 & polynomial;
75 x_4 ^= mask_4 & polynomial;
76
77 if(((y >> j) & 0x01) == 0x01) {
78 z_1 ^= x_1;
79 z_2 ^= x_2;
80 z_3 ^= x_3;
81 z_4 ^= x_4;
82 }
83 }
84
85 z_1.store_le(z);
86 z_2.store_le(z + 16);
87 z_3.store_le(z + 32);
88 z_4.store_le(z + 48);
89
90 x += 64;
91 z += 64;
92 size -= 64;
93 }
94
95 return orig_size - size;
96}
97
98} // namespace Botan
static SIMD_4x32 load_le(const void *in) noexcept
Definition simd_4x32.h:149
static SIMD_4x32 splat_u8(uint8_t B) noexcept
Definition simd_4x32.h:133