Botan 3.11.0
Crypto and TLS for C&
zfec_sse2.cpp
Go to the documentation of this file.
1/*
2* (C) 2009,2010,2021 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/zfec.h>
8
9#include <botan/internal/isa_extn.h>
10#include <botan/internal/simd_4x32.h>
11#include <emmintrin.h>
12
13// TODO(Botan4) remove this module - SSE2 only processors are basically nonexistent now
14
15namespace Botan {
16
17namespace {
18
19// NOLINTBEGIN(portability-simd-intrinsics)
20
21inline SIMD_4x32 BOTAN_FN_ISA_SSE2 rshift_1_u8(SIMD_4x32 v) {
22 return SIMD_4x32(_mm_add_epi8(v.raw(), v.raw()));
23}
24
25inline SIMD_4x32 BOTAN_FN_ISA_SSE2 high_bit_set_u8(SIMD_4x32 v) {
26 return SIMD_4x32(_mm_cmpgt_epi8(_mm_setzero_si128(), v.raw()));
27}
28
29// NOLINTEND(portability-simd-intrinsics)
30
31} // namespace
32
33BOTAN_FN_ISA_SSE2 size_t ZFEC::addmul_sse2(uint8_t z[], const uint8_t x[], uint8_t y, size_t size) {
34 const SIMD_4x32 polynomial = SIMD_4x32::splat_u8(0x1D);
35
36 const size_t orig_size = size;
37
38 // unrolled out to cache line size
39 while(size >= 64) {
40 SIMD_4x32 x_1 = SIMD_4x32::load_le(x);
41 SIMD_4x32 x_2 = SIMD_4x32::load_le(x + 16);
42 SIMD_4x32 x_3 = SIMD_4x32::load_le(x + 32);
43 SIMD_4x32 x_4 = SIMD_4x32::load_le(x + 48);
44
45 SIMD_4x32 z_1 = SIMD_4x32::load_le(z);
46 SIMD_4x32 z_2 = SIMD_4x32::load_le(z + 16);
47 SIMD_4x32 z_3 = SIMD_4x32::load_le(z + 32);
48 SIMD_4x32 z_4 = SIMD_4x32::load_le(z + 48);
49
50 if((y & 0x01) == 0x01) {
51 z_1 ^= x_1;
52 z_2 ^= x_2;
53 z_3 ^= x_3;
54 z_4 ^= x_4;
55 }
56
57 for(size_t j = 1; j != 8; ++j) {
58 /*
59 * Each byte of each mask is either 0 or the polynomial 0x1D,
60 * depending on if the high bit of x_i is set or not.
61 */
62
63 const SIMD_4x32 mask_1(high_bit_set_u8(x_1));
64 const SIMD_4x32 mask_2(high_bit_set_u8(x_2));
65 const SIMD_4x32 mask_3(high_bit_set_u8(x_3));
66 const SIMD_4x32 mask_4(high_bit_set_u8(x_4));
67
68 // x <<= 1
69 x_1 = rshift_1_u8(x_1);
70 x_2 = rshift_1_u8(x_2);
71 x_3 = rshift_1_u8(x_3);
72 x_4 = rshift_1_u8(x_4);
73
74 x_1 ^= mask_1 & polynomial;
75 x_2 ^= mask_2 & polynomial;
76 x_3 ^= mask_3 & polynomial;
77 x_4 ^= mask_4 & polynomial;
78
79 if(((y >> j) & 0x01) == 0x01) {
80 z_1 ^= x_1;
81 z_2 ^= x_2;
82 z_3 ^= x_3;
83 z_4 ^= x_4;
84 }
85 }
86
87 z_1.store_le(z);
88 z_2.store_le(z + 16);
89 z_3.store_le(z + 32);
90 z_4.store_le(z + 48);
91
92 x += 64;
93 z += 64;
94 size -= 64;
95 }
96
97 return orig_size - size;
98}
99
100} // namespace Botan
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(const void *in) noexcept
Definition simd_4x32.h:162
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 splat_u8(uint8_t B) noexcept
Definition simd_4x32.h:144