Botan 3.9.0
Crypto and TLS for C&
zfec_sse2.cpp
Go to the documentation of this file.
1/*
2* (C) 2009,2010,2021 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/zfec.h>
8
9#include <botan/internal/isa_extn.h>
10#include <botan/internal/simd_4x32.h>
11#include <immintrin.h>
12
13namespace Botan {
14
15namespace {
16
17inline SIMD_4x32 rshift_1_u8(SIMD_4x32 v) {
18 return SIMD_4x32(_mm_add_epi8(v.raw(), v.raw()));
19}
20
21inline SIMD_4x32 high_bit_set_u8(SIMD_4x32 v) {
22 return SIMD_4x32(_mm_cmpgt_epi8(_mm_setzero_si128(), v.raw()));
23}
24
25} // namespace
26
27BOTAN_FN_ISA_SSE2 size_t ZFEC::addmul_sse2(uint8_t z[], const uint8_t x[], uint8_t y, size_t size) {
28 const SIMD_4x32 polynomial = SIMD_4x32::splat_u8(0x1D);
29
30 const size_t orig_size = size;
31
32 // unrolled out to cache line size
33 while(size >= 64) {
34 SIMD_4x32 x_1 = SIMD_4x32::load_le(x);
35 SIMD_4x32 x_2 = SIMD_4x32::load_le(x + 16);
36 SIMD_4x32 x_3 = SIMD_4x32::load_le(x + 32);
37 SIMD_4x32 x_4 = SIMD_4x32::load_le(x + 48);
38
39 SIMD_4x32 z_1 = SIMD_4x32::load_le(z);
40 SIMD_4x32 z_2 = SIMD_4x32::load_le(z + 16);
41 SIMD_4x32 z_3 = SIMD_4x32::load_le(z + 32);
42 SIMD_4x32 z_4 = SIMD_4x32::load_le(z + 48);
43
44 if((y & 0x01) == 0x01) {
45 z_1 ^= x_1;
46 z_2 ^= x_2;
47 z_3 ^= x_3;
48 z_4 ^= x_4;
49 }
50
51 for(size_t j = 1; j != 8; ++j) {
52 /*
53 * Each byte of each mask is either 0 or the polynomial 0x1D,
54 * depending on if the high bit of x_i is set or not.
55 */
56
57 const SIMD_4x32 mask_1(high_bit_set_u8(x_1));
58 const SIMD_4x32 mask_2(high_bit_set_u8(x_2));
59 const SIMD_4x32 mask_3(high_bit_set_u8(x_3));
60 const SIMD_4x32 mask_4(high_bit_set_u8(x_4));
61
62 // x <<= 1
63 x_1 = rshift_1_u8(x_1);
64 x_2 = rshift_1_u8(x_2);
65 x_3 = rshift_1_u8(x_3);
66 x_4 = rshift_1_u8(x_4);
67
68 x_1 ^= mask_1 & polynomial;
69 x_2 ^= mask_2 & polynomial;
70 x_3 ^= mask_3 & polynomial;
71 x_4 ^= mask_4 & polynomial;
72
73 if(((y >> j) & 0x01) == 0x01) {
74 z_1 ^= x_1;
75 z_2 ^= x_2;
76 z_3 ^= x_3;
77 z_4 ^= x_4;
78 }
79 }
80
81 z_1.store_le(z);
82 z_2.store_le(z + 16);
83 z_3.store_le(z + 32);
84 z_4.store_le(z + 48);
85
86 x += 64;
87 z += 64;
88 size -= 64;
89 }
90
91 return orig_size - size;
92}
93
94} // namespace Botan
static SIMD_4x32 load_le(const void *in) noexcept
Definition simd_4x32.h:149
static SIMD_4x32 splat_u8(uint8_t B) noexcept
Definition simd_4x32.h:133