Botan 3.11.0
Crypto and TLS for C&
ghash_cpu.cpp
Go to the documentation of this file.
1/*
2* Hook for CLMUL/PMULL/VPMSUM
3* (C) 2013,2017,2019,2020 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/internal/ghash.h>
9
10#include <botan/internal/isa_extn.h>
11#include <botan/internal/polyval_fn.h>
12#include <botan/internal/simd_4x32.h>
13
14namespace Botan {
15
16namespace {
17
18inline SIMD_4x32 BOTAN_FN_ISA_CLMUL polyval_multiply_x4(const SIMD_4x32& H1,
19 const SIMD_4x32& H2,
20 const SIMD_4x32& H3,
21 const SIMD_4x32& H4,
22 const SIMD_4x32& X1,
23 const SIMD_4x32& X2,
24 const SIMD_4x32& X3,
25 const SIMD_4x32& X4) {
26 const SIMD_4x32 lo = (clmul<0x00>(H1, X1) ^ clmul<0x00>(H2, X2)) ^ (clmul<0x00>(H3, X3) ^ clmul<0x00>(H4, X4));
27 const SIMD_4x32 hi = (clmul<0x11>(H1, X1) ^ clmul<0x11>(H2, X2)) ^ (clmul<0x11>(H3, X3) ^ clmul<0x11>(H4, X4));
28
29 SIMD_4x32 mid;
30
31 mid ^= clmul<0x00>(H1 ^ H1.shift_elems_right<2>(), X1 ^ X1.shift_elems_right<2>());
32 mid ^= clmul<0x00>(H2 ^ H2.shift_elems_right<2>(), X2 ^ X2.shift_elems_right<2>());
33 mid ^= clmul<0x00>(H3 ^ H3.shift_elems_right<2>(), X3 ^ X3.shift_elems_right<2>());
34 mid ^= clmul<0x00>(H4 ^ H4.shift_elems_right<2>(), X4 ^ X4.shift_elems_right<2>());
35 mid ^= lo;
36 mid ^= hi;
37
38 return polyval_reduce(hi ^ mid.shift_elems_right<2>(), lo ^ mid.shift_elems_left<2>());
39}
40
41inline SIMD_4x32 BOTAN_FN_ISA_CLMUL polyval_multiply_x8(const SIMD_4x32& H1,
42 const SIMD_4x32& H2,
43 const SIMD_4x32& H3,
44 const SIMD_4x32& H4,
45 const SIMD_4x32& H5,
46 const SIMD_4x32& H6,
47 const SIMD_4x32& H7,
48 const SIMD_4x32& H8,
49 const SIMD_4x32& X1,
50 const SIMD_4x32& X2,
51 const SIMD_4x32& X3,
52 const SIMD_4x32& X4,
53 const SIMD_4x32& X5,
54 const SIMD_4x32& X6,
55 const SIMD_4x32& X7,
56 const SIMD_4x32& X8) {
57 const SIMD_4x32 lo = clmul<0x00>(H1, X1) ^ clmul<0x00>(H2, X2) ^ clmul<0x00>(H3, X3) ^ clmul<0x00>(H4, X4) ^
58 clmul<0x00>(H5, X5) ^ clmul<0x00>(H6, X6) ^ clmul<0x00>(H7, X7) ^ clmul<0x00>(H8, X8);
59
60 const SIMD_4x32 hi = clmul<0x11>(H1, X1) ^ clmul<0x11>(H2, X2) ^ clmul<0x11>(H3, X3) ^ clmul<0x11>(H4, X4) ^
61 clmul<0x11>(H5, X5) ^ clmul<0x11>(H6, X6) ^ clmul<0x11>(H7, X7) ^ clmul<0x11>(H8, X8);
62
63 SIMD_4x32 mid;
64
65 mid ^= clmul<0x00>(H1 ^ H1.shift_elems_right<2>(), X1 ^ X1.shift_elems_right<2>());
66 mid ^= clmul<0x00>(H2 ^ H2.shift_elems_right<2>(), X2 ^ X2.shift_elems_right<2>());
67 mid ^= clmul<0x00>(H3 ^ H3.shift_elems_right<2>(), X3 ^ X3.shift_elems_right<2>());
68 mid ^= clmul<0x00>(H4 ^ H4.shift_elems_right<2>(), X4 ^ X4.shift_elems_right<2>());
69 mid ^= clmul<0x00>(H5 ^ H5.shift_elems_right<2>(), X5 ^ X5.shift_elems_right<2>());
70 mid ^= clmul<0x00>(H6 ^ H6.shift_elems_right<2>(), X6 ^ X6.shift_elems_right<2>());
71 mid ^= clmul<0x00>(H7 ^ H7.shift_elems_right<2>(), X7 ^ X7.shift_elems_right<2>());
72 mid ^= clmul<0x00>(H8 ^ H8.shift_elems_right<2>(), X8 ^ X8.shift_elems_right<2>());
73 mid ^= lo;
74 mid ^= hi;
75
76 return polyval_reduce(hi ^ mid.shift_elems_right<2>(), lo ^ mid.shift_elems_left<2>());
77}
78
79} // namespace
80
81void BOTAN_FN_ISA_CLMUL GHASH::ghash_precompute_cpu(const uint8_t H_bytes[16], secure_vector<uint64_t>& H_pow) {
82 const SIMD_4x32 H1 = mulx_polyval(reverse_vector(SIMD_4x32::load_le(H_bytes)));
83 const SIMD_4x32 H2 = polyval_multiply(H1, H1);
84 const SIMD_4x32 H3 = polyval_multiply(H1, H2);
85 const SIMD_4x32 H4 = polyval_multiply(H2, H2);
86
87 H_pow.reserve(2 * 8);
88 H_pow.resize(2 * 4);
89 H1.store_le(&H_pow[0]); // NOLINT(*-container-data-pointer)
90 H2.store_le(&H_pow[2]);
91 H3.store_le(&H_pow[4]);
92 H4.store_le(&H_pow[6]);
93}
94
95void BOTAN_FN_ISA_CLMUL GHASH::ghash_multiply_cpu(uint8_t x[16],
97 const uint8_t input[],
98 size_t blocks) {
99 BOTAN_ASSERT_NOMSG(H_pow.size() == 2 * 4 || H_pow.size() == 2 * 8);
100
101 const SIMD_4x32 H1 = SIMD_4x32::load_le(&H_pow[0]); // NOLINT(*-container-data-pointer)
102
103 SIMD_4x32 a = reverse_vector(SIMD_4x32::load_le(x));
104
105 if(blocks >= 8) {
106 const SIMD_4x32 H2 = SIMD_4x32::load_le(&H_pow[2]);
107 const SIMD_4x32 H3 = SIMD_4x32::load_le(&H_pow[4]);
108 const SIMD_4x32 H4 = SIMD_4x32::load_le(&H_pow[6]);
109
110 if(H_pow.size() < 2 * 8) {
111 H_pow.resize(2 * 8);
112 const SIMD_4x32 H5 = polyval_multiply(H4, H1);
113 const SIMD_4x32 H6 = polyval_multiply(H4, H2);
114 const SIMD_4x32 H7 = polyval_multiply(H4, H3);
115 const SIMD_4x32 H8 = polyval_multiply(H4, H4);
116 H5.store_le(&H_pow[8]);
117 H6.store_le(&H_pow[10]);
118 H7.store_le(&H_pow[12]);
119 H8.store_le(&H_pow[14]);
120 }
121
122 const SIMD_4x32 H5 = SIMD_4x32::load_le(&H_pow[8]);
123 const SIMD_4x32 H6 = SIMD_4x32::load_le(&H_pow[10]);
124 const SIMD_4x32 H7 = SIMD_4x32::load_le(&H_pow[12]);
125 const SIMD_4x32 H8 = SIMD_4x32::load_le(&H_pow[14]);
126
127 while(blocks >= 8) {
128 const SIMD_4x32 m0 = reverse_vector(SIMD_4x32::load_le(input));
129 const SIMD_4x32 m1 = reverse_vector(SIMD_4x32::load_le(input + 16 * 1));
130 const SIMD_4x32 m2 = reverse_vector(SIMD_4x32::load_le(input + 16 * 2));
131 const SIMD_4x32 m3 = reverse_vector(SIMD_4x32::load_le(input + 16 * 3));
132 const SIMD_4x32 m4 = reverse_vector(SIMD_4x32::load_le(input + 16 * 4));
133 const SIMD_4x32 m5 = reverse_vector(SIMD_4x32::load_le(input + 16 * 5));
134 const SIMD_4x32 m6 = reverse_vector(SIMD_4x32::load_le(input + 16 * 6));
135 const SIMD_4x32 m7 = reverse_vector(SIMD_4x32::load_le(input + 16 * 7));
136
137 a = polyval_multiply_x8(H1, H2, H3, H4, H5, H6, H7, H8, m7, m6, m5, m4, m3, m2, m1, m0 ^ a);
138
139 input += 8 * 16;
140 blocks -= 8;
141 }
142 }
143
144 if(blocks >= 4) {
145 const SIMD_4x32 H2 = SIMD_4x32::load_le(&H_pow[2]);
146 const SIMD_4x32 H3 = SIMD_4x32::load_le(&H_pow[4]);
147 const SIMD_4x32 H4 = SIMD_4x32::load_le(&H_pow[6]);
148
149 while(blocks >= 4) {
150 const SIMD_4x32 m0 = reverse_vector(SIMD_4x32::load_le(input));
151 const SIMD_4x32 m1 = reverse_vector(SIMD_4x32::load_le(input + 16 * 1));
152 const SIMD_4x32 m2 = reverse_vector(SIMD_4x32::load_le(input + 16 * 2));
153 const SIMD_4x32 m3 = reverse_vector(SIMD_4x32::load_le(input + 16 * 3));
154
155 a ^= m0;
156 a = polyval_multiply_x4(H1, H2, H3, H4, m3, m2, m1, a);
157
158 input += 4 * 16;
159 blocks -= 4;
160 }
161 }
162
163 for(size_t i = 0; i != blocks; ++i) {
164 const SIMD_4x32 m = reverse_vector(SIMD_4x32::load_le(input + 16 * i));
165
166 a ^= m;
167 a = polyval_multiply(H1, a);
168 }
169
170 a = reverse_vector(a);
171 a.store_le(x);
172}
173
174} // namespace Botan
#define BOTAN_ASSERT_NOMSG(expr)
Definition assert.h:75
static SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 load_le(const void *in) noexcept
Definition simd_4x32.h:162
BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FN_ISA_SIMD_4X32 mulx_polyval(const SIMD_4x32 &h)
Definition polyval_fn.h:92
BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FN_ISA_CLMUL polyval_multiply(const SIMD_4x32 &H, const SIMD_4x32 &x)
Definition polyval_fn.h:128
BOTAN_FORCE_INLINE SIMD_4x32 BOTAN_FN_ISA_CLMUL polyval_reduce(const SIMD_4x32 &hi, const SIMD_4x32 &lo)
Definition polyval_fn.h:107
BOTAN_FORCE_INLINE BOTAN_FN_ISA_CLMUL SIMD_4x32 clmul(const SIMD_4x32 &H, const SIMD_4x32 &x)
Definition polyval_fn.h:31
BOTAN_FORCE_INLINE BOTAN_FN_ISA_SIMD_4X32 SIMD_4x32 reverse_vector(const SIMD_4x32 &in)
Definition polyval_fn.h:16
std::vector< T, secure_allocator< T > > secure_vector
Definition secmem.h:68