Botan 3.11.0
Crypto and TLS for C&
idea_sse2.cpp
Go to the documentation of this file.
1/*
2* IDEA in SSE2
3* (C) 2009 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/internal/idea.h>
9
10#include <botan/internal/ct_utils.h>
11#include <botan/internal/isa_extn.h>
12#include <emmintrin.h>
13
14namespace Botan {
15
16// NOLINTBEGIN(portability-simd-intrinsics) TODO add various helper fns
17
18namespace {
19
20BOTAN_FN_ISA_SSE2 inline __m128i mul(__m128i X, uint16_t K_16) {
21 const __m128i zeros = _mm_set1_epi16(0);
22 const __m128i ones = _mm_set1_epi16(1);
23
24 const __m128i K = _mm_set1_epi16(K_16);
25
26 const __m128i X_is_zero = _mm_cmpeq_epi16(X, zeros);
27 const __m128i K_is_zero = _mm_cmpeq_epi16(K, zeros);
28
29 const __m128i mul_lo = _mm_mullo_epi16(X, K);
30 const __m128i mul_hi = _mm_mulhi_epu16(X, K);
31
32 __m128i T = _mm_sub_epi16(mul_lo, mul_hi);
33
34 // Unsigned compare; cmp = 1 if mul_lo < mul_hi else 0
35 const __m128i subs = _mm_subs_epu16(mul_hi, mul_lo);
36 const __m128i cmp = _mm_min_epu8(_mm_or_si128(subs, _mm_srli_epi16(subs, 8)), ones);
37
38 T = _mm_add_epi16(T, cmp);
39
40 /* Selection: if X[i] is zero then assign 1-K
41 if K is zero then assign 1-X[i]
42
43 Could if() off value of K_16 for the second, but this gives a
44 constant time implementation which is a nice bonus.
45 */
46
47 T = _mm_or_si128(_mm_andnot_si128(X_is_zero, T), _mm_and_si128(_mm_sub_epi16(ones, K), X_is_zero));
48
49 T = _mm_or_si128(_mm_andnot_si128(K_is_zero, T), _mm_and_si128(_mm_sub_epi16(ones, X), K_is_zero));
50
51 return T;
52}
53
54/*
55* 4x8 matrix transpose
56*/
57BOTAN_FN_ISA_SSE2 void transpose_in(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3) {
58 B0 = _mm_shuffle_epi32(B0, _MM_SHUFFLE(3, 1, 2, 0));
59 B1 = _mm_shuffle_epi32(B1, _MM_SHUFFLE(3, 1, 2, 0));
60 B2 = _mm_shuffle_epi32(B2, _MM_SHUFFLE(3, 1, 2, 0));
61 B3 = _mm_shuffle_epi32(B3, _MM_SHUFFLE(3, 1, 2, 0));
62
63 B0 = _mm_shufflelo_epi16(B0, _MM_SHUFFLE(3, 1, 2, 0));
64 B1 = _mm_shufflelo_epi16(B1, _MM_SHUFFLE(3, 1, 2, 0));
65 B2 = _mm_shufflelo_epi16(B2, _MM_SHUFFLE(3, 1, 2, 0));
66 B3 = _mm_shufflelo_epi16(B3, _MM_SHUFFLE(3, 1, 2, 0));
67
68 B0 = _mm_shufflehi_epi16(B0, _MM_SHUFFLE(3, 1, 2, 0));
69 B1 = _mm_shufflehi_epi16(B1, _MM_SHUFFLE(3, 1, 2, 0));
70 B2 = _mm_shufflehi_epi16(B2, _MM_SHUFFLE(3, 1, 2, 0));
71 B3 = _mm_shufflehi_epi16(B3, _MM_SHUFFLE(3, 1, 2, 0));
72
73 const __m128i T0 = _mm_unpacklo_epi32(B0, B1);
74 const __m128i T1 = _mm_unpackhi_epi32(B0, B1);
75 const __m128i T2 = _mm_unpacklo_epi32(B2, B3);
76 const __m128i T3 = _mm_unpackhi_epi32(B2, B3);
77
78 B0 = _mm_unpacklo_epi64(T0, T2);
79 B1 = _mm_unpackhi_epi64(T0, T2);
80 B2 = _mm_unpacklo_epi64(T1, T3);
81 B3 = _mm_unpackhi_epi64(T1, T3);
82}
83
84/*
85* 4x8 matrix transpose (reverse)
86*/
87BOTAN_FN_ISA_SSE2 void transpose_out(__m128i& B0, __m128i& B1, __m128i& B2, __m128i& B3) {
88 __m128i T0 = _mm_unpacklo_epi64(B0, B1);
89 __m128i T1 = _mm_unpacklo_epi64(B2, B3);
90 __m128i T2 = _mm_unpackhi_epi64(B0, B1);
91 __m128i T3 = _mm_unpackhi_epi64(B2, B3);
92
93 T0 = _mm_shuffle_epi32(T0, _MM_SHUFFLE(3, 1, 2, 0));
94 T1 = _mm_shuffle_epi32(T1, _MM_SHUFFLE(3, 1, 2, 0));
95 T2 = _mm_shuffle_epi32(T2, _MM_SHUFFLE(3, 1, 2, 0));
96 T3 = _mm_shuffle_epi32(T3, _MM_SHUFFLE(3, 1, 2, 0));
97
98 T0 = _mm_shufflehi_epi16(T0, _MM_SHUFFLE(3, 1, 2, 0));
99 T1 = _mm_shufflehi_epi16(T1, _MM_SHUFFLE(3, 1, 2, 0));
100 T2 = _mm_shufflehi_epi16(T2, _MM_SHUFFLE(3, 1, 2, 0));
101 T3 = _mm_shufflehi_epi16(T3, _MM_SHUFFLE(3, 1, 2, 0));
102
103 T0 = _mm_shufflelo_epi16(T0, _MM_SHUFFLE(3, 1, 2, 0));
104 T1 = _mm_shufflelo_epi16(T1, _MM_SHUFFLE(3, 1, 2, 0));
105 T2 = _mm_shufflelo_epi16(T2, _MM_SHUFFLE(3, 1, 2, 0));
106 T3 = _mm_shufflelo_epi16(T3, _MM_SHUFFLE(3, 1, 2, 0));
107
108 B0 = _mm_unpacklo_epi32(T0, T1);
109 B1 = _mm_unpackhi_epi32(T0, T1);
110 B2 = _mm_unpacklo_epi32(T2, T3);
111 B3 = _mm_unpackhi_epi32(T2, T3);
112}
113
114} // namespace
115
116/*
117* 8 wide IDEA encryption/decryption in SSE2
118*/
119BOTAN_FN_ISA_SSE2 void IDEA::sse2_idea_op_8(const uint8_t in[64], uint8_t out[64], const uint16_t EK[52]) {
120 CT::poison(in, 64);
121 CT::poison(out, 64);
122 CT::poison(EK, 52);
123
124 const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
125
126 __m128i B0 = _mm_loadu_si128(in_mm + 0);
127 __m128i B1 = _mm_loadu_si128(in_mm + 1);
128 __m128i B2 = _mm_loadu_si128(in_mm + 2);
129 __m128i B3 = _mm_loadu_si128(in_mm + 3);
130
131 transpose_in(B0, B1, B2, B3);
132
133 // byte swap
134 B0 = _mm_or_si128(_mm_slli_epi16(B0, 8), _mm_srli_epi16(B0, 8));
135 B1 = _mm_or_si128(_mm_slli_epi16(B1, 8), _mm_srli_epi16(B1, 8));
136 B2 = _mm_or_si128(_mm_slli_epi16(B2, 8), _mm_srli_epi16(B2, 8));
137 B3 = _mm_or_si128(_mm_slli_epi16(B3, 8), _mm_srli_epi16(B3, 8));
138
139 for(size_t i = 0; i != 8; ++i) {
140 B0 = mul(B0, EK[6 * i + 0]);
141 B1 = _mm_add_epi16(B1, _mm_set1_epi16(EK[6 * i + 1]));
142 B2 = _mm_add_epi16(B2, _mm_set1_epi16(EK[6 * i + 2]));
143 B3 = mul(B3, EK[6 * i + 3]);
144
145 const __m128i T0 = B2;
146 B2 = _mm_xor_si128(B2, B0);
147 B2 = mul(B2, EK[6 * i + 4]);
148
149 const __m128i T1 = B1;
150
151 B1 = _mm_xor_si128(B1, B3);
152 B1 = _mm_add_epi16(B1, B2);
153 B1 = mul(B1, EK[6 * i + 5]);
154
155 B2 = _mm_add_epi16(B2, B1);
156
157 B0 = _mm_xor_si128(B0, B1);
158 B1 = _mm_xor_si128(B1, T0);
159 B3 = _mm_xor_si128(B3, B2);
160 B2 = _mm_xor_si128(B2, T1);
161 }
162
163 B0 = mul(B0, EK[48]);
164 B1 = _mm_add_epi16(B1, _mm_set1_epi16(EK[50]));
165 B2 = _mm_add_epi16(B2, _mm_set1_epi16(EK[49]));
166 B3 = mul(B3, EK[51]);
167
168 // byte swap
169 B0 = _mm_or_si128(_mm_slli_epi16(B0, 8), _mm_srli_epi16(B0, 8));
170 B1 = _mm_or_si128(_mm_slli_epi16(B1, 8), _mm_srli_epi16(B1, 8));
171 B2 = _mm_or_si128(_mm_slli_epi16(B2, 8), _mm_srli_epi16(B2, 8));
172 B3 = _mm_or_si128(_mm_slli_epi16(B3, 8), _mm_srli_epi16(B3, 8));
173
174 transpose_out(B0, B2, B1, B3);
175
176 __m128i* out_mm = reinterpret_cast<__m128i*>(out);
177
178 _mm_storeu_si128(out_mm + 0, B0);
179 _mm_storeu_si128(out_mm + 1, B2);
180 _mm_storeu_si128(out_mm + 2, B1);
181 _mm_storeu_si128(out_mm + 3, B3);
182
183 CT::unpoison(in, 64);
184 CT::unpoison(out, 64);
185 CT::unpoison(EK, 52);
186}
187
188// NOLINTEND(portability-simd-intrinsics)
189
190} // namespace Botan
constexpr void unpoison(const T *p, size_t n)
Definition ct_utils.h:67
constexpr void poison(const T *p, size_t n)
Definition ct_utils.h:56