Botan 3.0.0
Crypto and TLS for C&
serpent_avx2.cpp
Go to the documentation of this file.
1/*
2* (C) 2018 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/serpent.h>
8#include <botan/internal/simd_avx2.h>
9#include <botan/internal/serpent_sbox.h>
10
11namespace Botan {
12
13#if defined(__GNUG__) && !defined(__clang__)
14
15// These macros are redundant with the versions in serpent_sbox.h
16// but unfortunately removing them seems to trigger a bug in GCC
17// when building in amalgamation mode
18
19#define transform(B0, B1, B2, B3) \
20 do { \
21 B0 = B0.rotl<13>(); \
22 B2 = B2.rotl<3>(); \
23 B1 ^= B0 ^ B2; \
24 B3 ^= B2 ^ B0.shl<3>(); \
25 B1 = B1.rotl<1>(); \
26 B3 = B3.rotl<7>(); \
27 B0 ^= B1 ^ B3; \
28 B2 ^= B3 ^ B1.shl<7>(); \
29 B0 = B0.rotl<5>(); \
30 B2 = B2.rotl<22>(); \
31 } while(0)
32
33#define i_transform(B0, B1, B2, B3) \
34 do { \
35 B2 = B2.rotr<22>(); \
36 B0 = B0.rotr<5>(); \
37 B2 ^= B3 ^ B1.shl<7>(); \
38 B0 ^= B1 ^ B3; \
39 B3 = B3.rotr<7>(); \
40 B1 = B1.rotr<1>(); \
41 B3 ^= B2 ^ B0.shl<3>(); \
42 B1 ^= B0 ^ B2; \
43 B2 = B2.rotr<3>(); \
44 B0 = B0.rotr<13>(); \
45 } while(0)
46
47#endif
48
50void Serpent::avx2_encrypt_8(const uint8_t in[128], uint8_t out[128]) const
51 {
52 using namespace Botan::Serpent_F;
53
54 SIMD_8x32::reset_registers();
55
56 SIMD_8x32 B0 = SIMD_8x32::load_le(in);
57 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32);
58 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64);
59 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96);
60
61 SIMD_8x32::transpose(B0, B1, B2, B3);
62
63 const Key_Inserter key_xor(m_round_key.data());
64
65 key_xor( 0,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
66 key_xor( 1,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
67 key_xor( 2,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
68 key_xor( 3,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
69 key_xor( 4,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
70 key_xor( 5,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
71 key_xor( 6,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
72 key_xor( 7,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
73
74 key_xor( 8,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
75 key_xor( 9,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
76 key_xor(10,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
77 key_xor(11,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
78 key_xor(12,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
79 key_xor(13,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
80 key_xor(14,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
81 key_xor(15,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
82
83 key_xor(16,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
84 key_xor(17,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
85 key_xor(18,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
86 key_xor(19,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
87 key_xor(20,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
88 key_xor(21,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
89 key_xor(22,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
90 key_xor(23,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); transform(B0,B1,B2,B3);
91
92 key_xor(24,B0,B1,B2,B3); SBoxE0(B0,B1,B2,B3); transform(B0,B1,B2,B3);
93 key_xor(25,B0,B1,B2,B3); SBoxE1(B0,B1,B2,B3); transform(B0,B1,B2,B3);
94 key_xor(26,B0,B1,B2,B3); SBoxE2(B0,B1,B2,B3); transform(B0,B1,B2,B3);
95 key_xor(27,B0,B1,B2,B3); SBoxE3(B0,B1,B2,B3); transform(B0,B1,B2,B3);
96 key_xor(28,B0,B1,B2,B3); SBoxE4(B0,B1,B2,B3); transform(B0,B1,B2,B3);
97 key_xor(29,B0,B1,B2,B3); SBoxE5(B0,B1,B2,B3); transform(B0,B1,B2,B3);
98 key_xor(30,B0,B1,B2,B3); SBoxE6(B0,B1,B2,B3); transform(B0,B1,B2,B3);
99 key_xor(31,B0,B1,B2,B3); SBoxE7(B0,B1,B2,B3); key_xor(32,B0,B1,B2,B3);
100
101 SIMD_8x32::transpose(B0, B1, B2, B3);
102 B0.store_le(out);
103 B1.store_le(out + 32);
104 B2.store_le(out + 64);
105 B3.store_le(out + 96);
106
107 SIMD_8x32::zero_registers();
108 }
109
111void Serpent::avx2_decrypt_8(const uint8_t in[128], uint8_t out[128]) const
112 {
113 using namespace Botan::Serpent_F;
114
115 SIMD_8x32::reset_registers();
116
117 SIMD_8x32 B0 = SIMD_8x32::load_le(in);
118 SIMD_8x32 B1 = SIMD_8x32::load_le(in + 32);
119 SIMD_8x32 B2 = SIMD_8x32::load_le(in + 64);
120 SIMD_8x32 B3 = SIMD_8x32::load_le(in + 96);
121
122 SIMD_8x32::transpose(B0, B1, B2, B3);
123
124 const Key_Inserter key_xor(m_round_key.data());
125
126 key_xor(32,B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(31,B0,B1,B2,B3);
127 i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(30,B0,B1,B2,B3);
128 i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(29,B0,B1,B2,B3);
129 i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(28,B0,B1,B2,B3);
130 i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(27,B0,B1,B2,B3);
131 i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(26,B0,B1,B2,B3);
132 i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(25,B0,B1,B2,B3);
133 i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(24,B0,B1,B2,B3);
134
135 i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(23,B0,B1,B2,B3);
136 i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(22,B0,B1,B2,B3);
137 i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(21,B0,B1,B2,B3);
138 i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(20,B0,B1,B2,B3);
139 i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(19,B0,B1,B2,B3);
140 i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(18,B0,B1,B2,B3);
141 i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor(17,B0,B1,B2,B3);
142 i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor(16,B0,B1,B2,B3);
143
144 i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor(15,B0,B1,B2,B3);
145 i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor(14,B0,B1,B2,B3);
146 i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor(13,B0,B1,B2,B3);
147 i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor(12,B0,B1,B2,B3);
148 i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor(11,B0,B1,B2,B3);
149 i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor(10,B0,B1,B2,B3);
150 i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 9,B0,B1,B2,B3);
151 i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 8,B0,B1,B2,B3);
152
153 i_transform(B0,B1,B2,B3); SBoxD7(B0,B1,B2,B3); key_xor( 7,B0,B1,B2,B3);
154 i_transform(B0,B1,B2,B3); SBoxD6(B0,B1,B2,B3); key_xor( 6,B0,B1,B2,B3);
155 i_transform(B0,B1,B2,B3); SBoxD5(B0,B1,B2,B3); key_xor( 5,B0,B1,B2,B3);
156 i_transform(B0,B1,B2,B3); SBoxD4(B0,B1,B2,B3); key_xor( 4,B0,B1,B2,B3);
157 i_transform(B0,B1,B2,B3); SBoxD3(B0,B1,B2,B3); key_xor( 3,B0,B1,B2,B3);
158 i_transform(B0,B1,B2,B3); SBoxD2(B0,B1,B2,B3); key_xor( 2,B0,B1,B2,B3);
159 i_transform(B0,B1,B2,B3); SBoxD1(B0,B1,B2,B3); key_xor( 1,B0,B1,B2,B3);
160 i_transform(B0,B1,B2,B3); SBoxD0(B0,B1,B2,B3); key_xor( 0,B0,B1,B2,B3);
161
162 SIMD_8x32::transpose(B0, B1, B2, B3);
163
164 B0.store_le(out);
165 B1.store_le(out + 32);
166 B2.store_le(out + 64);
167 B3.store_le(out + 96);
168
169 SIMD_8x32::zero_registers();
170 }
171
172#undef transform
173#undef i_transform
174
175}
BOTAN_FORCE_INLINE void transform(T &B0, T &B1, T &B2, T &B3)
Definition: serpent_sbox.h:457
BOTAN_FORCE_INLINE void SBoxD5(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:366
BOTAN_FORCE_INLINE void SBoxD4(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:339
BOTAN_FORCE_INLINE void SBoxE0(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:18
BOTAN_FORCE_INLINE void SBoxE1(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:44
BOTAN_FORCE_INLINE void SBoxE3(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:95
BOTAN_FORCE_INLINE void SBoxD0(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:231
BOTAN_FORCE_INLINE void SBoxE5(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:150
BOTAN_FORCE_INLINE void SBoxD6(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:395
BOTAN_FORCE_INLINE void SBoxD3(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:312
BOTAN_FORCE_INLINE void i_transform(T &B0, T &B1, T &B2, T &B3)
Definition: serpent_sbox.h:475
BOTAN_FORCE_INLINE void SBoxD2(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:286
BOTAN_FORCE_INLINE void SBoxE6(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:178
BOTAN_FORCE_INLINE void SBoxE2(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:71
BOTAN_FORCE_INLINE void SBoxE4(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:122
BOTAN_FORCE_INLINE void SBoxD1(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:257
BOTAN_FORCE_INLINE void SBoxE7(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:202
BOTAN_FORCE_INLINE void SBoxD7(T &a, T &b, T &c, T &d)
Definition: serpent_sbox.h:420
Definition: alg_id.cpp:12
#define BOTAN_AVX2_FN
Definition: simd_avx2.h:15