19template <
size_t R1,
size_t R2,
size_t R3,
size_t R4>
21 uint64_t& X0, uint64_t& X1, uint64_t& X2, uint64_t& X3, uint64_t& X4, uint64_t& X5, uint64_t& X6, uint64_t& X7) {
36template <
size_t R1,
size_t R2,
size_t R3,
size_t R4>
38 uint64_t& X0, uint64_t& X1, uint64_t& X2, uint64_t& X3, uint64_t& X4, uint64_t& X5, uint64_t& X6, uint64_t& X7) {
55 Key_Inserter(
const uint64_t* K,
const uint64_t* T) : m_K(K), m_T(T) {}
67 X1 += m_K[(R + 1) % 9];
68 X2 += m_K[(R + 2) % 9];
69 X3 += m_K[(R + 3) % 9];
70 X4 += m_K[(R + 4) % 9];
71 X5 += m_K[(R + 5) % 9] + m_T[(R) % 3];
72 X6 += m_K[(R + 6) % 9] + m_T[(R + 1) % 3];
73 X7 += m_K[(R + 7) % 9] + R;
86 X1 -= m_K[(R + 1) % 9];
87 X2 -= m_K[(R + 2) % 9];
88 X3 -= m_K[(R + 3) % 9];
89 X4 -= m_K[(R + 4) % 9];
90 X5 -= m_K[(R + 5) % 9] + m_T[(R) % 3];
91 X6 -= m_K[(R + 6) % 9] + m_T[(R + 1) % 3];
92 X7 -= m_K[(R + 7) % 9] + R;
100template <
size_t R1,
size_t R2>
109 const Key_Inserter& key) {
110 e_round<46, 36, 19, 37>(X0, X2, X4, X6, X1, X3, X5, X7);
111 e_round<33, 27, 14, 42>(X2, X4, X6, X0, X1, X7, X5, X3);
112 e_round<17, 49, 36, 39>(X4, X6, X0, X2, X1, X3, X5, X7);
113 e_round<44, 9, 54, 56>(X6, X0, X2, X4, X1, X7, X5, X3);
114 key.e_add(R1, X0, X1, X2, X3, X4, X5, X6, X7);
116 e_round<39, 30, 34, 24>(X0, X2, X4, X6, X1, X3, X5, X7);
117 e_round<13, 50, 10, 17>(X2, X4, X6, X0, X1, X7, X5, X3);
118 e_round<25, 29, 39, 43>(X4, X6, X0, X2, X1, X3, X5, X7);
119 e_round<8, 35, 56, 22>(X6, X0, X2, X4, X1, X7, X5, X3);
120 key.e_add(R2, X0, X1, X2, X3, X4, X5, X6, X7);
123template <
size_t R1,
size_t R2>
132 const Key_Inserter& key) {
133 d_round<8, 35, 56, 22>(X6, X0, X2, X4, X1, X7, X5, X3);
134 d_round<25, 29, 39, 43>(X4, X6, X0, X2, X1, X3, X5, X7);
135 d_round<13, 50, 10, 17>(X2, X4, X6, X0, X1, X7, X5, X3);
136 d_round<39, 30, 34, 24>(X0, X2, X4, X6, X1, X3, X5, X7);
137 key.d_add(R1, X0, X1, X2, X3, X4, X5, X6, X7);
139 d_round<44, 9, 54, 56>(X6, X0, X2, X4, X1, X7, X5, X3);
140 d_round<17, 49, 36, 39>(X4, X6, X0, X2, X1, X3, X5, X7);
141 d_round<33, 27, 14, 42>(X2, X4, X6, X0, X1, X7, X5, X3);
142 d_round<46, 36, 19, 37>(X0, X2, X4, X6, X1, X3, X5, X7);
143 key.d_add(R2, X0, X1, X2, X3, X4, X5, X6, X7);
200 const Key_Inserter key(m_K.data(), m_T.data());
202 for(
size_t i = 0; i < blocks; ++i) {
213 key.e_add(0, X0, X1, X2, X3, X4, X5, X6, X7);
215 e8_rounds<1, 2>(X0, X1, X2, X3, X4, X5, X6, X7, key);
216 e8_rounds<3, 4>(X0, X1, X2, X3, X4, X5, X6, X7, key);
217 e8_rounds<5, 6>(X0, X1, X2, X3, X4, X5, X6, X7, key);
218 e8_rounds<7, 8>(X0, X1, X2, X3, X4, X5, X6, X7, key);
219 e8_rounds<9, 10>(X0, X1, X2, X3, X4, X5, X6, X7, key);
220 e8_rounds<11, 12>(X0, X1, X2, X3, X4, X5, X6, X7, key);
221 e8_rounds<13, 14>(X0, X1, X2, X3, X4, X5, X6, X7, key);
222 e8_rounds<15, 16>(X0, X1, X2, X3, X4, X5, X6, X7, key);
223 e8_rounds<17, 18>(X0, X1, X2, X3, X4, X5, X6, X7, key);
234 const Key_Inserter key(m_K.data(), m_T.data());
236 for(
size_t i = 0; i < blocks; ++i) {
247 key.d_add(18, X0, X1, X2, X3, X4, X5, X6, X7);
249 d8_rounds<17, 16>(X0, X1, X2, X3, X4, X5, X6, X7, key);
250 d8_rounds<15, 14>(X0, X1, X2, X3, X4, X5, X6, X7, key);
251 d8_rounds<13, 12>(X0, X1, X2, X3, X4, X5, X6, X7, key);
252 d8_rounds<11, 10>(X0, X1, X2, X3, X4, X5, X6, X7, key);
253 d8_rounds<9, 8>(X0, X1, X2, X3, X4, X5, X6, X7, key);
254 d8_rounds<7, 6>(X0, X1, X2, X3, X4, X5, X6, X7, key);
255 d8_rounds<5, 4>(X0, X1, X2, X3, X4, X5, X6, X7, key);
256 d8_rounds<3, 2>(X0, X1, X2, X3, X4, X5, X6, X7, key);
257 d8_rounds<1, 0>(X0, X1, X2, X3, X4, X5, X6, X7, key);