21 const auto bswap64 =
SIMD_4x32(0x04050607, 0x00010203, 0x0C0D0E0F, 0x08090A0B);
26 const auto bswap64 =
SIMD_4x32(0x04050607, 0x00010203, 0x0C0D0E0F, 0x08090A0B);
31 const uint32_t lo =
static_cast<uint32_t
>(v);
32 const uint32_t hi =
static_cast<uint32_t
>(v >> 32);
60 constexpr uint8_t pre_c = 0x45;
72 constexpr uint8_t post14_c = 0x6E;
84 constexpr uint8_t post2_c = 0xDC;
96 constexpr uint8_t post3_c = 0x37;
104 const auto mask_s2 =
SIMD_4x32(0xFF000000, 0x00FF0000, 0xFF000000, 0x00FF0000);
105 const auto mask_s3 =
SIMD_4x32(0x00FF0000, 0x0000FF00, 0x00FF0000, 0x0000FF00);
106 const auto mask_s4 =
SIMD_4x32(0x0000FF00, 0x000000FF, 0x0000FF00, 0x000000FF);
108 const auto pre123 = PRE123.affine_transform(x);
109 const auto pre4 = PRE4.affine_transform(x);
113 const auto s14 = POST14.affine_transform(sub);
114 const auto s2 = POST2.affine_transform(sub);
115 const auto s3 = POST3.affine_transform(sub);
121 const auto P1 =
SIMD_4x32(0x00000001, 0x00000001, 0x08080809, 0x08080809);
122 const auto P2 =
SIMD_4x32(0x01010202, 0x01010202, 0x09090A0A, 0x09090A0A);
123 const auto P3 =
SIMD_4x32(0x02030303, 0x02030303, 0x0A0B0B0B, 0x0A0B0B0B);
124 const auto P4 =
SIMD_4x32(0x06050404, 0x04040504, 0x0E0D0C0C, 0x0C0C0D0C);
125 const auto P5 =
SIMD_4x32(0x07060507, 0x05060605, 0x0F0E0D0F, 0x0D0E0E0D);
126 const auto P6 =
SIMD_4x32(0xFFFFFFFF, 0x07070706, 0xFFFFFFFF, 0x0F0F0F0E);
135 return (sxp1 ^ sxp2 ^ sxp3 ^ sxp4 ^ sxp5 ^ sxp6);
143 const uint32_t k1 =
static_cast<uint32_t
>(K >> 32);
144 const uint32_t k2 =
static_cast<uint32_t
>(K);
147 const auto shuf_hi =
SIMD_4x32(0x07060504, 0x07060504, 0x0F0E0D0C, 0x0F0E0D0C);
148 const auto shuf_lo =
SIMD_4x32(0x03020100, 0x03020100, 0x0B0A0908, 0x0B0A0908);
157 const auto mask_hi =
SIMD_4x32(0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF);
162 const uint32_t k1 =
static_cast<uint32_t
>(K >> 32);
163 const uint32_t k2 =
static_cast<uint32_t
>(K);
165 const auto shuf_hi =
SIMD_4x32(0x07060504, 0x07060504, 0x0F0E0D0C, 0x0F0E0D0C);
166 const auto shuf_lo =
SIMD_4x32(0x03020100, 0x03020100, 0x0B0A0908, 0x0B0A0908);
174 const auto mask_hi =
SIMD_4x32(0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF);
179 auto A = load_be64(in);
180 auto B = load_be64(in + 16);
181 const auto mask_upper =
SIMD_4x32(0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF);
188 const auto mask_upper =
SIMD_4x32(0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF);
192 store_be64(out + 16, B);
196 R ^= camellia_f(L ^ splat64(SK[0]));
197 L ^= camellia_f(R ^ splat64(SK[1]));
198 R ^= camellia_f(L ^ splat64(SK[2]));
199 L ^= camellia_f(R ^ splat64(SK[3]));
200 R ^= camellia_f(L ^ splat64(SK[4]));
201 L ^= camellia_f(R ^ splat64(SK[5]));
205 R ^= camellia_f(L ^ splat64(SK[5]));
206 L ^= camellia_f(R ^ splat64(SK[4]));
207 R ^= camellia_f(L ^ splat64(SK[3]));
208 L ^= camellia_f(R ^ splat64(SK[2]));
209 R ^= camellia_f(L ^ splat64(SK[1]));
210 L ^= camellia_f(R ^ splat64(SK[0]));
213BOTAN_FN_ISA_HWAES
void camellia_encrypt_x2_18r(
const uint8_t in[], uint8_t out[], std::span<const uint64_t> SK) {
216 load_and_deinterleave(in, L, R);
221 six_e_rounds(L, R, &SK[2]);
223 R = FLINV_2(R, SK[9]);
224 six_e_rounds(L, R, &SK[10]);
226 R = FLINV_2(R, SK[17]);
227 six_e_rounds(L, R, &SK[18]);
229 R ^= splat64(SK[24]);
230 L ^= splat64(SK[25]);
232 interleave_and_store(out, L, R);
235BOTAN_FN_ISA_HWAES
void camellia_decrypt_x2_18r(
const uint8_t in[], uint8_t out[], std::span<const uint64_t> SK) {
238 load_and_deinterleave(in, L, R);
240 R ^= splat64(SK[25]);
241 L ^= splat64(SK[24]);
243 six_d_rounds(L, R, &SK[18]);
245 R = FLINV_2(R, SK[16]);
246 six_d_rounds(L, R, &SK[10]);
248 R = FLINV_2(R, SK[8]);
249 six_d_rounds(L, R, &SK[2]);
254 interleave_and_store(out, L, R);
257BOTAN_FN_ISA_HWAES
void camellia_encrypt_x2_24r(
const uint8_t in[], uint8_t out[], std::span<const uint64_t> SK) {
260 load_and_deinterleave(in, L, R);
265 six_e_rounds(L, R, &SK[2]);
267 R = FLINV_2(R, SK[9]);
268 six_e_rounds(L, R, &SK[10]);
270 R = FLINV_2(R, SK[17]);
271 six_e_rounds(L, R, &SK[18]);
273 R = FLINV_2(R, SK[25]);
274 six_e_rounds(L, R, &SK[26]);
276 R ^= splat64(SK[32]);
277 L ^= splat64(SK[33]);
279 interleave_and_store(out, L, R);
282BOTAN_FN_ISA_HWAES
void camellia_decrypt_x2_24r(
const uint8_t in[], uint8_t out[], std::span<const uint64_t> SK) {
285 load_and_deinterleave(in, L, R);
287 R ^= splat64(SK[33]);
288 L ^= splat64(SK[32]);
290 six_d_rounds(L, R, &SK[26]);
292 R = FLINV_2(R, SK[24]);
293 six_d_rounds(L, R, &SK[18]);
295 R = FLINV_2(R, SK[16]);
296 six_d_rounds(L, R, &SK[10]);
298 R = FLINV_2(R, SK[8]);
299 six_d_rounds(L, R, &SK[2]);
304 interleave_and_store(out, L, R);