Botan 3.6.1
Crypto and TLS for C&
shacal2_arvm8.cpp
Go to the documentation of this file.
1/*
2* (C) 2020 Jack Lloyd
3*
4* Botan is released under the Simplified BSD License (see license.txt)
5*/
6
7#include <botan/internal/shacal2.h>
8#include <arm_neon.h>
9
10namespace Botan {
11
12/*
13Only encryption is supported since the inverse round function would
14require a different instruction
15*/
16BOTAN_FUNC_ISA("+crypto+sha2")
17void SHACAL2::armv8_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const {
18 const uint32_t* input32 = reinterpret_cast<const uint32_t*>(in);
19 uint32_t* output32 = reinterpret_cast<uint32_t*>(out);
20
21 while(blocks >= 2) {
22 uint32x4_t B0_0 = vld1q_u32(input32 + 0);
23 uint32x4_t B0_1 = vld1q_u32(input32 + 4);
24 uint32x4_t B1_0 = vld1q_u32(input32 + 8);
25 uint32x4_t B1_1 = vld1q_u32(input32 + 12);
26
27 B0_0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B0_0)));
28 B0_1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B0_1)));
29 B1_0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B1_0)));
30 B1_1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B1_1)));
31
32 for(size_t i = 0; i != 8; ++i) {
33 const auto RK0 = vld1q_u32(&m_RK[8 * i]);
34 const auto RK1 = vld1q_u32(&m_RK[8 * i + 4]);
35
36 const auto T0_0 = vsha256hq_u32(B0_0, B0_1, RK0);
37 const auto T0_1 = vsha256h2q_u32(B0_1, B0_0, RK0);
38 const auto T1_0 = vsha256hq_u32(B1_0, B1_1, RK0);
39 const auto T1_1 = vsha256h2q_u32(B1_1, B1_0, RK0);
40
41 B0_0 = vsha256hq_u32(T0_0, T0_1, RK1);
42 B0_1 = vsha256h2q_u32(T0_1, T0_0, RK1);
43 B1_0 = vsha256hq_u32(T1_0, T1_1, RK1);
44 B1_1 = vsha256h2q_u32(T1_1, T1_0, RK1);
45 }
46
47 B0_0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B0_0)));
48 B0_1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B0_1)));
49 B1_0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B1_0)));
50 B1_1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B1_1)));
51
52 vst1q_u32(&output32[0], B0_0);
53 vst1q_u32(&output32[4], B0_1);
54 vst1q_u32(&output32[8], B1_0);
55 vst1q_u32(&output32[12], B1_1);
56
57 blocks -= 2;
58 input32 += 16;
59 output32 += 16;
60 }
61
62 while(blocks > 0) {
63 uint32x4_t B0 = vld1q_u32(input32 + 0);
64 uint32x4_t B1 = vld1q_u32(input32 + 4);
65
66 B0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B0)));
67 B1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B1)));
68
69 for(size_t i = 0; i != 8; ++i) {
70 const auto RK0 = vld1q_u32(&m_RK[8 * i]);
71 const auto RK1 = vld1q_u32(&m_RK[8 * i + 4]);
72
73 const auto T0 = vsha256hq_u32(B0, B1, RK0);
74 const auto T1 = vsha256h2q_u32(B1, B0, RK0);
75
76 B0 = vsha256hq_u32(T0, T1, RK1);
77 B1 = vsha256h2q_u32(T1, T0, RK1);
78 }
79
80 B0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B0)));
81 B1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B1)));
82
83 vst1q_u32(&output32[0], B0);
84 vst1q_u32(&output32[4], B1);
85
86 blocks--;
87 input32 += 8;
88 output32 += 8;
89 }
90}
91
92} // namespace Botan
#define BOTAN_FUNC_ISA(isa)
Definition compiler.h:92