Botan  2.7.0
Crypto and TLS for C++11
shacal2_x86.cpp
Go to the documentation of this file.
1 /*
2 * SHACAL-2 using x86 SHA extensions
3 * (C) 2017 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #include <botan/shacal2.h>
9 #include <immintrin.h>
10 
11 namespace Botan {
12 
13 /*
14 Only encryption is supported since the inverse round function would
15 require a different instruction
16 */
17 
18 BOTAN_FUNC_ISA("sha,ssse3")
19 void SHACAL2::x86_encrypt_blocks(const uint8_t in[], uint8_t out[], size_t blocks) const
20  {
21  const __m128i MASK1 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7);
22  const __m128i MASK2 = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
23 
24  const __m128i* RK_mm = reinterpret_cast<const __m128i*>(m_RK.data());
25  const __m128i* in_mm = reinterpret_cast<const __m128i*>(in);
26  __m128i* out_mm = reinterpret_cast<__m128i*>(out);
27 
28  while(blocks >= 2)
29  {
30  __m128i B0_0 = _mm_loadu_si128(in_mm);
31  __m128i B0_1 = _mm_loadu_si128(in_mm+1);
32  __m128i B1_0 = _mm_loadu_si128(in_mm+2);
33  __m128i B1_1 = _mm_loadu_si128(in_mm+3);
34 
35  __m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0_0, B0_1), MASK2);
36  B0_1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0_0, B0_1), MASK2);
37  B0_0 = TMP;
38 
39  TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B1_0, B1_1), MASK2);
40  B1_1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B1_0, B1_1), MASK2);
41  B1_0 = TMP;
42 
43  for(size_t i = 0; i != 8; ++i)
44  {
45  const __m128i RK0 = _mm_loadu_si128(RK_mm + 2*i);
46  const __m128i RK2 = _mm_loadu_si128(RK_mm + 2*i+1);
47  const __m128i RK1 = _mm_srli_si128(RK0, 8);
48  const __m128i RK3 = _mm_srli_si128(RK2, 8);
49 
50  B0_1 = _mm_sha256rnds2_epu32(B0_1, B0_0, RK0);
51  B1_1 = _mm_sha256rnds2_epu32(B1_1, B1_0, RK0);
52 
53  B0_0 = _mm_sha256rnds2_epu32(B0_0, B0_1, RK1);
54  B1_0 = _mm_sha256rnds2_epu32(B1_0, B1_1, RK1);
55 
56  B0_1 = _mm_sha256rnds2_epu32(B0_1, B0_0, RK2);
57  B1_1 = _mm_sha256rnds2_epu32(B1_1, B1_0, RK2);
58 
59  B0_0 = _mm_sha256rnds2_epu32(B0_0, B0_1, RK3);
60  B1_0 = _mm_sha256rnds2_epu32(B1_0, B1_1, RK3);
61  }
62 
63  TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0_0, B0_1), MASK1);
64  B0_1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0_0, B0_1), MASK1);
65  B0_0 = TMP;
66 
67  TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B1_0, B1_1), MASK1);
68  B1_1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B1_0, B1_1), MASK1);
69  B1_0 = TMP;
70 
71  // Save state
72  _mm_storeu_si128(out_mm + 0, B0_0);
73  _mm_storeu_si128(out_mm + 1, B0_1);
74  _mm_storeu_si128(out_mm + 2, B1_0);
75  _mm_storeu_si128(out_mm + 3, B1_1);
76 
77  blocks -= 2;
78  in_mm += 4;
79  out_mm += 4;
80  }
81 
82  while(blocks)
83  {
84  __m128i B0 = _mm_loadu_si128(in_mm);
85  __m128i B1 = _mm_loadu_si128(in_mm+1);
86 
87  __m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK2);
88  B1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK2);
89  B0 = TMP;
90 
91  for(size_t i = 0; i != 8; ++i)
92  {
93  const __m128i RK0 = _mm_loadu_si128(RK_mm + 2*i);
94  const __m128i RK2 = _mm_loadu_si128(RK_mm + 2*i+1);
95  const __m128i RK1 = _mm_srli_si128(RK0, 8);
96  const __m128i RK3 = _mm_srli_si128(RK2, 8);
97 
98  B1 = _mm_sha256rnds2_epu32(B1, B0, RK0);
99  B0 = _mm_sha256rnds2_epu32(B0, B1, RK1);
100  B1 = _mm_sha256rnds2_epu32(B1, B0, RK2);
101  B0 = _mm_sha256rnds2_epu32(B0, B1, RK3);
102  }
103 
104  TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK1);
105  B1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK1);
106  B0 = TMP;
107 
108  // Save state
109  _mm_storeu_si128(out_mm, B0);
110  _mm_storeu_si128(out_mm + 1, B1);
111 
112  blocks--;
113  in_mm += 2;
114  out_mm += 2;
115  }
116  }
117 
118 }
#define BOTAN_FUNC_ISA(isa)
Definition: compiler.h:75
Definition: alg_id.cpp:13