Botan 3.6.1
Crypto and TLS for C&
sc_muladd.cpp
Go to the documentation of this file.
1/*
2* Ed25519
3* (C) 2017 Ribose Inc
4*
5* Based on the public domain code from SUPERCOP ref10 by
6* Peter Schwabe, Daniel J. Bernstein, Niels Duif, Tanja Lange, Bo-Yin Yang
7*
8* Botan is released under the Simplified BSD License (see license.txt)
9*/
10
11#include <botan/internal/ed25519_internal.h>
12
13namespace Botan {
14
15/*
16Input:
17 a[0]+256*a[1]+...+256^31*a[31] = a
18 b[0]+256*b[1]+...+256^31*b[31] = b
19 c[0]+256*c[1]+...+256^31*c[31] = c
20
21Output:
22 s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
23 where l = 2^252 + 27742317777372353535851937790883648493.
24*/
25
26void sc_muladd(uint8_t* s, const uint8_t* a, const uint8_t* b, const uint8_t* c) {
27 const int32_t MASK = 0x1fffff;
28
29 const int64_t a0 = MASK & load_3(a);
30 const int64_t a1 = MASK & (load_4(a + 2) >> 5);
31 const int64_t a2 = MASK & (load_3(a + 5) >> 2);
32 const int64_t a3 = MASK & (load_4(a + 7) >> 7);
33 const int64_t a4 = MASK & (load_4(a + 10) >> 4);
34 const int64_t a5 = MASK & (load_3(a + 13) >> 1);
35 const int64_t a6 = MASK & (load_4(a + 15) >> 6);
36 const int64_t a7 = MASK & (load_3(a + 18) >> 3);
37 const int64_t a8 = MASK & load_3(a + 21);
38 const int64_t a9 = MASK & (load_4(a + 23) >> 5);
39 const int64_t a10 = MASK & (load_3(a + 26) >> 2);
40 const int64_t a11 = (load_4(a + 28) >> 7);
41 const int64_t b0 = MASK & load_3(b);
42 const int64_t b1 = MASK & (load_4(b + 2) >> 5);
43 const int64_t b2 = MASK & (load_3(b + 5) >> 2);
44 const int64_t b3 = MASK & (load_4(b + 7) >> 7);
45 const int64_t b4 = MASK & (load_4(b + 10) >> 4);
46 const int64_t b5 = MASK & (load_3(b + 13) >> 1);
47 const int64_t b6 = MASK & (load_4(b + 15) >> 6);
48 const int64_t b7 = MASK & (load_3(b + 18) >> 3);
49 const int64_t b8 = MASK & load_3(b + 21);
50 const int64_t b9 = MASK & (load_4(b + 23) >> 5);
51 const int64_t b10 = MASK & (load_3(b + 26) >> 2);
52 const int64_t b11 = (load_4(b + 28) >> 7);
53 const int64_t c0 = MASK & load_3(c);
54 const int64_t c1 = MASK & (load_4(c + 2) >> 5);
55 const int64_t c2 = MASK & (load_3(c + 5) >> 2);
56 const int64_t c3 = MASK & (load_4(c + 7) >> 7);
57 const int64_t c4 = MASK & (load_4(c + 10) >> 4);
58 const int64_t c5 = MASK & (load_3(c + 13) >> 1);
59 const int64_t c6 = MASK & (load_4(c + 15) >> 6);
60 const int64_t c7 = MASK & (load_3(c + 18) >> 3);
61 const int64_t c8 = MASK & load_3(c + 21);
62 const int64_t c9 = MASK & (load_4(c + 23) >> 5);
63 const int64_t c10 = MASK & (load_3(c + 26) >> 2);
64 const int64_t c11 = (load_4(c + 28) >> 7);
65
66 int64_t s0 = c0 + a0 * b0;
67 int64_t s1 = c1 + a0 * b1 + a1 * b0;
68 int64_t s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
69 int64_t s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
70 int64_t s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
71 int64_t s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
72 int64_t s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
73 int64_t s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + a6 * b1 + a7 * b0;
74 int64_t s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 + a6 * b2 + a7 * b1 + a8 * b0;
75 int64_t s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 + a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
76 int64_t s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 + a6 * b4 + a7 * b3 + a8 * b2 +
77 a9 * b1 + a10 * b0;
78 int64_t s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 + a6 * b5 + a7 * b4 + a8 * b3 +
79 a9 * b2 + a10 * b1 + a11 * b0;
80 int64_t s12 =
81 a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
82 int64_t s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 + a9 * b4 + a10 * b3 + a11 * b2;
83 int64_t s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 + a10 * b4 + a11 * b3;
84 int64_t s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 + a11 * b4;
85 int64_t s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
86 int64_t s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
87 int64_t s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
88 int64_t s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
89 int64_t s20 = a9 * b11 + a10 * b10 + a11 * b9;
90 int64_t s21 = a10 * b11 + a11 * b10;
91 int64_t s22 = a11 * b11;
92 int64_t s23 = 0;
93
94 carry<21>(s0, s1);
95 carry<21>(s2, s3);
96 carry<21>(s4, s5);
97 carry<21>(s6, s7);
98 carry<21>(s8, s9);
99 carry<21>(s10, s11);
100 carry<21>(s12, s13);
101 carry<21>(s14, s15);
102 carry<21>(s16, s17);
103 carry<21>(s18, s19);
104 carry<21>(s20, s21);
105 carry<21>(s22, s23);
106
107 carry<21>(s1, s2);
108 carry<21>(s3, s4);
109 carry<21>(s5, s6);
110 carry<21>(s7, s8);
111 carry<21>(s9, s10);
112 carry<21>(s11, s12);
113 carry<21>(s13, s14);
114 carry<21>(s15, s16);
115 carry<21>(s17, s18);
116 carry<21>(s19, s20);
117 carry<21>(s21, s22);
118
119 redc_mul(s11, s12, s13, s14, s15, s16, s23);
120 redc_mul(s10, s11, s12, s13, s14, s15, s22);
121 redc_mul(s9, s10, s11, s12, s13, s14, s21);
122 redc_mul(s8, s9, s10, s11, s12, s13, s20);
123 redc_mul(s7, s8, s9, s10, s11, s12, s19);
124 redc_mul(s6, s7, s8, s9, s10, s11, s18);
125
126 carry<21>(s6, s7);
127 carry<21>(s8, s9);
128 carry<21>(s10, s11);
129 carry<21>(s12, s13);
130 carry<21>(s14, s15);
131 carry<21>(s16, s17);
132
133 carry<21>(s7, s8);
134 carry<21>(s9, s10);
135 carry<21>(s11, s12);
136 carry<21>(s13, s14);
137 carry<21>(s15, s16);
138
139 redc_mul(s5, s6, s7, s8, s9, s10, s17);
140 redc_mul(s4, s5, s6, s7, s8, s9, s16);
141 redc_mul(s3, s4, s5, s6, s7, s8, s15);
142 redc_mul(s2, s3, s4, s5, s6, s7, s14);
143 redc_mul(s1, s2, s3, s4, s5, s6, s13);
144 redc_mul(s0, s1, s2, s3, s4, s5, s12);
145
146 carry<21>(s0, s1);
147 carry<21>(s2, s3);
148 carry<21>(s4, s5);
149 carry<21>(s6, s7);
150 carry<21>(s8, s9);
151 carry<21>(s10, s11);
152
153 carry<21>(s1, s2);
154 carry<21>(s3, s4);
155 carry<21>(s5, s6);
156 carry<21>(s7, s8);
157 carry<21>(s9, s10);
158 carry<21>(s11, s12);
159
160 redc_mul(s0, s1, s2, s3, s4, s5, s12);
161
162 carry0<21>(s0, s1);
163 carry0<21>(s1, s2);
164 carry0<21>(s2, s3);
165 carry0<21>(s3, s4);
166 carry0<21>(s4, s5);
167 carry0<21>(s5, s6);
168 carry0<21>(s6, s7);
169 carry0<21>(s7, s8);
170 carry0<21>(s8, s9);
171 carry0<21>(s9, s10);
172 carry0<21>(s10, s11);
173 carry0<21>(s11, s12);
174
175 redc_mul(s0, s1, s2, s3, s4, s5, s12);
176
177 carry0<21>(s0, s1);
178 carry0<21>(s1, s2);
179 carry0<21>(s2, s3);
180 carry0<21>(s3, s4);
181 carry0<21>(s4, s5);
182 carry0<21>(s5, s6);
183 carry0<21>(s6, s7);
184 carry0<21>(s7, s8);
185 carry0<21>(s8, s9);
186 carry0<21>(s9, s10);
187 carry0<21>(s10, s11);
188
189 s[0] = static_cast<uint8_t>(s0 >> 0);
190 s[1] = static_cast<uint8_t>(s0 >> 8);
191 s[2] = static_cast<uint8_t>((s0 >> 16) | (s1 << 5));
192 s[3] = static_cast<uint8_t>(s1 >> 3);
193 s[4] = static_cast<uint8_t>(s1 >> 11);
194 s[5] = static_cast<uint8_t>((s1 >> 19) | (s2 << 2));
195 s[6] = static_cast<uint8_t>(s2 >> 6);
196 s[7] = static_cast<uint8_t>((s2 >> 14) | (s3 << 7));
197 s[8] = static_cast<uint8_t>(s3 >> 1);
198 s[9] = static_cast<uint8_t>(s3 >> 9);
199 s[10] = static_cast<uint8_t>((s3 >> 17) | (s4 << 4));
200 s[11] = static_cast<uint8_t>(s4 >> 4);
201 s[12] = static_cast<uint8_t>(s4 >> 12);
202 s[13] = static_cast<uint8_t>((s4 >> 20) | (s5 << 1));
203 s[14] = static_cast<uint8_t>(s5 >> 7);
204 s[15] = static_cast<uint8_t>((s5 >> 15) | (s6 << 6));
205 s[16] = static_cast<uint8_t>(s6 >> 2);
206 s[17] = static_cast<uint8_t>(s6 >> 10);
207 s[18] = static_cast<uint8_t>((s6 >> 18) | (s7 << 3));
208 s[19] = static_cast<uint8_t>(s7 >> 5);
209 s[20] = static_cast<uint8_t>(s7 >> 13);
210 s[21] = static_cast<uint8_t>(s8 >> 0);
211 s[22] = static_cast<uint8_t>(s8 >> 8);
212 s[23] = static_cast<uint8_t>((s8 >> 16) | (s9 << 5));
213 s[24] = static_cast<uint8_t>(s9 >> 3);
214 s[25] = static_cast<uint8_t>(s9 >> 11);
215 s[26] = static_cast<uint8_t>((s9 >> 19) | (s10 << 2));
216 s[27] = static_cast<uint8_t>(s10 >> 6);
217 s[28] = static_cast<uint8_t>((s10 >> 14) | (s11 << 7));
218 s[29] = static_cast<uint8_t>(s11 >> 1);
219 s[30] = static_cast<uint8_t>(s11 >> 9);
220 s[31] = static_cast<uint8_t>(s11 >> 17);
221}
222
223} // namespace Botan
void redc_mul(int64_t &s1, int64_t &s2, int64_t &s3, int64_t &s4, int64_t &s5, int64_t &s6, int64_t &X)
uint64_t load_4(const uint8_t *in)
void carry0(int64_t &h0, int64_t &h1)
void carry(int64_t &h0, int64_t &h1)
const SIMD_8x32 & b
uint64_t load_3(const uint8_t in[3])
void sc_muladd(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *)
Definition sc_muladd.cpp:26