Raven Core  3.0.0
P2P Digital Currency
keccak.c
Go to the documentation of this file.
1 /* $Id: keccak.c 259 2011-07-19 22:11:27Z tp $ */
2 /*
3  * Keccak implementation.
4  *
5  * ==========================(LICENSE BEGIN)============================
6  *
7  * Copyright (c) 2007-2010 Projet RNRT SAPHIR
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the
11  * "Software"), to deal in the Software without restriction, including
12  * without limitation the rights to use, copy, modify, merge, publish,
13  * distribute, sublicense, and/or sell copies of the Software, and to
14  * permit persons to whom the Software is furnished to do so, subject to
15  * the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be
18  * included in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27  *
28  * ===========================(LICENSE END)=============================
29  *
30  * @author Thomas Pornin <thomas.pornin@cryptolog.com>
31  */
32 
33 #include <stddef.h>
34 #include <string.h>
35 
36 #include "sph_keccak.h"
37 
38 #ifdef __cplusplus
39 extern "C"{
40 #endif
41 
42 /*
43  * Parameters:
44  *
45  * SPH_KECCAK_64 use a 64-bit type
46  * SPH_KECCAK_UNROLL number of loops to unroll (0/undef for full unroll)
47  * SPH_KECCAK_INTERLEAVE use bit-interleaving (32-bit type only)
48  * SPH_KECCAK_NOCOPY do not copy the state into local variables
49  *
50  * If there is no usable 64-bit type, the code automatically switches
51  * back to the 32-bit implementation.
52  *
53  * Some tests on an Intel Core2 Q6600 (both 64-bit and 32-bit, 32 kB L1
54  * code cache), a PowerPC (G3, 32 kB L1 code cache), an ARM920T core
55  * (16 kB L1 code cache), and a small MIPS-compatible CPU (Broadcom BCM3302,
56  * 8 kB L1 code cache), seem to show that the following are optimal:
57  *
58  * -- x86, 64-bit: use the 64-bit implementation, unroll 8 rounds,
59  * do not copy the state; unrolling 2, 6 or all rounds also provides
60  * near-optimal performance.
61  * -- x86, 32-bit: use the 32-bit implementation, unroll 6 rounds,
62  * interleave, do not copy the state. Unrolling 1, 2, 4 or 8 rounds
63  * also provides near-optimal performance.
64  * -- PowerPC: use the 64-bit implementation, unroll 8 rounds,
65  * copy the state. Unrolling 4 or 6 rounds is near-optimal.
66  * -- ARM: use the 64-bit implementation, unroll 2 or 4 rounds,
67  * copy the state.
68  * -- MIPS: use the 64-bit implementation, unroll 2 rounds, copy
69  * the state. Unrolling only 1 round is also near-optimal.
70  *
71  * Also, interleaving does not always yield actual improvements when
72  * using a 32-bit implementation; in particular when the architecture
73  * does not offer a native rotation opcode (interleaving replaces one
74  * 64-bit rotation with two 32-bit rotations, which is a gain only if
75  * there is a native 32-bit rotation opcode and not a native 64-bit
76  * rotation opcode; also, interleaving implies a small overhead when
77  * processing input words).
78  *
79  * To sum up:
80  * -- when possible, use the 64-bit code
81  * -- exception: on 32-bit x86, use 32-bit code
82  * -- when using 32-bit code, use interleaving
83  * -- copy the state, except on x86
84  * -- unroll 8 rounds on "big" machine, 2 rounds on "small" machines
85  */
86 
87 #if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_KECCAK
88 #define SPH_SMALL_FOOTPRINT_KECCAK 1
89 #endif
90 
91 /*
92  * By default, we select the 64-bit implementation if a 64-bit type
93  * is available, unless a 32-bit x86 is detected.
94  */
95 #if !defined SPH_KECCAK_64 && SPH_64 \
96  && !(defined __i386__ || SPH_I386_GCC || SPH_I386_MSVC)
97 #define SPH_KECCAK_64 1
98 #endif
99 
100 /*
101  * If using a 32-bit implementation, we prefer to interleave.
102  */
103 #if !SPH_KECCAK_64 && !defined SPH_KECCAK_INTERLEAVE
104 #define SPH_KECCAK_INTERLEAVE 1
105 #endif
106 
107 /*
108  * Unroll 8 rounds on big systems, 2 rounds on small systems.
109  */
110 #ifndef SPH_KECCAK_UNROLL
111 #if SPH_SMALL_FOOTPRINT_KECCAK
112 #define SPH_KECCAK_UNROLL 2
113 #else
114 #define SPH_KECCAK_UNROLL 8
115 #endif
116 #endif
117 
118 /*
119  * We do not want to copy the state to local variables on x86 (32-bit
120  * and 64-bit alike).
121  */
122 #ifndef SPH_KECCAK_NOCOPY
123 #if defined __i386__ || defined __x86_64 || SPH_I386_MSVC || SPH_I386_GCC
124 #define SPH_KECCAK_NOCOPY 1
125 #else
126 #define SPH_KECCAK_NOCOPY 0
127 #endif
128 #endif
129 
130 #ifdef _MSC_VER
131 #pragma warning (disable: 4146)
132 #endif
133 
134 #if SPH_KECCAK_64
135 
136 static const sph_u64 RC[] = {
137  SPH_C64(0x0000000000000001), SPH_C64(0x0000000000008082),
138  SPH_C64(0x800000000000808A), SPH_C64(0x8000000080008000),
139  SPH_C64(0x000000000000808B), SPH_C64(0x0000000080000001),
140  SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008009),
141  SPH_C64(0x000000000000008A), SPH_C64(0x0000000000000088),
142  SPH_C64(0x0000000080008009), SPH_C64(0x000000008000000A),
143  SPH_C64(0x000000008000808B), SPH_C64(0x800000000000008B),
144  SPH_C64(0x8000000000008089), SPH_C64(0x8000000000008003),
145  SPH_C64(0x8000000000008002), SPH_C64(0x8000000000000080),
146  SPH_C64(0x000000000000800A), SPH_C64(0x800000008000000A),
147  SPH_C64(0x8000000080008081), SPH_C64(0x8000000000008080),
148  SPH_C64(0x0000000080000001), SPH_C64(0x8000000080008008)
149 };
150 
151 #if SPH_KECCAK_NOCOPY
152 
153 #define a00 (kc->u.wide[ 0])
154 #define a10 (kc->u.wide[ 1])
155 #define a20 (kc->u.wide[ 2])
156 #define a30 (kc->u.wide[ 3])
157 #define a40 (kc->u.wide[ 4])
158 #define a01 (kc->u.wide[ 5])
159 #define a11 (kc->u.wide[ 6])
160 #define a21 (kc->u.wide[ 7])
161 #define a31 (kc->u.wide[ 8])
162 #define a41 (kc->u.wide[ 9])
163 #define a02 (kc->u.wide[10])
164 #define a12 (kc->u.wide[11])
165 #define a22 (kc->u.wide[12])
166 #define a32 (kc->u.wide[13])
167 #define a42 (kc->u.wide[14])
168 #define a03 (kc->u.wide[15])
169 #define a13 (kc->u.wide[16])
170 #define a23 (kc->u.wide[17])
171 #define a33 (kc->u.wide[18])
172 #define a43 (kc->u.wide[19])
173 #define a04 (kc->u.wide[20])
174 #define a14 (kc->u.wide[21])
175 #define a24 (kc->u.wide[22])
176 #define a34 (kc->u.wide[23])
177 #define a44 (kc->u.wide[24])
178 
179 #define DECL_STATE
180 #define READ_STATE(sc)
181 #define WRITE_STATE(sc)
182 
183 #define INPUT_BUF(size) do { \
184  size_t j; \
185  for (j = 0; j < (size); j += 8) { \
186  kc->u.wide[j >> 3] ^= sph_dec64le_aligned(buf + j); \
187  } \
188  } while (0)
189 
190 #define INPUT_BUF144 INPUT_BUF(144)
191 #define INPUT_BUF136 INPUT_BUF(136)
192 #define INPUT_BUF104 INPUT_BUF(104)
193 #define INPUT_BUF72 INPUT_BUF(72)
194 
195 #else
196 
197 #define DECL_STATE \
198  sph_u64 a00, a01, a02, a03, a04; \
199  sph_u64 a10, a11, a12, a13, a14; \
200  sph_u64 a20, a21, a22, a23, a24; \
201  sph_u64 a30, a31, a32, a33, a34; \
202  sph_u64 a40, a41, a42, a43, a44;
203 
204 #define READ_STATE(state) do { \
205  a00 = (state)->u.wide[ 0]; \
206  a10 = (state)->u.wide[ 1]; \
207  a20 = (state)->u.wide[ 2]; \
208  a30 = (state)->u.wide[ 3]; \
209  a40 = (state)->u.wide[ 4]; \
210  a01 = (state)->u.wide[ 5]; \
211  a11 = (state)->u.wide[ 6]; \
212  a21 = (state)->u.wide[ 7]; \
213  a31 = (state)->u.wide[ 8]; \
214  a41 = (state)->u.wide[ 9]; \
215  a02 = (state)->u.wide[10]; \
216  a12 = (state)->u.wide[11]; \
217  a22 = (state)->u.wide[12]; \
218  a32 = (state)->u.wide[13]; \
219  a42 = (state)->u.wide[14]; \
220  a03 = (state)->u.wide[15]; \
221  a13 = (state)->u.wide[16]; \
222  a23 = (state)->u.wide[17]; \
223  a33 = (state)->u.wide[18]; \
224  a43 = (state)->u.wide[19]; \
225  a04 = (state)->u.wide[20]; \
226  a14 = (state)->u.wide[21]; \
227  a24 = (state)->u.wide[22]; \
228  a34 = (state)->u.wide[23]; \
229  a44 = (state)->u.wide[24]; \
230  } while (0)
231 
232 #define WRITE_STATE(state) do { \
233  (state)->u.wide[ 0] = a00; \
234  (state)->u.wide[ 1] = a10; \
235  (state)->u.wide[ 2] = a20; \
236  (state)->u.wide[ 3] = a30; \
237  (state)->u.wide[ 4] = a40; \
238  (state)->u.wide[ 5] = a01; \
239  (state)->u.wide[ 6] = a11; \
240  (state)->u.wide[ 7] = a21; \
241  (state)->u.wide[ 8] = a31; \
242  (state)->u.wide[ 9] = a41; \
243  (state)->u.wide[10] = a02; \
244  (state)->u.wide[11] = a12; \
245  (state)->u.wide[12] = a22; \
246  (state)->u.wide[13] = a32; \
247  (state)->u.wide[14] = a42; \
248  (state)->u.wide[15] = a03; \
249  (state)->u.wide[16] = a13; \
250  (state)->u.wide[17] = a23; \
251  (state)->u.wide[18] = a33; \
252  (state)->u.wide[19] = a43; \
253  (state)->u.wide[20] = a04; \
254  (state)->u.wide[21] = a14; \
255  (state)->u.wide[22] = a24; \
256  (state)->u.wide[23] = a34; \
257  (state)->u.wide[24] = a44; \
258  } while (0)
259 
260 #define INPUT_BUF144 do { \
261  a00 ^= sph_dec64le_aligned(buf + 0); \
262  a10 ^= sph_dec64le_aligned(buf + 8); \
263  a20 ^= sph_dec64le_aligned(buf + 16); \
264  a30 ^= sph_dec64le_aligned(buf + 24); \
265  a40 ^= sph_dec64le_aligned(buf + 32); \
266  a01 ^= sph_dec64le_aligned(buf + 40); \
267  a11 ^= sph_dec64le_aligned(buf + 48); \
268  a21 ^= sph_dec64le_aligned(buf + 56); \
269  a31 ^= sph_dec64le_aligned(buf + 64); \
270  a41 ^= sph_dec64le_aligned(buf + 72); \
271  a02 ^= sph_dec64le_aligned(buf + 80); \
272  a12 ^= sph_dec64le_aligned(buf + 88); \
273  a22 ^= sph_dec64le_aligned(buf + 96); \
274  a32 ^= sph_dec64le_aligned(buf + 104); \
275  a42 ^= sph_dec64le_aligned(buf + 112); \
276  a03 ^= sph_dec64le_aligned(buf + 120); \
277  a13 ^= sph_dec64le_aligned(buf + 128); \
278  a23 ^= sph_dec64le_aligned(buf + 136); \
279  } while (0)
280 
281 #define INPUT_BUF136 do { \
282  a00 ^= sph_dec64le_aligned(buf + 0); \
283  a10 ^= sph_dec64le_aligned(buf + 8); \
284  a20 ^= sph_dec64le_aligned(buf + 16); \
285  a30 ^= sph_dec64le_aligned(buf + 24); \
286  a40 ^= sph_dec64le_aligned(buf + 32); \
287  a01 ^= sph_dec64le_aligned(buf + 40); \
288  a11 ^= sph_dec64le_aligned(buf + 48); \
289  a21 ^= sph_dec64le_aligned(buf + 56); \
290  a31 ^= sph_dec64le_aligned(buf + 64); \
291  a41 ^= sph_dec64le_aligned(buf + 72); \
292  a02 ^= sph_dec64le_aligned(buf + 80); \
293  a12 ^= sph_dec64le_aligned(buf + 88); \
294  a22 ^= sph_dec64le_aligned(buf + 96); \
295  a32 ^= sph_dec64le_aligned(buf + 104); \
296  a42 ^= sph_dec64le_aligned(buf + 112); \
297  a03 ^= sph_dec64le_aligned(buf + 120); \
298  a13 ^= sph_dec64le_aligned(buf + 128); \
299  } while (0)
300 
301 #define INPUT_BUF104 do { \
302  a00 ^= sph_dec64le_aligned(buf + 0); \
303  a10 ^= sph_dec64le_aligned(buf + 8); \
304  a20 ^= sph_dec64le_aligned(buf + 16); \
305  a30 ^= sph_dec64le_aligned(buf + 24); \
306  a40 ^= sph_dec64le_aligned(buf + 32); \
307  a01 ^= sph_dec64le_aligned(buf + 40); \
308  a11 ^= sph_dec64le_aligned(buf + 48); \
309  a21 ^= sph_dec64le_aligned(buf + 56); \
310  a31 ^= sph_dec64le_aligned(buf + 64); \
311  a41 ^= sph_dec64le_aligned(buf + 72); \
312  a02 ^= sph_dec64le_aligned(buf + 80); \
313  a12 ^= sph_dec64le_aligned(buf + 88); \
314  a22 ^= sph_dec64le_aligned(buf + 96); \
315  } while (0)
316 
317 #define INPUT_BUF72 do { \
318  a00 ^= sph_dec64le_aligned(buf + 0); \
319  a10 ^= sph_dec64le_aligned(buf + 8); \
320  a20 ^= sph_dec64le_aligned(buf + 16); \
321  a30 ^= sph_dec64le_aligned(buf + 24); \
322  a40 ^= sph_dec64le_aligned(buf + 32); \
323  a01 ^= sph_dec64le_aligned(buf + 40); \
324  a11 ^= sph_dec64le_aligned(buf + 48); \
325  a21 ^= sph_dec64le_aligned(buf + 56); \
326  a31 ^= sph_dec64le_aligned(buf + 64); \
327  } while (0)
328 
329 #define INPUT_BUF(lim) do { \
330  a00 ^= sph_dec64le_aligned(buf + 0); \
331  a10 ^= sph_dec64le_aligned(buf + 8); \
332  a20 ^= sph_dec64le_aligned(buf + 16); \
333  a30 ^= sph_dec64le_aligned(buf + 24); \
334  a40 ^= sph_dec64le_aligned(buf + 32); \
335  a01 ^= sph_dec64le_aligned(buf + 40); \
336  a11 ^= sph_dec64le_aligned(buf + 48); \
337  a21 ^= sph_dec64le_aligned(buf + 56); \
338  a31 ^= sph_dec64le_aligned(buf + 64); \
339  if ((lim) == 72) \
340  break; \
341  a41 ^= sph_dec64le_aligned(buf + 72); \
342  a02 ^= sph_dec64le_aligned(buf + 80); \
343  a12 ^= sph_dec64le_aligned(buf + 88); \
344  a22 ^= sph_dec64le_aligned(buf + 96); \
345  if ((lim) == 104) \
346  break; \
347  a32 ^= sph_dec64le_aligned(buf + 104); \
348  a42 ^= sph_dec64le_aligned(buf + 112); \
349  a03 ^= sph_dec64le_aligned(buf + 120); \
350  a13 ^= sph_dec64le_aligned(buf + 128); \
351  if ((lim) == 136) \
352  break; \
353  a23 ^= sph_dec64le_aligned(buf + 136); \
354  } while (0)
355 
356 #endif
357 
358 #define DECL64(x) sph_u64 x
359 #define MOV64(d, s) (d = s)
360 #define XOR64(d, a, b) (d = a ^ b)
361 #define AND64(d, a, b) (d = a & b)
362 #define OR64(d, a, b) (d = a | b)
363 #define NOT64(d, s) (d = SPH_T64(~s))
364 #define ROL64(d, v, n) (d = SPH_ROTL64(v, n))
365 #define XOR64_IOTA XOR64
366 
367 #else
368 
369 static const struct {
371 } RC[] = {
372 #if SPH_KECCAK_INTERLEAVE
373  { SPH_C32(0x00000000), SPH_C32(0x00000001) },
374  { SPH_C32(0x00000089), SPH_C32(0x00000000) },
375  { SPH_C32(0x8000008B), SPH_C32(0x00000000) },
376  { SPH_C32(0x80008080), SPH_C32(0x00000000) },
377  { SPH_C32(0x0000008B), SPH_C32(0x00000001) },
378  { SPH_C32(0x00008000), SPH_C32(0x00000001) },
379  { SPH_C32(0x80008088), SPH_C32(0x00000001) },
380  { SPH_C32(0x80000082), SPH_C32(0x00000001) },
381  { SPH_C32(0x0000000B), SPH_C32(0x00000000) },
382  { SPH_C32(0x0000000A), SPH_C32(0x00000000) },
383  { SPH_C32(0x00008082), SPH_C32(0x00000001) },
384  { SPH_C32(0x00008003), SPH_C32(0x00000000) },
385  { SPH_C32(0x0000808B), SPH_C32(0x00000001) },
386  { SPH_C32(0x8000000B), SPH_C32(0x00000001) },
387  { SPH_C32(0x8000008A), SPH_C32(0x00000001) },
388  { SPH_C32(0x80000081), SPH_C32(0x00000001) },
389  { SPH_C32(0x80000081), SPH_C32(0x00000000) },
390  { SPH_C32(0x80000008), SPH_C32(0x00000000) },
391  { SPH_C32(0x00000083), SPH_C32(0x00000000) },
392  { SPH_C32(0x80008003), SPH_C32(0x00000000) },
393  { SPH_C32(0x80008088), SPH_C32(0x00000001) },
394  { SPH_C32(0x80000088), SPH_C32(0x00000000) },
395  { SPH_C32(0x00008000), SPH_C32(0x00000001) },
396  { SPH_C32(0x80008082), SPH_C32(0x00000000) }
397 #else
398  { SPH_C32(0x00000000), SPH_C32(0x00000001) },
399  { SPH_C32(0x00000000), SPH_C32(0x00008082) },
400  { SPH_C32(0x80000000), SPH_C32(0x0000808A) },
401  { SPH_C32(0x80000000), SPH_C32(0x80008000) },
402  { SPH_C32(0x00000000), SPH_C32(0x0000808B) },
403  { SPH_C32(0x00000000), SPH_C32(0x80000001) },
404  { SPH_C32(0x80000000), SPH_C32(0x80008081) },
405  { SPH_C32(0x80000000), SPH_C32(0x00008009) },
406  { SPH_C32(0x00000000), SPH_C32(0x0000008A) },
407  { SPH_C32(0x00000000), SPH_C32(0x00000088) },
408  { SPH_C32(0x00000000), SPH_C32(0x80008009) },
409  { SPH_C32(0x00000000), SPH_C32(0x8000000A) },
410  { SPH_C32(0x00000000), SPH_C32(0x8000808B) },
411  { SPH_C32(0x80000000), SPH_C32(0x0000008B) },
412  { SPH_C32(0x80000000), SPH_C32(0x00008089) },
413  { SPH_C32(0x80000000), SPH_C32(0x00008003) },
414  { SPH_C32(0x80000000), SPH_C32(0x00008002) },
415  { SPH_C32(0x80000000), SPH_C32(0x00000080) },
416  { SPH_C32(0x00000000), SPH_C32(0x0000800A) },
417  { SPH_C32(0x80000000), SPH_C32(0x8000000A) },
418  { SPH_C32(0x80000000), SPH_C32(0x80008081) },
419  { SPH_C32(0x80000000), SPH_C32(0x00008080) },
420  { SPH_C32(0x00000000), SPH_C32(0x80000001) },
421  { SPH_C32(0x80000000), SPH_C32(0x80008008) }
422 #endif
423 };
424 
425 #if SPH_KECCAK_INTERLEAVE
426 
427 #define INTERLEAVE(xl, xh) do { \
428  sph_u32 l, h, t; \
429  l = (xl); h = (xh); \
430  t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \
431  t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \
432  t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \
433  t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \
434  t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \
435  t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \
436  t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \
437  t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \
438  t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \
439  l ^= t; h ^= t >> 16; \
440  (xl) = l; (xh) = h; \
441  } while (0)
442 
443 #define UNINTERLEAVE(xl, xh) do { \
444  sph_u32 l, h, t; \
445  l = (xl); h = (xh); \
446  t = (l ^ SPH_T32(h << 16)) & SPH_C32(0xFFFF0000); \
447  l ^= t; h ^= t >> 16; \
448  t = (l ^ (l >> 8)) & SPH_C32(0x0000FF00); l ^= t ^ (t << 8); \
449  t = (h ^ (h >> 8)) & SPH_C32(0x0000FF00); h ^= t ^ (t << 8); \
450  t = (l ^ (l >> 4)) & SPH_C32(0x00F000F0); l ^= t ^ (t << 4); \
451  t = (h ^ (h >> 4)) & SPH_C32(0x00F000F0); h ^= t ^ (t << 4); \
452  t = (l ^ (l >> 2)) & SPH_C32(0x0C0C0C0C); l ^= t ^ (t << 2); \
453  t = (h ^ (h >> 2)) & SPH_C32(0x0C0C0C0C); h ^= t ^ (t << 2); \
454  t = (l ^ (l >> 1)) & SPH_C32(0x22222222); l ^= t ^ (t << 1); \
455  t = (h ^ (h >> 1)) & SPH_C32(0x22222222); h ^= t ^ (t << 1); \
456  (xl) = l; (xh) = h; \
457  } while (0)
458 
459 #else
460 
461 #define INTERLEAVE(l, h)
462 #define UNINTERLEAVE(l, h)
463 
464 #endif
465 
466 #if SPH_KECCAK_NOCOPY
467 
468 #define a00l (kc->u.narrow[2 * 0 + 0])
469 #define a00h (kc->u.narrow[2 * 0 + 1])
470 #define a10l (kc->u.narrow[2 * 1 + 0])
471 #define a10h (kc->u.narrow[2 * 1 + 1])
472 #define a20l (kc->u.narrow[2 * 2 + 0])
473 #define a20h (kc->u.narrow[2 * 2 + 1])
474 #define a30l (kc->u.narrow[2 * 3 + 0])
475 #define a30h (kc->u.narrow[2 * 3 + 1])
476 #define a40l (kc->u.narrow[2 * 4 + 0])
477 #define a40h (kc->u.narrow[2 * 4 + 1])
478 #define a01l (kc->u.narrow[2 * 5 + 0])
479 #define a01h (kc->u.narrow[2 * 5 + 1])
480 #define a11l (kc->u.narrow[2 * 6 + 0])
481 #define a11h (kc->u.narrow[2 * 6 + 1])
482 #define a21l (kc->u.narrow[2 * 7 + 0])
483 #define a21h (kc->u.narrow[2 * 7 + 1])
484 #define a31l (kc->u.narrow[2 * 8 + 0])
485 #define a31h (kc->u.narrow[2 * 8 + 1])
486 #define a41l (kc->u.narrow[2 * 9 + 0])
487 #define a41h (kc->u.narrow[2 * 9 + 1])
488 #define a02l (kc->u.narrow[2 * 10 + 0])
489 #define a02h (kc->u.narrow[2 * 10 + 1])
490 #define a12l (kc->u.narrow[2 * 11 + 0])
491 #define a12h (kc->u.narrow[2 * 11 + 1])
492 #define a22l (kc->u.narrow[2 * 12 + 0])
493 #define a22h (kc->u.narrow[2 * 12 + 1])
494 #define a32l (kc->u.narrow[2 * 13 + 0])
495 #define a32h (kc->u.narrow[2 * 13 + 1])
496 #define a42l (kc->u.narrow[2 * 14 + 0])
497 #define a42h (kc->u.narrow[2 * 14 + 1])
498 #define a03l (kc->u.narrow[2 * 15 + 0])
499 #define a03h (kc->u.narrow[2 * 15 + 1])
500 #define a13l (kc->u.narrow[2 * 16 + 0])
501 #define a13h (kc->u.narrow[2 * 16 + 1])
502 #define a23l (kc->u.narrow[2 * 17 + 0])
503 #define a23h (kc->u.narrow[2 * 17 + 1])
504 #define a33l (kc->u.narrow[2 * 18 + 0])
505 #define a33h (kc->u.narrow[2 * 18 + 1])
506 #define a43l (kc->u.narrow[2 * 19 + 0])
507 #define a43h (kc->u.narrow[2 * 19 + 1])
508 #define a04l (kc->u.narrow[2 * 20 + 0])
509 #define a04h (kc->u.narrow[2 * 20 + 1])
510 #define a14l (kc->u.narrow[2 * 21 + 0])
511 #define a14h (kc->u.narrow[2 * 21 + 1])
512 #define a24l (kc->u.narrow[2 * 22 + 0])
513 #define a24h (kc->u.narrow[2 * 22 + 1])
514 #define a34l (kc->u.narrow[2 * 23 + 0])
515 #define a34h (kc->u.narrow[2 * 23 + 1])
516 #define a44l (kc->u.narrow[2 * 24 + 0])
517 #define a44h (kc->u.narrow[2 * 24 + 1])
518 
519 #define DECL_STATE
520 #define READ_STATE(state)
521 #define WRITE_STATE(state)
522 
523 #define INPUT_BUF(size) do { \
524  size_t j; \
525  for (j = 0; j < (size); j += 8) { \
526  sph_u32 tl, th; \
527  tl = sph_dec32le_aligned(buf + j + 0); \
528  th = sph_dec32le_aligned(buf + j + 4); \
529  INTERLEAVE(tl, th); \
530  kc->u.narrow[(j >> 2) + 0] ^= tl; \
531  kc->u.narrow[(j >> 2) + 1] ^= th; \
532  } \
533  } while (0)
534 
535 #define INPUT_BUF144 INPUT_BUF(144)
536 #define INPUT_BUF136 INPUT_BUF(136)
537 #define INPUT_BUF104 INPUT_BUF(104)
538 #define INPUT_BUF72 INPUT_BUF(72)
539 
540 #else
541 
542 #define DECL_STATE \
543  sph_u32 a00l, a00h, a01l, a01h, a02l, a02h, a03l, a03h, a04l, a04h; \
544  sph_u32 a10l, a10h, a11l, a11h, a12l, a12h, a13l, a13h, a14l, a14h; \
545  sph_u32 a20l, a20h, a21l, a21h, a22l, a22h, a23l, a23h, a24l, a24h; \
546  sph_u32 a30l, a30h, a31l, a31h, a32l, a32h, a33l, a33h, a34l, a34h; \
547  sph_u32 a40l, a40h, a41l, a41h, a42l, a42h, a43l, a43h, a44l, a44h;
548 
549 #define READ_STATE(state) do { \
550  a00l = (state)->u.narrow[2 * 0 + 0]; \
551  a00h = (state)->u.narrow[2 * 0 + 1]; \
552  a10l = (state)->u.narrow[2 * 1 + 0]; \
553  a10h = (state)->u.narrow[2 * 1 + 1]; \
554  a20l = (state)->u.narrow[2 * 2 + 0]; \
555  a20h = (state)->u.narrow[2 * 2 + 1]; \
556  a30l = (state)->u.narrow[2 * 3 + 0]; \
557  a30h = (state)->u.narrow[2 * 3 + 1]; \
558  a40l = (state)->u.narrow[2 * 4 + 0]; \
559  a40h = (state)->u.narrow[2 * 4 + 1]; \
560  a01l = (state)->u.narrow[2 * 5 + 0]; \
561  a01h = (state)->u.narrow[2 * 5 + 1]; \
562  a11l = (state)->u.narrow[2 * 6 + 0]; \
563  a11h = (state)->u.narrow[2 * 6 + 1]; \
564  a21l = (state)->u.narrow[2 * 7 + 0]; \
565  a21h = (state)->u.narrow[2 * 7 + 1]; \
566  a31l = (state)->u.narrow[2 * 8 + 0]; \
567  a31h = (state)->u.narrow[2 * 8 + 1]; \
568  a41l = (state)->u.narrow[2 * 9 + 0]; \
569  a41h = (state)->u.narrow[2 * 9 + 1]; \
570  a02l = (state)->u.narrow[2 * 10 + 0]; \
571  a02h = (state)->u.narrow[2 * 10 + 1]; \
572  a12l = (state)->u.narrow[2 * 11 + 0]; \
573  a12h = (state)->u.narrow[2 * 11 + 1]; \
574  a22l = (state)->u.narrow[2 * 12 + 0]; \
575  a22h = (state)->u.narrow[2 * 12 + 1]; \
576  a32l = (state)->u.narrow[2 * 13 + 0]; \
577  a32h = (state)->u.narrow[2 * 13 + 1]; \
578  a42l = (state)->u.narrow[2 * 14 + 0]; \
579  a42h = (state)->u.narrow[2 * 14 + 1]; \
580  a03l = (state)->u.narrow[2 * 15 + 0]; \
581  a03h = (state)->u.narrow[2 * 15 + 1]; \
582  a13l = (state)->u.narrow[2 * 16 + 0]; \
583  a13h = (state)->u.narrow[2 * 16 + 1]; \
584  a23l = (state)->u.narrow[2 * 17 + 0]; \
585  a23h = (state)->u.narrow[2 * 17 + 1]; \
586  a33l = (state)->u.narrow[2 * 18 + 0]; \
587  a33h = (state)->u.narrow[2 * 18 + 1]; \
588  a43l = (state)->u.narrow[2 * 19 + 0]; \
589  a43h = (state)->u.narrow[2 * 19 + 1]; \
590  a04l = (state)->u.narrow[2 * 20 + 0]; \
591  a04h = (state)->u.narrow[2 * 20 + 1]; \
592  a14l = (state)->u.narrow[2 * 21 + 0]; \
593  a14h = (state)->u.narrow[2 * 21 + 1]; \
594  a24l = (state)->u.narrow[2 * 22 + 0]; \
595  a24h = (state)->u.narrow[2 * 22 + 1]; \
596  a34l = (state)->u.narrow[2 * 23 + 0]; \
597  a34h = (state)->u.narrow[2 * 23 + 1]; \
598  a44l = (state)->u.narrow[2 * 24 + 0]; \
599  a44h = (state)->u.narrow[2 * 24 + 1]; \
600  } while (0)
601 
602 #define WRITE_STATE(state) do { \
603  (state)->u.narrow[2 * 0 + 0] = a00l; \
604  (state)->u.narrow[2 * 0 + 1] = a00h; \
605  (state)->u.narrow[2 * 1 + 0] = a10l; \
606  (state)->u.narrow[2 * 1 + 1] = a10h; \
607  (state)->u.narrow[2 * 2 + 0] = a20l; \
608  (state)->u.narrow[2 * 2 + 1] = a20h; \
609  (state)->u.narrow[2 * 3 + 0] = a30l; \
610  (state)->u.narrow[2 * 3 + 1] = a30h; \
611  (state)->u.narrow[2 * 4 + 0] = a40l; \
612  (state)->u.narrow[2 * 4 + 1] = a40h; \
613  (state)->u.narrow[2 * 5 + 0] = a01l; \
614  (state)->u.narrow[2 * 5 + 1] = a01h; \
615  (state)->u.narrow[2 * 6 + 0] = a11l; \
616  (state)->u.narrow[2 * 6 + 1] = a11h; \
617  (state)->u.narrow[2 * 7 + 0] = a21l; \
618  (state)->u.narrow[2 * 7 + 1] = a21h; \
619  (state)->u.narrow[2 * 8 + 0] = a31l; \
620  (state)->u.narrow[2 * 8 + 1] = a31h; \
621  (state)->u.narrow[2 * 9 + 0] = a41l; \
622  (state)->u.narrow[2 * 9 + 1] = a41h; \
623  (state)->u.narrow[2 * 10 + 0] = a02l; \
624  (state)->u.narrow[2 * 10 + 1] = a02h; \
625  (state)->u.narrow[2 * 11 + 0] = a12l; \
626  (state)->u.narrow[2 * 11 + 1] = a12h; \
627  (state)->u.narrow[2 * 12 + 0] = a22l; \
628  (state)->u.narrow[2 * 12 + 1] = a22h; \
629  (state)->u.narrow[2 * 13 + 0] = a32l; \
630  (state)->u.narrow[2 * 13 + 1] = a32h; \
631  (state)->u.narrow[2 * 14 + 0] = a42l; \
632  (state)->u.narrow[2 * 14 + 1] = a42h; \
633  (state)->u.narrow[2 * 15 + 0] = a03l; \
634  (state)->u.narrow[2 * 15 + 1] = a03h; \
635  (state)->u.narrow[2 * 16 + 0] = a13l; \
636  (state)->u.narrow[2 * 16 + 1] = a13h; \
637  (state)->u.narrow[2 * 17 + 0] = a23l; \
638  (state)->u.narrow[2 * 17 + 1] = a23h; \
639  (state)->u.narrow[2 * 18 + 0] = a33l; \
640  (state)->u.narrow[2 * 18 + 1] = a33h; \
641  (state)->u.narrow[2 * 19 + 0] = a43l; \
642  (state)->u.narrow[2 * 19 + 1] = a43h; \
643  (state)->u.narrow[2 * 20 + 0] = a04l; \
644  (state)->u.narrow[2 * 20 + 1] = a04h; \
645  (state)->u.narrow[2 * 21 + 0] = a14l; \
646  (state)->u.narrow[2 * 21 + 1] = a14h; \
647  (state)->u.narrow[2 * 22 + 0] = a24l; \
648  (state)->u.narrow[2 * 22 + 1] = a24h; \
649  (state)->u.narrow[2 * 23 + 0] = a34l; \
650  (state)->u.narrow[2 * 23 + 1] = a34h; \
651  (state)->u.narrow[2 * 24 + 0] = a44l; \
652  (state)->u.narrow[2 * 24 + 1] = a44h; \
653  } while (0)
654 
655 #define READ64(d, off) do { \
656  sph_u32 tl, th; \
657  tl = sph_dec32le_aligned(buf + (off)); \
658  th = sph_dec32le_aligned(buf + (off) + 4); \
659  INTERLEAVE(tl, th); \
660  d ## l ^= tl; \
661  d ## h ^= th; \
662  } while (0)
663 
664 #define INPUT_BUF144 do { \
665  READ64(a00, 0); \
666  READ64(a10, 8); \
667  READ64(a20, 16); \
668  READ64(a30, 24); \
669  READ64(a40, 32); \
670  READ64(a01, 40); \
671  READ64(a11, 48); \
672  READ64(a21, 56); \
673  READ64(a31, 64); \
674  READ64(a41, 72); \
675  READ64(a02, 80); \
676  READ64(a12, 88); \
677  READ64(a22, 96); \
678  READ64(a32, 104); \
679  READ64(a42, 112); \
680  READ64(a03, 120); \
681  READ64(a13, 128); \
682  READ64(a23, 136); \
683  } while (0)
684 
685 #define INPUT_BUF136 do { \
686  READ64(a00, 0); \
687  READ64(a10, 8); \
688  READ64(a20, 16); \
689  READ64(a30, 24); \
690  READ64(a40, 32); \
691  READ64(a01, 40); \
692  READ64(a11, 48); \
693  READ64(a21, 56); \
694  READ64(a31, 64); \
695  READ64(a41, 72); \
696  READ64(a02, 80); \
697  READ64(a12, 88); \
698  READ64(a22, 96); \
699  READ64(a32, 104); \
700  READ64(a42, 112); \
701  READ64(a03, 120); \
702  READ64(a13, 128); \
703  } while (0)
704 
705 #define INPUT_BUF104 do { \
706  READ64(a00, 0); \
707  READ64(a10, 8); \
708  READ64(a20, 16); \
709  READ64(a30, 24); \
710  READ64(a40, 32); \
711  READ64(a01, 40); \
712  READ64(a11, 48); \
713  READ64(a21, 56); \
714  READ64(a31, 64); \
715  READ64(a41, 72); \
716  READ64(a02, 80); \
717  READ64(a12, 88); \
718  READ64(a22, 96); \
719  } while (0)
720 
721 #define INPUT_BUF72 do { \
722  READ64(a00, 0); \
723  READ64(a10, 8); \
724  READ64(a20, 16); \
725  READ64(a30, 24); \
726  READ64(a40, 32); \
727  READ64(a01, 40); \
728  READ64(a11, 48); \
729  READ64(a21, 56); \
730  READ64(a31, 64); \
731  } while (0)
732 
733 #define INPUT_BUF(lim) do { \
734  READ64(a00, 0); \
735  READ64(a10, 8); \
736  READ64(a20, 16); \
737  READ64(a30, 24); \
738  READ64(a40, 32); \
739  READ64(a01, 40); \
740  READ64(a11, 48); \
741  READ64(a21, 56); \
742  READ64(a31, 64); \
743  if ((lim) == 72) \
744  break; \
745  READ64(a41, 72); \
746  READ64(a02, 80); \
747  READ64(a12, 88); \
748  READ64(a22, 96); \
749  if ((lim) == 104) \
750  break; \
751  READ64(a32, 104); \
752  READ64(a42, 112); \
753  READ64(a03, 120); \
754  READ64(a13, 128); \
755  if ((lim) == 136) \
756  break; \
757  READ64(a23, 136); \
758  } while (0)
759 
760 #endif
761 
762 #define DECL64(x) sph_u64 x ## l, x ## h
763 #define MOV64(d, s) (d ## l = s ## l, d ## h = s ## h)
764 #define XOR64(d, a, b) (d ## l = a ## l ^ b ## l, d ## h = a ## h ^ b ## h)
765 #define AND64(d, a, b) (d ## l = a ## l & b ## l, d ## h = a ## h & b ## h)
766 #define OR64(d, a, b) (d ## l = a ## l | b ## l, d ## h = a ## h | b ## h)
767 #define NOT64(d, s) (d ## l = SPH_T32(~s ## l), d ## h = SPH_T32(~s ## h))
768 #define ROL64(d, v, n) ROL64_ ## n(d, v)
769 
770 #if SPH_KECCAK_INTERLEAVE
771 
772 #define ROL64_odd1(d, v) do { \
773  sph_u32 tmp; \
774  tmp = v ## l; \
775  d ## l = SPH_T32(v ## h << 1) | (v ## h >> 31); \
776  d ## h = tmp; \
777  } while (0)
778 
779 #define ROL64_odd63(d, v) do { \
780  sph_u32 tmp; \
781  tmp = SPH_T32(v ## l << 31) | (v ## l >> 1); \
782  d ## l = v ## h; \
783  d ## h = tmp; \
784  } while (0)
785 
786 #define ROL64_odd(d, v, n) do { \
787  sph_u32 tmp; \
788  tmp = SPH_T32(v ## l << (n - 1)) | (v ## l >> (33 - n)); \
789  d ## l = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \
790  d ## h = tmp; \
791  } while (0)
792 
793 #define ROL64_even(d, v, n) do { \
794  d ## l = SPH_T32(v ## l << n) | (v ## l >> (32 - n)); \
795  d ## h = SPH_T32(v ## h << n) | (v ## h >> (32 - n)); \
796  } while (0)
797 
798 #define ROL64_0(d, v)
799 #define ROL64_1(d, v) ROL64_odd1(d, v)
800 #define ROL64_2(d, v) ROL64_even(d, v, 1)
801 #define ROL64_3(d, v) ROL64_odd( d, v, 2)
802 #define ROL64_4(d, v) ROL64_even(d, v, 2)
803 #define ROL64_5(d, v) ROL64_odd( d, v, 3)
804 #define ROL64_6(d, v) ROL64_even(d, v, 3)
805 #define ROL64_7(d, v) ROL64_odd( d, v, 4)
806 #define ROL64_8(d, v) ROL64_even(d, v, 4)
807 #define ROL64_9(d, v) ROL64_odd( d, v, 5)
808 #define ROL64_10(d, v) ROL64_even(d, v, 5)
809 #define ROL64_11(d, v) ROL64_odd( d, v, 6)
810 #define ROL64_12(d, v) ROL64_even(d, v, 6)
811 #define ROL64_13(d, v) ROL64_odd( d, v, 7)
812 #define ROL64_14(d, v) ROL64_even(d, v, 7)
813 #define ROL64_15(d, v) ROL64_odd( d, v, 8)
814 #define ROL64_16(d, v) ROL64_even(d, v, 8)
815 #define ROL64_17(d, v) ROL64_odd( d, v, 9)
816 #define ROL64_18(d, v) ROL64_even(d, v, 9)
817 #define ROL64_19(d, v) ROL64_odd( d, v, 10)
818 #define ROL64_20(d, v) ROL64_even(d, v, 10)
819 #define ROL64_21(d, v) ROL64_odd( d, v, 11)
820 #define ROL64_22(d, v) ROL64_even(d, v, 11)
821 #define ROL64_23(d, v) ROL64_odd( d, v, 12)
822 #define ROL64_24(d, v) ROL64_even(d, v, 12)
823 #define ROL64_25(d, v) ROL64_odd( d, v, 13)
824 #define ROL64_26(d, v) ROL64_even(d, v, 13)
825 #define ROL64_27(d, v) ROL64_odd( d, v, 14)
826 #define ROL64_28(d, v) ROL64_even(d, v, 14)
827 #define ROL64_29(d, v) ROL64_odd( d, v, 15)
828 #define ROL64_30(d, v) ROL64_even(d, v, 15)
829 #define ROL64_31(d, v) ROL64_odd( d, v, 16)
830 #define ROL64_32(d, v) ROL64_even(d, v, 16)
831 #define ROL64_33(d, v) ROL64_odd( d, v, 17)
832 #define ROL64_34(d, v) ROL64_even(d, v, 17)
833 #define ROL64_35(d, v) ROL64_odd( d, v, 18)
834 #define ROL64_36(d, v) ROL64_even(d, v, 18)
835 #define ROL64_37(d, v) ROL64_odd( d, v, 19)
836 #define ROL64_38(d, v) ROL64_even(d, v, 19)
837 #define ROL64_39(d, v) ROL64_odd( d, v, 20)
838 #define ROL64_40(d, v) ROL64_even(d, v, 20)
839 #define ROL64_41(d, v) ROL64_odd( d, v, 21)
840 #define ROL64_42(d, v) ROL64_even(d, v, 21)
841 #define ROL64_43(d, v) ROL64_odd( d, v, 22)
842 #define ROL64_44(d, v) ROL64_even(d, v, 22)
843 #define ROL64_45(d, v) ROL64_odd( d, v, 23)
844 #define ROL64_46(d, v) ROL64_even(d, v, 23)
845 #define ROL64_47(d, v) ROL64_odd( d, v, 24)
846 #define ROL64_48(d, v) ROL64_even(d, v, 24)
847 #define ROL64_49(d, v) ROL64_odd( d, v, 25)
848 #define ROL64_50(d, v) ROL64_even(d, v, 25)
849 #define ROL64_51(d, v) ROL64_odd( d, v, 26)
850 #define ROL64_52(d, v) ROL64_even(d, v, 26)
851 #define ROL64_53(d, v) ROL64_odd( d, v, 27)
852 #define ROL64_54(d, v) ROL64_even(d, v, 27)
853 #define ROL64_55(d, v) ROL64_odd( d, v, 28)
854 #define ROL64_56(d, v) ROL64_even(d, v, 28)
855 #define ROL64_57(d, v) ROL64_odd( d, v, 29)
856 #define ROL64_58(d, v) ROL64_even(d, v, 29)
857 #define ROL64_59(d, v) ROL64_odd( d, v, 30)
858 #define ROL64_60(d, v) ROL64_even(d, v, 30)
859 #define ROL64_61(d, v) ROL64_odd( d, v, 31)
860 #define ROL64_62(d, v) ROL64_even(d, v, 31)
861 #define ROL64_63(d, v) ROL64_odd63(d, v)
862 
863 #else
864 
865 #define ROL64_small(d, v, n) do { \
866  sph_u32 tmp; \
867  tmp = SPH_T32(v ## l << n) | (v ## h >> (32 - n)); \
868  d ## h = SPH_T32(v ## h << n) | (v ## l >> (32 - n)); \
869  d ## l = tmp; \
870  } while (0)
871 
872 #define ROL64_0(d, v) 0
873 #define ROL64_1(d, v) ROL64_small(d, v, 1)
874 #define ROL64_2(d, v) ROL64_small(d, v, 2)
875 #define ROL64_3(d, v) ROL64_small(d, v, 3)
876 #define ROL64_4(d, v) ROL64_small(d, v, 4)
877 #define ROL64_5(d, v) ROL64_small(d, v, 5)
878 #define ROL64_6(d, v) ROL64_small(d, v, 6)
879 #define ROL64_7(d, v) ROL64_small(d, v, 7)
880 #define ROL64_8(d, v) ROL64_small(d, v, 8)
881 #define ROL64_9(d, v) ROL64_small(d, v, 9)
882 #define ROL64_10(d, v) ROL64_small(d, v, 10)
883 #define ROL64_11(d, v) ROL64_small(d, v, 11)
884 #define ROL64_12(d, v) ROL64_small(d, v, 12)
885 #define ROL64_13(d, v) ROL64_small(d, v, 13)
886 #define ROL64_14(d, v) ROL64_small(d, v, 14)
887 #define ROL64_15(d, v) ROL64_small(d, v, 15)
888 #define ROL64_16(d, v) ROL64_small(d, v, 16)
889 #define ROL64_17(d, v) ROL64_small(d, v, 17)
890 #define ROL64_18(d, v) ROL64_small(d, v, 18)
891 #define ROL64_19(d, v) ROL64_small(d, v, 19)
892 #define ROL64_20(d, v) ROL64_small(d, v, 20)
893 #define ROL64_21(d, v) ROL64_small(d, v, 21)
894 #define ROL64_22(d, v) ROL64_small(d, v, 22)
895 #define ROL64_23(d, v) ROL64_small(d, v, 23)
896 #define ROL64_24(d, v) ROL64_small(d, v, 24)
897 #define ROL64_25(d, v) ROL64_small(d, v, 25)
898 #define ROL64_26(d, v) ROL64_small(d, v, 26)
899 #define ROL64_27(d, v) ROL64_small(d, v, 27)
900 #define ROL64_28(d, v) ROL64_small(d, v, 28)
901 #define ROL64_29(d, v) ROL64_small(d, v, 29)
902 #define ROL64_30(d, v) ROL64_small(d, v, 30)
903 #define ROL64_31(d, v) ROL64_small(d, v, 31)
904 
905 #define ROL64_32(d, v) do { \
906  sph_u32 tmp; \
907  tmp = v ## l; \
908  d ## l = v ## h; \
909  d ## h = tmp; \
910  } while (0)
911 
912 #define ROL64_big(d, v, n) do { \
913  sph_u32 trl, trh; \
914  ROL64_small(tr, v, n); \
915  d ## h = trl; \
916  d ## l = trh; \
917  } while (0)
918 
919 #define ROL64_33(d, v) ROL64_big(d, v, 1)
920 #define ROL64_34(d, v) ROL64_big(d, v, 2)
921 #define ROL64_35(d, v) ROL64_big(d, v, 3)
922 #define ROL64_36(d, v) ROL64_big(d, v, 4)
923 #define ROL64_37(d, v) ROL64_big(d, v, 5)
924 #define ROL64_38(d, v) ROL64_big(d, v, 6)
925 #define ROL64_39(d, v) ROL64_big(d, v, 7)
926 #define ROL64_40(d, v) ROL64_big(d, v, 8)
927 #define ROL64_41(d, v) ROL64_big(d, v, 9)
928 #define ROL64_42(d, v) ROL64_big(d, v, 10)
929 #define ROL64_43(d, v) ROL64_big(d, v, 11)
930 #define ROL64_44(d, v) ROL64_big(d, v, 12)
931 #define ROL64_45(d, v) ROL64_big(d, v, 13)
932 #define ROL64_46(d, v) ROL64_big(d, v, 14)
933 #define ROL64_47(d, v) ROL64_big(d, v, 15)
934 #define ROL64_48(d, v) ROL64_big(d, v, 16)
935 #define ROL64_49(d, v) ROL64_big(d, v, 17)
936 #define ROL64_50(d, v) ROL64_big(d, v, 18)
937 #define ROL64_51(d, v) ROL64_big(d, v, 19)
938 #define ROL64_52(d, v) ROL64_big(d, v, 20)
939 #define ROL64_53(d, v) ROL64_big(d, v, 21)
940 #define ROL64_54(d, v) ROL64_big(d, v, 22)
941 #define ROL64_55(d, v) ROL64_big(d, v, 23)
942 #define ROL64_56(d, v) ROL64_big(d, v, 24)
943 #define ROL64_57(d, v) ROL64_big(d, v, 25)
944 #define ROL64_58(d, v) ROL64_big(d, v, 26)
945 #define ROL64_59(d, v) ROL64_big(d, v, 27)
946 #define ROL64_60(d, v) ROL64_big(d, v, 28)
947 #define ROL64_61(d, v) ROL64_big(d, v, 29)
948 #define ROL64_62(d, v) ROL64_big(d, v, 30)
949 #define ROL64_63(d, v) ROL64_big(d, v, 31)
950 
951 #endif
952 
953 #define XOR64_IOTA(d, s, k) \
954  (d ## l = s ## l ^ k.low, d ## h = s ## h ^ k.high)
955 
956 #endif
957 
958 #define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) do { \
959  DECL64(tt0); \
960  DECL64(tt1); \
961  DECL64(tt2); \
962  DECL64(tt3); \
963  XOR64(tt0, d0, d1); \
964  XOR64(tt1, d2, d3); \
965  XOR64(tt0, tt0, d4); \
966  XOR64(tt0, tt0, tt1); \
967  ROL64(tt0, tt0, 1); \
968  XOR64(tt2, c0, c1); \
969  XOR64(tt3, c2, c3); \
970  XOR64(tt0, tt0, c4); \
971  XOR64(tt2, tt2, tt3); \
972  XOR64(t, tt0, tt2); \
973  } while (0)
974 
975 #define THETA(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
976  b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
977  b40, b41, b42, b43, b44) \
978  do { \
979  DECL64(t0); \
980  DECL64(t1); \
981  DECL64(t2); \
982  DECL64(t3); \
983  DECL64(t4); \
984  TH_ELT(t0, b40, b41, b42, b43, b44, b10, b11, b12, b13, b14); \
985  TH_ELT(t1, b00, b01, b02, b03, b04, b20, b21, b22, b23, b24); \
986  TH_ELT(t2, b10, b11, b12, b13, b14, b30, b31, b32, b33, b34); \
987  TH_ELT(t3, b20, b21, b22, b23, b24, b40, b41, b42, b43, b44); \
988  TH_ELT(t4, b30, b31, b32, b33, b34, b00, b01, b02, b03, b04); \
989  XOR64(b00, b00, t0); \
990  XOR64(b01, b01, t0); \
991  XOR64(b02, b02, t0); \
992  XOR64(b03, b03, t0); \
993  XOR64(b04, b04, t0); \
994  XOR64(b10, b10, t1); \
995  XOR64(b11, b11, t1); \
996  XOR64(b12, b12, t1); \
997  XOR64(b13, b13, t1); \
998  XOR64(b14, b14, t1); \
999  XOR64(b20, b20, t2); \
1000  XOR64(b21, b21, t2); \
1001  XOR64(b22, b22, t2); \
1002  XOR64(b23, b23, t2); \
1003  XOR64(b24, b24, t2); \
1004  XOR64(b30, b30, t3); \
1005  XOR64(b31, b31, t3); \
1006  XOR64(b32, b32, t3); \
1007  XOR64(b33, b33, t3); \
1008  XOR64(b34, b34, t3); \
1009  XOR64(b40, b40, t4); \
1010  XOR64(b41, b41, t4); \
1011  XOR64(b42, b42, t4); \
1012  XOR64(b43, b43, t4); \
1013  XOR64(b44, b44, t4); \
1014  } while (0)
1015 
1016 #define RHO(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
1017  b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
1018  b40, b41, b42, b43, b44) \
1019  do { \
1020  /* ROL64(b00, b00, 0); */ \
1021  ROL64(b01, b01, 36); \
1022  ROL64(b02, b02, 3); \
1023  ROL64(b03, b03, 41); \
1024  ROL64(b04, b04, 18); \
1025  ROL64(b10, b10, 1); \
1026  ROL64(b11, b11, 44); \
1027  ROL64(b12, b12, 10); \
1028  ROL64(b13, b13, 45); \
1029  ROL64(b14, b14, 2); \
1030  ROL64(b20, b20, 62); \
1031  ROL64(b21, b21, 6); \
1032  ROL64(b22, b22, 43); \
1033  ROL64(b23, b23, 15); \
1034  ROL64(b24, b24, 61); \
1035  ROL64(b30, b30, 28); \
1036  ROL64(b31, b31, 55); \
1037  ROL64(b32, b32, 25); \
1038  ROL64(b33, b33, 21); \
1039  ROL64(b34, b34, 56); \
1040  ROL64(b40, b40, 27); \
1041  ROL64(b41, b41, 20); \
1042  ROL64(b42, b42, 39); \
1043  ROL64(b43, b43, 8); \
1044  ROL64(b44, b44, 14); \
1045  } while (0)
1046 
1047 /*
1048  * The KHI macro integrates the "lane complement" optimization. On input,
1049  * some words are complemented:
1050  * a00 a01 a02 a04 a13 a20 a21 a22 a30 a33 a34 a43
1051  * On output, the following words are complemented:
1052  * a04 a10 a20 a22 a23 a31
1053  *
1054  * The (implicit) permutation and the theta expansion will bring back
1055  * the input mask for the next round.
1056  */
1057 
1058 #define KHI_XO(d, a, b, c) do { \
1059  DECL64(kt); \
1060  OR64(kt, b, c); \
1061  XOR64(d, a, kt); \
1062  } while (0)
1063 
1064 #define KHI_XA(d, a, b, c) do { \
1065  DECL64(kt); \
1066  AND64(kt, b, c); \
1067  XOR64(d, a, kt); \
1068  } while (0)
1069 
1070 #define KHI(b00, b01, b02, b03, b04, b10, b11, b12, b13, b14, \
1071  b20, b21, b22, b23, b24, b30, b31, b32, b33, b34, \
1072  b40, b41, b42, b43, b44) \
1073  do { \
1074  DECL64(c0); \
1075  DECL64(c1); \
1076  DECL64(c2); \
1077  DECL64(c3); \
1078  DECL64(c4); \
1079  DECL64(bnn); \
1080  NOT64(bnn, b20); \
1081  KHI_XO(c0, b00, b10, b20); \
1082  KHI_XO(c1, b10, bnn, b30); \
1083  KHI_XA(c2, b20, b30, b40); \
1084  KHI_XO(c3, b30, b40, b00); \
1085  KHI_XA(c4, b40, b00, b10); \
1086  MOV64(b00, c0); \
1087  MOV64(b10, c1); \
1088  MOV64(b20, c2); \
1089  MOV64(b30, c3); \
1090  MOV64(b40, c4); \
1091  NOT64(bnn, b41); \
1092  KHI_XO(c0, b01, b11, b21); \
1093  KHI_XA(c1, b11, b21, b31); \
1094  KHI_XO(c2, b21, b31, bnn); \
1095  KHI_XO(c3, b31, b41, b01); \
1096  KHI_XA(c4, b41, b01, b11); \
1097  MOV64(b01, c0); \
1098  MOV64(b11, c1); \
1099  MOV64(b21, c2); \
1100  MOV64(b31, c3); \
1101  MOV64(b41, c4); \
1102  NOT64(bnn, b32); \
1103  KHI_XO(c0, b02, b12, b22); \
1104  KHI_XA(c1, b12, b22, b32); \
1105  KHI_XA(c2, b22, bnn, b42); \
1106  KHI_XO(c3, bnn, b42, b02); \
1107  KHI_XA(c4, b42, b02, b12); \
1108  MOV64(b02, c0); \
1109  MOV64(b12, c1); \
1110  MOV64(b22, c2); \
1111  MOV64(b32, c3); \
1112  MOV64(b42, c4); \
1113  NOT64(bnn, b33); \
1114  KHI_XA(c0, b03, b13, b23); \
1115  KHI_XO(c1, b13, b23, b33); \
1116  KHI_XO(c2, b23, bnn, b43); \
1117  KHI_XA(c3, bnn, b43, b03); \
1118  KHI_XO(c4, b43, b03, b13); \
1119  MOV64(b03, c0); \
1120  MOV64(b13, c1); \
1121  MOV64(b23, c2); \
1122  MOV64(b33, c3); \
1123  MOV64(b43, c4); \
1124  NOT64(bnn, b14); \
1125  KHI_XA(c0, b04, bnn, b24); \
1126  KHI_XO(c1, bnn, b24, b34); \
1127  KHI_XA(c2, b24, b34, b44); \
1128  KHI_XO(c3, b34, b44, b04); \
1129  KHI_XA(c4, b44, b04, b14); \
1130  MOV64(b04, c0); \
1131  MOV64(b14, c1); \
1132  MOV64(b24, c2); \
1133  MOV64(b34, c3); \
1134  MOV64(b44, c4); \
1135  } while (0)
1136 
1137 #define IOTA(r) XOR64_IOTA(a00, a00, r)
1138 
1139 #define P0 a00, a01, a02, a03, a04, a10, a11, a12, a13, a14, a20, a21, \
1140  a22, a23, a24, a30, a31, a32, a33, a34, a40, a41, a42, a43, a44
1141 #define P1 a00, a30, a10, a40, a20, a11, a41, a21, a01, a31, a22, a02, \
1142  a32, a12, a42, a33, a13, a43, a23, a03, a44, a24, a04, a34, a14
1143 #define P2 a00, a33, a11, a44, a22, a41, a24, a02, a30, a13, a32, a10, \
1144  a43, a21, a04, a23, a01, a34, a12, a40, a14, a42, a20, a03, a31
1145 #define P3 a00, a23, a41, a14, a32, a24, a42, a10, a33, a01, a43, a11, \
1146  a34, a02, a20, a12, a30, a03, a21, a44, a31, a04, a22, a40, a13
1147 #define P4 a00, a12, a24, a31, a43, a42, a04, a11, a23, a30, a34, a41, \
1148  a03, a10, a22, a21, a33, a40, a02, a14, a13, a20, a32, a44, a01
1149 #define P5 a00, a21, a42, a13, a34, a04, a20, a41, a12, a33, a03, a24, \
1150  a40, a11, a32, a02, a23, a44, a10, a31, a01, a22, a43, a14, a30
1151 #define P6 a00, a02, a04, a01, a03, a20, a22, a24, a21, a23, a40, a42, \
1152  a44, a41, a43, a10, a12, a14, a11, a13, a30, a32, a34, a31, a33
1153 #define P7 a00, a10, a20, a30, a40, a22, a32, a42, a02, a12, a44, a04, \
1154  a14, a24, a34, a11, a21, a31, a41, a01, a33, a43, a03, a13, a23
1155 #define P8 a00, a11, a22, a33, a44, a32, a43, a04, a10, a21, a14, a20, \
1156  a31, a42, a03, a41, a02, a13, a24, a30, a23, a34, a40, a01, a12
1157 #define P9 a00, a41, a32, a23, a14, a43, a34, a20, a11, a02, a31, a22, \
1158  a13, a04, a40, a24, a10, a01, a42, a33, a12, a03, a44, a30, a21
1159 #define P10 a00, a24, a43, a12, a31, a34, a03, a22, a41, a10, a13, a32, \
1160  a01, a20, a44, a42, a11, a30, a04, a23, a21, a40, a14, a33, a02
1161 #define P11 a00, a42, a34, a21, a13, a03, a40, a32, a24, a11, a01, a43, \
1162  a30, a22, a14, a04, a41, a33, a20, a12, a02, a44, a31, a23, a10
1163 #define P12 a00, a04, a03, a02, a01, a40, a44, a43, a42, a41, a30, a34, \
1164  a33, a32, a31, a20, a24, a23, a22, a21, a10, a14, a13, a12, a11
1165 #define P13 a00, a20, a40, a10, a30, a44, a14, a34, a04, a24, a33, a03, \
1166  a23, a43, a13, a22, a42, a12, a32, a02, a11, a31, a01, a21, a41
1167 #define P14 a00, a22, a44, a11, a33, a14, a31, a03, a20, a42, a23, a40, \
1168  a12, a34, a01, a32, a04, a21, a43, a10, a41, a13, a30, a02, a24
1169 #define P15 a00, a32, a14, a41, a23, a31, a13, a40, a22, a04, a12, a44, \
1170  a21, a03, a30, a43, a20, a02, a34, a11, a24, a01, a33, a10, a42
1171 #define P16 a00, a43, a31, a24, a12, a13, a01, a44, a32, a20, a21, a14, \
1172  a02, a40, a33, a34, a22, a10, a03, a41, a42, a30, a23, a11, a04
1173 #define P17 a00, a34, a13, a42, a21, a01, a30, a14, a43, a22, a02, a31, \
1174  a10, a44, a23, a03, a32, a11, a40, a24, a04, a33, a12, a41, a20
1175 #define P18 a00, a03, a01, a04, a02, a30, a33, a31, a34, a32, a10, a13, \
1176  a11, a14, a12, a40, a43, a41, a44, a42, a20, a23, a21, a24, a22
1177 #define P19 a00, a40, a30, a20, a10, a33, a23, a13, a03, a43, a11, a01, \
1178  a41, a31, a21, a44, a34, a24, a14, a04, a22, a12, a02, a42, a32
1179 #define P20 a00, a44, a33, a22, a11, a23, a12, a01, a40, a34, a41, a30, \
1180  a24, a13, a02, a14, a03, a42, a31, a20, a32, a21, a10, a04, a43
1181 #define P21 a00, a14, a23, a32, a41, a12, a21, a30, a44, a03, a24, a33, \
1182  a42, a01, a10, a31, a40, a04, a13, a22, a43, a02, a11, a20, a34
1183 #define P22 a00, a31, a12, a43, a24, a21, a02, a33, a14, a40, a42, a23, \
1184  a04, a30, a11, a13, a44, a20, a01, a32, a34, a10, a41, a22, a03
1185 #define P23 a00, a13, a21, a34, a42, a02, a10, a23, a31, a44, a04, a12, \
1186  a20, a33, a41, a01, a14, a22, a30, a43, a03, a11, a24, a32, a40
1187 
1188 #define P1_TO_P0 do { \
1189  DECL64(t); \
1190  MOV64(t, a01); \
1191  MOV64(a01, a30); \
1192  MOV64(a30, a33); \
1193  MOV64(a33, a23); \
1194  MOV64(a23, a12); \
1195  MOV64(a12, a21); \
1196  MOV64(a21, a02); \
1197  MOV64(a02, a10); \
1198  MOV64(a10, a11); \
1199  MOV64(a11, a41); \
1200  MOV64(a41, a24); \
1201  MOV64(a24, a42); \
1202  MOV64(a42, a04); \
1203  MOV64(a04, a20); \
1204  MOV64(a20, a22); \
1205  MOV64(a22, a32); \
1206  MOV64(a32, a43); \
1207  MOV64(a43, a34); \
1208  MOV64(a34, a03); \
1209  MOV64(a03, a40); \
1210  MOV64(a40, a44); \
1211  MOV64(a44, a14); \
1212  MOV64(a14, a31); \
1213  MOV64(a31, a13); \
1214  MOV64(a13, t); \
1215  } while (0)
1216 
1217 #define P2_TO_P0 do { \
1218  DECL64(t); \
1219  MOV64(t, a01); \
1220  MOV64(a01, a33); \
1221  MOV64(a33, a12); \
1222  MOV64(a12, a02); \
1223  MOV64(a02, a11); \
1224  MOV64(a11, a24); \
1225  MOV64(a24, a04); \
1226  MOV64(a04, a22); \
1227  MOV64(a22, a43); \
1228  MOV64(a43, a03); \
1229  MOV64(a03, a44); \
1230  MOV64(a44, a31); \
1231  MOV64(a31, t); \
1232  MOV64(t, a10); \
1233  MOV64(a10, a41); \
1234  MOV64(a41, a42); \
1235  MOV64(a42, a20); \
1236  MOV64(a20, a32); \
1237  MOV64(a32, a34); \
1238  MOV64(a34, a40); \
1239  MOV64(a40, a14); \
1240  MOV64(a14, a13); \
1241  MOV64(a13, a30); \
1242  MOV64(a30, a23); \
1243  MOV64(a23, a21); \
1244  MOV64(a21, t); \
1245  } while (0)
1246 
1247 #define P4_TO_P0 do { \
1248  DECL64(t); \
1249  MOV64(t, a01); \
1250  MOV64(a01, a12); \
1251  MOV64(a12, a11); \
1252  MOV64(a11, a04); \
1253  MOV64(a04, a43); \
1254  MOV64(a43, a44); \
1255  MOV64(a44, t); \
1256  MOV64(t, a02); \
1257  MOV64(a02, a24); \
1258  MOV64(a24, a22); \
1259  MOV64(a22, a03); \
1260  MOV64(a03, a31); \
1261  MOV64(a31, a33); \
1262  MOV64(a33, t); \
1263  MOV64(t, a10); \
1264  MOV64(a10, a42); \
1265  MOV64(a42, a32); \
1266  MOV64(a32, a40); \
1267  MOV64(a40, a13); \
1268  MOV64(a13, a23); \
1269  MOV64(a23, t); \
1270  MOV64(t, a14); \
1271  MOV64(a14, a30); \
1272  MOV64(a30, a21); \
1273  MOV64(a21, a41); \
1274  MOV64(a41, a20); \
1275  MOV64(a20, a34); \
1276  MOV64(a34, t); \
1277  } while (0)
1278 
1279 #define P6_TO_P0 do { \
1280  DECL64(t); \
1281  MOV64(t, a01); \
1282  MOV64(a01, a02); \
1283  MOV64(a02, a04); \
1284  MOV64(a04, a03); \
1285  MOV64(a03, t); \
1286  MOV64(t, a10); \
1287  MOV64(a10, a20); \
1288  MOV64(a20, a40); \
1289  MOV64(a40, a30); \
1290  MOV64(a30, t); \
1291  MOV64(t, a11); \
1292  MOV64(a11, a22); \
1293  MOV64(a22, a44); \
1294  MOV64(a44, a33); \
1295  MOV64(a33, t); \
1296  MOV64(t, a12); \
1297  MOV64(a12, a24); \
1298  MOV64(a24, a43); \
1299  MOV64(a43, a31); \
1300  MOV64(a31, t); \
1301  MOV64(t, a13); \
1302  MOV64(a13, a21); \
1303  MOV64(a21, a42); \
1304  MOV64(a42, a34); \
1305  MOV64(a34, t); \
1306  MOV64(t, a14); \
1307  MOV64(a14, a23); \
1308  MOV64(a23, a41); \
1309  MOV64(a41, a32); \
1310  MOV64(a32, t); \
1311  } while (0)
1312 
1313 #define P8_TO_P0 do { \
1314  DECL64(t); \
1315  MOV64(t, a01); \
1316  MOV64(a01, a11); \
1317  MOV64(a11, a43); \
1318  MOV64(a43, t); \
1319  MOV64(t, a02); \
1320  MOV64(a02, a22); \
1321  MOV64(a22, a31); \
1322  MOV64(a31, t); \
1323  MOV64(t, a03); \
1324  MOV64(a03, a33); \
1325  MOV64(a33, a24); \
1326  MOV64(a24, t); \
1327  MOV64(t, a04); \
1328  MOV64(a04, a44); \
1329  MOV64(a44, a12); \
1330  MOV64(a12, t); \
1331  MOV64(t, a10); \
1332  MOV64(a10, a32); \
1333  MOV64(a32, a13); \
1334  MOV64(a13, t); \
1335  MOV64(t, a14); \
1336  MOV64(a14, a21); \
1337  MOV64(a21, a20); \
1338  MOV64(a20, t); \
1339  MOV64(t, a23); \
1340  MOV64(a23, a42); \
1341  MOV64(a42, a40); \
1342  MOV64(a40, t); \
1343  MOV64(t, a30); \
1344  MOV64(a30, a41); \
1345  MOV64(a41, a34); \
1346  MOV64(a34, t); \
1347  } while (0)
1348 
1349 #define P12_TO_P0 do { \
1350  DECL64(t); \
1351  MOV64(t, a01); \
1352  MOV64(a01, a04); \
1353  MOV64(a04, t); \
1354  MOV64(t, a02); \
1355  MOV64(a02, a03); \
1356  MOV64(a03, t); \
1357  MOV64(t, a10); \
1358  MOV64(a10, a40); \
1359  MOV64(a40, t); \
1360  MOV64(t, a11); \
1361  MOV64(a11, a44); \
1362  MOV64(a44, t); \
1363  MOV64(t, a12); \
1364  MOV64(a12, a43); \
1365  MOV64(a43, t); \
1366  MOV64(t, a13); \
1367  MOV64(a13, a42); \
1368  MOV64(a42, t); \
1369  MOV64(t, a14); \
1370  MOV64(a14, a41); \
1371  MOV64(a41, t); \
1372  MOV64(t, a20); \
1373  MOV64(a20, a30); \
1374  MOV64(a30, t); \
1375  MOV64(t, a21); \
1376  MOV64(a21, a34); \
1377  MOV64(a34, t); \
1378  MOV64(t, a22); \
1379  MOV64(a22, a33); \
1380  MOV64(a33, t); \
1381  MOV64(t, a23); \
1382  MOV64(a23, a32); \
1383  MOV64(a32, t); \
1384  MOV64(t, a24); \
1385  MOV64(a24, a31); \
1386  MOV64(a31, t); \
1387  } while (0)
1388 
1389 #define LPAR (
1390 #define RPAR )
1391 
1392 #define KF_ELT(r, s, k) do { \
1393  THETA LPAR P ## r RPAR; \
1394  RHO LPAR P ## r RPAR; \
1395  KHI LPAR P ## s RPAR; \
1396  IOTA(k); \
1397  } while (0)
1398 
1399 #define DO(x) x
1400 
1401 #define KECCAK_F_1600 DO(KECCAK_F_1600_)
1402 
1403 #if SPH_KECCAK_UNROLL == 1
1404 
1405 #define KECCAK_F_1600_ do { \
1406  int j; \
1407  for (j = 0; j < 24; j ++) { \
1408  KF_ELT( 0, 1, RC[j + 0]); \
1409  P1_TO_P0; \
1410  } \
1411  } while (0)
1412 
1413 #elif SPH_KECCAK_UNROLL == 2
1414 
1415 #define KECCAK_F_1600_ do { \
1416  int j; \
1417  for (j = 0; j < 24; j += 2) { \
1418  KF_ELT( 0, 1, RC[j + 0]); \
1419  KF_ELT( 1, 2, RC[j + 1]); \
1420  P2_TO_P0; \
1421  } \
1422  } while (0)
1423 
1424 #elif SPH_KECCAK_UNROLL == 4
1425 
1426 #define KECCAK_F_1600_ do { \
1427  int j; \
1428  for (j = 0; j < 24; j += 4) { \
1429  KF_ELT( 0, 1, RC[j + 0]); \
1430  KF_ELT( 1, 2, RC[j + 1]); \
1431  KF_ELT( 2, 3, RC[j + 2]); \
1432  KF_ELT( 3, 4, RC[j + 3]); \
1433  P4_TO_P0; \
1434  } \
1435  } while (0)
1436 
1437 #elif SPH_KECCAK_UNROLL == 6
1438 
1439 #define KECCAK_F_1600_ do { \
1440  int j; \
1441  for (j = 0; j < 24; j += 6) { \
1442  KF_ELT( 0, 1, RC[j + 0]); \
1443  KF_ELT( 1, 2, RC[j + 1]); \
1444  KF_ELT( 2, 3, RC[j + 2]); \
1445  KF_ELT( 3, 4, RC[j + 3]); \
1446  KF_ELT( 4, 5, RC[j + 4]); \
1447  KF_ELT( 5, 6, RC[j + 5]); \
1448  P6_TO_P0; \
1449  } \
1450  } while (0)
1451 
1452 #elif SPH_KECCAK_UNROLL == 8
1453 
1454 #define KECCAK_F_1600_ do { \
1455  int j; \
1456  for (j = 0; j < 24; j += 8) { \
1457  KF_ELT( 0, 1, RC[j + 0]); \
1458  KF_ELT( 1, 2, RC[j + 1]); \
1459  KF_ELT( 2, 3, RC[j + 2]); \
1460  KF_ELT( 3, 4, RC[j + 3]); \
1461  KF_ELT( 4, 5, RC[j + 4]); \
1462  KF_ELT( 5, 6, RC[j + 5]); \
1463  KF_ELT( 6, 7, RC[j + 6]); \
1464  KF_ELT( 7, 8, RC[j + 7]); \
1465  P8_TO_P0; \
1466  } \
1467  } while (0)
1468 
1469 #elif SPH_KECCAK_UNROLL == 12
1470 
1471 #define KECCAK_F_1600_ do { \
1472  int j; \
1473  for (j = 0; j < 24; j += 12) { \
1474  KF_ELT( 0, 1, RC[j + 0]); \
1475  KF_ELT( 1, 2, RC[j + 1]); \
1476  KF_ELT( 2, 3, RC[j + 2]); \
1477  KF_ELT( 3, 4, RC[j + 3]); \
1478  KF_ELT( 4, 5, RC[j + 4]); \
1479  KF_ELT( 5, 6, RC[j + 5]); \
1480  KF_ELT( 6, 7, RC[j + 6]); \
1481  KF_ELT( 7, 8, RC[j + 7]); \
1482  KF_ELT( 8, 9, RC[j + 8]); \
1483  KF_ELT( 9, 10, RC[j + 9]); \
1484  KF_ELT(10, 11, RC[j + 10]); \
1485  KF_ELT(11, 12, RC[j + 11]); \
1486  P12_TO_P0; \
1487  } \
1488  } while (0)
1489 
1490 #elif SPH_KECCAK_UNROLL == 0
1491 
1492 #define KECCAK_F_1600_ do { \
1493  KF_ELT( 0, 1, RC[ 0]); \
1494  KF_ELT( 1, 2, RC[ 1]); \
1495  KF_ELT( 2, 3, RC[ 2]); \
1496  KF_ELT( 3, 4, RC[ 3]); \
1497  KF_ELT( 4, 5, RC[ 4]); \
1498  KF_ELT( 5, 6, RC[ 5]); \
1499  KF_ELT( 6, 7, RC[ 6]); \
1500  KF_ELT( 7, 8, RC[ 7]); \
1501  KF_ELT( 8, 9, RC[ 8]); \
1502  KF_ELT( 9, 10, RC[ 9]); \
1503  KF_ELT(10, 11, RC[10]); \
1504  KF_ELT(11, 12, RC[11]); \
1505  KF_ELT(12, 13, RC[12]); \
1506  KF_ELT(13, 14, RC[13]); \
1507  KF_ELT(14, 15, RC[14]); \
1508  KF_ELT(15, 16, RC[15]); \
1509  KF_ELT(16, 17, RC[16]); \
1510  KF_ELT(17, 18, RC[17]); \
1511  KF_ELT(18, 19, RC[18]); \
1512  KF_ELT(19, 20, RC[19]); \
1513  KF_ELT(20, 21, RC[20]); \
1514  KF_ELT(21, 22, RC[21]); \
1515  KF_ELT(22, 23, RC[22]); \
1516  KF_ELT(23, 0, RC[23]); \
1517  } while (0)
1518 
1519 #else
1520 
1521 #error Unimplemented unroll count for Keccak.
1522 
1523 #endif
1524 
1525 static void
1526 keccak_init(sph_keccak_context *kc, unsigned out_size)
1527 {
1528  int i;
1529 
1530 #if SPH_KECCAK_64
1531  for (i = 0; i < 25; i ++)
1532  kc->u.wide[i] = 0;
1533  /*
1534  * Initialization for the "lane complement".
1535  */
1536  kc->u.wide[ 1] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1537  kc->u.wide[ 2] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1538  kc->u.wide[ 8] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1539  kc->u.wide[12] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1540  kc->u.wide[17] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1541  kc->u.wide[20] = SPH_C64(0xFFFFFFFFFFFFFFFF);
1542 #else
1543 
1544  for (i = 0; i < 50; i ++)
1545  kc->u.narrow[i] = 0;
1546  /*
1547  * Initialization for the "lane complement".
1548  * Note: since we set to all-one full 64-bit words,
1549  * interleaving (if applicable) is a no-op.
1550  */
1551  kc->u.narrow[ 2] = SPH_C32(0xFFFFFFFF);
1552  kc->u.narrow[ 3] = SPH_C32(0xFFFFFFFF);
1553  kc->u.narrow[ 4] = SPH_C32(0xFFFFFFFF);
1554  kc->u.narrow[ 5] = SPH_C32(0xFFFFFFFF);
1555  kc->u.narrow[16] = SPH_C32(0xFFFFFFFF);
1556  kc->u.narrow[17] = SPH_C32(0xFFFFFFFF);
1557  kc->u.narrow[24] = SPH_C32(0xFFFFFFFF);
1558  kc->u.narrow[25] = SPH_C32(0xFFFFFFFF);
1559  kc->u.narrow[34] = SPH_C32(0xFFFFFFFF);
1560  kc->u.narrow[35] = SPH_C32(0xFFFFFFFF);
1561  kc->u.narrow[40] = SPH_C32(0xFFFFFFFF);
1562  kc->u.narrow[41] = SPH_C32(0xFFFFFFFF);
1563 #endif
1564  kc->ptr = 0;
1565  kc->lim = 200 - (out_size >> 2);
1566 }
1567 
1568 static void
1569 keccak_core(sph_keccak_context *kc, const void *data, size_t len, size_t lim)
1570 {
1571  unsigned char *buf;
1572  size_t ptr;
1573  DECL_STATE
1574 
1575  buf = kc->buf;
1576  ptr = kc->ptr;
1577 
1578  if (len < (lim - ptr)) {
1579  memcpy(buf + ptr, data, len);
1580  kc->ptr = ptr + len;
1581  return;
1582  }
1583 
1584  READ_STATE(kc);
1585  while (len > 0) {
1586  size_t clen;
1587 
1588  clen = (lim - ptr);
1589  if (clen > len)
1590  clen = len;
1591  memcpy(buf + ptr, data, clen);
1592  ptr += clen;
1593  data = (const unsigned char *)data + clen;
1594  len -= clen;
1595  if (ptr == lim) {
1596  INPUT_BUF(lim);
1597  KECCAK_F_1600;
1598  ptr = 0;
1599  }
1600  }
1601  WRITE_STATE(kc);
1602  kc->ptr = ptr;
1603 }
1604 
1605 #if SPH_KECCAK_64
1606 
1607 #define DEFCLOSE(d, lim) \
1608  static void keccak_close ## d( \
1609  sph_keccak_context *kc, unsigned ub, unsigned n, void *dst) \
1610  { \
1611  unsigned eb; \
1612  union { \
1613  unsigned char tmp[lim + 1]; \
1614  sph_u64 dummy; /* for alignment */ \
1615  } u; \
1616  size_t j; \
1617  \
1618  eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
1619  if (kc->ptr == (lim - 1)) { \
1620  if (n == 7) { \
1621  u.tmp[0] = eb; \
1622  memset(u.tmp + 1, 0, lim - 1); \
1623  u.tmp[lim] = 0x80; \
1624  j = 1 + lim; \
1625  } else { \
1626  u.tmp[0] = eb | 0x80; \
1627  j = 1; \
1628  } \
1629  } else { \
1630  j = lim - kc->ptr; \
1631  u.tmp[0] = eb; \
1632  memset(u.tmp + 1, 0, j - 2); \
1633  u.tmp[j - 1] = 0x80; \
1634  } \
1635  keccak_core(kc, u.tmp, j, lim); \
1636  /* Finalize the "lane complement" */ \
1637  kc->u.wide[ 1] = ~kc->u.wide[ 1]; \
1638  kc->u.wide[ 2] = ~kc->u.wide[ 2]; \
1639  kc->u.wide[ 8] = ~kc->u.wide[ 8]; \
1640  kc->u.wide[12] = ~kc->u.wide[12]; \
1641  kc->u.wide[17] = ~kc->u.wide[17]; \
1642  kc->u.wide[20] = ~kc->u.wide[20]; \
1643  for (j = 0; j < d; j += 8) \
1644  sph_enc64le_aligned(u.tmp + j, kc->u.wide[j >> 3]); \
1645  memcpy(dst, u.tmp, d); \
1646  keccak_init(kc, (unsigned)d << 3); \
1647  } \
1648 
1649 #else
1650 
1651 #define DEFCLOSE(d, lim) \
1652  static void keccak_close ## d( \
1653  sph_keccak_context *kc, unsigned ub, unsigned n, void *dst) \
1654  { \
1655  unsigned eb; \
1656  union { \
1657  unsigned char tmp[lim + 1]; \
1658  sph_u64 dummy; /* for alignment */ \
1659  } u; \
1660  size_t j; \
1661  \
1662  eb = (0x100 | (ub & 0xFF)) >> (8 - n); \
1663  if (kc->ptr == (lim - 1)) { \
1664  if (n == 7) { \
1665  u.tmp[0] = eb; \
1666  memset(u.tmp + 1, 0, lim - 1); \
1667  u.tmp[lim] = 0x80; \
1668  j = 1 + lim; \
1669  } else { \
1670  u.tmp[0] = eb | 0x80; \
1671  j = 1; \
1672  } \
1673  } else { \
1674  j = lim - kc->ptr; \
1675  u.tmp[0] = eb; \
1676  memset(u.tmp + 1, 0, j - 2); \
1677  u.tmp[j - 1] = 0x80; \
1678  } \
1679  keccak_core(kc, u.tmp, j, lim); \
1680  /* Finalize the "lane complement" */ \
1681  kc->u.narrow[ 2] = ~kc->u.narrow[ 2]; \
1682  kc->u.narrow[ 3] = ~kc->u.narrow[ 3]; \
1683  kc->u.narrow[ 4] = ~kc->u.narrow[ 4]; \
1684  kc->u.narrow[ 5] = ~kc->u.narrow[ 5]; \
1685  kc->u.narrow[16] = ~kc->u.narrow[16]; \
1686  kc->u.narrow[17] = ~kc->u.narrow[17]; \
1687  kc->u.narrow[24] = ~kc->u.narrow[24]; \
1688  kc->u.narrow[25] = ~kc->u.narrow[25]; \
1689  kc->u.narrow[34] = ~kc->u.narrow[34]; \
1690  kc->u.narrow[35] = ~kc->u.narrow[35]; \
1691  kc->u.narrow[40] = ~kc->u.narrow[40]; \
1692  kc->u.narrow[41] = ~kc->u.narrow[41]; \
1693  /* un-interleave */ \
1694  for (j = 0; j < 50; j += 2) \
1695  UNINTERLEAVE(kc->u.narrow[j], kc->u.narrow[j + 1]); \
1696  for (j = 0; j < d; j += 4) \
1697  sph_enc32le_aligned(u.tmp + j, kc->u.narrow[j >> 2]); \
1698  memcpy(dst, u.tmp, d); \
1699  keccak_init(kc, (unsigned)d << 3); \
1700  } \
1701 
1702 #endif
1703 
1704 DEFCLOSE(28, 144)
1705 DEFCLOSE(32, 136)
1706 DEFCLOSE(48, 104)
1707 DEFCLOSE(64, 72)
1708 
1709 /* see sph_keccak.h */
1710 void
1712 {
1713  keccak_init(cc, 224);
1714 }
1715 
1716 /* see sph_keccak.h */
1717 void
1718 sph_keccak224(void *cc, const void *data, size_t len)
1719 {
1720  keccak_core(cc, data, len, 144);
1721 }
1722 
1723 /* see sph_keccak.h */
1724 void
1725 sph_keccak224_close(void *cc, void *dst)
1726 {
1727  sph_keccak224_addbits_and_close(cc, 0, 0, dst);
1728 }
1729 
1730 /* see sph_keccak.h */
1731 void
1732 sph_keccak224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1733 {
1734  keccak_close28(cc, ub, n, dst);
1735 }
1736 
1737 /* see sph_keccak.h */
1738 void
1740 {
1741  keccak_init(cc, 256);
1742 }
1743 
1744 /* see sph_keccak.h */
1745 void
1746 sph_keccak256(void *cc, const void *data, size_t len)
1747 {
1748  keccak_core(cc, data, len, 136);
1749 }
1750 
1751 /* see sph_keccak.h */
1752 void
1753 sph_keccak256_close(void *cc, void *dst)
1754 {
1755  sph_keccak256_addbits_and_close(cc, 0, 0, dst);
1756 }
1757 
1758 /* see sph_keccak.h */
1759 void
1760 sph_keccak256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1761 {
1762  keccak_close32(cc, ub, n, dst);
1763 }
1764 
1765 /* see sph_keccak.h */
1766 void
1768 {
1769  keccak_init(cc, 384);
1770 }
1771 
1772 /* see sph_keccak.h */
1773 void
1774 sph_keccak384(void *cc, const void *data, size_t len)
1775 {
1776  keccak_core(cc, data, len, 104);
1777 }
1778 
1779 /* see sph_keccak.h */
1780 void
1781 sph_keccak384_close(void *cc, void *dst)
1782 {
1783  sph_keccak384_addbits_and_close(cc, 0, 0, dst);
1784 }
1785 
1786 /* see sph_keccak.h */
1787 void
1788 sph_keccak384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1789 {
1790  keccak_close48(cc, ub, n, dst);
1791 }
1792 
1793 /* see sph_keccak.h */
1794 void
1796 {
1797  keccak_init(cc, 512);
1798 }
1799 
1800 /* see sph_keccak.h */
1801 void
1802 sph_keccak512(void *cc, const void *data, size_t len)
1803 {
1804  keccak_core(cc, data, len, 72);
1805 }
1806 
1807 /* see sph_keccak.h */
1808 void
1809 sph_keccak512_close(void *cc, void *dst)
1810 {
1811  sph_keccak512_addbits_and_close(cc, 0, 0, dst);
1812 }
1813 
1814 /* see sph_keccak.h */
1815 void
1816 sph_keccak512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1817 {
1818  keccak_close64(cc, ub, n, dst);
1819 }
1820 
1821 
1822 #ifdef __cplusplus
1823 }
1824 #endif
void sph_keccak224_init(void *cc)
Initialize a Keccak-224 context.
Definition: keccak.c:1711
#define KECCAK_F_1600
Definition: keccak.c:1401
void sph_keccak384_close(void *cc, void *dst)
Terminate the current Keccak-384 computation and output the result into the provided buffer...
Definition: keccak.c:1781
void sph_keccak384_init(void *cc)
Initialize a Keccak-384 context.
Definition: keccak.c:1767
void sph_keccak256_init(void *cc)
Initialize a Keccak-256 context.
Definition: keccak.c:1739
#define READ_STATE(state)
Definition: keccak.c:549
void sph_keccak224(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: keccak.c:1718
sph_u32 high
Definition: keccak.c:370
void sph_keccak256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: keccak.c:1760
sph_u32 narrow[50]
Definition: sph_keccak.h:84
void sph_keccak384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: keccak.c:1788
union sph_keccak_context::@9 u
#define SPH_C32(x)
Definition: sph_types.h:873
#define WRITE_STATE(state)
Definition: keccak.c:602
#define DEFCLOSE(d, lim)
Definition: keccak.c:1651
void sph_keccak512_init(void *cc)
Initialize a Keccak-512 context.
Definition: keccak.c:1795
void sph_keccak512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: keccak.c:1816
void * memcpy(void *a, const void *b, size_t c)
void sph_keccak512(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: keccak.c:1802
sph_u32 low
Definition: keccak.c:370
unsigned long sph_u32
Definition: sph_types.h:870
unsigned char buf[144]
Definition: sph_keccak.h:78
void sph_keccak256_close(void *cc, void *dst)
Terminate the current Keccak-256 computation and output the result into the provided buffer...
Definition: keccak.c:1753
#define INPUT_BUF(lim)
Definition: keccak.c:733
void sph_keccak224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: keccak.c:1732
#define DECL_STATE
Definition: keccak.c:542
void sph_keccak512_close(void *cc, void *dst)
Terminate the current Keccak-512 computation and output the result into the provided buffer...
Definition: keccak.c:1809
void sph_keccak384(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: keccak.c:1774
This structure is a context for Keccak computations: it contains the intermediate values and some dat...
Definition: sph_keccak.h:76
Keccak interface.
void sph_keccak256(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: keccak.c:1746
void sph_keccak224_close(void *cc, void *dst)
Terminate the current Keccak-224 computation and output the result into the provided buffer...
Definition: keccak.c:1725