Raven Core  3.0.0
P2P Digital Currency
luffa.c
Go to the documentation of this file.
1 /* $Id: luffa.c 219 2010-06-08 17:24:41Z tp $ */
2 /*
3  * Luffa implementation.
4  *
5  * ==========================(LICENSE BEGIN)============================
6  *
7  * Copyright (c) 2007-2010 Projet RNRT SAPHIR
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the
11  * "Software"), to deal in the Software without restriction, including
12  * without limitation the rights to use, copy, modify, merge, publish,
13  * distribute, sublicense, and/or sell copies of the Software, and to
14  * permit persons to whom the Software is furnished to do so, subject to
15  * the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be
18  * included in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27  *
28  * ===========================(LICENSE END)=============================
29  *
30  * @author Thomas Pornin <thomas.pornin@cryptolog.com>
31  */
32 
33 #include <stddef.h>
34 #include <string.h>
35 #include <limits.h>
36 
37 #include "sph_luffa.h"
38 
39 #ifdef __cplusplus
40 extern "C"{
41 #endif
42 
43 #if SPH_64_TRUE && !defined SPH_LUFFA_PARALLEL
44 #define SPH_LUFFA_PARALLEL 1
45 #endif
46 
47 #ifdef _MSC_VER
48 #pragma warning (disable: 4146)
49 #endif
50 
51 static const sph_u32 V_INIT[5][8] = {
52  {
53  SPH_C32(0x6d251e69), SPH_C32(0x44b051e0),
54  SPH_C32(0x4eaa6fb4), SPH_C32(0xdbf78465),
55  SPH_C32(0x6e292011), SPH_C32(0x90152df4),
56  SPH_C32(0xee058139), SPH_C32(0xdef610bb)
57  }, {
58  SPH_C32(0xc3b44b95), SPH_C32(0xd9d2f256),
59  SPH_C32(0x70eee9a0), SPH_C32(0xde099fa3),
60  SPH_C32(0x5d9b0557), SPH_C32(0x8fc944b3),
61  SPH_C32(0xcf1ccf0e), SPH_C32(0x746cd581)
62  }, {
63  SPH_C32(0xf7efc89d), SPH_C32(0x5dba5781),
64  SPH_C32(0x04016ce5), SPH_C32(0xad659c05),
65  SPH_C32(0x0306194f), SPH_C32(0x666d1836),
66  SPH_C32(0x24aa230a), SPH_C32(0x8b264ae7)
67  }, {
68  SPH_C32(0x858075d5), SPH_C32(0x36d79cce),
69  SPH_C32(0xe571f7d7), SPH_C32(0x204b1f67),
70  SPH_C32(0x35870c6a), SPH_C32(0x57e9e923),
71  SPH_C32(0x14bcb808), SPH_C32(0x7cde72ce)
72  }, {
73  SPH_C32(0x6c68e9be), SPH_C32(0x5ec41e22),
74  SPH_C32(0xc825b7c7), SPH_C32(0xaffb4363),
75  SPH_C32(0xf5df3999), SPH_C32(0x0fc688f1),
76  SPH_C32(0xb07224cc), SPH_C32(0x03e86cea)
77  }
78 };
79 
80 static const sph_u32 RC00[8] = {
81  SPH_C32(0x303994a6), SPH_C32(0xc0e65299),
82  SPH_C32(0x6cc33a12), SPH_C32(0xdc56983e),
83  SPH_C32(0x1e00108f), SPH_C32(0x7800423d),
84  SPH_C32(0x8f5b7882), SPH_C32(0x96e1db12)
85 };
86 
87 static const sph_u32 RC04[8] = {
88  SPH_C32(0xe0337818), SPH_C32(0x441ba90d),
89  SPH_C32(0x7f34d442), SPH_C32(0x9389217f),
90  SPH_C32(0xe5a8bce6), SPH_C32(0x5274baf4),
91  SPH_C32(0x26889ba7), SPH_C32(0x9a226e9d)
92 };
93 
94 static const sph_u32 RC10[8] = {
95  SPH_C32(0xb6de10ed), SPH_C32(0x70f47aae),
96  SPH_C32(0x0707a3d4), SPH_C32(0x1c1e8f51),
97  SPH_C32(0x707a3d45), SPH_C32(0xaeb28562),
98  SPH_C32(0xbaca1589), SPH_C32(0x40a46f3e)
99 };
100 
101 static const sph_u32 RC14[8] = {
102  SPH_C32(0x01685f3d), SPH_C32(0x05a17cf4),
103  SPH_C32(0xbd09caca), SPH_C32(0xf4272b28),
104  SPH_C32(0x144ae5cc), SPH_C32(0xfaa7ae2b),
105  SPH_C32(0x2e48f1c1), SPH_C32(0xb923c704)
106 };
107 
108 #if SPH_LUFFA_PARALLEL
109 
110 static const sph_u64 RCW010[8] = {
111  SPH_C64(0xb6de10ed303994a6), SPH_C64(0x70f47aaec0e65299),
112  SPH_C64(0x0707a3d46cc33a12), SPH_C64(0x1c1e8f51dc56983e),
113  SPH_C64(0x707a3d451e00108f), SPH_C64(0xaeb285627800423d),
114  SPH_C64(0xbaca15898f5b7882), SPH_C64(0x40a46f3e96e1db12)
115 };
116 
117 static const sph_u64 RCW014[8] = {
118  SPH_C64(0x01685f3de0337818), SPH_C64(0x05a17cf4441ba90d),
119  SPH_C64(0xbd09caca7f34d442), SPH_C64(0xf4272b289389217f),
120  SPH_C64(0x144ae5cce5a8bce6), SPH_C64(0xfaa7ae2b5274baf4),
121  SPH_C64(0x2e48f1c126889ba7), SPH_C64(0xb923c7049a226e9d)
122 };
123 
124 #endif
125 
126 static const sph_u32 RC20[8] = {
127  SPH_C32(0xfc20d9d2), SPH_C32(0x34552e25),
128  SPH_C32(0x7ad8818f), SPH_C32(0x8438764a),
129  SPH_C32(0xbb6de032), SPH_C32(0xedb780c8),
130  SPH_C32(0xd9847356), SPH_C32(0xa2c78434)
131 };
132 
133 static const sph_u32 RC24[8] = {
134  SPH_C32(0xe25e72c1), SPH_C32(0xe623bb72),
135  SPH_C32(0x5c58a4a4), SPH_C32(0x1e38e2e7),
136  SPH_C32(0x78e38b9d), SPH_C32(0x27586719),
137  SPH_C32(0x36eda57f), SPH_C32(0x703aace7)
138 };
139 
140 static const sph_u32 RC30[8] = {
141  SPH_C32(0xb213afa5), SPH_C32(0xc84ebe95),
142  SPH_C32(0x4e608a22), SPH_C32(0x56d858fe),
143  SPH_C32(0x343b138f), SPH_C32(0xd0ec4e3d),
144  SPH_C32(0x2ceb4882), SPH_C32(0xb3ad2208)
145 };
146 
147 static const sph_u32 RC34[8] = {
148  SPH_C32(0xe028c9bf), SPH_C32(0x44756f91),
149  SPH_C32(0x7e8fce32), SPH_C32(0x956548be),
150  SPH_C32(0xfe191be2), SPH_C32(0x3cb226e5),
151  SPH_C32(0x5944a28e), SPH_C32(0xa1c4c355)
152 };
153 
154 #if SPH_LUFFA_PARALLEL
155 
156 static const sph_u64 RCW230[8] = {
157  SPH_C64(0xb213afa5fc20d9d2), SPH_C64(0xc84ebe9534552e25),
158  SPH_C64(0x4e608a227ad8818f), SPH_C64(0x56d858fe8438764a),
159  SPH_C64(0x343b138fbb6de032), SPH_C64(0xd0ec4e3dedb780c8),
160  SPH_C64(0x2ceb4882d9847356), SPH_C64(0xb3ad2208a2c78434)
161 };
162 
163 
164 static const sph_u64 RCW234[8] = {
165  SPH_C64(0xe028c9bfe25e72c1), SPH_C64(0x44756f91e623bb72),
166  SPH_C64(0x7e8fce325c58a4a4), SPH_C64(0x956548be1e38e2e7),
167  SPH_C64(0xfe191be278e38b9d), SPH_C64(0x3cb226e527586719),
168  SPH_C64(0x5944a28e36eda57f), SPH_C64(0xa1c4c355703aace7)
169 };
170 
171 #endif
172 
173 static const sph_u32 RC40[8] = {
174  SPH_C32(0xf0d2e9e3), SPH_C32(0xac11d7fa),
175  SPH_C32(0x1bcb66f2), SPH_C32(0x6f2d9bc9),
176  SPH_C32(0x78602649), SPH_C32(0x8edae952),
177  SPH_C32(0x3b6ba548), SPH_C32(0xedae9520)
178 };
179 
180 static const sph_u32 RC44[8] = {
181  SPH_C32(0x5090d577), SPH_C32(0x2d1925ab),
182  SPH_C32(0xb46496ac), SPH_C32(0xd1925ab0),
183  SPH_C32(0x29131ab6), SPH_C32(0x0fc053c3),
184  SPH_C32(0x3f014f0c), SPH_C32(0xfc053c31)
185 };
186 
187 #define DECL_TMP8(w) \
188  sph_u32 w ## 0, w ## 1, w ## 2, w ## 3, w ## 4, w ## 5, w ## 6, w ## 7;
189 
190 #define M2(d, s) do { \
191  sph_u32 tmp = s ## 7; \
192  d ## 7 = s ## 6; \
193  d ## 6 = s ## 5; \
194  d ## 5 = s ## 4; \
195  d ## 4 = s ## 3 ^ tmp; \
196  d ## 3 = s ## 2 ^ tmp; \
197  d ## 2 = s ## 1; \
198  d ## 1 = s ## 0 ^ tmp; \
199  d ## 0 = tmp; \
200  } while (0)
201 
202 #define XOR(d, s1, s2) do { \
203  d ## 0 = s1 ## 0 ^ s2 ## 0; \
204  d ## 1 = s1 ## 1 ^ s2 ## 1; \
205  d ## 2 = s1 ## 2 ^ s2 ## 2; \
206  d ## 3 = s1 ## 3 ^ s2 ## 3; \
207  d ## 4 = s1 ## 4 ^ s2 ## 4; \
208  d ## 5 = s1 ## 5 ^ s2 ## 5; \
209  d ## 6 = s1 ## 6 ^ s2 ## 6; \
210  d ## 7 = s1 ## 7 ^ s2 ## 7; \
211  } while (0)
212 
213 #if SPH_LUFFA_PARALLEL
214 
215 #define SUB_CRUMB_GEN(a0, a1, a2, a3, width) do { \
216  sph_u ## width tmp; \
217  tmp = (a0); \
218  (a0) |= (a1); \
219  (a2) ^= (a3); \
220  (a1) = SPH_T ## width(~(a1)); \
221  (a0) ^= (a3); \
222  (a3) &= tmp; \
223  (a1) ^= (a3); \
224  (a3) ^= (a2); \
225  (a2) &= (a0); \
226  (a0) = SPH_T ## width(~(a0)); \
227  (a2) ^= (a1); \
228  (a1) |= (a3); \
229  tmp ^= (a1); \
230  (a3) ^= (a2); \
231  (a2) &= (a1); \
232  (a1) ^= (a0); \
233  (a0) = tmp; \
234  } while (0)
235 
236 #define SUB_CRUMB(a0, a1, a2, a3) SUB_CRUMB_GEN(a0, a1, a2, a3, 32)
237 #define SUB_CRUMBW(a0, a1, a2, a3) SUB_CRUMB_GEN(a0, a1, a2, a3, 64)
238 
239 
240 #if 0
241 
242 #define ROL32W(x, n) SPH_T64( \
243  (((x) << (n)) \
244  & ~((SPH_C64(0xFFFFFFFF) >> (32 - (n))) << 32)) \
245  | (((x) >> (32 - (n))) \
246  & ~((SPH_C64(0xFFFFFFFF) >> (n)) << (n))))
247 
248 #define MIX_WORDW(u, v) do { \
249  (v) ^= (u); \
250  (u) = ROL32W((u), 2) ^ (v); \
251  (v) = ROL32W((v), 14) ^ (u); \
252  (u) = ROL32W((u), 10) ^ (v); \
253  (v) = ROL32W((v), 1); \
254  } while (0)
255 
256 #endif
257 
258 #define MIX_WORDW(u, v) do { \
259  sph_u32 ul, uh, vl, vh; \
260  (v) ^= (u); \
261  ul = SPH_T32((sph_u32)(u)); \
262  uh = SPH_T32((sph_u32)((u) >> 32)); \
263  vl = SPH_T32((sph_u32)(v)); \
264  vh = SPH_T32((sph_u32)((v) >> 32)); \
265  ul = SPH_ROTL32(ul, 2) ^ vl; \
266  vl = SPH_ROTL32(vl, 14) ^ ul; \
267  ul = SPH_ROTL32(ul, 10) ^ vl; \
268  vl = SPH_ROTL32(vl, 1); \
269  uh = SPH_ROTL32(uh, 2) ^ vh; \
270  vh = SPH_ROTL32(vh, 14) ^ uh; \
271  uh = SPH_ROTL32(uh, 10) ^ vh; \
272  vh = SPH_ROTL32(vh, 1); \
273  (u) = (sph_u64)ul | ((sph_u64)uh << 32); \
274  (v) = (sph_u64)vl | ((sph_u64)vh << 32); \
275  } while (0)
276 
277 #else
278 
279 #define SUB_CRUMB(a0, a1, a2, a3) do { \
280  sph_u32 tmp; \
281  tmp = (a0); \
282  (a0) |= (a1); \
283  (a2) ^= (a3); \
284  (a1) = SPH_T32(~(a1)); \
285  (a0) ^= (a3); \
286  (a3) &= tmp; \
287  (a1) ^= (a3); \
288  (a3) ^= (a2); \
289  (a2) &= (a0); \
290  (a0) = SPH_T32(~(a0)); \
291  (a2) ^= (a1); \
292  (a1) |= (a3); \
293  tmp ^= (a1); \
294  (a3) ^= (a2); \
295  (a2) &= (a1); \
296  (a1) ^= (a0); \
297  (a0) = tmp; \
298  } while (0)
299 
300 #endif
301 
302 #define MIX_WORD(u, v) do { \
303  (v) ^= (u); \
304  (u) = SPH_ROTL32((u), 2) ^ (v); \
305  (v) = SPH_ROTL32((v), 14) ^ (u); \
306  (u) = SPH_ROTL32((u), 10) ^ (v); \
307  (v) = SPH_ROTL32((v), 1); \
308  } while (0)
309 
310 #define DECL_STATE3 \
311  sph_u32 V00, V01, V02, V03, V04, V05, V06, V07; \
312  sph_u32 V10, V11, V12, V13, V14, V15, V16, V17; \
313  sph_u32 V20, V21, V22, V23, V24, V25, V26, V27;
314 
315 #define READ_STATE3(state) do { \
316  V00 = (state)->V[0][0]; \
317  V01 = (state)->V[0][1]; \
318  V02 = (state)->V[0][2]; \
319  V03 = (state)->V[0][3]; \
320  V04 = (state)->V[0][4]; \
321  V05 = (state)->V[0][5]; \
322  V06 = (state)->V[0][6]; \
323  V07 = (state)->V[0][7]; \
324  V10 = (state)->V[1][0]; \
325  V11 = (state)->V[1][1]; \
326  V12 = (state)->V[1][2]; \
327  V13 = (state)->V[1][3]; \
328  V14 = (state)->V[1][4]; \
329  V15 = (state)->V[1][5]; \
330  V16 = (state)->V[1][6]; \
331  V17 = (state)->V[1][7]; \
332  V20 = (state)->V[2][0]; \
333  V21 = (state)->V[2][1]; \
334  V22 = (state)->V[2][2]; \
335  V23 = (state)->V[2][3]; \
336  V24 = (state)->V[2][4]; \
337  V25 = (state)->V[2][5]; \
338  V26 = (state)->V[2][6]; \
339  V27 = (state)->V[2][7]; \
340  } while (0)
341 
342 #define WRITE_STATE3(state) do { \
343  (state)->V[0][0] = V00; \
344  (state)->V[0][1] = V01; \
345  (state)->V[0][2] = V02; \
346  (state)->V[0][3] = V03; \
347  (state)->V[0][4] = V04; \
348  (state)->V[0][5] = V05; \
349  (state)->V[0][6] = V06; \
350  (state)->V[0][7] = V07; \
351  (state)->V[1][0] = V10; \
352  (state)->V[1][1] = V11; \
353  (state)->V[1][2] = V12; \
354  (state)->V[1][3] = V13; \
355  (state)->V[1][4] = V14; \
356  (state)->V[1][5] = V15; \
357  (state)->V[1][6] = V16; \
358  (state)->V[1][7] = V17; \
359  (state)->V[2][0] = V20; \
360  (state)->V[2][1] = V21; \
361  (state)->V[2][2] = V22; \
362  (state)->V[2][3] = V23; \
363  (state)->V[2][4] = V24; \
364  (state)->V[2][5] = V25; \
365  (state)->V[2][6] = V26; \
366  (state)->V[2][7] = V27; \
367  } while (0)
368 
369 #define MI3 do { \
370  DECL_TMP8(M) \
371  DECL_TMP8(a) \
372  M0 = sph_dec32be_aligned(buf + 0); \
373  M1 = sph_dec32be_aligned(buf + 4); \
374  M2 = sph_dec32be_aligned(buf + 8); \
375  M3 = sph_dec32be_aligned(buf + 12); \
376  M4 = sph_dec32be_aligned(buf + 16); \
377  M5 = sph_dec32be_aligned(buf + 20); \
378  M6 = sph_dec32be_aligned(buf + 24); \
379  M7 = sph_dec32be_aligned(buf + 28); \
380  XOR(a, V0, V1); \
381  XOR(a, a, V2); \
382  M2(a, a); \
383  XOR(V0, a, V0); \
384  XOR(V0, M, V0); \
385  M2(M, M); \
386  XOR(V1, a, V1); \
387  XOR(V1, M, V1); \
388  M2(M, M); \
389  XOR(V2, a, V2); \
390  XOR(V2, M, V2); \
391  } while (0)
392 
393 #define TWEAK3 do { \
394  V14 = SPH_ROTL32(V14, 1); \
395  V15 = SPH_ROTL32(V15, 1); \
396  V16 = SPH_ROTL32(V16, 1); \
397  V17 = SPH_ROTL32(V17, 1); \
398  V24 = SPH_ROTL32(V24, 2); \
399  V25 = SPH_ROTL32(V25, 2); \
400  V26 = SPH_ROTL32(V26, 2); \
401  V27 = SPH_ROTL32(V27, 2); \
402  } while (0)
403 
404 #if SPH_LUFFA_PARALLEL
405 
406 #define P3 do { \
407  int r; \
408  sph_u64 W0, W1, W2, W3, W4, W5, W6, W7; \
409  TWEAK3; \
410  W0 = (sph_u64)V00 | ((sph_u64)V10 << 32); \
411  W1 = (sph_u64)V01 | ((sph_u64)V11 << 32); \
412  W2 = (sph_u64)V02 | ((sph_u64)V12 << 32); \
413  W3 = (sph_u64)V03 | ((sph_u64)V13 << 32); \
414  W4 = (sph_u64)V04 | ((sph_u64)V14 << 32); \
415  W5 = (sph_u64)V05 | ((sph_u64)V15 << 32); \
416  W6 = (sph_u64)V06 | ((sph_u64)V16 << 32); \
417  W7 = (sph_u64)V07 | ((sph_u64)V17 << 32); \
418  for (r = 0; r < 8; r ++) { \
419  SUB_CRUMBW(W0, W1, W2, W3); \
420  SUB_CRUMBW(W5, W6, W7, W4); \
421  MIX_WORDW(W0, W4); \
422  MIX_WORDW(W1, W5); \
423  MIX_WORDW(W2, W6); \
424  MIX_WORDW(W3, W7); \
425  W0 ^= RCW010[r]; \
426  W4 ^= RCW014[r]; \
427  } \
428  V00 = SPH_T32((sph_u32)W0); \
429  V10 = SPH_T32((sph_u32)(W0 >> 32)); \
430  V01 = SPH_T32((sph_u32)W1); \
431  V11 = SPH_T32((sph_u32)(W1 >> 32)); \
432  V02 = SPH_T32((sph_u32)W2); \
433  V12 = SPH_T32((sph_u32)(W2 >> 32)); \
434  V03 = SPH_T32((sph_u32)W3); \
435  V13 = SPH_T32((sph_u32)(W3 >> 32)); \
436  V04 = SPH_T32((sph_u32)W4); \
437  V14 = SPH_T32((sph_u32)(W4 >> 32)); \
438  V05 = SPH_T32((sph_u32)W5); \
439  V15 = SPH_T32((sph_u32)(W5 >> 32)); \
440  V06 = SPH_T32((sph_u32)W6); \
441  V16 = SPH_T32((sph_u32)(W6 >> 32)); \
442  V07 = SPH_T32((sph_u32)W7); \
443  V17 = SPH_T32((sph_u32)(W7 >> 32)); \
444  for (r = 0; r < 8; r ++) { \
445  SUB_CRUMB(V20, V21, V22, V23); \
446  SUB_CRUMB(V25, V26, V27, V24); \
447  MIX_WORD(V20, V24); \
448  MIX_WORD(V21, V25); \
449  MIX_WORD(V22, V26); \
450  MIX_WORD(V23, V27); \
451  V20 ^= RC20[r]; \
452  V24 ^= RC24[r]; \
453  } \
454  } while (0)
455 
456 #else
457 
458 #define P3 do { \
459  int r; \
460  TWEAK3; \
461  for (r = 0; r < 8; r ++) { \
462  SUB_CRUMB(V00, V01, V02, V03); \
463  SUB_CRUMB(V05, V06, V07, V04); \
464  MIX_WORD(V00, V04); \
465  MIX_WORD(V01, V05); \
466  MIX_WORD(V02, V06); \
467  MIX_WORD(V03, V07); \
468  V00 ^= RC00[r]; \
469  V04 ^= RC04[r]; \
470  } \
471  for (r = 0; r < 8; r ++) { \
472  SUB_CRUMB(V10, V11, V12, V13); \
473  SUB_CRUMB(V15, V16, V17, V14); \
474  MIX_WORD(V10, V14); \
475  MIX_WORD(V11, V15); \
476  MIX_WORD(V12, V16); \
477  MIX_WORD(V13, V17); \
478  V10 ^= RC10[r]; \
479  V14 ^= RC14[r]; \
480  } \
481  for (r = 0; r < 8; r ++) { \
482  SUB_CRUMB(V20, V21, V22, V23); \
483  SUB_CRUMB(V25, V26, V27, V24); \
484  MIX_WORD(V20, V24); \
485  MIX_WORD(V21, V25); \
486  MIX_WORD(V22, V26); \
487  MIX_WORD(V23, V27); \
488  V20 ^= RC20[r]; \
489  V24 ^= RC24[r]; \
490  } \
491  } while (0)
492 
493 #endif
494 
495 #define DECL_STATE4 \
496  sph_u32 V00, V01, V02, V03, V04, V05, V06, V07; \
497  sph_u32 V10, V11, V12, V13, V14, V15, V16, V17; \
498  sph_u32 V20, V21, V22, V23, V24, V25, V26, V27; \
499  sph_u32 V30, V31, V32, V33, V34, V35, V36, V37;
500 
501 #define READ_STATE4(state) do { \
502  V00 = (state)->V[0][0]; \
503  V01 = (state)->V[0][1]; \
504  V02 = (state)->V[0][2]; \
505  V03 = (state)->V[0][3]; \
506  V04 = (state)->V[0][4]; \
507  V05 = (state)->V[0][5]; \
508  V06 = (state)->V[0][6]; \
509  V07 = (state)->V[0][7]; \
510  V10 = (state)->V[1][0]; \
511  V11 = (state)->V[1][1]; \
512  V12 = (state)->V[1][2]; \
513  V13 = (state)->V[1][3]; \
514  V14 = (state)->V[1][4]; \
515  V15 = (state)->V[1][5]; \
516  V16 = (state)->V[1][6]; \
517  V17 = (state)->V[1][7]; \
518  V20 = (state)->V[2][0]; \
519  V21 = (state)->V[2][1]; \
520  V22 = (state)->V[2][2]; \
521  V23 = (state)->V[2][3]; \
522  V24 = (state)->V[2][4]; \
523  V25 = (state)->V[2][5]; \
524  V26 = (state)->V[2][6]; \
525  V27 = (state)->V[2][7]; \
526  V30 = (state)->V[3][0]; \
527  V31 = (state)->V[3][1]; \
528  V32 = (state)->V[3][2]; \
529  V33 = (state)->V[3][3]; \
530  V34 = (state)->V[3][4]; \
531  V35 = (state)->V[3][5]; \
532  V36 = (state)->V[3][6]; \
533  V37 = (state)->V[3][7]; \
534  } while (0)
535 
536 #define WRITE_STATE4(state) do { \
537  (state)->V[0][0] = V00; \
538  (state)->V[0][1] = V01; \
539  (state)->V[0][2] = V02; \
540  (state)->V[0][3] = V03; \
541  (state)->V[0][4] = V04; \
542  (state)->V[0][5] = V05; \
543  (state)->V[0][6] = V06; \
544  (state)->V[0][7] = V07; \
545  (state)->V[1][0] = V10; \
546  (state)->V[1][1] = V11; \
547  (state)->V[1][2] = V12; \
548  (state)->V[1][3] = V13; \
549  (state)->V[1][4] = V14; \
550  (state)->V[1][5] = V15; \
551  (state)->V[1][6] = V16; \
552  (state)->V[1][7] = V17; \
553  (state)->V[2][0] = V20; \
554  (state)->V[2][1] = V21; \
555  (state)->V[2][2] = V22; \
556  (state)->V[2][3] = V23; \
557  (state)->V[2][4] = V24; \
558  (state)->V[2][5] = V25; \
559  (state)->V[2][6] = V26; \
560  (state)->V[2][7] = V27; \
561  (state)->V[3][0] = V30; \
562  (state)->V[3][1] = V31; \
563  (state)->V[3][2] = V32; \
564  (state)->V[3][3] = V33; \
565  (state)->V[3][4] = V34; \
566  (state)->V[3][5] = V35; \
567  (state)->V[3][6] = V36; \
568  (state)->V[3][7] = V37; \
569  } while (0)
570 
571 #define MI4 do { \
572  DECL_TMP8(M) \
573  DECL_TMP8(a) \
574  DECL_TMP8(b) \
575  M0 = sph_dec32be_aligned(buf + 0); \
576  M1 = sph_dec32be_aligned(buf + 4); \
577  M2 = sph_dec32be_aligned(buf + 8); \
578  M3 = sph_dec32be_aligned(buf + 12); \
579  M4 = sph_dec32be_aligned(buf + 16); \
580  M5 = sph_dec32be_aligned(buf + 20); \
581  M6 = sph_dec32be_aligned(buf + 24); \
582  M7 = sph_dec32be_aligned(buf + 28); \
583  XOR(a, V0, V1); \
584  XOR(b, V2, V3); \
585  XOR(a, a, b); \
586  M2(a, a); \
587  XOR(V0, a, V0); \
588  XOR(V1, a, V1); \
589  XOR(V2, a, V2); \
590  XOR(V3, a, V3); \
591  M2(b, V0); \
592  XOR(b, b, V3); \
593  M2(V3, V3); \
594  XOR(V3, V3, V2); \
595  M2(V2, V2); \
596  XOR(V2, V2, V1); \
597  M2(V1, V1); \
598  XOR(V1, V1, V0); \
599  XOR(V0, b, M); \
600  M2(M, M); \
601  XOR(V1, V1, M); \
602  M2(M, M); \
603  XOR(V2, V2, M); \
604  M2(M, M); \
605  XOR(V3, V3, M); \
606  } while (0)
607 
608 #define TWEAK4 do { \
609  V14 = SPH_ROTL32(V14, 1); \
610  V15 = SPH_ROTL32(V15, 1); \
611  V16 = SPH_ROTL32(V16, 1); \
612  V17 = SPH_ROTL32(V17, 1); \
613  V24 = SPH_ROTL32(V24, 2); \
614  V25 = SPH_ROTL32(V25, 2); \
615  V26 = SPH_ROTL32(V26, 2); \
616  V27 = SPH_ROTL32(V27, 2); \
617  V34 = SPH_ROTL32(V34, 3); \
618  V35 = SPH_ROTL32(V35, 3); \
619  V36 = SPH_ROTL32(V36, 3); \
620  V37 = SPH_ROTL32(V37, 3); \
621  } while (0)
622 
623 #if SPH_LUFFA_PARALLEL
624 
625 #define P4 do { \
626  int r; \
627  sph_u64 W0, W1, W2, W3, W4, W5, W6, W7; \
628  TWEAK4; \
629  W0 = (sph_u64)V00 | ((sph_u64)V10 << 32); \
630  W1 = (sph_u64)V01 | ((sph_u64)V11 << 32); \
631  W2 = (sph_u64)V02 | ((sph_u64)V12 << 32); \
632  W3 = (sph_u64)V03 | ((sph_u64)V13 << 32); \
633  W4 = (sph_u64)V04 | ((sph_u64)V14 << 32); \
634  W5 = (sph_u64)V05 | ((sph_u64)V15 << 32); \
635  W6 = (sph_u64)V06 | ((sph_u64)V16 << 32); \
636  W7 = (sph_u64)V07 | ((sph_u64)V17 << 32); \
637  for (r = 0; r < 8; r ++) { \
638  SUB_CRUMBW(W0, W1, W2, W3); \
639  SUB_CRUMBW(W5, W6, W7, W4); \
640  MIX_WORDW(W0, W4); \
641  MIX_WORDW(W1, W5); \
642  MIX_WORDW(W2, W6); \
643  MIX_WORDW(W3, W7); \
644  W0 ^= RCW010[r]; \
645  W4 ^= RCW014[r]; \
646  } \
647  V00 = SPH_T32((sph_u32)W0); \
648  V10 = SPH_T32((sph_u32)(W0 >> 32)); \
649  V01 = SPH_T32((sph_u32)W1); \
650  V11 = SPH_T32((sph_u32)(W1 >> 32)); \
651  V02 = SPH_T32((sph_u32)W2); \
652  V12 = SPH_T32((sph_u32)(W2 >> 32)); \
653  V03 = SPH_T32((sph_u32)W3); \
654  V13 = SPH_T32((sph_u32)(W3 >> 32)); \
655  V04 = SPH_T32((sph_u32)W4); \
656  V14 = SPH_T32((sph_u32)(W4 >> 32)); \
657  V05 = SPH_T32((sph_u32)W5); \
658  V15 = SPH_T32((sph_u32)(W5 >> 32)); \
659  V06 = SPH_T32((sph_u32)W6); \
660  V16 = SPH_T32((sph_u32)(W6 >> 32)); \
661  V07 = SPH_T32((sph_u32)W7); \
662  V17 = SPH_T32((sph_u32)(W7 >> 32)); \
663  W0 = (sph_u64)V20 | ((sph_u64)V30 << 32); \
664  W1 = (sph_u64)V21 | ((sph_u64)V31 << 32); \
665  W2 = (sph_u64)V22 | ((sph_u64)V32 << 32); \
666  W3 = (sph_u64)V23 | ((sph_u64)V33 << 32); \
667  W4 = (sph_u64)V24 | ((sph_u64)V34 << 32); \
668  W5 = (sph_u64)V25 | ((sph_u64)V35 << 32); \
669  W6 = (sph_u64)V26 | ((sph_u64)V36 << 32); \
670  W7 = (sph_u64)V27 | ((sph_u64)V37 << 32); \
671  for (r = 0; r < 8; r ++) { \
672  SUB_CRUMBW(W0, W1, W2, W3); \
673  SUB_CRUMBW(W5, W6, W7, W4); \
674  MIX_WORDW(W0, W4); \
675  MIX_WORDW(W1, W5); \
676  MIX_WORDW(W2, W6); \
677  MIX_WORDW(W3, W7); \
678  W0 ^= RCW230[r]; \
679  W4 ^= RCW234[r]; \
680  } \
681  V20 = SPH_T32((sph_u32)W0); \
682  V30 = SPH_T32((sph_u32)(W0 >> 32)); \
683  V21 = SPH_T32((sph_u32)W1); \
684  V31 = SPH_T32((sph_u32)(W1 >> 32)); \
685  V22 = SPH_T32((sph_u32)W2); \
686  V32 = SPH_T32((sph_u32)(W2 >> 32)); \
687  V23 = SPH_T32((sph_u32)W3); \
688  V33 = SPH_T32((sph_u32)(W3 >> 32)); \
689  V24 = SPH_T32((sph_u32)W4); \
690  V34 = SPH_T32((sph_u32)(W4 >> 32)); \
691  V25 = SPH_T32((sph_u32)W5); \
692  V35 = SPH_T32((sph_u32)(W5 >> 32)); \
693  V26 = SPH_T32((sph_u32)W6); \
694  V36 = SPH_T32((sph_u32)(W6 >> 32)); \
695  V27 = SPH_T32((sph_u32)W7); \
696  V37 = SPH_T32((sph_u32)(W7 >> 32)); \
697  } while (0)
698 
699 #else
700 
701 #define P4 do { \
702  int r; \
703  TWEAK4; \
704  for (r = 0; r < 8; r ++) { \
705  SUB_CRUMB(V00, V01, V02, V03); \
706  SUB_CRUMB(V05, V06, V07, V04); \
707  MIX_WORD(V00, V04); \
708  MIX_WORD(V01, V05); \
709  MIX_WORD(V02, V06); \
710  MIX_WORD(V03, V07); \
711  V00 ^= RC00[r]; \
712  V04 ^= RC04[r]; \
713  } \
714  for (r = 0; r < 8; r ++) { \
715  SUB_CRUMB(V10, V11, V12, V13); \
716  SUB_CRUMB(V15, V16, V17, V14); \
717  MIX_WORD(V10, V14); \
718  MIX_WORD(V11, V15); \
719  MIX_WORD(V12, V16); \
720  MIX_WORD(V13, V17); \
721  V10 ^= RC10[r]; \
722  V14 ^= RC14[r]; \
723  } \
724  for (r = 0; r < 8; r ++) { \
725  SUB_CRUMB(V20, V21, V22, V23); \
726  SUB_CRUMB(V25, V26, V27, V24); \
727  MIX_WORD(V20, V24); \
728  MIX_WORD(V21, V25); \
729  MIX_WORD(V22, V26); \
730  MIX_WORD(V23, V27); \
731  V20 ^= RC20[r]; \
732  V24 ^= RC24[r]; \
733  } \
734  for (r = 0; r < 8; r ++) { \
735  SUB_CRUMB(V30, V31, V32, V33); \
736  SUB_CRUMB(V35, V36, V37, V34); \
737  MIX_WORD(V30, V34); \
738  MIX_WORD(V31, V35); \
739  MIX_WORD(V32, V36); \
740  MIX_WORD(V33, V37); \
741  V30 ^= RC30[r]; \
742  V34 ^= RC34[r]; \
743  } \
744  } while (0)
745 
746 #endif
747 
748 #define DECL_STATE5 \
749  sph_u32 V00, V01, V02, V03, V04, V05, V06, V07; \
750  sph_u32 V10, V11, V12, V13, V14, V15, V16, V17; \
751  sph_u32 V20, V21, V22, V23, V24, V25, V26, V27; \
752  sph_u32 V30, V31, V32, V33, V34, V35, V36, V37; \
753  sph_u32 V40, V41, V42, V43, V44, V45, V46, V47;
754 
755 #define READ_STATE5(state) do { \
756  V00 = (state)->V[0][0]; \
757  V01 = (state)->V[0][1]; \
758  V02 = (state)->V[0][2]; \
759  V03 = (state)->V[0][3]; \
760  V04 = (state)->V[0][4]; \
761  V05 = (state)->V[0][5]; \
762  V06 = (state)->V[0][6]; \
763  V07 = (state)->V[0][7]; \
764  V10 = (state)->V[1][0]; \
765  V11 = (state)->V[1][1]; \
766  V12 = (state)->V[1][2]; \
767  V13 = (state)->V[1][3]; \
768  V14 = (state)->V[1][4]; \
769  V15 = (state)->V[1][5]; \
770  V16 = (state)->V[1][6]; \
771  V17 = (state)->V[1][7]; \
772  V20 = (state)->V[2][0]; \
773  V21 = (state)->V[2][1]; \
774  V22 = (state)->V[2][2]; \
775  V23 = (state)->V[2][3]; \
776  V24 = (state)->V[2][4]; \
777  V25 = (state)->V[2][5]; \
778  V26 = (state)->V[2][6]; \
779  V27 = (state)->V[2][7]; \
780  V30 = (state)->V[3][0]; \
781  V31 = (state)->V[3][1]; \
782  V32 = (state)->V[3][2]; \
783  V33 = (state)->V[3][3]; \
784  V34 = (state)->V[3][4]; \
785  V35 = (state)->V[3][5]; \
786  V36 = (state)->V[3][6]; \
787  V37 = (state)->V[3][7]; \
788  V40 = (state)->V[4][0]; \
789  V41 = (state)->V[4][1]; \
790  V42 = (state)->V[4][2]; \
791  V43 = (state)->V[4][3]; \
792  V44 = (state)->V[4][4]; \
793  V45 = (state)->V[4][5]; \
794  V46 = (state)->V[4][6]; \
795  V47 = (state)->V[4][7]; \
796  } while (0)
797 
798 #define WRITE_STATE5(state) do { \
799  (state)->V[0][0] = V00; \
800  (state)->V[0][1] = V01; \
801  (state)->V[0][2] = V02; \
802  (state)->V[0][3] = V03; \
803  (state)->V[0][4] = V04; \
804  (state)->V[0][5] = V05; \
805  (state)->V[0][6] = V06; \
806  (state)->V[0][7] = V07; \
807  (state)->V[1][0] = V10; \
808  (state)->V[1][1] = V11; \
809  (state)->V[1][2] = V12; \
810  (state)->V[1][3] = V13; \
811  (state)->V[1][4] = V14; \
812  (state)->V[1][5] = V15; \
813  (state)->V[1][6] = V16; \
814  (state)->V[1][7] = V17; \
815  (state)->V[2][0] = V20; \
816  (state)->V[2][1] = V21; \
817  (state)->V[2][2] = V22; \
818  (state)->V[2][3] = V23; \
819  (state)->V[2][4] = V24; \
820  (state)->V[2][5] = V25; \
821  (state)->V[2][6] = V26; \
822  (state)->V[2][7] = V27; \
823  (state)->V[3][0] = V30; \
824  (state)->V[3][1] = V31; \
825  (state)->V[3][2] = V32; \
826  (state)->V[3][3] = V33; \
827  (state)->V[3][4] = V34; \
828  (state)->V[3][5] = V35; \
829  (state)->V[3][6] = V36; \
830  (state)->V[3][7] = V37; \
831  (state)->V[4][0] = V40; \
832  (state)->V[4][1] = V41; \
833  (state)->V[4][2] = V42; \
834  (state)->V[4][3] = V43; \
835  (state)->V[4][4] = V44; \
836  (state)->V[4][5] = V45; \
837  (state)->V[4][6] = V46; \
838  (state)->V[4][7] = V47; \
839  } while (0)
840 
841 #define MI5 do { \
842  DECL_TMP8(M) \
843  DECL_TMP8(a) \
844  DECL_TMP8(b) \
845  M0 = sph_dec32be_aligned(buf + 0); \
846  M1 = sph_dec32be_aligned(buf + 4); \
847  M2 = sph_dec32be_aligned(buf + 8); \
848  M3 = sph_dec32be_aligned(buf + 12); \
849  M4 = sph_dec32be_aligned(buf + 16); \
850  M5 = sph_dec32be_aligned(buf + 20); \
851  M6 = sph_dec32be_aligned(buf + 24); \
852  M7 = sph_dec32be_aligned(buf + 28); \
853  XOR(a, V0, V1); \
854  XOR(b, V2, V3); \
855  XOR(a, a, b); \
856  XOR(a, a, V4); \
857  M2(a, a); \
858  XOR(V0, a, V0); \
859  XOR(V1, a, V1); \
860  XOR(V2, a, V2); \
861  XOR(V3, a, V3); \
862  XOR(V4, a, V4); \
863  M2(b, V0); \
864  XOR(b, b, V1); \
865  M2(V1, V1); \
866  XOR(V1, V1, V2); \
867  M2(V2, V2); \
868  XOR(V2, V2, V3); \
869  M2(V3, V3); \
870  XOR(V3, V3, V4); \
871  M2(V4, V4); \
872  XOR(V4, V4, V0); \
873  M2(V0, b); \
874  XOR(V0, V0, V4); \
875  M2(V4, V4); \
876  XOR(V4, V4, V3); \
877  M2(V3, V3); \
878  XOR(V3, V3, V2); \
879  M2(V2, V2); \
880  XOR(V2, V2, V1); \
881  M2(V1, V1); \
882  XOR(V1, V1, b); \
883  XOR(V0, V0, M); \
884  M2(M, M); \
885  XOR(V1, V1, M); \
886  M2(M, M); \
887  XOR(V2, V2, M); \
888  M2(M, M); \
889  XOR(V3, V3, M); \
890  M2(M, M); \
891  XOR(V4, V4, M); \
892  } while (0)
893 
894 #define TWEAK5 do { \
895  V14 = SPH_ROTL32(V14, 1); \
896  V15 = SPH_ROTL32(V15, 1); \
897  V16 = SPH_ROTL32(V16, 1); \
898  V17 = SPH_ROTL32(V17, 1); \
899  V24 = SPH_ROTL32(V24, 2); \
900  V25 = SPH_ROTL32(V25, 2); \
901  V26 = SPH_ROTL32(V26, 2); \
902  V27 = SPH_ROTL32(V27, 2); \
903  V34 = SPH_ROTL32(V34, 3); \
904  V35 = SPH_ROTL32(V35, 3); \
905  V36 = SPH_ROTL32(V36, 3); \
906  V37 = SPH_ROTL32(V37, 3); \
907  V44 = SPH_ROTL32(V44, 4); \
908  V45 = SPH_ROTL32(V45, 4); \
909  V46 = SPH_ROTL32(V46, 4); \
910  V47 = SPH_ROTL32(V47, 4); \
911  } while (0)
912 
913 #if SPH_LUFFA_PARALLEL
914 
915 #define P5 do { \
916  int r; \
917  sph_u64 W0, W1, W2, W3, W4, W5, W6, W7; \
918  TWEAK5; \
919  W0 = (sph_u64)V00 | ((sph_u64)V10 << 32); \
920  W1 = (sph_u64)V01 | ((sph_u64)V11 << 32); \
921  W2 = (sph_u64)V02 | ((sph_u64)V12 << 32); \
922  W3 = (sph_u64)V03 | ((sph_u64)V13 << 32); \
923  W4 = (sph_u64)V04 | ((sph_u64)V14 << 32); \
924  W5 = (sph_u64)V05 | ((sph_u64)V15 << 32); \
925  W6 = (sph_u64)V06 | ((sph_u64)V16 << 32); \
926  W7 = (sph_u64)V07 | ((sph_u64)V17 << 32); \
927  for (r = 0; r < 8; r ++) { \
928  SUB_CRUMBW(W0, W1, W2, W3); \
929  SUB_CRUMBW(W5, W6, W7, W4); \
930  MIX_WORDW(W0, W4); \
931  MIX_WORDW(W1, W5); \
932  MIX_WORDW(W2, W6); \
933  MIX_WORDW(W3, W7); \
934  W0 ^= RCW010[r]; \
935  W4 ^= RCW014[r]; \
936  } \
937  V00 = SPH_T32((sph_u32)W0); \
938  V10 = SPH_T32((sph_u32)(W0 >> 32)); \
939  V01 = SPH_T32((sph_u32)W1); \
940  V11 = SPH_T32((sph_u32)(W1 >> 32)); \
941  V02 = SPH_T32((sph_u32)W2); \
942  V12 = SPH_T32((sph_u32)(W2 >> 32)); \
943  V03 = SPH_T32((sph_u32)W3); \
944  V13 = SPH_T32((sph_u32)(W3 >> 32)); \
945  V04 = SPH_T32((sph_u32)W4); \
946  V14 = SPH_T32((sph_u32)(W4 >> 32)); \
947  V05 = SPH_T32((sph_u32)W5); \
948  V15 = SPH_T32((sph_u32)(W5 >> 32)); \
949  V06 = SPH_T32((sph_u32)W6); \
950  V16 = SPH_T32((sph_u32)(W6 >> 32)); \
951  V07 = SPH_T32((sph_u32)W7); \
952  V17 = SPH_T32((sph_u32)(W7 >> 32)); \
953  W0 = (sph_u64)V20 | ((sph_u64)V30 << 32); \
954  W1 = (sph_u64)V21 | ((sph_u64)V31 << 32); \
955  W2 = (sph_u64)V22 | ((sph_u64)V32 << 32); \
956  W3 = (sph_u64)V23 | ((sph_u64)V33 << 32); \
957  W4 = (sph_u64)V24 | ((sph_u64)V34 << 32); \
958  W5 = (sph_u64)V25 | ((sph_u64)V35 << 32); \
959  W6 = (sph_u64)V26 | ((sph_u64)V36 << 32); \
960  W7 = (sph_u64)V27 | ((sph_u64)V37 << 32); \
961  for (r = 0; r < 8; r ++) { \
962  SUB_CRUMBW(W0, W1, W2, W3); \
963  SUB_CRUMBW(W5, W6, W7, W4); \
964  MIX_WORDW(W0, W4); \
965  MIX_WORDW(W1, W5); \
966  MIX_WORDW(W2, W6); \
967  MIX_WORDW(W3, W7); \
968  W0 ^= RCW230[r]; \
969  W4 ^= RCW234[r]; \
970  } \
971  V20 = SPH_T32((sph_u32)W0); \
972  V30 = SPH_T32((sph_u32)(W0 >> 32)); \
973  V21 = SPH_T32((sph_u32)W1); \
974  V31 = SPH_T32((sph_u32)(W1 >> 32)); \
975  V22 = SPH_T32((sph_u32)W2); \
976  V32 = SPH_T32((sph_u32)(W2 >> 32)); \
977  V23 = SPH_T32((sph_u32)W3); \
978  V33 = SPH_T32((sph_u32)(W3 >> 32)); \
979  V24 = SPH_T32((sph_u32)W4); \
980  V34 = SPH_T32((sph_u32)(W4 >> 32)); \
981  V25 = SPH_T32((sph_u32)W5); \
982  V35 = SPH_T32((sph_u32)(W5 >> 32)); \
983  V26 = SPH_T32((sph_u32)W6); \
984  V36 = SPH_T32((sph_u32)(W6 >> 32)); \
985  V27 = SPH_T32((sph_u32)W7); \
986  V37 = SPH_T32((sph_u32)(W7 >> 32)); \
987  for (r = 0; r < 8; r ++) { \
988  SUB_CRUMB(V40, V41, V42, V43); \
989  SUB_CRUMB(V45, V46, V47, V44); \
990  MIX_WORD(V40, V44); \
991  MIX_WORD(V41, V45); \
992  MIX_WORD(V42, V46); \
993  MIX_WORD(V43, V47); \
994  V40 ^= RC40[r]; \
995  V44 ^= RC44[r]; \
996  } \
997  } while (0)
998 
999 #else
1000 
1001 #define P5 do { \
1002  int r; \
1003  TWEAK5; \
1004  for (r = 0; r < 8; r ++) { \
1005  SUB_CRUMB(V00, V01, V02, V03); \
1006  SUB_CRUMB(V05, V06, V07, V04); \
1007  MIX_WORD(V00, V04); \
1008  MIX_WORD(V01, V05); \
1009  MIX_WORD(V02, V06); \
1010  MIX_WORD(V03, V07); \
1011  V00 ^= RC00[r]; \
1012  V04 ^= RC04[r]; \
1013  } \
1014  for (r = 0; r < 8; r ++) { \
1015  SUB_CRUMB(V10, V11, V12, V13); \
1016  SUB_CRUMB(V15, V16, V17, V14); \
1017  MIX_WORD(V10, V14); \
1018  MIX_WORD(V11, V15); \
1019  MIX_WORD(V12, V16); \
1020  MIX_WORD(V13, V17); \
1021  V10 ^= RC10[r]; \
1022  V14 ^= RC14[r]; \
1023  } \
1024  for (r = 0; r < 8; r ++) { \
1025  SUB_CRUMB(V20, V21, V22, V23); \
1026  SUB_CRUMB(V25, V26, V27, V24); \
1027  MIX_WORD(V20, V24); \
1028  MIX_WORD(V21, V25); \
1029  MIX_WORD(V22, V26); \
1030  MIX_WORD(V23, V27); \
1031  V20 ^= RC20[r]; \
1032  V24 ^= RC24[r]; \
1033  } \
1034  for (r = 0; r < 8; r ++) { \
1035  SUB_CRUMB(V30, V31, V32, V33); \
1036  SUB_CRUMB(V35, V36, V37, V34); \
1037  MIX_WORD(V30, V34); \
1038  MIX_WORD(V31, V35); \
1039  MIX_WORD(V32, V36); \
1040  MIX_WORD(V33, V37); \
1041  V30 ^= RC30[r]; \
1042  V34 ^= RC34[r]; \
1043  } \
1044  for (r = 0; r < 8; r ++) { \
1045  SUB_CRUMB(V40, V41, V42, V43); \
1046  SUB_CRUMB(V45, V46, V47, V44); \
1047  MIX_WORD(V40, V44); \
1048  MIX_WORD(V41, V45); \
1049  MIX_WORD(V42, V46); \
1050  MIX_WORD(V43, V47); \
1051  V40 ^= RC40[r]; \
1052  V44 ^= RC44[r]; \
1053  } \
1054  } while (0)
1055 
1056 #endif
1057 
1058 static void
1059 luffa3(sph_luffa224_context *sc, const void *data, size_t len)
1060 {
1061  unsigned char *buf;
1062  size_t ptr;
1063  DECL_STATE3
1064 
1065  buf = sc->buf;
1066  ptr = sc->ptr;
1067  if (len < (sizeof sc->buf) - ptr) {
1068  memcpy(buf + ptr, data, len);
1069  ptr += len;
1070  sc->ptr = ptr;
1071  return;
1072  }
1073 
1074  READ_STATE3(sc);
1075  while (len > 0) {
1076  size_t clen;
1077 
1078  clen = (sizeof sc->buf) - ptr;
1079  if (clen > len)
1080  clen = len;
1081  memcpy(buf + ptr, data, clen);
1082  ptr += clen;
1083  data = (const unsigned char *)data + clen;
1084  len -= clen;
1085  if (ptr == sizeof sc->buf) {
1086  MI3;
1087  P3;
1088  ptr = 0;
1089  }
1090  }
1091  WRITE_STATE3(sc);
1092  sc->ptr = ptr;
1093 }
1094 
1095 static void
1096 luffa3_close(sph_luffa224_context *sc, unsigned ub, unsigned n,
1097  void *dst, unsigned out_size_w32)
1098 {
1099  unsigned char *buf, *out;
1100  size_t ptr;
1101  unsigned z;
1102  int i;
1103  DECL_STATE3
1104 
1105  buf = sc->buf;
1106  ptr = sc->ptr;
1107  z = 0x80 >> n;
1108  buf[ptr ++] = ((ub & -z) | z) & 0xFF;
1109  memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
1110  READ_STATE3(sc);
1111  for (i = 0; i < 2; i ++) {
1112  MI3;
1113  P3;
1114  memset(buf, 0, sizeof sc->buf);
1115  }
1116  out = dst;
1117  sph_enc32be(out + 0, V00 ^ V10 ^ V20);
1118  sph_enc32be(out + 4, V01 ^ V11 ^ V21);
1119  sph_enc32be(out + 8, V02 ^ V12 ^ V22);
1120  sph_enc32be(out + 12, V03 ^ V13 ^ V23);
1121  sph_enc32be(out + 16, V04 ^ V14 ^ V24);
1122  sph_enc32be(out + 20, V05 ^ V15 ^ V25);
1123  sph_enc32be(out + 24, V06 ^ V16 ^ V26);
1124  if (out_size_w32 > 7)
1125  sph_enc32be(out + 28, V07 ^ V17 ^ V27);
1126 }
1127 
1128 static void
1129 luffa4(sph_luffa384_context *sc, const void *data, size_t len)
1130 {
1131  unsigned char *buf;
1132  size_t ptr;
1133  DECL_STATE4
1134 
1135  buf = sc->buf;
1136  ptr = sc->ptr;
1137  if (len < (sizeof sc->buf) - ptr) {
1138  memcpy(buf + ptr, data, len);
1139  ptr += len;
1140  sc->ptr = ptr;
1141  return;
1142  }
1143 
1144  READ_STATE4(sc);
1145  while (len > 0) {
1146  size_t clen;
1147 
1148  clen = (sizeof sc->buf) - ptr;
1149  if (clen > len)
1150  clen = len;
1151  memcpy(buf + ptr, data, clen);
1152  ptr += clen;
1153  data = (const unsigned char *)data + clen;
1154  len -= clen;
1155  if (ptr == sizeof sc->buf) {
1156  MI4;
1157  P4;
1158  ptr = 0;
1159  }
1160  }
1161  WRITE_STATE4(sc);
1162  sc->ptr = ptr;
1163 }
1164 
1165 static void
1166 luffa4_close(sph_luffa384_context *sc, unsigned ub, unsigned n, void *dst)
1167 {
1168  unsigned char *buf, *out;
1169  size_t ptr;
1170  unsigned z;
1171  int i;
1172  DECL_STATE4
1173 
1174  buf = sc->buf;
1175  ptr = sc->ptr;
1176  out = dst;
1177  z = 0x80 >> n;
1178  buf[ptr ++] = ((ub & -z) | z) & 0xFF;
1179  memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
1180  READ_STATE4(sc);
1181  for (i = 0; i < 3; i ++) {
1182  MI4;
1183  P4;
1184  switch (i) {
1185  case 0:
1186  memset(buf, 0, sizeof sc->buf);
1187  break;
1188  case 1:
1189  sph_enc32be(out + 0, V00 ^ V10 ^ V20 ^ V30);
1190  sph_enc32be(out + 4, V01 ^ V11 ^ V21 ^ V31);
1191  sph_enc32be(out + 8, V02 ^ V12 ^ V22 ^ V32);
1192  sph_enc32be(out + 12, V03 ^ V13 ^ V23 ^ V33);
1193  sph_enc32be(out + 16, V04 ^ V14 ^ V24 ^ V34);
1194  sph_enc32be(out + 20, V05 ^ V15 ^ V25 ^ V35);
1195  sph_enc32be(out + 24, V06 ^ V16 ^ V26 ^ V36);
1196  sph_enc32be(out + 28, V07 ^ V17 ^ V27 ^ V37);
1197  break;
1198  case 2:
1199  sph_enc32be(out + 32, V00 ^ V10 ^ V20 ^ V30);
1200  sph_enc32be(out + 36, V01 ^ V11 ^ V21 ^ V31);
1201  sph_enc32be(out + 40, V02 ^ V12 ^ V22 ^ V32);
1202  sph_enc32be(out + 44, V03 ^ V13 ^ V23 ^ V33);
1203  break;
1204  }
1205  }
1206 }
1207 
1208 static void
1209 luffa5(sph_luffa512_context *sc, const void *data, size_t len)
1210 {
1211  unsigned char *buf;
1212  size_t ptr;
1213  DECL_STATE5
1214 
1215  buf = sc->buf;
1216  ptr = sc->ptr;
1217  if (len < (sizeof sc->buf) - ptr) {
1218  memcpy(buf + ptr, data, len);
1219  ptr += len;
1220  sc->ptr = ptr;
1221  return;
1222  }
1223 
1224  READ_STATE5(sc);
1225  while (len > 0) {
1226  size_t clen;
1227 
1228  clen = (sizeof sc->buf) - ptr;
1229  if (clen > len)
1230  clen = len;
1231  memcpy(buf + ptr, data, clen);
1232  ptr += clen;
1233  data = (const unsigned char *)data + clen;
1234  len -= clen;
1235  if (ptr == sizeof sc->buf) {
1236  MI5;
1237  P5;
1238  ptr = 0;
1239  }
1240  }
1241  WRITE_STATE5(sc);
1242  sc->ptr = ptr;
1243 }
1244 
1245 static void
1246 luffa5_close(sph_luffa512_context *sc, unsigned ub, unsigned n, void *dst)
1247 {
1248  unsigned char *buf, *out;
1249  size_t ptr;
1250  unsigned z;
1251  int i;
1252  DECL_STATE5
1253 
1254  buf = sc->buf;
1255  ptr = sc->ptr;
1256  out = dst;
1257  z = 0x80 >> n;
1258  buf[ptr ++] = ((ub & -z) | z) & 0xFF;
1259  memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
1260  READ_STATE5(sc);
1261  for (i = 0; i < 3; i ++) {
1262  MI5;
1263  P5;
1264  switch (i) {
1265  case 0:
1266  memset(buf, 0, sizeof sc->buf);
1267  break;
1268  case 1:
1269  sph_enc32be(out + 0, V00 ^ V10 ^ V20 ^ V30 ^ V40);
1270  sph_enc32be(out + 4, V01 ^ V11 ^ V21 ^ V31 ^ V41);
1271  sph_enc32be(out + 8, V02 ^ V12 ^ V22 ^ V32 ^ V42);
1272  sph_enc32be(out + 12, V03 ^ V13 ^ V23 ^ V33 ^ V43);
1273  sph_enc32be(out + 16, V04 ^ V14 ^ V24 ^ V34 ^ V44);
1274  sph_enc32be(out + 20, V05 ^ V15 ^ V25 ^ V35 ^ V45);
1275  sph_enc32be(out + 24, V06 ^ V16 ^ V26 ^ V36 ^ V46);
1276  sph_enc32be(out + 28, V07 ^ V17 ^ V27 ^ V37 ^ V47);
1277  break;
1278  case 2:
1279  sph_enc32be(out + 32, V00 ^ V10 ^ V20 ^ V30 ^ V40);
1280  sph_enc32be(out + 36, V01 ^ V11 ^ V21 ^ V31 ^ V41);
1281  sph_enc32be(out + 40, V02 ^ V12 ^ V22 ^ V32 ^ V42);
1282  sph_enc32be(out + 44, V03 ^ V13 ^ V23 ^ V33 ^ V43);
1283  sph_enc32be(out + 48, V04 ^ V14 ^ V24 ^ V34 ^ V44);
1284  sph_enc32be(out + 52, V05 ^ V15 ^ V25 ^ V35 ^ V45);
1285  sph_enc32be(out + 56, V06 ^ V16 ^ V26 ^ V36 ^ V46);
1286  sph_enc32be(out + 60, V07 ^ V17 ^ V27 ^ V37 ^ V47);
1287  break;
1288  }
1289  }
1290 }
1291 
1292 /* see sph_luffa.h */
1293 void
1295 {
1297 
1298  sc = cc;
1299  memcpy(sc->V, V_INIT, sizeof(sc->V));
1300  sc->ptr = 0;
1301 }
1302 
1303 /* see sph_luffa.h */
1304 void
1305 sph_luffa224(void *cc, const void *data, size_t len)
1306 {
1307  luffa3(cc, data, len);
1308 }
1309 
1310 /* see sph_luffa.h */
1311 void
1312 sph_luffa224_close(void *cc, void *dst)
1313 {
1314  sph_luffa224_addbits_and_close(cc, 0, 0, dst);
1315 }
1316 
1317 /* see sph_luffa.h */
1318 void
1319 sph_luffa224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1320 {
1321  luffa3_close(cc, ub, n, dst, 7);
1322  sph_luffa224_init(cc);
1323 }
1324 
1325 /* see sph_luffa.h */
1326 void
1328 {
1330 
1331  sc = cc;
1332  memcpy(sc->V, V_INIT, sizeof(sc->V));
1333  sc->ptr = 0;
1334 }
1335 
1336 /* see sph_luffa.h */
1337 void
1338 sph_luffa256(void *cc, const void *data, size_t len)
1339 {
1340  luffa3(cc, data, len);
1341 }
1342 
1343 /* see sph_luffa.h */
1344 void
1345 sph_luffa256_close(void *cc, void *dst)
1346 {
1347  sph_luffa256_addbits_and_close(cc, 0, 0, dst);
1348 }
1349 
1350 /* see sph_luffa.h */
1351 void
1352 sph_luffa256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1353 {
1354  luffa3_close(cc, ub, n, dst, 8);
1355  sph_luffa256_init(cc);
1356 }
1357 
1358 /* see sph_luffa.h */
1359 void
1361 {
1363 
1364  sc = cc;
1365  memcpy(sc->V, V_INIT, sizeof(sc->V));
1366  sc->ptr = 0;
1367 }
1368 
1369 /* see sph_luffa.h */
1370 void
1371 sph_luffa384(void *cc, const void *data, size_t len)
1372 {
1373  luffa4(cc, data, len);
1374 }
1375 
1376 /* see sph_luffa.h */
1377 void
1378 sph_luffa384_close(void *cc, void *dst)
1379 {
1380  sph_luffa384_addbits_and_close(cc, 0, 0, dst);
1381 }
1382 
1383 /* see sph_luffa.h */
1384 void
1385 sph_luffa384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1386 {
1387  luffa4_close(cc, ub, n, dst);
1388  sph_luffa384_init(cc);
1389 }
1390 
1391 /* see sph_luffa.h */
1392 void
1394 {
1396 
1397  sc = cc;
1398  memcpy(sc->V, V_INIT, sizeof(sc->V));
1399  sc->ptr = 0;
1400 }
1401 
1402 /* see sph_luffa.h */
1403 void
1404 sph_luffa512(void *cc, const void *data, size_t len)
1405 {
1406  luffa5(cc, data, len);
1407 }
1408 
1409 /* see sph_luffa.h */
1410 void
1411 sph_luffa512_close(void *cc, void *dst)
1412 {
1413  sph_luffa512_addbits_and_close(cc, 0, 0, dst);
1414 }
1415 
1416 /* see sph_luffa.h */
1417 void
1418 sph_luffa512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1419 {
1420  luffa5_close(cc, ub, n, dst);
1421  sph_luffa512_init(cc);
1422 }
1423 
1424 #ifdef __cplusplus
1425 }
1426 #endif
void sph_luffa384_close(void *cc, void *dst)
Terminate the current Luffa-384 computation and output the result into the provided buffer...
Definition: luffa.c:1378
void sph_luffa224(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: luffa.c:1305
#define P5
Definition: luffa.c:1001
#define WRITE_STATE3(state)
Definition: luffa.c:342
void sph_luffa384_init(void *cc)
Initialize a Luffa-384 context.
Definition: luffa.c:1360
void sph_luffa256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: luffa.c:1352
#define DECL_STATE3
Definition: luffa.c:310
This structure is a context for Luffa-224 computations: it contains the intermediate values and some ...
Definition: sph_luffa.h:76
unsigned char buf[32]
Definition: sph_luffa.h:95
void sph_luffa256_close(void *cc, void *dst)
Terminate the current Luffa-256 computation and output the result into the provided buffer...
Definition: luffa.c:1345
void sph_luffa224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: luffa.c:1319
This structure is a context for Luffa-384 computations.
Definition: sph_luffa.h:93
void sph_luffa512_close(void *cc, void *dst)
Terminate the current Luffa-512 computation and output the result into the provided buffer...
Definition: luffa.c:1411
void sph_luffa512(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: luffa.c:1404
#define MI5
Definition: luffa.c:841
#define SPH_C32(x)
Definition: sph_types.h:873
void sph_luffa256(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: luffa.c:1338
void sph_luffa224_close(void *cc, void *dst)
Terminate the current Luffa-224 computation and output the result into the provided buffer...
Definition: luffa.c:1312
unsigned char buf[32]
Definition: sph_luffa.h:106
sph_u32 V[3][8]
Definition: sph_luffa.h:80
#define P4
Definition: luffa.c:701
#define P3
Definition: luffa.c:458
#define READ_STATE5(state)
Definition: luffa.c:755
sph_u32 V[4][8]
Definition: sph_luffa.h:97
void sph_luffa512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: luffa.c:1418
Luffa interface.
#define MI4
Definition: luffa.c:571
#define DECL_STATE5
Definition: luffa.c:748
#define MI3
Definition: luffa.c:369
void sph_luffa256_init(void *cc)
Initialize a Luffa-256 context.
Definition: luffa.c:1327
unsigned char buf[32]
Definition: sph_luffa.h:78
void * memcpy(void *a, const void *b, size_t c)
This structure is a context for Luffa-512 computations.
Definition: sph_luffa.h:104
#define READ_STATE4(state)
Definition: luffa.c:501
unsigned long sph_u32
Definition: sph_types.h:870
#define READ_STATE3(state)
Definition: luffa.c:315
#define WRITE_STATE5(state)
Definition: luffa.c:798
#define DECL_STATE4
Definition: luffa.c:495
#define WRITE_STATE4(state)
Definition: luffa.c:536
sph_u32 V[5][8]
Definition: sph_luffa.h:108
void sph_luffa224_init(void *cc)
Initialize a Luffa-224 context.
Definition: luffa.c:1294
void sph_luffa384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: luffa.c:1385
void sph_luffa384(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: luffa.c:1371
void sph_luffa512_init(void *cc)
Initialize a Luffa-512 context.
Definition: luffa.c:1393