Raven Core  3.0.0
P2P Digital Currency
jh.c
Go to the documentation of this file.
1 /* $Id: jh.c 255 2011-06-07 19:50:20Z tp $ */
2 /*
3  * JH implementation.
4  *
5  * ==========================(LICENSE BEGIN)============================
6  *
7  * Copyright (c) 2007-2010 Projet RNRT SAPHIR
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the
11  * "Software"), to deal in the Software without restriction, including
12  * without limitation the rights to use, copy, modify, merge, publish,
13  * distribute, sublicense, and/or sell copies of the Software, and to
14  * permit persons to whom the Software is furnished to do so, subject to
15  * the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be
18  * included in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27  *
28  * ===========================(LICENSE END)=============================
29  *
30  * @author Thomas Pornin <thomas.pornin@cryptolog.com>
31  */
32 
33 #include <stddef.h>
34 #include <string.h>
35 
36 #include "sph_jh.h"
37 
38 #ifdef __cplusplus
39 extern "C"{
40 #endif
41 
42 
43 #if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_JH
44 #define SPH_SMALL_FOOTPRINT_JH 1
45 #endif
46 
47 #if !defined SPH_JH_64 && SPH_64_TRUE
48 #define SPH_JH_64 1
49 #endif
50 
51 #if !SPH_64
52 #undef SPH_JH_64
53 #endif
54 
55 #ifdef _MSC_VER
56 #pragma warning (disable: 4146)
57 #endif
58 
59 /*
60  * The internal bitslice representation may use either big-endian or
61  * little-endian (true bitslice operations do not care about the bit
62  * ordering, and the bit-swapping linear operations in JH happen to
63  * be invariant through endianness-swapping). The constants must be
64  * defined according to the chosen endianness; we use some
65  * byte-swapping macros for that.
66  */
67 
68 #if SPH_LITTLE_ENDIAN
69 
70 #define C32e(x) ((SPH_C32(x) >> 24) \
71  | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
72  | ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \
73  | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
74 #define dec32e_aligned sph_dec32le_aligned
75 #define enc32e sph_enc32le
76 
77 #if SPH_64
78 #define C64e(x) ((SPH_C64(x) >> 56) \
79  | ((SPH_C64(x) >> 40) & SPH_C64(0x000000000000FF00)) \
80  | ((SPH_C64(x) >> 24) & SPH_C64(0x0000000000FF0000)) \
81  | ((SPH_C64(x) >> 8) & SPH_C64(0x00000000FF000000)) \
82  | ((SPH_C64(x) << 8) & SPH_C64(0x000000FF00000000)) \
83  | ((SPH_C64(x) << 24) & SPH_C64(0x0000FF0000000000)) \
84  | ((SPH_C64(x) << 40) & SPH_C64(0x00FF000000000000)) \
85  | ((SPH_C64(x) << 56) & SPH_C64(0xFF00000000000000)))
86 #define dec64e_aligned sph_dec64le_aligned
87 #define enc64e sph_enc64le
88 #endif
89 
90 #else
91 
92 #define C32e(x) SPH_C32(x)
93 #define dec32e_aligned sph_dec32be_aligned
94 #define enc32e sph_enc32be
95 #if SPH_64
96 #define C64e(x) SPH_C64(x)
97 #define dec64e_aligned sph_dec64be_aligned
98 #define enc64e sph_enc64be
99 #endif
100 
101 #endif
102 
103 #define Sb(x0, x1, x2, x3, c) do { \
104  x3 = ~x3; \
105  x0 ^= (c) & ~x2; \
106  tmp = (c) ^ (x0 & x1); \
107  x0 ^= x2 & x3; \
108  x3 ^= ~x1 & x2; \
109  x1 ^= x0 & x2; \
110  x2 ^= x0 & ~x3; \
111  x0 ^= x1 | x3; \
112  x3 ^= x1 & x2; \
113  x1 ^= tmp & x0; \
114  x2 ^= tmp; \
115  } while (0)
116 
117 #define Lb(x0, x1, x2, x3, x4, x5, x6, x7) do { \
118  x4 ^= x1; \
119  x5 ^= x2; \
120  x6 ^= x3 ^ x0; \
121  x7 ^= x0; \
122  x0 ^= x5; \
123  x1 ^= x6; \
124  x2 ^= x7 ^ x4; \
125  x3 ^= x4; \
126  } while (0)
127 
128 #if SPH_JH_64
129 
130 static const sph_u64 C[] = {
131  C64e(0x72d5dea2df15f867), C64e(0x7b84150ab7231557),
132  C64e(0x81abd6904d5a87f6), C64e(0x4e9f4fc5c3d12b40),
133  C64e(0xea983ae05c45fa9c), C64e(0x03c5d29966b2999a),
134  C64e(0x660296b4f2bb538a), C64e(0xb556141a88dba231),
135  C64e(0x03a35a5c9a190edb), C64e(0x403fb20a87c14410),
136  C64e(0x1c051980849e951d), C64e(0x6f33ebad5ee7cddc),
137  C64e(0x10ba139202bf6b41), C64e(0xdc786515f7bb27d0),
138  C64e(0x0a2c813937aa7850), C64e(0x3f1abfd2410091d3),
139  C64e(0x422d5a0df6cc7e90), C64e(0xdd629f9c92c097ce),
140  C64e(0x185ca70bc72b44ac), C64e(0xd1df65d663c6fc23),
141  C64e(0x976e6c039ee0b81a), C64e(0x2105457e446ceca8),
142  C64e(0xeef103bb5d8e61fa), C64e(0xfd9697b294838197),
143  C64e(0x4a8e8537db03302f), C64e(0x2a678d2dfb9f6a95),
144  C64e(0x8afe7381f8b8696c), C64e(0x8ac77246c07f4214),
145  C64e(0xc5f4158fbdc75ec4), C64e(0x75446fa78f11bb80),
146  C64e(0x52de75b7aee488bc), C64e(0x82b8001e98a6a3f4),
147  C64e(0x8ef48f33a9a36315), C64e(0xaa5f5624d5b7f989),
148  C64e(0xb6f1ed207c5ae0fd), C64e(0x36cae95a06422c36),
149  C64e(0xce2935434efe983d), C64e(0x533af974739a4ba7),
150  C64e(0xd0f51f596f4e8186), C64e(0x0e9dad81afd85a9f),
151  C64e(0xa7050667ee34626a), C64e(0x8b0b28be6eb91727),
152  C64e(0x47740726c680103f), C64e(0xe0a07e6fc67e487b),
153  C64e(0x0d550aa54af8a4c0), C64e(0x91e3e79f978ef19e),
154  C64e(0x8676728150608dd4), C64e(0x7e9e5a41f3e5b062),
155  C64e(0xfc9f1fec4054207a), C64e(0xe3e41a00cef4c984),
156  C64e(0x4fd794f59dfa95d8), C64e(0x552e7e1124c354a5),
157  C64e(0x5bdf7228bdfe6e28), C64e(0x78f57fe20fa5c4b2),
158  C64e(0x05897cefee49d32e), C64e(0x447e9385eb28597f),
159  C64e(0x705f6937b324314a), C64e(0x5e8628f11dd6e465),
160  C64e(0xc71b770451b920e7), C64e(0x74fe43e823d4878a),
161  C64e(0x7d29e8a3927694f2), C64e(0xddcb7a099b30d9c1),
162  C64e(0x1d1b30fb5bdc1be0), C64e(0xda24494ff29c82bf),
163  C64e(0xa4e7ba31b470bfff), C64e(0x0d324405def8bc48),
164  C64e(0x3baefc3253bbd339), C64e(0x459fc3c1e0298ba0),
165  C64e(0xe5c905fdf7ae090f), C64e(0x947034124290f134),
166  C64e(0xa271b701e344ed95), C64e(0xe93b8e364f2f984a),
167  C64e(0x88401d63a06cf615), C64e(0x47c1444b8752afff),
168  C64e(0x7ebb4af1e20ac630), C64e(0x4670b6c5cc6e8ce6),
169  C64e(0xa4d5a456bd4fca00), C64e(0xda9d844bc83e18ae),
170  C64e(0x7357ce453064d1ad), C64e(0xe8a6ce68145c2567),
171  C64e(0xa3da8cf2cb0ee116), C64e(0x33e906589a94999a),
172  C64e(0x1f60b220c26f847b), C64e(0xd1ceac7fa0d18518),
173  C64e(0x32595ba18ddd19d3), C64e(0x509a1cc0aaa5b446),
174  C64e(0x9f3d6367e4046bba), C64e(0xf6ca19ab0b56ee7e),
175  C64e(0x1fb179eaa9282174), C64e(0xe9bdf7353b3651ee),
176  C64e(0x1d57ac5a7550d376), C64e(0x3a46c2fea37d7001),
177  C64e(0xf735c1af98a4d842), C64e(0x78edec209e6b6779),
178  C64e(0x41836315ea3adba8), C64e(0xfac33b4d32832c83),
179  C64e(0xa7403b1f1c2747f3), C64e(0x5940f034b72d769a),
180  C64e(0xe73e4e6cd2214ffd), C64e(0xb8fd8d39dc5759ef),
181  C64e(0x8d9b0c492b49ebda), C64e(0x5ba2d74968f3700d),
182  C64e(0x7d3baed07a8d5584), C64e(0xf5a5e9f0e4f88e65),
183  C64e(0xa0b8a2f436103b53), C64e(0x0ca8079e753eec5a),
184  C64e(0x9168949256e8884f), C64e(0x5bb05c55f8babc4c),
185  C64e(0xe3bb3b99f387947b), C64e(0x75daf4d6726b1c5d),
186  C64e(0x64aeac28dc34b36d), C64e(0x6c34a550b828db71),
187  C64e(0xf861e2f2108d512a), C64e(0xe3db643359dd75fc),
188  C64e(0x1cacbcf143ce3fa2), C64e(0x67bbd13c02e843b0),
189  C64e(0x330a5bca8829a175), C64e(0x7f34194db416535c),
190  C64e(0x923b94c30e794d1e), C64e(0x797475d7b6eeaf3f),
191  C64e(0xeaa8d4f7be1a3921), C64e(0x5cf47e094c232751),
192  C64e(0x26a32453ba323cd2), C64e(0x44a3174a6da6d5ad),
193  C64e(0xb51d3ea6aff2c908), C64e(0x83593d98916b3c56),
194  C64e(0x4cf87ca17286604d), C64e(0x46e23ecc086ec7f6),
195  C64e(0x2f9833b3b1bc765e), C64e(0x2bd666a5efc4e62a),
196  C64e(0x06f4b6e8bec1d436), C64e(0x74ee8215bcef2163),
197  C64e(0xfdc14e0df453c969), C64e(0xa77d5ac406585826),
198  C64e(0x7ec1141606e0fa16), C64e(0x7e90af3d28639d3f),
199  C64e(0xd2c9f2e3009bd20c), C64e(0x5faace30b7d40c30),
200  C64e(0x742a5116f2e03298), C64e(0x0deb30d8e3cef89a),
201  C64e(0x4bc59e7bb5f17992), C64e(0xff51e66e048668d3),
202  C64e(0x9b234d57e6966731), C64e(0xcce6a6f3170a7505),
203  C64e(0xb17681d913326cce), C64e(0x3c175284f805a262),
204  C64e(0xf42bcbb378471547), C64e(0xff46548223936a48),
205  C64e(0x38df58074e5e6565), C64e(0xf2fc7c89fc86508e),
206  C64e(0x31702e44d00bca86), C64e(0xf04009a23078474e),
207  C64e(0x65a0ee39d1f73883), C64e(0xf75ee937e42c3abd),
208  C64e(0x2197b2260113f86f), C64e(0xa344edd1ef9fdee7),
209  C64e(0x8ba0df15762592d9), C64e(0x3c85f7f612dc42be),
210  C64e(0xd8a7ec7cab27b07e), C64e(0x538d7ddaaa3ea8de),
211  C64e(0xaa25ce93bd0269d8), C64e(0x5af643fd1a7308f9),
212  C64e(0xc05fefda174a19a5), C64e(0x974d66334cfd216a),
213  C64e(0x35b49831db411570), C64e(0xea1e0fbbedcd549b),
214  C64e(0x9ad063a151974072), C64e(0xf6759dbf91476fe2)
215 };
216 
217 #define Ceven_hi(r) (C[((r) << 2) + 0])
218 #define Ceven_lo(r) (C[((r) << 2) + 1])
219 #define Codd_hi(r) (C[((r) << 2) + 2])
220 #define Codd_lo(r) (C[((r) << 2) + 3])
221 
222 #define S(x0, x1, x2, x3, cb, r) do { \
223  Sb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, cb ## hi(r)); \
224  Sb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, cb ## lo(r)); \
225  } while (0)
226 
227 #define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \
228  Lb(x0 ## h, x1 ## h, x2 ## h, x3 ## h, \
229  x4 ## h, x5 ## h, x6 ## h, x7 ## h); \
230  Lb(x0 ## l, x1 ## l, x2 ## l, x3 ## l, \
231  x4 ## l, x5 ## l, x6 ## l, x7 ## l); \
232  } while (0)
233 
234 #define Wz(x, c, n) do { \
235  sph_u64 t = (x ## h & (c)) << (n); \
236  x ## h = ((x ## h >> (n)) & (c)) | t; \
237  t = (x ## l & (c)) << (n); \
238  x ## l = ((x ## l >> (n)) & (c)) | t; \
239  } while (0)
240 
241 #define W0(x) Wz(x, SPH_C64(0x5555555555555555), 1)
242 #define W1(x) Wz(x, SPH_C64(0x3333333333333333), 2)
243 #define W2(x) Wz(x, SPH_C64(0x0F0F0F0F0F0F0F0F), 4)
244 #define W3(x) Wz(x, SPH_C64(0x00FF00FF00FF00FF), 8)
245 #define W4(x) Wz(x, SPH_C64(0x0000FFFF0000FFFF), 16)
246 #define W5(x) Wz(x, SPH_C64(0x00000000FFFFFFFF), 32)
247 #define W6(x) do { \
248  sph_u64 t = x ## h; \
249  x ## h = x ## l; \
250  x ## l = t; \
251  } while (0)
252 
253 #define DECL_STATE \
254  sph_u64 h0h, h1h, h2h, h3h, h4h, h5h, h6h, h7h; \
255  sph_u64 h0l, h1l, h2l, h3l, h4l, h5l, h6l, h7l; \
256  sph_u64 tmp;
257 
258 #define READ_STATE(state) do { \
259  h0h = (state)->H.wide[ 0]; \
260  h0l = (state)->H.wide[ 1]; \
261  h1h = (state)->H.wide[ 2]; \
262  h1l = (state)->H.wide[ 3]; \
263  h2h = (state)->H.wide[ 4]; \
264  h2l = (state)->H.wide[ 5]; \
265  h3h = (state)->H.wide[ 6]; \
266  h3l = (state)->H.wide[ 7]; \
267  h4h = (state)->H.wide[ 8]; \
268  h4l = (state)->H.wide[ 9]; \
269  h5h = (state)->H.wide[10]; \
270  h5l = (state)->H.wide[11]; \
271  h6h = (state)->H.wide[12]; \
272  h6l = (state)->H.wide[13]; \
273  h7h = (state)->H.wide[14]; \
274  h7l = (state)->H.wide[15]; \
275  } while (0)
276 
277 #define WRITE_STATE(state) do { \
278  (state)->H.wide[ 0] = h0h; \
279  (state)->H.wide[ 1] = h0l; \
280  (state)->H.wide[ 2] = h1h; \
281  (state)->H.wide[ 3] = h1l; \
282  (state)->H.wide[ 4] = h2h; \
283  (state)->H.wide[ 5] = h2l; \
284  (state)->H.wide[ 6] = h3h; \
285  (state)->H.wide[ 7] = h3l; \
286  (state)->H.wide[ 8] = h4h; \
287  (state)->H.wide[ 9] = h4l; \
288  (state)->H.wide[10] = h5h; \
289  (state)->H.wide[11] = h5l; \
290  (state)->H.wide[12] = h6h; \
291  (state)->H.wide[13] = h6l; \
292  (state)->H.wide[14] = h7h; \
293  (state)->H.wide[15] = h7l; \
294  } while (0)
295 
296 #define INPUT_BUF1 \
297  sph_u64 m0h = dec64e_aligned(buf + 0); \
298  sph_u64 m0l = dec64e_aligned(buf + 8); \
299  sph_u64 m1h = dec64e_aligned(buf + 16); \
300  sph_u64 m1l = dec64e_aligned(buf + 24); \
301  sph_u64 m2h = dec64e_aligned(buf + 32); \
302  sph_u64 m2l = dec64e_aligned(buf + 40); \
303  sph_u64 m3h = dec64e_aligned(buf + 48); \
304  sph_u64 m3l = dec64e_aligned(buf + 56); \
305  h0h ^= m0h; \
306  h0l ^= m0l; \
307  h1h ^= m1h; \
308  h1l ^= m1l; \
309  h2h ^= m2h; \
310  h2l ^= m2l; \
311  h3h ^= m3h; \
312  h3l ^= m3l;
313 
314 #define INPUT_BUF2 \
315  h4h ^= m0h; \
316  h4l ^= m0l; \
317  h5h ^= m1h; \
318  h5l ^= m1l; \
319  h6h ^= m2h; \
320  h6l ^= m2l; \
321  h7h ^= m3h; \
322  h7l ^= m3l;
323 
324 static const sph_u64 IV224[] = {
325  C64e(0x2dfedd62f99a98ac), C64e(0xae7cacd619d634e7),
326  C64e(0xa4831005bc301216), C64e(0xb86038c6c9661494),
327  C64e(0x66d9899f2580706f), C64e(0xce9ea31b1d9b1adc),
328  C64e(0x11e8325f7b366e10), C64e(0xf994857f02fa06c1),
329  C64e(0x1b4f1b5cd8c840b3), C64e(0x97f6a17f6e738099),
330  C64e(0xdcdf93a5adeaa3d3), C64e(0xa431e8dec9539a68),
331  C64e(0x22b4a98aec86a1e4), C64e(0xd574ac959ce56cf0),
332  C64e(0x15960deab5ab2bbf), C64e(0x9611dcf0dd64ea6e)
333 };
334 
335 static const sph_u64 IV256[] = {
336  C64e(0xeb98a3412c20d3eb), C64e(0x92cdbe7b9cb245c1),
337  C64e(0x1c93519160d4c7fa), C64e(0x260082d67e508a03),
338  C64e(0xa4239e267726b945), C64e(0xe0fb1a48d41a9477),
339  C64e(0xcdb5ab26026b177a), C64e(0x56f024420fff2fa8),
340  C64e(0x71a396897f2e4d75), C64e(0x1d144908f77de262),
341  C64e(0x277695f776248f94), C64e(0x87d5b6574780296c),
342  C64e(0x5c5e272dac8e0d6c), C64e(0x518450c657057a0f),
343  C64e(0x7be4d367702412ea), C64e(0x89e3ab13d31cd769)
344 };
345 
346 static const sph_u64 IV384[] = {
347  C64e(0x481e3bc6d813398a), C64e(0x6d3b5e894ade879b),
348  C64e(0x63faea68d480ad2e), C64e(0x332ccb21480f8267),
349  C64e(0x98aec84d9082b928), C64e(0xd455ea3041114249),
350  C64e(0x36f555b2924847ec), C64e(0xc7250a93baf43ce1),
351  C64e(0x569b7f8a27db454c), C64e(0x9efcbd496397af0e),
352  C64e(0x589fc27d26aa80cd), C64e(0x80c08b8c9deb2eda),
353  C64e(0x8a7981e8f8d5373a), C64e(0xf43967adddd17a71),
354  C64e(0xa9b4d3bda475d394), C64e(0x976c3fba9842737f)
355 };
356 
357 static const sph_u64 IV512[] = {
358  C64e(0x6fd14b963e00aa17), C64e(0x636a2e057a15d543),
359  C64e(0x8a225e8d0c97ef0b), C64e(0xe9341259f2b3c361),
360  C64e(0x891da0c1536f801e), C64e(0x2aa9056bea2b6d80),
361  C64e(0x588eccdb2075baa6), C64e(0xa90f3a76baf83bf7),
362  C64e(0x0169e60541e34a69), C64e(0x46b58a8e2e6fe65a),
363  C64e(0x1047a7d0c1843c24), C64e(0x3b6e71b12d5ac199),
364  C64e(0xcf57f6ec9db1f856), C64e(0xa706887c5716b156),
365  C64e(0xe3c2fcdfe68517fb), C64e(0x545a4678cc8cdd4b)
366 };
367 
368 #else
369 
370 static const sph_u32 C[] = {
371  C32e(0x72d5dea2), C32e(0xdf15f867), C32e(0x7b84150a),
372  C32e(0xb7231557), C32e(0x81abd690), C32e(0x4d5a87f6),
373  C32e(0x4e9f4fc5), C32e(0xc3d12b40), C32e(0xea983ae0),
374  C32e(0x5c45fa9c), C32e(0x03c5d299), C32e(0x66b2999a),
375  C32e(0x660296b4), C32e(0xf2bb538a), C32e(0xb556141a),
376  C32e(0x88dba231), C32e(0x03a35a5c), C32e(0x9a190edb),
377  C32e(0x403fb20a), C32e(0x87c14410), C32e(0x1c051980),
378  C32e(0x849e951d), C32e(0x6f33ebad), C32e(0x5ee7cddc),
379  C32e(0x10ba1392), C32e(0x02bf6b41), C32e(0xdc786515),
380  C32e(0xf7bb27d0), C32e(0x0a2c8139), C32e(0x37aa7850),
381  C32e(0x3f1abfd2), C32e(0x410091d3), C32e(0x422d5a0d),
382  C32e(0xf6cc7e90), C32e(0xdd629f9c), C32e(0x92c097ce),
383  C32e(0x185ca70b), C32e(0xc72b44ac), C32e(0xd1df65d6),
384  C32e(0x63c6fc23), C32e(0x976e6c03), C32e(0x9ee0b81a),
385  C32e(0x2105457e), C32e(0x446ceca8), C32e(0xeef103bb),
386  C32e(0x5d8e61fa), C32e(0xfd9697b2), C32e(0x94838197),
387  C32e(0x4a8e8537), C32e(0xdb03302f), C32e(0x2a678d2d),
388  C32e(0xfb9f6a95), C32e(0x8afe7381), C32e(0xf8b8696c),
389  C32e(0x8ac77246), C32e(0xc07f4214), C32e(0xc5f4158f),
390  C32e(0xbdc75ec4), C32e(0x75446fa7), C32e(0x8f11bb80),
391  C32e(0x52de75b7), C32e(0xaee488bc), C32e(0x82b8001e),
392  C32e(0x98a6a3f4), C32e(0x8ef48f33), C32e(0xa9a36315),
393  C32e(0xaa5f5624), C32e(0xd5b7f989), C32e(0xb6f1ed20),
394  C32e(0x7c5ae0fd), C32e(0x36cae95a), C32e(0x06422c36),
395  C32e(0xce293543), C32e(0x4efe983d), C32e(0x533af974),
396  C32e(0x739a4ba7), C32e(0xd0f51f59), C32e(0x6f4e8186),
397  C32e(0x0e9dad81), C32e(0xafd85a9f), C32e(0xa7050667),
398  C32e(0xee34626a), C32e(0x8b0b28be), C32e(0x6eb91727),
399  C32e(0x47740726), C32e(0xc680103f), C32e(0xe0a07e6f),
400  C32e(0xc67e487b), C32e(0x0d550aa5), C32e(0x4af8a4c0),
401  C32e(0x91e3e79f), C32e(0x978ef19e), C32e(0x86767281),
402  C32e(0x50608dd4), C32e(0x7e9e5a41), C32e(0xf3e5b062),
403  C32e(0xfc9f1fec), C32e(0x4054207a), C32e(0xe3e41a00),
404  C32e(0xcef4c984), C32e(0x4fd794f5), C32e(0x9dfa95d8),
405  C32e(0x552e7e11), C32e(0x24c354a5), C32e(0x5bdf7228),
406  C32e(0xbdfe6e28), C32e(0x78f57fe2), C32e(0x0fa5c4b2),
407  C32e(0x05897cef), C32e(0xee49d32e), C32e(0x447e9385),
408  C32e(0xeb28597f), C32e(0x705f6937), C32e(0xb324314a),
409  C32e(0x5e8628f1), C32e(0x1dd6e465), C32e(0xc71b7704),
410  C32e(0x51b920e7), C32e(0x74fe43e8), C32e(0x23d4878a),
411  C32e(0x7d29e8a3), C32e(0x927694f2), C32e(0xddcb7a09),
412  C32e(0x9b30d9c1), C32e(0x1d1b30fb), C32e(0x5bdc1be0),
413  C32e(0xda24494f), C32e(0xf29c82bf), C32e(0xa4e7ba31),
414  C32e(0xb470bfff), C32e(0x0d324405), C32e(0xdef8bc48),
415  C32e(0x3baefc32), C32e(0x53bbd339), C32e(0x459fc3c1),
416  C32e(0xe0298ba0), C32e(0xe5c905fd), C32e(0xf7ae090f),
417  C32e(0x94703412), C32e(0x4290f134), C32e(0xa271b701),
418  C32e(0xe344ed95), C32e(0xe93b8e36), C32e(0x4f2f984a),
419  C32e(0x88401d63), C32e(0xa06cf615), C32e(0x47c1444b),
420  C32e(0x8752afff), C32e(0x7ebb4af1), C32e(0xe20ac630),
421  C32e(0x4670b6c5), C32e(0xcc6e8ce6), C32e(0xa4d5a456),
422  C32e(0xbd4fca00), C32e(0xda9d844b), C32e(0xc83e18ae),
423  C32e(0x7357ce45), C32e(0x3064d1ad), C32e(0xe8a6ce68),
424  C32e(0x145c2567), C32e(0xa3da8cf2), C32e(0xcb0ee116),
425  C32e(0x33e90658), C32e(0x9a94999a), C32e(0x1f60b220),
426  C32e(0xc26f847b), C32e(0xd1ceac7f), C32e(0xa0d18518),
427  C32e(0x32595ba1), C32e(0x8ddd19d3), C32e(0x509a1cc0),
428  C32e(0xaaa5b446), C32e(0x9f3d6367), C32e(0xe4046bba),
429  C32e(0xf6ca19ab), C32e(0x0b56ee7e), C32e(0x1fb179ea),
430  C32e(0xa9282174), C32e(0xe9bdf735), C32e(0x3b3651ee),
431  C32e(0x1d57ac5a), C32e(0x7550d376), C32e(0x3a46c2fe),
432  C32e(0xa37d7001), C32e(0xf735c1af), C32e(0x98a4d842),
433  C32e(0x78edec20), C32e(0x9e6b6779), C32e(0x41836315),
434  C32e(0xea3adba8), C32e(0xfac33b4d), C32e(0x32832c83),
435  C32e(0xa7403b1f), C32e(0x1c2747f3), C32e(0x5940f034),
436  C32e(0xb72d769a), C32e(0xe73e4e6c), C32e(0xd2214ffd),
437  C32e(0xb8fd8d39), C32e(0xdc5759ef), C32e(0x8d9b0c49),
438  C32e(0x2b49ebda), C32e(0x5ba2d749), C32e(0x68f3700d),
439  C32e(0x7d3baed0), C32e(0x7a8d5584), C32e(0xf5a5e9f0),
440  C32e(0xe4f88e65), C32e(0xa0b8a2f4), C32e(0x36103b53),
441  C32e(0x0ca8079e), C32e(0x753eec5a), C32e(0x91689492),
442  C32e(0x56e8884f), C32e(0x5bb05c55), C32e(0xf8babc4c),
443  C32e(0xe3bb3b99), C32e(0xf387947b), C32e(0x75daf4d6),
444  C32e(0x726b1c5d), C32e(0x64aeac28), C32e(0xdc34b36d),
445  C32e(0x6c34a550), C32e(0xb828db71), C32e(0xf861e2f2),
446  C32e(0x108d512a), C32e(0xe3db6433), C32e(0x59dd75fc),
447  C32e(0x1cacbcf1), C32e(0x43ce3fa2), C32e(0x67bbd13c),
448  C32e(0x02e843b0), C32e(0x330a5bca), C32e(0x8829a175),
449  C32e(0x7f34194d), C32e(0xb416535c), C32e(0x923b94c3),
450  C32e(0x0e794d1e), C32e(0x797475d7), C32e(0xb6eeaf3f),
451  C32e(0xeaa8d4f7), C32e(0xbe1a3921), C32e(0x5cf47e09),
452  C32e(0x4c232751), C32e(0x26a32453), C32e(0xba323cd2),
453  C32e(0x44a3174a), C32e(0x6da6d5ad), C32e(0xb51d3ea6),
454  C32e(0xaff2c908), C32e(0x83593d98), C32e(0x916b3c56),
455  C32e(0x4cf87ca1), C32e(0x7286604d), C32e(0x46e23ecc),
456  C32e(0x086ec7f6), C32e(0x2f9833b3), C32e(0xb1bc765e),
457  C32e(0x2bd666a5), C32e(0xefc4e62a), C32e(0x06f4b6e8),
458  C32e(0xbec1d436), C32e(0x74ee8215), C32e(0xbcef2163),
459  C32e(0xfdc14e0d), C32e(0xf453c969), C32e(0xa77d5ac4),
460  C32e(0x06585826), C32e(0x7ec11416), C32e(0x06e0fa16),
461  C32e(0x7e90af3d), C32e(0x28639d3f), C32e(0xd2c9f2e3),
462  C32e(0x009bd20c), C32e(0x5faace30), C32e(0xb7d40c30),
463  C32e(0x742a5116), C32e(0xf2e03298), C32e(0x0deb30d8),
464  C32e(0xe3cef89a), C32e(0x4bc59e7b), C32e(0xb5f17992),
465  C32e(0xff51e66e), C32e(0x048668d3), C32e(0x9b234d57),
466  C32e(0xe6966731), C32e(0xcce6a6f3), C32e(0x170a7505),
467  C32e(0xb17681d9), C32e(0x13326cce), C32e(0x3c175284),
468  C32e(0xf805a262), C32e(0xf42bcbb3), C32e(0x78471547),
469  C32e(0xff465482), C32e(0x23936a48), C32e(0x38df5807),
470  C32e(0x4e5e6565), C32e(0xf2fc7c89), C32e(0xfc86508e),
471  C32e(0x31702e44), C32e(0xd00bca86), C32e(0xf04009a2),
472  C32e(0x3078474e), C32e(0x65a0ee39), C32e(0xd1f73883),
473  C32e(0xf75ee937), C32e(0xe42c3abd), C32e(0x2197b226),
474  C32e(0x0113f86f), C32e(0xa344edd1), C32e(0xef9fdee7),
475  C32e(0x8ba0df15), C32e(0x762592d9), C32e(0x3c85f7f6),
476  C32e(0x12dc42be), C32e(0xd8a7ec7c), C32e(0xab27b07e),
477  C32e(0x538d7dda), C32e(0xaa3ea8de), C32e(0xaa25ce93),
478  C32e(0xbd0269d8), C32e(0x5af643fd), C32e(0x1a7308f9),
479  C32e(0xc05fefda), C32e(0x174a19a5), C32e(0x974d6633),
480  C32e(0x4cfd216a), C32e(0x35b49831), C32e(0xdb411570),
481  C32e(0xea1e0fbb), C32e(0xedcd549b), C32e(0x9ad063a1),
482  C32e(0x51974072), C32e(0xf6759dbf), C32e(0x91476fe2)
483 };
484 
485 #define Ceven_w3(r) (C[((r) << 3) + 0])
486 #define Ceven_w2(r) (C[((r) << 3) + 1])
487 #define Ceven_w1(r) (C[((r) << 3) + 2])
488 #define Ceven_w0(r) (C[((r) << 3) + 3])
489 #define Codd_w3(r) (C[((r) << 3) + 4])
490 #define Codd_w2(r) (C[((r) << 3) + 5])
491 #define Codd_w1(r) (C[((r) << 3) + 6])
492 #define Codd_w0(r) (C[((r) << 3) + 7])
493 
494 #define S(x0, x1, x2, x3, cb, r) do { \
495  Sb(x0 ## 3, x1 ## 3, x2 ## 3, x3 ## 3, cb ## w3(r)); \
496  Sb(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, cb ## w2(r)); \
497  Sb(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, cb ## w1(r)); \
498  Sb(x0 ## 0, x1 ## 0, x2 ## 0, x3 ## 0, cb ## w0(r)); \
499  } while (0)
500 
501 #define L(x0, x1, x2, x3, x4, x5, x6, x7) do { \
502  Lb(x0 ## 3, x1 ## 3, x2 ## 3, x3 ## 3, \
503  x4 ## 3, x5 ## 3, x6 ## 3, x7 ## 3); \
504  Lb(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, \
505  x4 ## 2, x5 ## 2, x6 ## 2, x7 ## 2); \
506  Lb(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, \
507  x4 ## 1, x5 ## 1, x6 ## 1, x7 ## 1); \
508  Lb(x0 ## 0, x1 ## 0, x2 ## 0, x3 ## 0, \
509  x4 ## 0, x5 ## 0, x6 ## 0, x7 ## 0); \
510  } while (0)
511 
512 #define Wz(x, c, n) do { \
513  sph_u32 t = (x ## 3 & (c)) << (n); \
514  x ## 3 = ((x ## 3 >> (n)) & (c)) | t; \
515  t = (x ## 2 & (c)) << (n); \
516  x ## 2 = ((x ## 2 >> (n)) & (c)) | t; \
517  t = (x ## 1 & (c)) << (n); \
518  x ## 1 = ((x ## 1 >> (n)) & (c)) | t; \
519  t = (x ## 0 & (c)) << (n); \
520  x ## 0 = ((x ## 0 >> (n)) & (c)) | t; \
521  } while (0)
522 
523 #define W0(x) Wz(x, SPH_C32(0x55555555), 1)
524 #define W1(x) Wz(x, SPH_C32(0x33333333), 2)
525 #define W2(x) Wz(x, SPH_C32(0x0F0F0F0F), 4)
526 #define W3(x) Wz(x, SPH_C32(0x00FF00FF), 8)
527 #define W4(x) Wz(x, SPH_C32(0x0000FFFF), 16)
528 #define W5(x) do { \
529  sph_u32 t = x ## 3; \
530  x ## 3 = x ## 2; \
531  x ## 2 = t; \
532  t = x ## 1; \
533  x ## 1 = x ## 0; \
534  x ## 0 = t; \
535  } while (0)
536 #define W6(x) do { \
537  sph_u32 t = x ## 3; \
538  x ## 3 = x ## 1; \
539  x ## 1 = t; \
540  t = x ## 2; \
541  x ## 2 = x ## 0; \
542  x ## 0 = t; \
543  } while (0)
544 
545 #define DECL_STATE \
546  sph_u32 h03, h02, h01, h00, h13, h12, h11, h10; \
547  sph_u32 h23, h22, h21, h20, h33, h32, h31, h30; \
548  sph_u32 h43, h42, h41, h40, h53, h52, h51, h50; \
549  sph_u32 h63, h62, h61, h60, h73, h72, h71, h70; \
550  sph_u32 tmp;
551 
552 #define READ_STATE(state) do { \
553  h03 = (state)->H.narrow[ 0]; \
554  h02 = (state)->H.narrow[ 1]; \
555  h01 = (state)->H.narrow[ 2]; \
556  h00 = (state)->H.narrow[ 3]; \
557  h13 = (state)->H.narrow[ 4]; \
558  h12 = (state)->H.narrow[ 5]; \
559  h11 = (state)->H.narrow[ 6]; \
560  h10 = (state)->H.narrow[ 7]; \
561  h23 = (state)->H.narrow[ 8]; \
562  h22 = (state)->H.narrow[ 9]; \
563  h21 = (state)->H.narrow[10]; \
564  h20 = (state)->H.narrow[11]; \
565  h33 = (state)->H.narrow[12]; \
566  h32 = (state)->H.narrow[13]; \
567  h31 = (state)->H.narrow[14]; \
568  h30 = (state)->H.narrow[15]; \
569  h43 = (state)->H.narrow[16]; \
570  h42 = (state)->H.narrow[17]; \
571  h41 = (state)->H.narrow[18]; \
572  h40 = (state)->H.narrow[19]; \
573  h53 = (state)->H.narrow[20]; \
574  h52 = (state)->H.narrow[21]; \
575  h51 = (state)->H.narrow[22]; \
576  h50 = (state)->H.narrow[23]; \
577  h63 = (state)->H.narrow[24]; \
578  h62 = (state)->H.narrow[25]; \
579  h61 = (state)->H.narrow[26]; \
580  h60 = (state)->H.narrow[27]; \
581  h73 = (state)->H.narrow[28]; \
582  h72 = (state)->H.narrow[29]; \
583  h71 = (state)->H.narrow[30]; \
584  h70 = (state)->H.narrow[31]; \
585  } while (0)
586 
587 #define WRITE_STATE(state) do { \
588  (state)->H.narrow[ 0] = h03; \
589  (state)->H.narrow[ 1] = h02; \
590  (state)->H.narrow[ 2] = h01; \
591  (state)->H.narrow[ 3] = h00; \
592  (state)->H.narrow[ 4] = h13; \
593  (state)->H.narrow[ 5] = h12; \
594  (state)->H.narrow[ 6] = h11; \
595  (state)->H.narrow[ 7] = h10; \
596  (state)->H.narrow[ 8] = h23; \
597  (state)->H.narrow[ 9] = h22; \
598  (state)->H.narrow[10] = h21; \
599  (state)->H.narrow[11] = h20; \
600  (state)->H.narrow[12] = h33; \
601  (state)->H.narrow[13] = h32; \
602  (state)->H.narrow[14] = h31; \
603  (state)->H.narrow[15] = h30; \
604  (state)->H.narrow[16] = h43; \
605  (state)->H.narrow[17] = h42; \
606  (state)->H.narrow[18] = h41; \
607  (state)->H.narrow[19] = h40; \
608  (state)->H.narrow[20] = h53; \
609  (state)->H.narrow[21] = h52; \
610  (state)->H.narrow[22] = h51; \
611  (state)->H.narrow[23] = h50; \
612  (state)->H.narrow[24] = h63; \
613  (state)->H.narrow[25] = h62; \
614  (state)->H.narrow[26] = h61; \
615  (state)->H.narrow[27] = h60; \
616  (state)->H.narrow[28] = h73; \
617  (state)->H.narrow[29] = h72; \
618  (state)->H.narrow[30] = h71; \
619  (state)->H.narrow[31] = h70; \
620  } while (0)
621 
622 #define INPUT_BUF1 \
623  sph_u32 m03 = dec32e_aligned(buf + 0); \
624  sph_u32 m02 = dec32e_aligned(buf + 4); \
625  sph_u32 m01 = dec32e_aligned(buf + 8); \
626  sph_u32 m00 = dec32e_aligned(buf + 12); \
627  sph_u32 m13 = dec32e_aligned(buf + 16); \
628  sph_u32 m12 = dec32e_aligned(buf + 20); \
629  sph_u32 m11 = dec32e_aligned(buf + 24); \
630  sph_u32 m10 = dec32e_aligned(buf + 28); \
631  sph_u32 m23 = dec32e_aligned(buf + 32); \
632  sph_u32 m22 = dec32e_aligned(buf + 36); \
633  sph_u32 m21 = dec32e_aligned(buf + 40); \
634  sph_u32 m20 = dec32e_aligned(buf + 44); \
635  sph_u32 m33 = dec32e_aligned(buf + 48); \
636  sph_u32 m32 = dec32e_aligned(buf + 52); \
637  sph_u32 m31 = dec32e_aligned(buf + 56); \
638  sph_u32 m30 = dec32e_aligned(buf + 60); \
639  h03 ^= m03; \
640  h02 ^= m02; \
641  h01 ^= m01; \
642  h00 ^= m00; \
643  h13 ^= m13; \
644  h12 ^= m12; \
645  h11 ^= m11; \
646  h10 ^= m10; \
647  h23 ^= m23; \
648  h22 ^= m22; \
649  h21 ^= m21; \
650  h20 ^= m20; \
651  h33 ^= m33; \
652  h32 ^= m32; \
653  h31 ^= m31; \
654  h30 ^= m30;
655 
656 #define INPUT_BUF2 \
657  h43 ^= m03; \
658  h42 ^= m02; \
659  h41 ^= m01; \
660  h40 ^= m00; \
661  h53 ^= m13; \
662  h52 ^= m12; \
663  h51 ^= m11; \
664  h50 ^= m10; \
665  h63 ^= m23; \
666  h62 ^= m22; \
667  h61 ^= m21; \
668  h60 ^= m20; \
669  h73 ^= m33; \
670  h72 ^= m32; \
671  h71 ^= m31; \
672  h70 ^= m30;
673 
674 static const sph_u32 IV224[] = {
675  C32e(0x2dfedd62), C32e(0xf99a98ac), C32e(0xae7cacd6), C32e(0x19d634e7),
676  C32e(0xa4831005), C32e(0xbc301216), C32e(0xb86038c6), C32e(0xc9661494),
677  C32e(0x66d9899f), C32e(0x2580706f), C32e(0xce9ea31b), C32e(0x1d9b1adc),
678  C32e(0x11e8325f), C32e(0x7b366e10), C32e(0xf994857f), C32e(0x02fa06c1),
679  C32e(0x1b4f1b5c), C32e(0xd8c840b3), C32e(0x97f6a17f), C32e(0x6e738099),
680  C32e(0xdcdf93a5), C32e(0xadeaa3d3), C32e(0xa431e8de), C32e(0xc9539a68),
681  C32e(0x22b4a98a), C32e(0xec86a1e4), C32e(0xd574ac95), C32e(0x9ce56cf0),
682  C32e(0x15960dea), C32e(0xb5ab2bbf), C32e(0x9611dcf0), C32e(0xdd64ea6e)
683 };
684 
685 static const sph_u32 IV256[] = {
686  C32e(0xeb98a341), C32e(0x2c20d3eb), C32e(0x92cdbe7b), C32e(0x9cb245c1),
687  C32e(0x1c935191), C32e(0x60d4c7fa), C32e(0x260082d6), C32e(0x7e508a03),
688  C32e(0xa4239e26), C32e(0x7726b945), C32e(0xe0fb1a48), C32e(0xd41a9477),
689  C32e(0xcdb5ab26), C32e(0x026b177a), C32e(0x56f02442), C32e(0x0fff2fa8),
690  C32e(0x71a39689), C32e(0x7f2e4d75), C32e(0x1d144908), C32e(0xf77de262),
691  C32e(0x277695f7), C32e(0x76248f94), C32e(0x87d5b657), C32e(0x4780296c),
692  C32e(0x5c5e272d), C32e(0xac8e0d6c), C32e(0x518450c6), C32e(0x57057a0f),
693  C32e(0x7be4d367), C32e(0x702412ea), C32e(0x89e3ab13), C32e(0xd31cd769)
694 };
695 
696 static const sph_u32 IV384[] = {
697  C32e(0x481e3bc6), C32e(0xd813398a), C32e(0x6d3b5e89), C32e(0x4ade879b),
698  C32e(0x63faea68), C32e(0xd480ad2e), C32e(0x332ccb21), C32e(0x480f8267),
699  C32e(0x98aec84d), C32e(0x9082b928), C32e(0xd455ea30), C32e(0x41114249),
700  C32e(0x36f555b2), C32e(0x924847ec), C32e(0xc7250a93), C32e(0xbaf43ce1),
701  C32e(0x569b7f8a), C32e(0x27db454c), C32e(0x9efcbd49), C32e(0x6397af0e),
702  C32e(0x589fc27d), C32e(0x26aa80cd), C32e(0x80c08b8c), C32e(0x9deb2eda),
703  C32e(0x8a7981e8), C32e(0xf8d5373a), C32e(0xf43967ad), C32e(0xddd17a71),
704  C32e(0xa9b4d3bd), C32e(0xa475d394), C32e(0x976c3fba), C32e(0x9842737f)
705 };
706 
707 static const sph_u32 IV512[] = {
708  C32e(0x6fd14b96), C32e(0x3e00aa17), C32e(0x636a2e05), C32e(0x7a15d543),
709  C32e(0x8a225e8d), C32e(0x0c97ef0b), C32e(0xe9341259), C32e(0xf2b3c361),
710  C32e(0x891da0c1), C32e(0x536f801e), C32e(0x2aa9056b), C32e(0xea2b6d80),
711  C32e(0x588eccdb), C32e(0x2075baa6), C32e(0xa90f3a76), C32e(0xbaf83bf7),
712  C32e(0x0169e605), C32e(0x41e34a69), C32e(0x46b58a8e), C32e(0x2e6fe65a),
713  C32e(0x1047a7d0), C32e(0xc1843c24), C32e(0x3b6e71b1), C32e(0x2d5ac199),
714  C32e(0xcf57f6ec), C32e(0x9db1f856), C32e(0xa706887c), C32e(0x5716b156),
715  C32e(0xe3c2fcdf), C32e(0xe68517fb), C32e(0x545a4678), C32e(0xcc8cdd4b)
716 };
717 
718 #endif
719 
720 #define SL(ro) SLu(r + ro, ro)
721 
722 #define SLu(r, ro) do { \
723  S(h0, h2, h4, h6, Ceven_, r); \
724  S(h1, h3, h5, h7, Codd_, r); \
725  L(h0, h2, h4, h6, h1, h3, h5, h7); \
726  W ## ro(h1); \
727  W ## ro(h3); \
728  W ## ro(h5); \
729  W ## ro(h7); \
730  } while (0)
731 
732 #if SPH_SMALL_FOOTPRINT_JH
733 
734 #if SPH_JH_64
735 
736 /*
737  * The "small footprint" 64-bit version just uses a partially unrolled
738  * loop.
739  */
740 
741 #define E8 do { \
742  unsigned r; \
743  for (r = 0; r < 42; r += 7) { \
744  SL(0); \
745  SL(1); \
746  SL(2); \
747  SL(3); \
748  SL(4); \
749  SL(5); \
750  SL(6); \
751  } \
752  } while (0)
753 
754 #else
755 
756 #define E8 do { \
757  unsigned r, g; \
758  for (r = g = 0; r < 42; r ++) { \
759  S(h0, h2, h4, h6, Ceven_, r); \
760  S(h1, h3, h5, h7, Codd_, r); \
761  L(h0, h2, h4, h6, h1, h3, h5, h7); \
762  switch (g) { \
763  case 0: \
764  W0(h1); \
765  W0(h3); \
766  W0(h5); \
767  W0(h7); \
768  break; \
769  case 1: \
770  W1(h1); \
771  W1(h3); \
772  W1(h5); \
773  W1(h7); \
774  break; \
775  case 2: \
776  W2(h1); \
777  W2(h3); \
778  W2(h5); \
779  W2(h7); \
780  break; \
781  case 3: \
782  W3(h1); \
783  W3(h3); \
784  W3(h5); \
785  W3(h7); \
786  break; \
787  case 4: \
788  W4(h1); \
789  W4(h3); \
790  W4(h5); \
791  W4(h7); \
792  break; \
793  case 5: \
794  W5(h1); \
795  W5(h3); \
796  W5(h5); \
797  W5(h7); \
798  break; \
799  case 6: \
800  W6(h1); \
801  W6(h3); \
802  W6(h5); \
803  W6(h7); \
804  break; \
805  } \
806  if (++ g == 7) \
807  g = 0; \
808  } \
809  } while (0)
810 
811 #endif
812 
813 #else
814 
815 #if SPH_JH_64
816 
817 /*
818  * On a "true 64-bit" architecture, we can unroll at will.
819  */
820 
821 #define E8 do { \
822  SLu( 0, 0); \
823  SLu( 1, 1); \
824  SLu( 2, 2); \
825  SLu( 3, 3); \
826  SLu( 4, 4); \
827  SLu( 5, 5); \
828  SLu( 6, 6); \
829  SLu( 7, 0); \
830  SLu( 8, 1); \
831  SLu( 9, 2); \
832  SLu(10, 3); \
833  SLu(11, 4); \
834  SLu(12, 5); \
835  SLu(13, 6); \
836  SLu(14, 0); \
837  SLu(15, 1); \
838  SLu(16, 2); \
839  SLu(17, 3); \
840  SLu(18, 4); \
841  SLu(19, 5); \
842  SLu(20, 6); \
843  SLu(21, 0); \
844  SLu(22, 1); \
845  SLu(23, 2); \
846  SLu(24, 3); \
847  SLu(25, 4); \
848  SLu(26, 5); \
849  SLu(27, 6); \
850  SLu(28, 0); \
851  SLu(29, 1); \
852  SLu(30, 2); \
853  SLu(31, 3); \
854  SLu(32, 4); \
855  SLu(33, 5); \
856  SLu(34, 6); \
857  SLu(35, 0); \
858  SLu(36, 1); \
859  SLu(37, 2); \
860  SLu(38, 3); \
861  SLu(39, 4); \
862  SLu(40, 5); \
863  SLu(41, 6); \
864  } while (0)
865 
866 #else
867 
868 /*
869  * We are not aiming at a small footprint, but we are still using a
870  * 32-bit implementation. Full loop unrolling would smash the L1
871  * cache on some "big" architectures (32 kB L1 cache).
872  */
873 
874 #define E8 do { \
875  unsigned r; \
876  for (r = 0; r < 42; r += 7) { \
877  SL(0); \
878  SL(1); \
879  SL(2); \
880  SL(3); \
881  SL(4); \
882  SL(5); \
883  SL(6); \
884  } \
885  } while (0)
886 
887 #endif
888 
889 #endif
890 
891 static void
892 jh_init(sph_jh_context *sc, const void *iv)
893 {
894  sc->ptr = 0;
895 #if SPH_JH_64
896  memcpy(sc->H.wide, iv, sizeof sc->H.wide);
897 #else
898  memcpy(sc->H.narrow, iv, sizeof sc->H.narrow);
899 #endif
900 #if SPH_64
901  sc->block_count = 0;
902 #else
903  sc->block_count_high = 0;
904  sc->block_count_low = 0;
905 #endif
906 }
907 
908 static void
909 jh_core(sph_jh_context *sc, const void *data, size_t len)
910 {
911  unsigned char *buf;
912  size_t ptr;
913  DECL_STATE
914 
915  buf = sc->buf;
916  ptr = sc->ptr;
917  if (len < (sizeof sc->buf) - ptr) {
918  memcpy(buf + ptr, data, len);
919  ptr += len;
920  sc->ptr = ptr;
921  return;
922  }
923 
924  READ_STATE(sc);
925  while (len > 0) {
926  size_t clen;
927 
928  clen = (sizeof sc->buf) - ptr;
929  if (clen > len)
930  clen = len;
931  memcpy(buf + ptr, data, clen);
932  ptr += clen;
933  data = (const unsigned char *)data + clen;
934  len -= clen;
935  if (ptr == sizeof sc->buf) {
936  INPUT_BUF1;
937  E8;
938  INPUT_BUF2;
939 #if SPH_64
940  sc->block_count ++;
941 #else
942  if ((sc->block_count_low = SPH_T32(
943  sc->block_count_low + 1)) == 0)
944  sc->block_count_high ++;
945 #endif
946  ptr = 0;
947  }
948  }
949  WRITE_STATE(sc);
950  sc->ptr = ptr;
951 }
952 
953 static void
954 jh_close(sph_jh_context *sc, unsigned ub, unsigned n,
955  void *dst, size_t out_size_w32, const void *iv)
956 {
957  unsigned z;
958  unsigned char buf[128];
959  size_t numz, u;
960 #if SPH_64
961  sph_u64 l0, l1;
962 #else
963  sph_u32 l0, l1, l2, l3;
964 #endif
965 
966  z = 0x80 >> n;
967  buf[0] = ((ub & -z) | z) & 0xFF;
968  if (sc->ptr == 0 && n == 0) {
969  numz = 47;
970  } else {
971  numz = 111 - sc->ptr;
972  }
973  memset(buf + 1, 0, numz);
974 #if SPH_64
975  l0 = SPH_T64(sc->block_count << 9) + (sc->ptr << 3) + n;
976  l1 = SPH_T64(sc->block_count >> 55);
977  sph_enc64be(buf + numz + 1, l1);
978  sph_enc64be(buf + numz + 9, l0);
979 #else
980  l0 = SPH_T32(sc->block_count_low << 9) + (sc->ptr << 3) + n;
981  l1 = SPH_T32(sc->block_count_low >> 23)
982  + SPH_T32(sc->block_count_high << 9);
983  l2 = SPH_T32(sc->block_count_high >> 23);
984  l3 = 0;
985  sph_enc32be(buf + numz + 1, l3);
986  sph_enc32be(buf + numz + 5, l2);
987  sph_enc32be(buf + numz + 9, l1);
988  sph_enc32be(buf + numz + 13, l0);
989 #endif
990  jh_core(sc, buf, numz + 17);
991 #if SPH_JH_64
992  for (u = 0; u < 8; u ++)
993  enc64e(buf + (u << 3), sc->H.wide[u + 8]);
994 #else
995  for (u = 0; u < 16; u ++)
996  enc32e(buf + (u << 2), sc->H.narrow[u + 16]);
997 #endif
998  memcpy(dst, buf + ((16 - out_size_w32) << 2), out_size_w32 << 2);
999  jh_init(sc, iv);
1000 }
1001 
1002 /* see sph_jh.h */
1003 void
1005 {
1006  jh_init(cc, IV224);
1007 }
1008 
1009 /* see sph_jh.h */
1010 void
1011 sph_jh224(void *cc, const void *data, size_t len)
1012 {
1013  jh_core(cc, data, len);
1014 }
1015 
1016 /* see sph_jh.h */
1017 void
1018 sph_jh224_close(void *cc, void *dst)
1019 {
1020  jh_close(cc, 0, 0, dst, 7, IV224);
1021 }
1022 
1023 /* see sph_jh.h */
1024 void
1025 sph_jh224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1026 {
1027  jh_close(cc, ub, n, dst, 7, IV224);
1028 }
1029 
1030 /* see sph_jh.h */
1031 void
1033 {
1034  jh_init(cc, IV256);
1035 }
1036 
1037 /* see sph_jh.h */
1038 void
1039 sph_jh256(void *cc, const void *data, size_t len)
1040 {
1041  jh_core(cc, data, len);
1042 }
1043 
1044 /* see sph_jh.h */
1045 void
1046 sph_jh256_close(void *cc, void *dst)
1047 {
1048  jh_close(cc, 0, 0, dst, 8, IV256);
1049 }
1050 
1051 /* see sph_jh.h */
1052 void
1053 sph_jh256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1054 {
1055  jh_close(cc, ub, n, dst, 8, IV256);
1056 }
1057 
1058 /* see sph_jh.h */
1059 void
1061 {
1062  jh_init(cc, IV384);
1063 }
1064 
1065 /* see sph_jh.h */
1066 void
1067 sph_jh384(void *cc, const void *data, size_t len)
1068 {
1069  jh_core(cc, data, len);
1070 }
1071 
1072 /* see sph_jh.h */
1073 void
1074 sph_jh384_close(void *cc, void *dst)
1075 {
1076  jh_close(cc, 0, 0, dst, 12, IV384);
1077 }
1078 
1079 /* see sph_jh.h */
1080 void
1081 sph_jh384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1082 {
1083  jh_close(cc, ub, n, dst, 12, IV384);
1084 }
1085 
1086 /* see sph_jh.h */
1087 void
1089 {
1090  jh_init(cc, IV512);
1091 }
1092 
1093 /* see sph_jh.h */
1094 void
1095 sph_jh512(void *cc, const void *data, size_t len)
1096 {
1097  jh_core(cc, data, len);
1098 }
1099 
1100 /* see sph_jh.h */
1101 void
1102 sph_jh512_close(void *cc, void *dst)
1103 {
1104  jh_close(cc, 0, 0, dst, 16, IV512);
1105 }
1106 
1107 /* see sph_jh.h */
1108 void
1109 sph_jh512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
1110 {
1111  jh_close(cc, ub, n, dst, 16, IV512);
1112 }
1113 
1114 #ifdef __cplusplus
1115 }
1116 #endif
void sph_jh224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: jh.c:1025
JH interface.
#define C32e(x)
Definition: jh.c:92
sph_u32 block_count_high
Definition: sph_jh.h:89
sph_u32 narrow[32]
Definition: sph_jh.h:84
#define DECL_STATE
Definition: jh.c:545
void sph_jh256(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: jh.c:1039
void sph_jh512_init(void *cc)
Initialize a JH-512 context.
Definition: jh.c:1088
#define READ_STATE(state)
Definition: jh.c:552
#define SPH_T32(x)
Definition: sph_types.h:932
#define INPUT_BUF1
Definition: jh.c:622
void sph_jh256_close(void *cc, void *dst)
Terminate the current JH-256 computation and output the result into the provided buffer.
Definition: jh.c:1046
void sph_jh384_close(void *cc, void *dst)
Terminate the current JH-384 computation and output the result into the provided buffer.
Definition: jh.c:1074
void sph_jh256_init(void *cc)
Initialize a JH-256 context.
Definition: jh.c:1032
void sph_jh512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: jh.c:1109
void sph_jh384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: jh.c:1081
void sph_jh224_init(void *cc)
Initialize a JH-224 context.
Definition: jh.c:1004
void sph_jh512_close(void *cc, void *dst)
Terminate the current JH-512 computation and output the result into the provided buffer.
Definition: jh.c:1102
union sph_jh_context::@8 H
#define WRITE_STATE(state)
Definition: jh.c:587
void sph_jh384_init(void *cc)
Initialize a JH-384 context.
Definition: jh.c:1060
unsigned char buf[64]
Definition: sph_jh.h:78
size_t ptr
Definition: sph_jh.h:79
void * memcpy(void *a, const void *b, size_t c)
unsigned long sph_u32
Definition: sph_types.h:870
#define enc32e
Definition: jh.c:94
This structure is a context for JH computations: it contains the intermediate values and some data fr...
Definition: sph_jh.h:76
void sph_jh256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
Definition: jh.c:1053
void sph_jh512(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: jh.c:1095
void sph_jh224(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: jh.c:1011
sph_u32 block_count_low
Definition: sph_jh.h:89
#define INPUT_BUF2
Definition: jh.c:656
void sph_jh384(void *cc, const void *data, size_t len)
Process some data bytes.
Definition: jh.c:1067
void sph_jh224_close(void *cc, void *dst)
Terminate the current JH-224 computation and output the result into the provided buffer.
Definition: jh.c:1018
#define E8
Definition: jh.c:874