43 #if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SIMD 44 #define SPH_SMALL_FOOTPRINT_SIMD 1 48 #pragma warning (disable: 4146) 55 #define ROL32 SPH_ROTL32 57 #define XCAT(x, y) XCAT_(x, y) 58 #define XCAT_(x, y) x ## y 63 static const s32 alpha_tab[] = {
64 1, 41, 139, 45, 46, 87, 226, 14, 60, 147, 116, 130,
65 190, 80, 196, 69, 2, 82, 21, 90, 92, 174, 195, 28,
66 120, 37, 232, 3, 123, 160, 135, 138, 4, 164, 42, 180,
67 184, 91, 133, 56, 240, 74, 207, 6, 246, 63, 13, 19,
68 8, 71, 84, 103, 111, 182, 9, 112, 223, 148, 157, 12,
69 235, 126, 26, 38, 16, 142, 168, 206, 222, 107, 18, 224,
70 189, 39, 57, 24, 213, 252, 52, 76, 32, 27, 79, 155,
71 187, 214, 36, 191, 121, 78, 114, 48, 169, 247, 104, 152,
72 64, 54, 158, 53, 117, 171, 72, 125, 242, 156, 228, 96,
73 81, 237, 208, 47, 128, 108, 59, 106, 234, 85, 144, 250,
74 227, 55, 199, 192, 162, 217, 159, 94, 256, 216, 118, 212,
75 211, 170, 31, 243, 197, 110, 141, 127, 67, 177, 61, 188,
76 255, 175, 236, 167, 165, 83, 62, 229, 137, 220, 25, 254,
77 134, 97, 122, 119, 253, 93, 215, 77, 73, 166, 124, 201,
78 17, 183, 50, 251, 11, 194, 244, 238, 249, 186, 173, 154,
79 146, 75, 248, 145, 34, 109, 100, 245, 22, 131, 231, 219,
80 241, 115, 89, 51, 35, 150, 239, 33, 68, 218, 200, 233,
81 44, 5, 205, 181, 225, 230, 178, 102, 70, 43, 221, 66,
82 136, 179, 143, 209, 88, 10, 153, 105, 193, 203, 99, 204,
83 140, 86, 185, 132, 15, 101, 29, 161, 176, 20, 49, 210,
84 129, 149, 198, 151, 23, 172, 113, 7, 30, 202, 58, 65,
93 #define REDS1(x) (((x) & 0xFF) - ((x) >> 8)) 94 #define REDS2(x) (((x) & 0xFFFF) + ((x) >> 16)) 102 #define FFT_LOOP(rb, hk, as, id) do { \ 105 s32 n = q[(rb) + (hk)]; \ 107 q[(rb) + (hk)] = m - n; \ 110 for (; u < (hk); u += 4, v += 4 * (as)) { \ 112 m = q[(rb) + u + 0]; \ 113 n = q[(rb) + u + 0 + (hk)]; \ 114 t = REDS2(n * alpha_tab[v + 0 * (as)]); \ 115 q[(rb) + u + 0] = m + t; \ 116 q[(rb) + u + 0 + (hk)] = m - t; \ 118 m = q[(rb) + u + 1]; \ 119 n = q[(rb) + u + 1 + (hk)]; \ 120 t = REDS2(n * alpha_tab[v + 1 * (as)]); \ 121 q[(rb) + u + 1] = m + t; \ 122 q[(rb) + u + 1 + (hk)] = m - t; \ 123 m = q[(rb) + u + 2]; \ 124 n = q[(rb) + u + 2 + (hk)]; \ 125 t = REDS2(n * alpha_tab[v + 2 * (as)]); \ 126 q[(rb) + u + 2] = m + t; \ 127 q[(rb) + u + 2 + (hk)] = m - t; \ 128 m = q[(rb) + u + 3]; \ 129 n = q[(rb) + u + 3 + (hk)]; \ 130 t = REDS2(n * alpha_tab[v + 3 * (as)]); \ 131 q[(rb) + u + 3] = m + t; \ 132 q[(rb) + u + 3 + (hk)] = m - t; \ 147 #define FFT8(xb, xs, d) do { \ 149 s32 x1 = x[(xb) + (xs)]; \ 150 s32 x2 = x[(xb) + 2 * (xs)]; \ 151 s32 x3 = x[(xb) + 3 * (xs)]; \ 153 s32 a1 = x0 + (x2 << 4); \ 155 s32 a3 = x0 - (x2 << 4); \ 157 s32 b1 = REDS1((x1 << 2) + (x3 << 6)); \ 158 s32 b2 = (x1 << 4) - (x3 << 4); \ 159 s32 b3 = REDS1((x1 << 6) + (x3 << 2)); \ 176 #define FFT16(xb, xs, rb) do { \ 177 s32 d1_0, d1_1, d1_2, d1_3, d1_4, d1_5, d1_6, d1_7; \ 178 s32 d2_0, d2_1, d2_2, d2_3, d2_4, d2_5, d2_6, d2_7; \ 179 FFT8(xb, (xs) << 1, d1_); \ 180 FFT8((xb) + (xs), (xs) << 1, d2_); \ 181 q[(rb) + 0] = d1_0 + d2_0; \ 182 q[(rb) + 1] = d1_1 + (d2_1 << 1); \ 183 q[(rb) + 2] = d1_2 + (d2_2 << 2); \ 184 q[(rb) + 3] = d1_3 + (d2_3 << 3); \ 185 q[(rb) + 4] = d1_4 + (d2_4 << 4); \ 186 q[(rb) + 5] = d1_5 + (d2_5 << 5); \ 187 q[(rb) + 6] = d1_6 + (d2_6 << 6); \ 188 q[(rb) + 7] = d1_7 + (d2_7 << 7); \ 189 q[(rb) + 8] = d1_0 - d2_0; \ 190 q[(rb) + 9] = d1_1 - (d2_1 << 1); \ 191 q[(rb) + 10] = d1_2 - (d2_2 << 2); \ 192 q[(rb) + 11] = d1_3 - (d2_3 << 3); \ 193 q[(rb) + 12] = d1_4 - (d2_4 << 4); \ 194 q[(rb) + 13] = d1_5 - (d2_5 << 5); \ 195 q[(rb) + 14] = d1_6 - (d2_6 << 6); \ 196 q[(rb) + 15] = d1_7 - (d2_7 << 7); \ 202 #define FFT32(xb, xs, rb, id) do { \ 203 FFT16(xb, (xs) << 1, rb); \ 204 FFT16((xb) + (xs), (xs) << 1, (rb) + 16); \ 205 FFT_LOOP(rb, 16, 8, id); \ 211 #define FFT64(xb, xs, rb, id) do { \ 212 FFT32(xb, (xs) << 1, rb, XCAT(id, a)); \ 213 FFT32((xb) + (xs), (xs) << 1, (rb) + 32, XCAT(id, b)); \ 214 FFT_LOOP(rb, 32, 4, id); \ 217 #if SPH_SMALL_FOOTPRINT_SIMD 220 fft32(
unsigned char *x,
size_t xs, s32 *q)
230 #define FFT128(xb, xs, rb, id) do { \ 231 fft32(x + (xb) + ((xs) * 0), (xs) << 2, &q[(rb) + 0]); \ 232 fft32(x + (xb) + ((xs) * 2), (xs) << 2, &q[(rb) + 32]); \ 233 FFT_LOOP(rb, 32, 4, XCAT(id, aa)); \ 234 fft32(x + (xb) + ((xs) * 1), (xs) << 2, &q[(rb) + 64]); \ 235 fft32(x + (xb) + ((xs) * 3), (xs) << 2, &q[(rb) + 96]); \ 236 FFT_LOOP((rb) + 64, 32, 4, XCAT(id, ab)); \ 237 FFT_LOOP(rb, 64, 2, XCAT(id, a)); \ 245 #define FFT128(xb, xs, rb, id) do { \ 246 FFT64(xb, (xs) << 1, rb, XCAT(id, a)); \ 247 FFT64((xb) + (xs), (xs) << 1, (rb) + 64, XCAT(id, b)); \ 248 FFT_LOOP(rb, 64, 2, id); \ 260 fft64(
unsigned char *x,
size_t xs, s32 *q)
265 FFT32(0, xd, 0, label_a);
266 FFT32(xs, xd, 32, label_b);
273 #define FFT256(xb, xs, rb, id) do { \ 274 fft64(x + (xb) + ((xs) * 0), (xs) << 2, &q[(rb) + 0]); \ 275 fft64(x + (xb) + ((xs) * 2), (xs) << 2, &q[(rb) + 64]); \ 276 FFT_LOOP(rb, 64, 2, XCAT(id, aa)); \ 277 fft64(x + (xb) + ((xs) * 1), (xs) << 2, &q[(rb) + 128]); \ 278 fft64(x + (xb) + ((xs) * 3), (xs) << 2, &q[(rb) + 192]); \ 279 FFT_LOOP((rb) + 128, 64, 2, XCAT(id, ab)); \ 280 FFT_LOOP(rb, 128, 1, XCAT(id, a)); \ 286 static const unsigned short yoff_s_n[] = {
287 1, 98, 95, 58, 30, 113, 23, 198, 129, 49, 176, 29,
288 15, 185, 140, 99, 193, 153, 88, 143, 136, 221, 70, 178,
289 225, 205, 44, 200, 68, 239, 35, 89, 241, 231, 22, 100,
290 34, 248, 146, 173, 249, 244, 11, 50, 17, 124, 73, 215,
291 253, 122, 134, 25, 137, 62, 165, 236, 255, 61, 67, 141,
292 197, 31, 211, 118, 256, 159, 162, 199, 227, 144, 234, 59,
293 128, 208, 81, 228, 242, 72, 117, 158, 64, 104, 169, 114,
294 121, 36, 187, 79, 32, 52, 213, 57, 189, 18, 222, 168,
295 16, 26, 235, 157, 223, 9, 111, 84, 8, 13, 246, 207,
296 240, 133, 184, 42, 4, 135, 123, 232, 120, 195, 92, 21,
297 2, 196, 190, 116, 60, 226, 46, 139
303 static const unsigned short yoff_s_f[] = {
304 2, 156, 118, 107, 45, 212, 111, 162, 97, 249, 211, 3,
305 49, 101, 151, 223, 189, 178, 253, 204, 76, 82, 232, 65,
306 96, 176, 161, 47, 189, 61, 248, 107, 0, 131, 133, 113,
307 17, 33, 12, 111, 251, 103, 57, 148, 47, 65, 249, 143,
308 189, 8, 204, 230, 205, 151, 187, 227, 247, 111, 140, 6,
309 77, 10, 21, 149, 255, 101, 139, 150, 212, 45, 146, 95,
310 160, 8, 46, 254, 208, 156, 106, 34, 68, 79, 4, 53,
311 181, 175, 25, 192, 161, 81, 96, 210, 68, 196, 9, 150,
312 0, 126, 124, 144, 240, 224, 245, 146, 6, 154, 200, 109,
313 210, 192, 8, 114, 68, 249, 53, 27, 52, 106, 70, 30,
314 10, 146, 117, 251, 180, 247, 236, 108
320 static const unsigned short yoff_b_n[] = {
321 1, 163, 98, 40, 95, 65, 58, 202, 30, 7, 113, 172,
322 23, 151, 198, 149, 129, 210, 49, 20, 176, 161, 29, 101,
323 15, 132, 185, 86, 140, 204, 99, 203, 193, 105, 153, 10,
324 88, 209, 143, 179, 136, 66, 221, 43, 70, 102, 178, 230,
325 225, 181, 205, 5, 44, 233, 200, 218, 68, 33, 239, 150,
326 35, 51, 89, 115, 241, 219, 231, 131, 22, 245, 100, 109,
327 34, 145, 248, 75, 146, 154, 173, 186, 249, 238, 244, 194,
328 11, 251, 50, 183, 17, 201, 124, 166, 73, 77, 215, 93,
329 253, 119, 122, 97, 134, 254, 25, 220, 137, 229, 62, 83,
330 165, 167, 236, 175, 255, 188, 61, 177, 67, 127, 141, 110,
331 197, 243, 31, 170, 211, 212, 118, 216, 256, 94, 159, 217,
332 162, 192, 199, 55, 227, 250, 144, 85, 234, 106, 59, 108,
333 128, 47, 208, 237, 81, 96, 228, 156, 242, 125, 72, 171,
334 117, 53, 158, 54, 64, 152, 104, 247, 169, 48, 114, 78,
335 121, 191, 36, 214, 187, 155, 79, 27, 32, 76, 52, 252,
336 213, 24, 57, 39, 189, 224, 18, 107, 222, 206, 168, 142,
337 16, 38, 26, 126, 235, 12, 157, 148, 223, 112, 9, 182,
338 111, 103, 84, 71, 8, 19, 13, 63, 246, 6, 207, 74,
339 240, 56, 133, 91, 184, 180, 42, 164, 4, 138, 135, 160,
340 123, 3, 232, 37, 120, 28, 195, 174, 92, 90, 21, 82,
341 2, 69, 196, 80, 190, 130, 116, 147, 60, 14, 226, 87,
348 static const unsigned short yoff_b_f[] = {
349 2, 203, 156, 47, 118, 214, 107, 106, 45, 93, 212, 20,
350 111, 73, 162, 251, 97, 215, 249, 53, 211, 19, 3, 89,
351 49, 207, 101, 67, 151, 130, 223, 23, 189, 202, 178, 239,
352 253, 127, 204, 49, 76, 236, 82, 137, 232, 157, 65, 79,
353 96, 161, 176, 130, 161, 30, 47, 9, 189, 247, 61, 226,
354 248, 90, 107, 64, 0, 88, 131, 243, 133, 59, 113, 115,
355 17, 236, 33, 213, 12, 191, 111, 19, 251, 61, 103, 208,
356 57, 35, 148, 248, 47, 116, 65, 119, 249, 178, 143, 40,
357 189, 129, 8, 163, 204, 227, 230, 196, 205, 122, 151, 45,
358 187, 19, 227, 72, 247, 125, 111, 121, 140, 220, 6, 107,
359 77, 69, 10, 101, 21, 65, 149, 171, 255, 54, 101, 210,
360 139, 43, 150, 151, 212, 164, 45, 237, 146, 184, 95, 6,
361 160, 42, 8, 204, 46, 238, 254, 168, 208, 50, 156, 190,
362 106, 127, 34, 234, 68, 55, 79, 18, 4, 130, 53, 208,
363 181, 21, 175, 120, 25, 100, 192, 178, 161, 96, 81, 127,
364 96, 227, 210, 248, 68, 10, 196, 31, 9, 167, 150, 193,
365 0, 169, 126, 14, 124, 198, 144, 142, 240, 21, 224, 44,
366 245, 66, 146, 238, 6, 196, 154, 49, 200, 222, 109, 9,
367 210, 141, 192, 138, 8, 79, 114, 217, 68, 128, 249, 94,
368 53, 30, 27, 61, 52, 135, 106, 212, 70, 238, 30, 185,
369 10, 132, 146, 136, 117, 37, 251, 150, 180, 188, 247, 156,
373 #define INNER(l, h, mm) (((u32)((l) * (mm)) & 0xFFFFU) \ 374 + ((u32)((h) * (mm)) << 16)) 376 #define W_SMALL(sb, o1, o2, mm) \ 377 (INNER(q[8 * (sb) + 2 * 0 + o1], q[8 * (sb) + 2 * 0 + o2], mm), \ 378 INNER(q[8 * (sb) + 2 * 1 + o1], q[8 * (sb) + 2 * 1 + o2], mm), \ 379 INNER(q[8 * (sb) + 2 * 2 + o1], q[8 * (sb) + 2 * 2 + o2], mm), \ 380 INNER(q[8 * (sb) + 2 * 3 + o1], q[8 * (sb) + 2 * 3 + o2], mm) 382 #define WS_0_0 W_SMALL( 4, 0, 1, 185) 383 #define WS_0_1 W_SMALL( 6, 0, 1, 185) 384 #define WS_0_2 W_SMALL( 0, 0, 1, 185) 385 #define WS_0_3 W_SMALL( 2, 0, 1, 185) 386 #define WS_0_4 W_SMALL( 7, 0, 1, 185) 387 #define WS_0_5 W_SMALL( 5, 0, 1, 185) 388 #define WS_0_6 W_SMALL( 3, 0, 1, 185) 389 #define WS_0_7 W_SMALL( 1, 0, 1, 185) 390 #define WS_1_0 W_SMALL(15, 0, 1, 185) 391 #define WS_1_1 W_SMALL(11, 0, 1, 185) 392 #define WS_1_2 W_SMALL(12, 0, 1, 185) 393 #define WS_1_3 W_SMALL( 8, 0, 1, 185) 394 #define WS_1_4 W_SMALL( 9, 0, 1, 185) 395 #define WS_1_5 W_SMALL(13, 0, 1, 185) 396 #define WS_1_6 W_SMALL(10, 0, 1, 185) 397 #define WS_1_7 W_SMALL(14, 0, 1, 185) 398 #define WS_2_0 W_SMALL(17, -128, -64, 233) 399 #define WS_2_1 W_SMALL(18, -128, -64, 233) 400 #define WS_2_2 W_SMALL(23, -128, -64, 233) 401 #define WS_2_3 W_SMALL(20, -128, -64, 233) 402 #define WS_2_4 W_SMALL(22, -128, -64, 233) 403 #define WS_2_5 W_SMALL(21, -128, -64, 233) 404 #define WS_2_6 W_SMALL(16, -128, -64, 233) 405 #define WS_2_7 W_SMALL(19, -128, -64, 233) 406 #define WS_3_0 W_SMALL(30, -191, -127, 233) 407 #define WS_3_1 W_SMALL(24, -191, -127, 233) 408 #define WS_3_2 W_SMALL(25, -191, -127, 233) 409 #define WS_3_3 W_SMALL(31, -191, -127, 233) 410 #define WS_3_4 W_SMALL(27, -191, -127, 233) 411 #define WS_3_5 W_SMALL(29, -191, -127, 233) 412 #define WS_3_6 W_SMALL(28, -191, -127, 233) 413 #define WS_3_7 W_SMALL(26, -191, -127, 233) 415 #define W_BIG(sb, o1, o2, mm) \ 416 (INNER(q[16 * (sb) + 2 * 0 + o1], q[16 * (sb) + 2 * 0 + o2], mm), \ 417 INNER(q[16 * (sb) + 2 * 1 + o1], q[16 * (sb) + 2 * 1 + o2], mm), \ 418 INNER(q[16 * (sb) + 2 * 2 + o1], q[16 * (sb) + 2 * 2 + o2], mm), \ 419 INNER(q[16 * (sb) + 2 * 3 + o1], q[16 * (sb) + 2 * 3 + o2], mm), \ 420 INNER(q[16 * (sb) + 2 * 4 + o1], q[16 * (sb) + 2 * 4 + o2], mm), \ 421 INNER(q[16 * (sb) + 2 * 5 + o1], q[16 * (sb) + 2 * 5 + o2], mm), \ 422 INNER(q[16 * (sb) + 2 * 6 + o1], q[16 * (sb) + 2 * 6 + o2], mm), \ 423 INNER(q[16 * (sb) + 2 * 7 + o1], q[16 * (sb) + 2 * 7 + o2], mm) 425 #define WB_0_0 W_BIG( 4, 0, 1, 185) 426 #define WB_0_1 W_BIG( 6, 0, 1, 185) 427 #define WB_0_2 W_BIG( 0, 0, 1, 185) 428 #define WB_0_3 W_BIG( 2, 0, 1, 185) 429 #define WB_0_4 W_BIG( 7, 0, 1, 185) 430 #define WB_0_5 W_BIG( 5, 0, 1, 185) 431 #define WB_0_6 W_BIG( 3, 0, 1, 185) 432 #define WB_0_7 W_BIG( 1, 0, 1, 185) 433 #define WB_1_0 W_BIG(15, 0, 1, 185) 434 #define WB_1_1 W_BIG(11, 0, 1, 185) 435 #define WB_1_2 W_BIG(12, 0, 1, 185) 436 #define WB_1_3 W_BIG( 8, 0, 1, 185) 437 #define WB_1_4 W_BIG( 9, 0, 1, 185) 438 #define WB_1_5 W_BIG(13, 0, 1, 185) 439 #define WB_1_6 W_BIG(10, 0, 1, 185) 440 #define WB_1_7 W_BIG(14, 0, 1, 185) 441 #define WB_2_0 W_BIG(17, -256, -128, 233) 442 #define WB_2_1 W_BIG(18, -256, -128, 233) 443 #define WB_2_2 W_BIG(23, -256, -128, 233) 444 #define WB_2_3 W_BIG(20, -256, -128, 233) 445 #define WB_2_4 W_BIG(22, -256, -128, 233) 446 #define WB_2_5 W_BIG(21, -256, -128, 233) 447 #define WB_2_6 W_BIG(16, -256, -128, 233) 448 #define WB_2_7 W_BIG(19, -256, -128, 233) 449 #define WB_3_0 W_BIG(30, -383, -255, 233) 450 #define WB_3_1 W_BIG(24, -383, -255, 233) 451 #define WB_3_2 W_BIG(25, -383, -255, 233) 452 #define WB_3_3 W_BIG(31, -383, -255, 233) 453 #define WB_3_4 W_BIG(27, -383, -255, 233) 454 #define WB_3_5 W_BIG(29, -383, -255, 233) 455 #define WB_3_6 W_BIG(28, -383, -255, 233) 456 #define WB_3_7 W_BIG(26, -383, -255, 233) 458 #define IF(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) 459 #define MAJ(x, y, z) (((x) & (y)) | (((x) | (y)) & (z))) 539 #define DECL_STATE_SMALL 540 #define READ_STATE_SMALL(sc) 541 #define WRITE_STATE_SMALL(sc) 542 #define DECL_STATE_BIG 543 #define READ_STATE_BIG(sc) 544 #define WRITE_STATE_BIG(sc) 548 #define DECL_STATE_SMALL \ 549 u32 A0, A1, A2, A3, B0, B1, B2, B3, C0, C1, C2, C3, D0, D1, D2, D3; 551 #define READ_STATE_SMALL(sc) do { \ 552 A0 = (sc)->state[ 0]; \ 553 A1 = (sc)->state[ 1]; \ 554 A2 = (sc)->state[ 2]; \ 555 A3 = (sc)->state[ 3]; \ 556 B0 = (sc)->state[ 4]; \ 557 B1 = (sc)->state[ 5]; \ 558 B2 = (sc)->state[ 6]; \ 559 B3 = (sc)->state[ 7]; \ 560 C0 = (sc)->state[ 8]; \ 561 C1 = (sc)->state[ 9]; \ 562 C2 = (sc)->state[10]; \ 563 C3 = (sc)->state[11]; \ 564 D0 = (sc)->state[12]; \ 565 D1 = (sc)->state[13]; \ 566 D2 = (sc)->state[14]; \ 567 D3 = (sc)->state[15]; \ 570 #define WRITE_STATE_SMALL(sc) do { \ 571 (sc)->state[ 0] = A0; \ 572 (sc)->state[ 1] = A1; \ 573 (sc)->state[ 2] = A2; \ 574 (sc)->state[ 3] = A3; \ 575 (sc)->state[ 4] = B0; \ 576 (sc)->state[ 5] = B1; \ 577 (sc)->state[ 6] = B2; \ 578 (sc)->state[ 7] = B3; \ 579 (sc)->state[ 8] = C0; \ 580 (sc)->state[ 9] = C1; \ 581 (sc)->state[10] = C2; \ 582 (sc)->state[11] = C3; \ 583 (sc)->state[12] = D0; \ 584 (sc)->state[13] = D1; \ 585 (sc)->state[14] = D2; \ 586 (sc)->state[15] = D3; \ 589 #define DECL_STATE_BIG \ 590 u32 A0, A1, A2, A3, A4, A5, A6, A7; \ 591 u32 B0, B1, B2, B3, B4, B5, B6, B7; \ 592 u32 C0, C1, C2, C3, C4, C5, C6, C7; \ 593 u32 D0, D1, D2, D3, D4, D5, D6, D7; 595 #define READ_STATE_BIG(sc) do { \ 596 A0 = (sc)->state[ 0]; \ 597 A1 = (sc)->state[ 1]; \ 598 A2 = (sc)->state[ 2]; \ 599 A3 = (sc)->state[ 3]; \ 600 A4 = (sc)->state[ 4]; \ 601 A5 = (sc)->state[ 5]; \ 602 A6 = (sc)->state[ 6]; \ 603 A7 = (sc)->state[ 7]; \ 604 B0 = (sc)->state[ 8]; \ 605 B1 = (sc)->state[ 9]; \ 606 B2 = (sc)->state[10]; \ 607 B3 = (sc)->state[11]; \ 608 B4 = (sc)->state[12]; \ 609 B5 = (sc)->state[13]; \ 610 B6 = (sc)->state[14]; \ 611 B7 = (sc)->state[15]; \ 612 C0 = (sc)->state[16]; \ 613 C1 = (sc)->state[17]; \ 614 C2 = (sc)->state[18]; \ 615 C3 = (sc)->state[19]; \ 616 C4 = (sc)->state[20]; \ 617 C5 = (sc)->state[21]; \ 618 C6 = (sc)->state[22]; \ 619 C7 = (sc)->state[23]; \ 620 D0 = (sc)->state[24]; \ 621 D1 = (sc)->state[25]; \ 622 D2 = (sc)->state[26]; \ 623 D3 = (sc)->state[27]; \ 624 D4 = (sc)->state[28]; \ 625 D5 = (sc)->state[29]; \ 626 D6 = (sc)->state[30]; \ 627 D7 = (sc)->state[31]; \ 630 #define WRITE_STATE_BIG(sc) do { \ 631 (sc)->state[ 0] = A0; \ 632 (sc)->state[ 1] = A1; \ 633 (sc)->state[ 2] = A2; \ 634 (sc)->state[ 3] = A3; \ 635 (sc)->state[ 4] = A4; \ 636 (sc)->state[ 5] = A5; \ 637 (sc)->state[ 6] = A6; \ 638 (sc)->state[ 7] = A7; \ 639 (sc)->state[ 8] = B0; \ 640 (sc)->state[ 9] = B1; \ 641 (sc)->state[10] = B2; \ 642 (sc)->state[11] = B3; \ 643 (sc)->state[12] = B4; \ 644 (sc)->state[13] = B5; \ 645 (sc)->state[14] = B6; \ 646 (sc)->state[15] = B7; \ 647 (sc)->state[16] = C0; \ 648 (sc)->state[17] = C1; \ 649 (sc)->state[18] = C2; \ 650 (sc)->state[19] = C3; \ 651 (sc)->state[20] = C4; \ 652 (sc)->state[21] = C5; \ 653 (sc)->state[22] = C6; \ 654 (sc)->state[23] = C7; \ 655 (sc)->state[24] = D0; \ 656 (sc)->state[25] = D1; \ 657 (sc)->state[26] = D2; \ 658 (sc)->state[27] = D3; \ 659 (sc)->state[28] = D4; \ 660 (sc)->state[29] = D5; \ 661 (sc)->state[30] = D6; \ 662 (sc)->state[31] = D7; \ 667 #define STEP_ELT(n, w, fun, s, ppb) do { \ 668 u32 tt = T32(D ## n + (w) + fun(A ## n, B ## n, C ## n)); \ 669 A ## n = T32(ROL32(tt, s) + XCAT(tA, XCAT(ppb, n))); \ 675 #define STEP_SMALL(w0, w1, w2, w3, fun, r, s, pp4b) do { \ 676 u32 tA0 = ROL32(A0, r); \ 677 u32 tA1 = ROL32(A1, r); \ 678 u32 tA2 = ROL32(A2, r); \ 679 u32 tA3 = ROL32(A3, r); \ 680 STEP_ELT(0, w0, fun, s, pp4b); \ 681 STEP_ELT(1, w1, fun, s, pp4b); \ 682 STEP_ELT(2, w2, fun, s, pp4b); \ 683 STEP_ELT(3, w3, fun, s, pp4b); \ 686 #define STEP_BIG(w0, w1, w2, w3, w4, w5, w6, w7, fun, r, s, pp8b) do { \ 687 u32 tA0 = ROL32(A0, r); \ 688 u32 tA1 = ROL32(A1, r); \ 689 u32 tA2 = ROL32(A2, r); \ 690 u32 tA3 = ROL32(A3, r); \ 691 u32 tA4 = ROL32(A4, r); \ 692 u32 tA5 = ROL32(A5, r); \ 693 u32 tA6 = ROL32(A6, r); \ 694 u32 tA7 = ROL32(A7, r); \ 695 STEP_ELT(0, w0, fun, s, pp8b); \ 696 STEP_ELT(1, w1, fun, s, pp8b); \ 697 STEP_ELT(2, w2, fun, s, pp8b); \ 698 STEP_ELT(3, w3, fun, s, pp8b); \ 699 STEP_ELT(4, w4, fun, s, pp8b); \ 700 STEP_ELT(5, w5, fun, s, pp8b); \ 701 STEP_ELT(6, w6, fun, s, pp8b); \ 702 STEP_ELT(7, w7, fun, s, pp8b); \ 732 #define STEP_SMALL_(w, fun, r, s, pp4b) STEP_SMALL w, fun, r, s, pp4b) 734 #define ONE_ROUND_SMALL(ri, isp, p0, p1, p2, p3) do { \ 735 STEP_SMALL_(WS_ ## ri ## 0, \ 736 IF, p0, p1, XCAT(PP4_, M3_0_ ## isp)); \ 737 STEP_SMALL_(WS_ ## ri ## 1, \ 738 IF, p1, p2, XCAT(PP4_, M3_1_ ## isp)); \ 739 STEP_SMALL_(WS_ ## ri ## 2, \ 740 IF, p2, p3, XCAT(PP4_, M3_2_ ## isp)); \ 741 STEP_SMALL_(WS_ ## ri ## 3, \ 742 IF, p3, p0, XCAT(PP4_, M3_3_ ## isp)); \ 743 STEP_SMALL_(WS_ ## ri ## 4, \ 744 MAJ, p0, p1, XCAT(PP4_, M3_4_ ## isp)); \ 745 STEP_SMALL_(WS_ ## ri ## 5, \ 746 MAJ, p1, p2, XCAT(PP4_, M3_5_ ## isp)); \ 747 STEP_SMALL_(WS_ ## ri ## 6, \ 748 MAJ, p2, p3, XCAT(PP4_, M3_6_ ## isp)); \ 749 STEP_SMALL_(WS_ ## ri ## 7, \ 750 MAJ, p3, p0, XCAT(PP4_, M3_7_ ## isp)); \ 789 #define STEP_BIG_(w, fun, r, s, pp8b) STEP_BIG w, fun, r, s, pp8b) 791 #define ONE_ROUND_BIG(ri, isp, p0, p1, p2, p3) do { \ 792 STEP_BIG_(WB_ ## ri ## 0, \ 793 IF, p0, p1, XCAT(PP8_, M7_0_ ## isp)); \ 794 STEP_BIG_(WB_ ## ri ## 1, \ 795 IF, p1, p2, XCAT(PP8_, M7_1_ ## isp)); \ 796 STEP_BIG_(WB_ ## ri ## 2, \ 797 IF, p2, p3, XCAT(PP8_, M7_2_ ## isp)); \ 798 STEP_BIG_(WB_ ## ri ## 3, \ 799 IF, p3, p0, XCAT(PP8_, M7_3_ ## isp)); \ 800 STEP_BIG_(WB_ ## ri ## 4, \ 801 MAJ, p0, p1, XCAT(PP8_, M7_4_ ## isp)); \ 802 STEP_BIG_(WB_ ## ri ## 5, \ 803 MAJ, p1, p2, XCAT(PP8_, M7_5_ ## isp)); \ 804 STEP_BIG_(WB_ ## ri ## 6, \ 805 MAJ, p2, p3, XCAT(PP8_, M7_6_ ## isp)); \ 806 STEP_BIG_(WB_ ## ri ## 7, \ 807 MAJ, p3, p0, XCAT(PP8_, M7_7_ ## isp)); \ 810 #if SPH_SMALL_FOOTPRINT_SIMD 829 #define STEP2_ELT(n, w, fun, s, ppb) do { \ 830 u32 tt = T32(D ## n + (w) + fun(A ## n, B ## n, C ## n)); \ 831 A ## n = T32(ROL32(tt, s) + tA[(ppb) ^ n]); \ 837 #define STEP2_SMALL(w0, w1, w2, w3, fun, r, s, pp4b) do { \ 839 tA[0] = ROL32(A0, r); \ 840 tA[1] = ROL32(A1, r); \ 841 tA[2] = ROL32(A2, r); \ 842 tA[3] = ROL32(A3, r); \ 843 STEP2_ELT(0, w0, fun, s, pp4b); \ 844 STEP2_ELT(1, w1, fun, s, pp4b); \ 845 STEP2_ELT(2, w2, fun, s, pp4b); \ 846 STEP2_ELT(3, w3, fun, s, pp4b); \ 850 one_round_small(u32 *state, u32 *w,
int isp,
int p0,
int p1,
int p2,
int p3)
852 static const int pp4k[] = { 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2 };
854 STEP2_SMALL(w[ 0], w[ 1], w[ 2], w[ 3],
IF, p0, p1, pp4k[isp + 0]);
855 STEP2_SMALL(w[ 4], w[ 5], w[ 6], w[ 7],
IF, p1, p2, pp4k[isp + 1]);
856 STEP2_SMALL(w[ 8], w[ 9], w[10], w[11],
IF, p2, p3, pp4k[isp + 2]);
857 STEP2_SMALL(w[12], w[13], w[14], w[15],
IF, p3, p0, pp4k[isp + 3]);
858 STEP2_SMALL(w[16], w[17], w[18], w[19],
MAJ, p0, p1, pp4k[isp + 4]);
859 STEP2_SMALL(w[20], w[21], w[22], w[23],
MAJ, p1, p2, pp4k[isp + 5]);
860 STEP2_SMALL(w[24], w[25], w[26], w[27],
MAJ, p2, p3, pp4k[isp + 6]);
861 STEP2_SMALL(w[28], w[29], w[30], w[31],
MAJ, p3, p0, pp4k[isp + 7]);
874 static const size_t wsp[32] = {
875 4 << 3, 6 << 3, 0 << 3, 2 << 3,
876 7 << 3, 5 << 3, 3 << 3, 1 << 3,
877 15 << 3, 11 << 3, 12 << 3, 8 << 3,
878 9 << 3, 13 << 3, 10 << 3, 14 << 3,
879 17 << 3, 18 << 3, 23 << 3, 20 << 3,
880 22 << 3, 21 << 3, 16 << 3, 19 << 3,
881 30 << 3, 24 << 3, 25 << 3, 31 << 3,
882 27 << 3, 29 << 3, 28 << 3, 26 << 3
888 for (i = 0; i < 128; i ++) {
891 tq = q[i] + yoff_s_f[i];
895 q[i] = (tq <= 128 ? tq : tq - 257);
898 for (i = 0; i < 128; i ++) {
901 tq = q[i] + yoff_s_n[i];
905 q[i] = (tq <= 128 ? tq : tq - 257);
909 for (i = 0; i < 16; i += 4) {
910 state[i + 0] = sc->
state[i + 0]
911 ^ sph_dec32le_aligned(x + 4 * (i + 0));
912 state[i + 1] = sc->
state[i + 1]
913 ^ sph_dec32le_aligned(x + 4 * (i + 1));
914 state[i + 2] = sc->
state[i + 2]
915 ^ sph_dec32le_aligned(x + 4 * (i + 2));
916 state[i + 3] = sc->
state[i + 3]
917 ^ sph_dec32le_aligned(x + 4 * (i + 3));
920 #define WSREAD(sb, o1, o2, mm) do { \ 921 for (u = 0; u < 32; u += 4) { \ 922 size_t v = wsp[(u >> 2) + (sb)]; \ 923 w[u + 0] = INNER(q[v + 2 * 0 + (o1)], \ 924 q[v + 2 * 0 + (o2)], mm); \ 925 w[u + 1] = INNER(q[v + 2 * 1 + (o1)], \ 926 q[v + 2 * 1 + (o2)], mm); \ 927 w[u + 2] = INNER(q[v + 2 * 2 + (o1)], \ 928 q[v + 2 * 2 + (o2)], mm); \ 929 w[u + 3] = INNER(q[v + 2 * 3 + (o1)], \ 930 q[v + 2 * 3 + (o2)], mm); \ 934 WSREAD( 0, 0, 1, 185);
935 one_round_small(state, w, 0, 3, 23, 17, 27);
936 WSREAD( 8, 0, 1, 185);
937 one_round_small(state, w, 2, 28, 19, 22, 7);
938 WSREAD(16, -128, -64, 233);
939 one_round_small(state, w, 1, 29, 9, 15, 5);
940 WSREAD(24, -191, -127, 233);
941 one_round_small(state, w, 0, 4, 13, 10, 25);
977 #define A0 (sc->state[ 0]) 978 #define A1 (sc->state[ 1]) 979 #define A2 (sc->state[ 2]) 980 #define A3 (sc->state[ 3]) 981 #define B0 (sc->state[ 4]) 982 #define B1 (sc->state[ 5]) 983 #define B2 (sc->state[ 6]) 984 #define B3 (sc->state[ 7]) 985 #define C0 (sc->state[ 8]) 986 #define C1 (sc->state[ 9]) 987 #define C2 (sc->state[10]) 988 #define C3 (sc->state[11]) 989 #define D0 (sc->state[12]) 990 #define D1 (sc->state[13]) 991 #define D2 (sc->state[14]) 992 #define D3 (sc->state[15]) 1012 for (i = 0; i < 128; i ++) {
1015 tq = q[i] + yoff_s_f[i];
1019 q[i] = (tq <= 128 ? tq : tq - 257);
1022 for (i = 0; i < 128; i ++) {
1025 tq = q[i] + yoff_s_n[i];
1029 q[i] = (tq <= 128 ? tq : tq - 257);
1033 A0 ^= sph_dec32le_aligned(x + 0);
1034 A1 ^= sph_dec32le_aligned(x + 4);
1035 A2 ^= sph_dec32le_aligned(x + 8);
1036 A3 ^= sph_dec32le_aligned(x + 12);
1037 B0 ^= sph_dec32le_aligned(x + 16);
1038 B1 ^= sph_dec32le_aligned(x + 20);
1039 B2 ^= sph_dec32le_aligned(x + 24);
1040 B3 ^= sph_dec32le_aligned(x + 28);
1041 C0 ^= sph_dec32le_aligned(x + 32);
1042 C1 ^= sph_dec32le_aligned(x + 36);
1043 C2 ^= sph_dec32le_aligned(x + 40);
1044 C3 ^= sph_dec32le_aligned(x + 44);
1045 D0 ^= sph_dec32le_aligned(x + 48);
1046 D1 ^= sph_dec32le_aligned(x + 52);
1047 D2 ^= sph_dec32le_aligned(x + 56);
1048 D3 ^= sph_dec32le_aligned(x + 60);
1054 STEP_SMALL(saved[ 0], saved[ 1], saved[ 2], saved[ 3],
1056 STEP_SMALL(saved[ 4], saved[ 5], saved[ 6], saved[ 7],
1057 IF, 13, 10, PP4_0_);
1058 STEP_SMALL(saved[ 8], saved[ 9], saved[10], saved[11],
1059 IF, 10, 25, PP4_1_);
1060 STEP_SMALL(saved[12], saved[13], saved[14], saved[15],
1066 IF, 13, 10, PP4_0_);
1068 IF, 10, 25, PP4_1_);
1096 #if SPH_SMALL_FOOTPRINT_SIMD 1098 #define A0 state[ 0] 1099 #define A1 state[ 1] 1100 #define A2 state[ 2] 1101 #define A3 state[ 3] 1102 #define A4 state[ 4] 1103 #define A5 state[ 5] 1104 #define A6 state[ 6] 1105 #define A7 state[ 7] 1106 #define B0 state[ 8] 1107 #define B1 state[ 9] 1108 #define B2 state[10] 1109 #define B3 state[11] 1110 #define B4 state[12] 1111 #define B5 state[13] 1112 #define B6 state[14] 1113 #define B7 state[15] 1114 #define C0 state[16] 1115 #define C1 state[17] 1116 #define C2 state[18] 1117 #define C3 state[19] 1118 #define C4 state[20] 1119 #define C5 state[21] 1120 #define C6 state[22] 1121 #define C7 state[23] 1122 #define D0 state[24] 1123 #define D1 state[25] 1124 #define D2 state[26] 1125 #define D3 state[27] 1126 #define D4 state[28] 1127 #define D5 state[29] 1128 #define D6 state[30] 1129 #define D7 state[31] 1143 #define STEP2_BIG(w0, w1, w2, w3, w4, w5, w6, w7, fun, r, s, pp8b) do { \ 1145 tA[0] = ROL32(A0, r); \ 1146 tA[1] = ROL32(A1, r); \ 1147 tA[2] = ROL32(A2, r); \ 1148 tA[3] = ROL32(A3, r); \ 1149 tA[4] = ROL32(A4, r); \ 1150 tA[5] = ROL32(A5, r); \ 1151 tA[6] = ROL32(A6, r); \ 1152 tA[7] = ROL32(A7, r); \ 1153 STEP2_ELT(0, w0, fun, s, pp8b); \ 1154 STEP2_ELT(1, w1, fun, s, pp8b); \ 1155 STEP2_ELT(2, w2, fun, s, pp8b); \ 1156 STEP2_ELT(3, w3, fun, s, pp8b); \ 1157 STEP2_ELT(4, w4, fun, s, pp8b); \ 1158 STEP2_ELT(5, w5, fun, s, pp8b); \ 1159 STEP2_ELT(6, w6, fun, s, pp8b); \ 1160 STEP2_ELT(7, w7, fun, s, pp8b); \ 1164 one_round_big(u32 *state, u32 *w,
int isp,
int p0,
int p1,
int p2,
int p3)
1166 static const int pp8k[] = { 1, 6, 2, 3, 5, 7, 4, 1, 6, 2, 3 };
1168 STEP2_BIG(w[ 0], w[ 1], w[ 2], w[ 3], w[ 4], w[ 5], w[ 6], w[ 7],
1169 IF, p0, p1, pp8k[isp + 0]);
1170 STEP2_BIG(w[ 8], w[ 9], w[10], w[11], w[12], w[13], w[14], w[15],
1171 IF, p1, p2, pp8k[isp + 1]);
1172 STEP2_BIG(w[16], w[17], w[18], w[19], w[20], w[21], w[22], w[23],
1173 IF, p2, p3, pp8k[isp + 2]);
1174 STEP2_BIG(w[24], w[25], w[26], w[27], w[28], w[29], w[30], w[31],
1175 IF, p3, p0, pp8k[isp + 3]);
1176 STEP2_BIG(w[32], w[33], w[34], w[35], w[36], w[37], w[38], w[39],
1177 MAJ, p0, p1, pp8k[isp + 4]);
1178 STEP2_BIG(w[40], w[41], w[42], w[43], w[44], w[45], w[46], w[47],
1179 MAJ, p1, p2, pp8k[isp + 5]);
1180 STEP2_BIG(w[48], w[49], w[50], w[51], w[52], w[53], w[54], w[55],
1181 MAJ, p2, p3, pp8k[isp + 6]);
1182 STEP2_BIG(w[56], w[57], w[58], w[59], w[60], w[61], w[62], w[63],
1183 MAJ, p3, p0, pp8k[isp + 7]);
1196 static const size_t wbp[32] = {
1197 4 << 4, 6 << 4, 0 << 4, 2 << 4,
1198 7 << 4, 5 << 4, 3 << 4, 1 << 4,
1199 15 << 4, 11 << 4, 12 << 4, 8 << 4,
1200 9 << 4, 13 << 4, 10 << 4, 14 << 4,
1201 17 << 4, 18 << 4, 23 << 4, 20 << 4,
1202 22 << 4, 21 << 4, 16 << 4, 19 << 4,
1203 30 << 4, 24 << 4, 25 << 4, 31 << 4,
1204 27 << 4, 29 << 4, 28 << 4, 26 << 4
1210 for (i = 0; i < 256; i ++) {
1213 tq = q[i] + yoff_b_f[i];
1217 q[i] = (tq <= 128 ? tq : tq - 257);
1220 for (i = 0; i < 256; i ++) {
1223 tq = q[i] + yoff_b_n[i];
1227 q[i] = (tq <= 128 ? tq : tq - 257);
1231 for (i = 0; i < 32; i += 8) {
1232 state[i + 0] = sc->
state[i + 0]
1233 ^ sph_dec32le_aligned(x + 4 * (i + 0));
1234 state[i + 1] = sc->
state[i + 1]
1235 ^ sph_dec32le_aligned(x + 4 * (i + 1));
1236 state[i + 2] = sc->
state[i + 2]
1237 ^ sph_dec32le_aligned(x + 4 * (i + 2));
1238 state[i + 3] = sc->
state[i + 3]
1239 ^ sph_dec32le_aligned(x + 4 * (i + 3));
1240 state[i + 4] = sc->
state[i + 4]
1241 ^ sph_dec32le_aligned(x + 4 * (i + 4));
1242 state[i + 5] = sc->
state[i + 5]
1243 ^ sph_dec32le_aligned(x + 4 * (i + 5));
1244 state[i + 6] = sc->
state[i + 6]
1245 ^ sph_dec32le_aligned(x + 4 * (i + 6));
1246 state[i + 7] = sc->
state[i + 7]
1247 ^ sph_dec32le_aligned(x + 4 * (i + 7));
1250 #define WBREAD(sb, o1, o2, mm) do { \ 1251 for (u = 0; u < 64; u += 8) { \ 1252 size_t v = wbp[(u >> 3) + (sb)]; \ 1253 w[u + 0] = INNER(q[v + 2 * 0 + (o1)], \ 1254 q[v + 2 * 0 + (o2)], mm); \ 1255 w[u + 1] = INNER(q[v + 2 * 1 + (o1)], \ 1256 q[v + 2 * 1 + (o2)], mm); \ 1257 w[u + 2] = INNER(q[v + 2 * 2 + (o1)], \ 1258 q[v + 2 * 2 + (o2)], mm); \ 1259 w[u + 3] = INNER(q[v + 2 * 3 + (o1)], \ 1260 q[v + 2 * 3 + (o2)], mm); \ 1261 w[u + 4] = INNER(q[v + 2 * 4 + (o1)], \ 1262 q[v + 2 * 4 + (o2)], mm); \ 1263 w[u + 5] = INNER(q[v + 2 * 5 + (o1)], \ 1264 q[v + 2 * 5 + (o2)], mm); \ 1265 w[u + 6] = INNER(q[v + 2 * 6 + (o1)], \ 1266 q[v + 2 * 6 + (o2)], mm); \ 1267 w[u + 7] = INNER(q[v + 2 * 7 + (o1)], \ 1268 q[v + 2 * 7 + (o2)], mm); \ 1272 WBREAD( 0, 0, 1, 185);
1273 one_round_big(state, w, 0, 3, 23, 17, 27);
1274 WBREAD( 8, 0, 1, 185);
1275 one_round_big(state, w, 1, 28, 19, 22, 7);
1276 WBREAD(16, -256, -128, 233);
1277 one_round_big(state, w, 2, 29, 9, 15, 5);
1278 WBREAD(24, -383, -255, 233);
1279 one_round_big(state, w, 3, 4, 13, 10, 25);
1290 IF, 13, 10, PP8_5_);
1294 IF, 10, 25, PP8_6_);
1339 #define A0 (sc->state[ 0]) 1340 #define A1 (sc->state[ 1]) 1341 #define A2 (sc->state[ 2]) 1342 #define A3 (sc->state[ 3]) 1343 #define A4 (sc->state[ 4]) 1344 #define A5 (sc->state[ 5]) 1345 #define A6 (sc->state[ 6]) 1346 #define A7 (sc->state[ 7]) 1347 #define B0 (sc->state[ 8]) 1348 #define B1 (sc->state[ 9]) 1349 #define B2 (sc->state[10]) 1350 #define B3 (sc->state[11]) 1351 #define B4 (sc->state[12]) 1352 #define B5 (sc->state[13]) 1353 #define B6 (sc->state[14]) 1354 #define B7 (sc->state[15]) 1355 #define C0 (sc->state[16]) 1356 #define C1 (sc->state[17]) 1357 #define C2 (sc->state[18]) 1358 #define C3 (sc->state[19]) 1359 #define C4 (sc->state[20]) 1360 #define C5 (sc->state[21]) 1361 #define C6 (sc->state[22]) 1362 #define C7 (sc->state[23]) 1363 #define D0 (sc->state[24]) 1364 #define D1 (sc->state[25]) 1365 #define D2 (sc->state[26]) 1366 #define D3 (sc->state[27]) 1367 #define D4 (sc->state[28]) 1368 #define D5 (sc->state[29]) 1369 #define D6 (sc->state[30]) 1370 #define D7 (sc->state[31]) 1391 for (i = 0; i < 256; i ++) {
1394 tq = q[i] + yoff_b_f[i];
1398 q[i] = (tq <= 128 ? tq : tq - 257);
1401 for (i = 0; i < 256; i ++) {
1404 tq = q[i] + yoff_b_n[i];
1408 q[i] = (tq <= 128 ? tq : tq - 257);
1412 A0 ^= sph_dec32le_aligned(x + 0);
1413 A1 ^= sph_dec32le_aligned(x + 4);
1414 A2 ^= sph_dec32le_aligned(x + 8);
1415 A3 ^= sph_dec32le_aligned(x + 12);
1416 A4 ^= sph_dec32le_aligned(x + 16);
1417 A5 ^= sph_dec32le_aligned(x + 20);
1418 A6 ^= sph_dec32le_aligned(x + 24);
1419 A7 ^= sph_dec32le_aligned(x + 28);
1420 B0 ^= sph_dec32le_aligned(x + 32);
1421 B1 ^= sph_dec32le_aligned(x + 36);
1422 B2 ^= sph_dec32le_aligned(x + 40);
1423 B3 ^= sph_dec32le_aligned(x + 44);
1424 B4 ^= sph_dec32le_aligned(x + 48);
1425 B5 ^= sph_dec32le_aligned(x + 52);
1426 B6 ^= sph_dec32le_aligned(x + 56);
1427 B7 ^= sph_dec32le_aligned(x + 60);
1428 C0 ^= sph_dec32le_aligned(x + 64);
1429 C1 ^= sph_dec32le_aligned(x + 68);
1430 C2 ^= sph_dec32le_aligned(x + 72);
1431 C3 ^= sph_dec32le_aligned(x + 76);
1432 C4 ^= sph_dec32le_aligned(x + 80);
1433 C5 ^= sph_dec32le_aligned(x + 84);
1434 C6 ^= sph_dec32le_aligned(x + 88);
1435 C7 ^= sph_dec32le_aligned(x + 92);
1436 D0 ^= sph_dec32le_aligned(x + 96);
1437 D1 ^= sph_dec32le_aligned(x + 100);
1438 D2 ^= sph_dec32le_aligned(x + 104);
1439 D3 ^= sph_dec32le_aligned(x + 108);
1440 D4 ^= sph_dec32le_aligned(x + 112);
1441 D5 ^= sph_dec32le_aligned(x + 116);
1442 D6 ^= sph_dec32le_aligned(x + 120);
1443 D7 ^= sph_dec32le_aligned(x + 124);
1451 saved[ 0], saved[ 1], saved[ 2], saved[ 3],
1452 saved[ 4], saved[ 5], saved[ 6], saved[ 7],
1455 saved[ 8], saved[ 9], saved[10], saved[11],
1456 saved[12], saved[13], saved[14], saved[15],
1457 IF, 13, 10, PP8_5_);
1459 saved[16], saved[17], saved[18], saved[19],
1460 saved[20], saved[21], saved[22], saved[23],
1461 IF, 10, 25, PP8_6_);
1463 saved[24], saved[25], saved[26], saved[27],
1464 saved[28], saved[29], saved[30], saved[31],
1474 IF, 13, 10, PP8_5_);
1478 IF, 10, 25, PP8_6_);
1524 static const u32 IV224[] = {
1525 C32(0x33586E9F),
C32(0x12FFF033),
C32(0xB2D9F64D),
C32(0x6F8FEA53),
1526 C32(0xDE943106),
C32(0x2742E439),
C32(0x4FBAB5AC),
C32(0x62B9FF96),
1527 C32(0x22E7B0AF),
C32(0xC862B3A8),
C32(0x33E00CDC),
C32(0x236B86A6),
1528 C32(0xF64AE77C),
C32(0xFA373B76),
C32(0x7DC1EE5B),
C32(0x7FB29CE8)
1531 static const u32 IV256[] = {
1532 C32(0x4D567983),
C32(0x07190BA9),
C32(0x8474577B),
C32(0x39D726E9),
1533 C32(0xAAF3D925),
C32(0x3EE20B03),
C32(0xAFD5E751),
C32(0xC96006D3),
1534 C32(0xC2C2BA14),
C32(0x49B3BCB4),
C32(0xF67CAF46),
C32(0x668626C9),
1535 C32(0xE2EAA8D2),
C32(0x1FF47833),
C32(0xD0C661A5),
C32(0x55693DE1)
1538 static const u32 IV384[] = {
1539 C32(0x8A36EEBC),
C32(0x94A3BD90),
C32(0xD1537B83),
C32(0xB25B070B),
1540 C32(0xF463F1B5),
C32(0xB6F81E20),
C32(0x0055C339),
C32(0xB4D144D1),
1541 C32(0x7360CA61),
C32(0x18361A03),
C32(0x17DCB4B9),
C32(0x3414C45A),
1542 C32(0xA699A9D2),
C32(0xE39E9664),
C32(0x468BFE77),
C32(0x51D062F8),
1543 C32(0xB9E3BFE8),
C32(0x63BECE2A),
C32(0x8FE506B9),
C32(0xF8CC4AC2),
1544 C32(0x7AE11542),
C32(0xB1AADDA1),
C32(0x64B06794),
C32(0x28D2F462),
1545 C32(0xE64071EC),
C32(0x1DEB91A8),
C32(0x8AC8DB23),
C32(0x3F782AB5),
1546 C32(0x039B5CB8),
C32(0x71DDD962),
C32(0xFADE2CEA),
C32(0x1416DF71)
1549 static const u32 IV512[] = {
1550 C32(0x0BA16B95),
C32(0x72F999AD),
C32(0x9FECC2AE),
C32(0xBA3264FC),
1551 C32(0x5E894929),
C32(0x8E9F30E5),
C32(0x2F1DAA37),
C32(0xF0F2C558),
1552 C32(0xAC506643),
C32(0xA90635A5),
C32(0xE25B878B),
C32(0xAAB7878F),
1553 C32(0x88817F7A),
C32(0x0A02892B),
C32(0x559A7550),
C32(0x598F657E),
1554 C32(0x7EEF60A1),
C32(0x6B70E3E8),
C32(0x9C1714D1),
C32(0xB958E2A8),
1555 C32(0xAB02675E),
C32(0xED1C014F),
C32(0xCD8D65BB),
C32(0xFDB7A257),
1556 C32(0x09254899),
C32(0xD699C7BC),
C32(0x9019B6DC),
C32(0x2B9022E4),
1557 C32(0x8FA14956),
C32(0x21BF9BD3),
C32(0xB94D0943),
C32(0x6FFDDC22)
1561 init_small(
void *cc,
const u32 *iv)
1572 init_big(
void *cc,
const u32 *iv)
1583 update_small(
void *cc,
const void *data,
size_t len)
1591 clen = (
sizeof sc->
buf) - sc->
ptr;
1595 data = (
const unsigned char *)data + clen;
1597 if ((sc->
ptr += clen) ==
sizeof sc->
buf) {
1598 compress_small(sc, 0);
1608 update_big(
void *cc,
const void *data,
size_t len)
1616 clen = (
sizeof sc->
buf) - sc->
ptr;
1620 data = (
const unsigned char *)data + clen;
1622 if ((sc->
ptr += clen) ==
sizeof sc->
buf) {
1623 compress_big(sc, 0);
1633 encode_count_small(
unsigned char *dst,
1634 u32
low, u32
high,
size_t ptr,
unsigned n)
1636 low =
T32(low << 9);
1637 high =
T32(high << 9) + (low >> 23);
1638 low += (ptr << 3) + n;
1639 sph_enc32le(dst, low);
1640 sph_enc32le(dst + 4, high);
1644 encode_count_big(
unsigned char *dst,
1645 u32 low, u32 high,
size_t ptr,
unsigned n)
1647 low =
T32(low << 10);
1648 high =
T32(high << 10) + (low >> 22);
1649 low += (ptr << 3) + n;
1650 sph_enc32le(dst, low);
1651 sph_enc32le(dst + 4, high);
1655 finalize_small(
void *cc,
unsigned ub,
unsigned n,
void *dst,
size_t dst_len)
1662 if (sc->
ptr > 0 || n > 0) {
1663 memset(sc->
buf + sc->
ptr, 0,
1664 (
sizeof sc->
buf) - sc->
ptr);
1665 sc->
buf[sc->
ptr] = ub & (0xFF << (8 - n));
1666 compress_small(sc, 0);
1668 memset(sc->
buf, 0,
sizeof sc->
buf);
1670 compress_small(sc, 1);
1672 for (d = dst, u = 0; u < dst_len; u ++)
1673 sph_enc32le(d + (u << 2), sc->
state[u]);
1677 finalize_big(
void *cc,
unsigned ub,
unsigned n,
void *dst,
size_t dst_len)
1684 if (sc->
ptr > 0 || n > 0) {
1685 memset(sc->
buf + sc->
ptr, 0,
1686 (
sizeof sc->
buf) - sc->
ptr);
1687 sc->
buf[sc->
ptr] = ub & (0xFF << (8 - n));
1688 compress_big(sc, 0);
1690 memset(sc->
buf, 0,
sizeof sc->
buf);
1692 compress_big(sc, 1);
1694 for (d = dst, u = 0; u < dst_len; u ++)
1695 sph_enc32le(d + (u << 2), sc->
state[u]);
1701 init_small(cc, IV224);
1707 update_small(cc, data, len);
1719 finalize_small(cc, ub, n, dst, 7);
1726 init_small(cc, IV256);
1732 update_small(cc, data, len);
1744 finalize_small(cc, ub, n, dst, 8);
1751 init_big(cc, IV384);
1757 update_big(cc, data, len);
1769 finalize_big(cc, ub, n, dst, 12);
1776 init_big(cc, IV512);
1782 update_big(cc, data, len);
1794 finalize_big(cc, ub, n, dst, 16);
void sph_simd224(void *cc, const void *data, size_t len)
Process some data bytes.
#define FFT32(xb, xs, rb, id)
#define READ_STATE_BIG(sc)
#define FFT_LOOP(rb, hk, as, id)
void sph_simd384_close(void *cc, void *dst)
Terminate the current SIMD-384 computation and output the result into the provided buffer...
This structure is a context for SIMD computations: it contains the intermediate values and some data ...
void sph_simd384_init(void *cc)
Initialize an SIMD-384 context.
void sph_simd512(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_simd512_close(void *cc, void *dst)
Terminate the current SIMD-512 computation and output the result into the provided buffer...
void sph_simd256_init(void *cc)
Initialize an SIMD-256 context.
#define ONE_ROUND_BIG(ri, isp, p0, p1, p2, p3)
#define WRITE_STATE_BIG(sc)
void sph_simd384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
#define STEP_BIG(w0, w1, w2, w3, w4, w5, w6, w7, fun, r, s, pp8b)
#define READ_STATE_SMALL(sc)
#define WRITE_STATE_SMALL(sc)
This structure is a context for SIMD computations: it contains the intermediate values and some data ...
void sph_simd512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
#define ONE_ROUND_SMALL(ri, isp, p0, p1, p2, p3)
#define STEP_SMALL(w0, w1, w2, w3, fun, r, s, pp4b)
void sph_simd256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
void sph_simd256_close(void *cc, void *dst)
Terminate the current SIMD-256 computation and output the result into the provided buffer...
void sph_simd224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
void sph_simd384(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_simd224_init(void *cc)
Initialize an SIMD-224 context.
#define FFT256(xb, xs, rb, id)
void sph_simd256(void *cc, const void *data, size_t len)
Process some data bytes.
void * memcpy(void *a, const void *b, size_t c)
#define FFT128(xb, xs, rb, id)
#define FFT16(xb, xs, rb)
void sph_simd512_init(void *cc)
Initialize an SIMD-512 context.
std::string _(const char *psz)
Translation function: Call Translate signal on UI interface, which returns a boost::optional result...
void sph_simd224_close(void *cc, void *dst)
Terminate the current SIMD-224 computation and output the result into the provided buffer...