43 #if SPH_64_TRUE && !defined SPH_LUFFA_PARALLEL 44 #define SPH_LUFFA_PARALLEL 1 48 #pragma warning (disable: 4146) 51 static const sph_u32 V_INIT[5][8] = {
80 static const sph_u32 RC00[8] = {
87 static const sph_u32 RC04[8] = {
94 static const sph_u32 RC10[8] = {
101 static const sph_u32 RC14[8] = {
108 #if SPH_LUFFA_PARALLEL 110 static const sph_u64 RCW010[8] = {
111 SPH_C64(0xb6de10ed303994a6), SPH_C64(0x70f47aaec0e65299),
112 SPH_C64(0x0707a3d46cc33a12), SPH_C64(0x1c1e8f51dc56983e),
113 SPH_C64(0x707a3d451e00108f), SPH_C64(0xaeb285627800423d),
114 SPH_C64(0xbaca15898f5b7882), SPH_C64(0x40a46f3e96e1db12)
117 static const sph_u64 RCW014[8] = {
118 SPH_C64(0x01685f3de0337818), SPH_C64(0x05a17cf4441ba90d),
119 SPH_C64(0xbd09caca7f34d442), SPH_C64(0xf4272b289389217f),
120 SPH_C64(0x144ae5cce5a8bce6), SPH_C64(0xfaa7ae2b5274baf4),
121 SPH_C64(0x2e48f1c126889ba7), SPH_C64(0xb923c7049a226e9d)
126 static const sph_u32 RC20[8] = {
133 static const sph_u32 RC24[8] = {
140 static const sph_u32 RC30[8] = {
147 static const sph_u32 RC34[8] = {
154 #if SPH_LUFFA_PARALLEL 156 static const sph_u64 RCW230[8] = {
157 SPH_C64(0xb213afa5fc20d9d2), SPH_C64(0xc84ebe9534552e25),
158 SPH_C64(0x4e608a227ad8818f), SPH_C64(0x56d858fe8438764a),
159 SPH_C64(0x343b138fbb6de032), SPH_C64(0xd0ec4e3dedb780c8),
160 SPH_C64(0x2ceb4882d9847356), SPH_C64(0xb3ad2208a2c78434)
164 static const sph_u64 RCW234[8] = {
165 SPH_C64(0xe028c9bfe25e72c1), SPH_C64(0x44756f91e623bb72),
166 SPH_C64(0x7e8fce325c58a4a4), SPH_C64(0x956548be1e38e2e7),
167 SPH_C64(0xfe191be278e38b9d), SPH_C64(0x3cb226e527586719),
168 SPH_C64(0x5944a28e36eda57f), SPH_C64(0xa1c4c355703aace7)
173 static const sph_u32 RC40[8] = {
180 static const sph_u32 RC44[8] = {
187 #define DECL_TMP8(w) \ 188 sph_u32 w ## 0, w ## 1, w ## 2, w ## 3, w ## 4, w ## 5, w ## 6, w ## 7; 190 #define M2(d, s) do { \ 191 sph_u32 tmp = s ## 7; \ 195 d ## 4 = s ## 3 ^ tmp; \ 196 d ## 3 = s ## 2 ^ tmp; \ 198 d ## 1 = s ## 0 ^ tmp; \ 202 #define XOR(d, s1, s2) do { \ 203 d ## 0 = s1 ## 0 ^ s2 ## 0; \ 204 d ## 1 = s1 ## 1 ^ s2 ## 1; \ 205 d ## 2 = s1 ## 2 ^ s2 ## 2; \ 206 d ## 3 = s1 ## 3 ^ s2 ## 3; \ 207 d ## 4 = s1 ## 4 ^ s2 ## 4; \ 208 d ## 5 = s1 ## 5 ^ s2 ## 5; \ 209 d ## 6 = s1 ## 6 ^ s2 ## 6; \ 210 d ## 7 = s1 ## 7 ^ s2 ## 7; \ 213 #if SPH_LUFFA_PARALLEL 215 #define SUB_CRUMB_GEN(a0, a1, a2, a3, width) do { \ 216 sph_u ## width tmp; \ 220 (a1) = SPH_T ## width(~(a1)); \ 226 (a0) = SPH_T ## width(~(a0)); \ 236 #define SUB_CRUMB(a0, a1, a2, a3) SUB_CRUMB_GEN(a0, a1, a2, a3, 32) 237 #define SUB_CRUMBW(a0, a1, a2, a3) SUB_CRUMB_GEN(a0, a1, a2, a3, 64) 242 #define ROL32W(x, n) SPH_T64( \ 244 & ~((SPH_C64(0xFFFFFFFF) >> (32 - (n))) << 32)) \ 245 | (((x) >> (32 - (n))) \ 246 & ~((SPH_C64(0xFFFFFFFF) >> (n)) << (n)))) 248 #define MIX_WORDW(u, v) do { \ 250 (u) = ROL32W((u), 2) ^ (v); \ 251 (v) = ROL32W((v), 14) ^ (u); \ 252 (u) = ROL32W((u), 10) ^ (v); \ 253 (v) = ROL32W((v), 1); \ 258 #define MIX_WORDW(u, v) do { \ 259 sph_u32 ul, uh, vl, vh; \ 261 ul = SPH_T32((sph_u32)(u)); \ 262 uh = SPH_T32((sph_u32)((u) >> 32)); \ 263 vl = SPH_T32((sph_u32)(v)); \ 264 vh = SPH_T32((sph_u32)((v) >> 32)); \ 265 ul = SPH_ROTL32(ul, 2) ^ vl; \ 266 vl = SPH_ROTL32(vl, 14) ^ ul; \ 267 ul = SPH_ROTL32(ul, 10) ^ vl; \ 268 vl = SPH_ROTL32(vl, 1); \ 269 uh = SPH_ROTL32(uh, 2) ^ vh; \ 270 vh = SPH_ROTL32(vh, 14) ^ uh; \ 271 uh = SPH_ROTL32(uh, 10) ^ vh; \ 272 vh = SPH_ROTL32(vh, 1); \ 273 (u) = (sph_u64)ul | ((sph_u64)uh << 32); \ 274 (v) = (sph_u64)vl | ((sph_u64)vh << 32); \ 279 #define SUB_CRUMB(a0, a1, a2, a3) do { \ 284 (a1) = SPH_T32(~(a1)); \ 290 (a0) = SPH_T32(~(a0)); \ 302 #define MIX_WORD(u, v) do { \ 304 (u) = SPH_ROTL32((u), 2) ^ (v); \ 305 (v) = SPH_ROTL32((v), 14) ^ (u); \ 306 (u) = SPH_ROTL32((u), 10) ^ (v); \ 307 (v) = SPH_ROTL32((v), 1); \ 310 #define DECL_STATE3 \ 311 sph_u32 V00, V01, V02, V03, V04, V05, V06, V07; \ 312 sph_u32 V10, V11, V12, V13, V14, V15, V16, V17; \ 313 sph_u32 V20, V21, V22, V23, V24, V25, V26, V27; 315 #define READ_STATE3(state) do { \ 316 V00 = (state)->V[0][0]; \ 317 V01 = (state)->V[0][1]; \ 318 V02 = (state)->V[0][2]; \ 319 V03 = (state)->V[0][3]; \ 320 V04 = (state)->V[0][4]; \ 321 V05 = (state)->V[0][5]; \ 322 V06 = (state)->V[0][6]; \ 323 V07 = (state)->V[0][7]; \ 324 V10 = (state)->V[1][0]; \ 325 V11 = (state)->V[1][1]; \ 326 V12 = (state)->V[1][2]; \ 327 V13 = (state)->V[1][3]; \ 328 V14 = (state)->V[1][4]; \ 329 V15 = (state)->V[1][5]; \ 330 V16 = (state)->V[1][6]; \ 331 V17 = (state)->V[1][7]; \ 332 V20 = (state)->V[2][0]; \ 333 V21 = (state)->V[2][1]; \ 334 V22 = (state)->V[2][2]; \ 335 V23 = (state)->V[2][3]; \ 336 V24 = (state)->V[2][4]; \ 337 V25 = (state)->V[2][5]; \ 338 V26 = (state)->V[2][6]; \ 339 V27 = (state)->V[2][7]; \ 342 #define WRITE_STATE3(state) do { \ 343 (state)->V[0][0] = V00; \ 344 (state)->V[0][1] = V01; \ 345 (state)->V[0][2] = V02; \ 346 (state)->V[0][3] = V03; \ 347 (state)->V[0][4] = V04; \ 348 (state)->V[0][5] = V05; \ 349 (state)->V[0][6] = V06; \ 350 (state)->V[0][7] = V07; \ 351 (state)->V[1][0] = V10; \ 352 (state)->V[1][1] = V11; \ 353 (state)->V[1][2] = V12; \ 354 (state)->V[1][3] = V13; \ 355 (state)->V[1][4] = V14; \ 356 (state)->V[1][5] = V15; \ 357 (state)->V[1][6] = V16; \ 358 (state)->V[1][7] = V17; \ 359 (state)->V[2][0] = V20; \ 360 (state)->V[2][1] = V21; \ 361 (state)->V[2][2] = V22; \ 362 (state)->V[2][3] = V23; \ 363 (state)->V[2][4] = V24; \ 364 (state)->V[2][5] = V25; \ 365 (state)->V[2][6] = V26; \ 366 (state)->V[2][7] = V27; \ 372 M0 = sph_dec32be_aligned(buf + 0); \ 373 M1 = sph_dec32be_aligned(buf + 4); \ 374 M2 = sph_dec32be_aligned(buf + 8); \ 375 M3 = sph_dec32be_aligned(buf + 12); \ 376 M4 = sph_dec32be_aligned(buf + 16); \ 377 M5 = sph_dec32be_aligned(buf + 20); \ 378 M6 = sph_dec32be_aligned(buf + 24); \ 379 M7 = sph_dec32be_aligned(buf + 28); \ 393 #define TWEAK3 do { \ 394 V14 = SPH_ROTL32(V14, 1); \ 395 V15 = SPH_ROTL32(V15, 1); \ 396 V16 = SPH_ROTL32(V16, 1); \ 397 V17 = SPH_ROTL32(V17, 1); \ 398 V24 = SPH_ROTL32(V24, 2); \ 399 V25 = SPH_ROTL32(V25, 2); \ 400 V26 = SPH_ROTL32(V26, 2); \ 401 V27 = SPH_ROTL32(V27, 2); \ 404 #if SPH_LUFFA_PARALLEL 408 sph_u64 W0, W1, W2, W3, W4, W5, W6, W7; \ 410 W0 = (sph_u64)V00 | ((sph_u64)V10 << 32); \ 411 W1 = (sph_u64)V01 | ((sph_u64)V11 << 32); \ 412 W2 = (sph_u64)V02 | ((sph_u64)V12 << 32); \ 413 W3 = (sph_u64)V03 | ((sph_u64)V13 << 32); \ 414 W4 = (sph_u64)V04 | ((sph_u64)V14 << 32); \ 415 W5 = (sph_u64)V05 | ((sph_u64)V15 << 32); \ 416 W6 = (sph_u64)V06 | ((sph_u64)V16 << 32); \ 417 W7 = (sph_u64)V07 | ((sph_u64)V17 << 32); \ 418 for (r = 0; r < 8; r ++) { \ 419 SUB_CRUMBW(W0, W1, W2, W3); \ 420 SUB_CRUMBW(W5, W6, W7, W4); \ 428 V00 = SPH_T32((sph_u32)W0); \ 429 V10 = SPH_T32((sph_u32)(W0 >> 32)); \ 430 V01 = SPH_T32((sph_u32)W1); \ 431 V11 = SPH_T32((sph_u32)(W1 >> 32)); \ 432 V02 = SPH_T32((sph_u32)W2); \ 433 V12 = SPH_T32((sph_u32)(W2 >> 32)); \ 434 V03 = SPH_T32((sph_u32)W3); \ 435 V13 = SPH_T32((sph_u32)(W3 >> 32)); \ 436 V04 = SPH_T32((sph_u32)W4); \ 437 V14 = SPH_T32((sph_u32)(W4 >> 32)); \ 438 V05 = SPH_T32((sph_u32)W5); \ 439 V15 = SPH_T32((sph_u32)(W5 >> 32)); \ 440 V06 = SPH_T32((sph_u32)W6); \ 441 V16 = SPH_T32((sph_u32)(W6 >> 32)); \ 442 V07 = SPH_T32((sph_u32)W7); \ 443 V17 = SPH_T32((sph_u32)(W7 >> 32)); \ 444 for (r = 0; r < 8; r ++) { \ 445 SUB_CRUMB(V20, V21, V22, V23); \ 446 SUB_CRUMB(V25, V26, V27, V24); \ 447 MIX_WORD(V20, V24); \ 448 MIX_WORD(V21, V25); \ 449 MIX_WORD(V22, V26); \ 450 MIX_WORD(V23, V27); \ 461 for (r = 0; r < 8; r ++) { \ 462 SUB_CRUMB(V00, V01, V02, V03); \ 463 SUB_CRUMB(V05, V06, V07, V04); \ 464 MIX_WORD(V00, V04); \ 465 MIX_WORD(V01, V05); \ 466 MIX_WORD(V02, V06); \ 467 MIX_WORD(V03, V07); \ 471 for (r = 0; r < 8; r ++) { \ 472 SUB_CRUMB(V10, V11, V12, V13); \ 473 SUB_CRUMB(V15, V16, V17, V14); \ 474 MIX_WORD(V10, V14); \ 475 MIX_WORD(V11, V15); \ 476 MIX_WORD(V12, V16); \ 477 MIX_WORD(V13, V17); \ 481 for (r = 0; r < 8; r ++) { \ 482 SUB_CRUMB(V20, V21, V22, V23); \ 483 SUB_CRUMB(V25, V26, V27, V24); \ 484 MIX_WORD(V20, V24); \ 485 MIX_WORD(V21, V25); \ 486 MIX_WORD(V22, V26); \ 487 MIX_WORD(V23, V27); \ 495 #define DECL_STATE4 \ 496 sph_u32 V00, V01, V02, V03, V04, V05, V06, V07; \ 497 sph_u32 V10, V11, V12, V13, V14, V15, V16, V17; \ 498 sph_u32 V20, V21, V22, V23, V24, V25, V26, V27; \ 499 sph_u32 V30, V31, V32, V33, V34, V35, V36, V37; 501 #define READ_STATE4(state) do { \ 502 V00 = (state)->V[0][0]; \ 503 V01 = (state)->V[0][1]; \ 504 V02 = (state)->V[0][2]; \ 505 V03 = (state)->V[0][3]; \ 506 V04 = (state)->V[0][4]; \ 507 V05 = (state)->V[0][5]; \ 508 V06 = (state)->V[0][6]; \ 509 V07 = (state)->V[0][7]; \ 510 V10 = (state)->V[1][0]; \ 511 V11 = (state)->V[1][1]; \ 512 V12 = (state)->V[1][2]; \ 513 V13 = (state)->V[1][3]; \ 514 V14 = (state)->V[1][4]; \ 515 V15 = (state)->V[1][5]; \ 516 V16 = (state)->V[1][6]; \ 517 V17 = (state)->V[1][7]; \ 518 V20 = (state)->V[2][0]; \ 519 V21 = (state)->V[2][1]; \ 520 V22 = (state)->V[2][2]; \ 521 V23 = (state)->V[2][3]; \ 522 V24 = (state)->V[2][4]; \ 523 V25 = (state)->V[2][5]; \ 524 V26 = (state)->V[2][6]; \ 525 V27 = (state)->V[2][7]; \ 526 V30 = (state)->V[3][0]; \ 527 V31 = (state)->V[3][1]; \ 528 V32 = (state)->V[3][2]; \ 529 V33 = (state)->V[3][3]; \ 530 V34 = (state)->V[3][4]; \ 531 V35 = (state)->V[3][5]; \ 532 V36 = (state)->V[3][6]; \ 533 V37 = (state)->V[3][7]; \ 536 #define WRITE_STATE4(state) do { \ 537 (state)->V[0][0] = V00; \ 538 (state)->V[0][1] = V01; \ 539 (state)->V[0][2] = V02; \ 540 (state)->V[0][3] = V03; \ 541 (state)->V[0][4] = V04; \ 542 (state)->V[0][5] = V05; \ 543 (state)->V[0][6] = V06; \ 544 (state)->V[0][7] = V07; \ 545 (state)->V[1][0] = V10; \ 546 (state)->V[1][1] = V11; \ 547 (state)->V[1][2] = V12; \ 548 (state)->V[1][3] = V13; \ 549 (state)->V[1][4] = V14; \ 550 (state)->V[1][5] = V15; \ 551 (state)->V[1][6] = V16; \ 552 (state)->V[1][7] = V17; \ 553 (state)->V[2][0] = V20; \ 554 (state)->V[2][1] = V21; \ 555 (state)->V[2][2] = V22; \ 556 (state)->V[2][3] = V23; \ 557 (state)->V[2][4] = V24; \ 558 (state)->V[2][5] = V25; \ 559 (state)->V[2][6] = V26; \ 560 (state)->V[2][7] = V27; \ 561 (state)->V[3][0] = V30; \ 562 (state)->V[3][1] = V31; \ 563 (state)->V[3][2] = V32; \ 564 (state)->V[3][3] = V33; \ 565 (state)->V[3][4] = V34; \ 566 (state)->V[3][5] = V35; \ 567 (state)->V[3][6] = V36; \ 568 (state)->V[3][7] = V37; \ 575 M0 = sph_dec32be_aligned(buf + 0); \ 576 M1 = sph_dec32be_aligned(buf + 4); \ 577 M2 = sph_dec32be_aligned(buf + 8); \ 578 M3 = sph_dec32be_aligned(buf + 12); \ 579 M4 = sph_dec32be_aligned(buf + 16); \ 580 M5 = sph_dec32be_aligned(buf + 20); \ 581 M6 = sph_dec32be_aligned(buf + 24); \ 582 M7 = sph_dec32be_aligned(buf + 28); \ 608 #define TWEAK4 do { \ 609 V14 = SPH_ROTL32(V14, 1); \ 610 V15 = SPH_ROTL32(V15, 1); \ 611 V16 = SPH_ROTL32(V16, 1); \ 612 V17 = SPH_ROTL32(V17, 1); \ 613 V24 = SPH_ROTL32(V24, 2); \ 614 V25 = SPH_ROTL32(V25, 2); \ 615 V26 = SPH_ROTL32(V26, 2); \ 616 V27 = SPH_ROTL32(V27, 2); \ 617 V34 = SPH_ROTL32(V34, 3); \ 618 V35 = SPH_ROTL32(V35, 3); \ 619 V36 = SPH_ROTL32(V36, 3); \ 620 V37 = SPH_ROTL32(V37, 3); \ 623 #if SPH_LUFFA_PARALLEL 627 sph_u64 W0, W1, W2, W3, W4, W5, W6, W7; \ 629 W0 = (sph_u64)V00 | ((sph_u64)V10 << 32); \ 630 W1 = (sph_u64)V01 | ((sph_u64)V11 << 32); \ 631 W2 = (sph_u64)V02 | ((sph_u64)V12 << 32); \ 632 W3 = (sph_u64)V03 | ((sph_u64)V13 << 32); \ 633 W4 = (sph_u64)V04 | ((sph_u64)V14 << 32); \ 634 W5 = (sph_u64)V05 | ((sph_u64)V15 << 32); \ 635 W6 = (sph_u64)V06 | ((sph_u64)V16 << 32); \ 636 W7 = (sph_u64)V07 | ((sph_u64)V17 << 32); \ 637 for (r = 0; r < 8; r ++) { \ 638 SUB_CRUMBW(W0, W1, W2, W3); \ 639 SUB_CRUMBW(W5, W6, W7, W4); \ 647 V00 = SPH_T32((sph_u32)W0); \ 648 V10 = SPH_T32((sph_u32)(W0 >> 32)); \ 649 V01 = SPH_T32((sph_u32)W1); \ 650 V11 = SPH_T32((sph_u32)(W1 >> 32)); \ 651 V02 = SPH_T32((sph_u32)W2); \ 652 V12 = SPH_T32((sph_u32)(W2 >> 32)); \ 653 V03 = SPH_T32((sph_u32)W3); \ 654 V13 = SPH_T32((sph_u32)(W3 >> 32)); \ 655 V04 = SPH_T32((sph_u32)W4); \ 656 V14 = SPH_T32((sph_u32)(W4 >> 32)); \ 657 V05 = SPH_T32((sph_u32)W5); \ 658 V15 = SPH_T32((sph_u32)(W5 >> 32)); \ 659 V06 = SPH_T32((sph_u32)W6); \ 660 V16 = SPH_T32((sph_u32)(W6 >> 32)); \ 661 V07 = SPH_T32((sph_u32)W7); \ 662 V17 = SPH_T32((sph_u32)(W7 >> 32)); \ 663 W0 = (sph_u64)V20 | ((sph_u64)V30 << 32); \ 664 W1 = (sph_u64)V21 | ((sph_u64)V31 << 32); \ 665 W2 = (sph_u64)V22 | ((sph_u64)V32 << 32); \ 666 W3 = (sph_u64)V23 | ((sph_u64)V33 << 32); \ 667 W4 = (sph_u64)V24 | ((sph_u64)V34 << 32); \ 668 W5 = (sph_u64)V25 | ((sph_u64)V35 << 32); \ 669 W6 = (sph_u64)V26 | ((sph_u64)V36 << 32); \ 670 W7 = (sph_u64)V27 | ((sph_u64)V37 << 32); \ 671 for (r = 0; r < 8; r ++) { \ 672 SUB_CRUMBW(W0, W1, W2, W3); \ 673 SUB_CRUMBW(W5, W6, W7, W4); \ 681 V20 = SPH_T32((sph_u32)W0); \ 682 V30 = SPH_T32((sph_u32)(W0 >> 32)); \ 683 V21 = SPH_T32((sph_u32)W1); \ 684 V31 = SPH_T32((sph_u32)(W1 >> 32)); \ 685 V22 = SPH_T32((sph_u32)W2); \ 686 V32 = SPH_T32((sph_u32)(W2 >> 32)); \ 687 V23 = SPH_T32((sph_u32)W3); \ 688 V33 = SPH_T32((sph_u32)(W3 >> 32)); \ 689 V24 = SPH_T32((sph_u32)W4); \ 690 V34 = SPH_T32((sph_u32)(W4 >> 32)); \ 691 V25 = SPH_T32((sph_u32)W5); \ 692 V35 = SPH_T32((sph_u32)(W5 >> 32)); \ 693 V26 = SPH_T32((sph_u32)W6); \ 694 V36 = SPH_T32((sph_u32)(W6 >> 32)); \ 695 V27 = SPH_T32((sph_u32)W7); \ 696 V37 = SPH_T32((sph_u32)(W7 >> 32)); \ 704 for (r = 0; r < 8; r ++) { \ 705 SUB_CRUMB(V00, V01, V02, V03); \ 706 SUB_CRUMB(V05, V06, V07, V04); \ 707 MIX_WORD(V00, V04); \ 708 MIX_WORD(V01, V05); \ 709 MIX_WORD(V02, V06); \ 710 MIX_WORD(V03, V07); \ 714 for (r = 0; r < 8; r ++) { \ 715 SUB_CRUMB(V10, V11, V12, V13); \ 716 SUB_CRUMB(V15, V16, V17, V14); \ 717 MIX_WORD(V10, V14); \ 718 MIX_WORD(V11, V15); \ 719 MIX_WORD(V12, V16); \ 720 MIX_WORD(V13, V17); \ 724 for (r = 0; r < 8; r ++) { \ 725 SUB_CRUMB(V20, V21, V22, V23); \ 726 SUB_CRUMB(V25, V26, V27, V24); \ 727 MIX_WORD(V20, V24); \ 728 MIX_WORD(V21, V25); \ 729 MIX_WORD(V22, V26); \ 730 MIX_WORD(V23, V27); \ 734 for (r = 0; r < 8; r ++) { \ 735 SUB_CRUMB(V30, V31, V32, V33); \ 736 SUB_CRUMB(V35, V36, V37, V34); \ 737 MIX_WORD(V30, V34); \ 738 MIX_WORD(V31, V35); \ 739 MIX_WORD(V32, V36); \ 740 MIX_WORD(V33, V37); \ 748 #define DECL_STATE5 \ 749 sph_u32 V00, V01, V02, V03, V04, V05, V06, V07; \ 750 sph_u32 V10, V11, V12, V13, V14, V15, V16, V17; \ 751 sph_u32 V20, V21, V22, V23, V24, V25, V26, V27; \ 752 sph_u32 V30, V31, V32, V33, V34, V35, V36, V37; \ 753 sph_u32 V40, V41, V42, V43, V44, V45, V46, V47; 755 #define READ_STATE5(state) do { \ 756 V00 = (state)->V[0][0]; \ 757 V01 = (state)->V[0][1]; \ 758 V02 = (state)->V[0][2]; \ 759 V03 = (state)->V[0][3]; \ 760 V04 = (state)->V[0][4]; \ 761 V05 = (state)->V[0][5]; \ 762 V06 = (state)->V[0][6]; \ 763 V07 = (state)->V[0][7]; \ 764 V10 = (state)->V[1][0]; \ 765 V11 = (state)->V[1][1]; \ 766 V12 = (state)->V[1][2]; \ 767 V13 = (state)->V[1][3]; \ 768 V14 = (state)->V[1][4]; \ 769 V15 = (state)->V[1][5]; \ 770 V16 = (state)->V[1][6]; \ 771 V17 = (state)->V[1][7]; \ 772 V20 = (state)->V[2][0]; \ 773 V21 = (state)->V[2][1]; \ 774 V22 = (state)->V[2][2]; \ 775 V23 = (state)->V[2][3]; \ 776 V24 = (state)->V[2][4]; \ 777 V25 = (state)->V[2][5]; \ 778 V26 = (state)->V[2][6]; \ 779 V27 = (state)->V[2][7]; \ 780 V30 = (state)->V[3][0]; \ 781 V31 = (state)->V[3][1]; \ 782 V32 = (state)->V[3][2]; \ 783 V33 = (state)->V[3][3]; \ 784 V34 = (state)->V[3][4]; \ 785 V35 = (state)->V[3][5]; \ 786 V36 = (state)->V[3][6]; \ 787 V37 = (state)->V[3][7]; \ 788 V40 = (state)->V[4][0]; \ 789 V41 = (state)->V[4][1]; \ 790 V42 = (state)->V[4][2]; \ 791 V43 = (state)->V[4][3]; \ 792 V44 = (state)->V[4][4]; \ 793 V45 = (state)->V[4][5]; \ 794 V46 = (state)->V[4][6]; \ 795 V47 = (state)->V[4][7]; \ 798 #define WRITE_STATE5(state) do { \ 799 (state)->V[0][0] = V00; \ 800 (state)->V[0][1] = V01; \ 801 (state)->V[0][2] = V02; \ 802 (state)->V[0][3] = V03; \ 803 (state)->V[0][4] = V04; \ 804 (state)->V[0][5] = V05; \ 805 (state)->V[0][6] = V06; \ 806 (state)->V[0][7] = V07; \ 807 (state)->V[1][0] = V10; \ 808 (state)->V[1][1] = V11; \ 809 (state)->V[1][2] = V12; \ 810 (state)->V[1][3] = V13; \ 811 (state)->V[1][4] = V14; \ 812 (state)->V[1][5] = V15; \ 813 (state)->V[1][6] = V16; \ 814 (state)->V[1][7] = V17; \ 815 (state)->V[2][0] = V20; \ 816 (state)->V[2][1] = V21; \ 817 (state)->V[2][2] = V22; \ 818 (state)->V[2][3] = V23; \ 819 (state)->V[2][4] = V24; \ 820 (state)->V[2][5] = V25; \ 821 (state)->V[2][6] = V26; \ 822 (state)->V[2][7] = V27; \ 823 (state)->V[3][0] = V30; \ 824 (state)->V[3][1] = V31; \ 825 (state)->V[3][2] = V32; \ 826 (state)->V[3][3] = V33; \ 827 (state)->V[3][4] = V34; \ 828 (state)->V[3][5] = V35; \ 829 (state)->V[3][6] = V36; \ 830 (state)->V[3][7] = V37; \ 831 (state)->V[4][0] = V40; \ 832 (state)->V[4][1] = V41; \ 833 (state)->V[4][2] = V42; \ 834 (state)->V[4][3] = V43; \ 835 (state)->V[4][4] = V44; \ 836 (state)->V[4][5] = V45; \ 837 (state)->V[4][6] = V46; \ 838 (state)->V[4][7] = V47; \ 845 M0 = sph_dec32be_aligned(buf + 0); \ 846 M1 = sph_dec32be_aligned(buf + 4); \ 847 M2 = sph_dec32be_aligned(buf + 8); \ 848 M3 = sph_dec32be_aligned(buf + 12); \ 849 M4 = sph_dec32be_aligned(buf + 16); \ 850 M5 = sph_dec32be_aligned(buf + 20); \ 851 M6 = sph_dec32be_aligned(buf + 24); \ 852 M7 = sph_dec32be_aligned(buf + 28); \ 894 #define TWEAK5 do { \ 895 V14 = SPH_ROTL32(V14, 1); \ 896 V15 = SPH_ROTL32(V15, 1); \ 897 V16 = SPH_ROTL32(V16, 1); \ 898 V17 = SPH_ROTL32(V17, 1); \ 899 V24 = SPH_ROTL32(V24, 2); \ 900 V25 = SPH_ROTL32(V25, 2); \ 901 V26 = SPH_ROTL32(V26, 2); \ 902 V27 = SPH_ROTL32(V27, 2); \ 903 V34 = SPH_ROTL32(V34, 3); \ 904 V35 = SPH_ROTL32(V35, 3); \ 905 V36 = SPH_ROTL32(V36, 3); \ 906 V37 = SPH_ROTL32(V37, 3); \ 907 V44 = SPH_ROTL32(V44, 4); \ 908 V45 = SPH_ROTL32(V45, 4); \ 909 V46 = SPH_ROTL32(V46, 4); \ 910 V47 = SPH_ROTL32(V47, 4); \ 913 #if SPH_LUFFA_PARALLEL 917 sph_u64 W0, W1, W2, W3, W4, W5, W6, W7; \ 919 W0 = (sph_u64)V00 | ((sph_u64)V10 << 32); \ 920 W1 = (sph_u64)V01 | ((sph_u64)V11 << 32); \ 921 W2 = (sph_u64)V02 | ((sph_u64)V12 << 32); \ 922 W3 = (sph_u64)V03 | ((sph_u64)V13 << 32); \ 923 W4 = (sph_u64)V04 | ((sph_u64)V14 << 32); \ 924 W5 = (sph_u64)V05 | ((sph_u64)V15 << 32); \ 925 W6 = (sph_u64)V06 | ((sph_u64)V16 << 32); \ 926 W7 = (sph_u64)V07 | ((sph_u64)V17 << 32); \ 927 for (r = 0; r < 8; r ++) { \ 928 SUB_CRUMBW(W0, W1, W2, W3); \ 929 SUB_CRUMBW(W5, W6, W7, W4); \ 937 V00 = SPH_T32((sph_u32)W0); \ 938 V10 = SPH_T32((sph_u32)(W0 >> 32)); \ 939 V01 = SPH_T32((sph_u32)W1); \ 940 V11 = SPH_T32((sph_u32)(W1 >> 32)); \ 941 V02 = SPH_T32((sph_u32)W2); \ 942 V12 = SPH_T32((sph_u32)(W2 >> 32)); \ 943 V03 = SPH_T32((sph_u32)W3); \ 944 V13 = SPH_T32((sph_u32)(W3 >> 32)); \ 945 V04 = SPH_T32((sph_u32)W4); \ 946 V14 = SPH_T32((sph_u32)(W4 >> 32)); \ 947 V05 = SPH_T32((sph_u32)W5); \ 948 V15 = SPH_T32((sph_u32)(W5 >> 32)); \ 949 V06 = SPH_T32((sph_u32)W6); \ 950 V16 = SPH_T32((sph_u32)(W6 >> 32)); \ 951 V07 = SPH_T32((sph_u32)W7); \ 952 V17 = SPH_T32((sph_u32)(W7 >> 32)); \ 953 W0 = (sph_u64)V20 | ((sph_u64)V30 << 32); \ 954 W1 = (sph_u64)V21 | ((sph_u64)V31 << 32); \ 955 W2 = (sph_u64)V22 | ((sph_u64)V32 << 32); \ 956 W3 = (sph_u64)V23 | ((sph_u64)V33 << 32); \ 957 W4 = (sph_u64)V24 | ((sph_u64)V34 << 32); \ 958 W5 = (sph_u64)V25 | ((sph_u64)V35 << 32); \ 959 W6 = (sph_u64)V26 | ((sph_u64)V36 << 32); \ 960 W7 = (sph_u64)V27 | ((sph_u64)V37 << 32); \ 961 for (r = 0; r < 8; r ++) { \ 962 SUB_CRUMBW(W0, W1, W2, W3); \ 963 SUB_CRUMBW(W5, W6, W7, W4); \ 971 V20 = SPH_T32((sph_u32)W0); \ 972 V30 = SPH_T32((sph_u32)(W0 >> 32)); \ 973 V21 = SPH_T32((sph_u32)W1); \ 974 V31 = SPH_T32((sph_u32)(W1 >> 32)); \ 975 V22 = SPH_T32((sph_u32)W2); \ 976 V32 = SPH_T32((sph_u32)(W2 >> 32)); \ 977 V23 = SPH_T32((sph_u32)W3); \ 978 V33 = SPH_T32((sph_u32)(W3 >> 32)); \ 979 V24 = SPH_T32((sph_u32)W4); \ 980 V34 = SPH_T32((sph_u32)(W4 >> 32)); \ 981 V25 = SPH_T32((sph_u32)W5); \ 982 V35 = SPH_T32((sph_u32)(W5 >> 32)); \ 983 V26 = SPH_T32((sph_u32)W6); \ 984 V36 = SPH_T32((sph_u32)(W6 >> 32)); \ 985 V27 = SPH_T32((sph_u32)W7); \ 986 V37 = SPH_T32((sph_u32)(W7 >> 32)); \ 987 for (r = 0; r < 8; r ++) { \ 988 SUB_CRUMB(V40, V41, V42, V43); \ 989 SUB_CRUMB(V45, V46, V47, V44); \ 990 MIX_WORD(V40, V44); \ 991 MIX_WORD(V41, V45); \ 992 MIX_WORD(V42, V46); \ 993 MIX_WORD(V43, V47); \ 1004 for (r = 0; r < 8; r ++) { \ 1005 SUB_CRUMB(V00, V01, V02, V03); \ 1006 SUB_CRUMB(V05, V06, V07, V04); \ 1007 MIX_WORD(V00, V04); \ 1008 MIX_WORD(V01, V05); \ 1009 MIX_WORD(V02, V06); \ 1010 MIX_WORD(V03, V07); \ 1014 for (r = 0; r < 8; r ++) { \ 1015 SUB_CRUMB(V10, V11, V12, V13); \ 1016 SUB_CRUMB(V15, V16, V17, V14); \ 1017 MIX_WORD(V10, V14); \ 1018 MIX_WORD(V11, V15); \ 1019 MIX_WORD(V12, V16); \ 1020 MIX_WORD(V13, V17); \ 1024 for (r = 0; r < 8; r ++) { \ 1025 SUB_CRUMB(V20, V21, V22, V23); \ 1026 SUB_CRUMB(V25, V26, V27, V24); \ 1027 MIX_WORD(V20, V24); \ 1028 MIX_WORD(V21, V25); \ 1029 MIX_WORD(V22, V26); \ 1030 MIX_WORD(V23, V27); \ 1034 for (r = 0; r < 8; r ++) { \ 1035 SUB_CRUMB(V30, V31, V32, V33); \ 1036 SUB_CRUMB(V35, V36, V37, V34); \ 1037 MIX_WORD(V30, V34); \ 1038 MIX_WORD(V31, V35); \ 1039 MIX_WORD(V32, V36); \ 1040 MIX_WORD(V33, V37); \ 1044 for (r = 0; r < 8; r ++) { \ 1045 SUB_CRUMB(V40, V41, V42, V43); \ 1046 SUB_CRUMB(V45, V46, V47, V44); \ 1047 MIX_WORD(V40, V44); \ 1048 MIX_WORD(V41, V45); \ 1049 MIX_WORD(V42, V46); \ 1050 MIX_WORD(V43, V47); \ 1067 if (len < (
sizeof sc->
buf) - ptr) {
1068 memcpy(buf + ptr, data, len);
1078 clen = (
sizeof sc->
buf) - ptr;
1081 memcpy(buf + ptr, data, clen);
1083 data = (
const unsigned char *)data + clen;
1085 if (ptr ==
sizeof sc->
buf) {
1097 void *dst,
unsigned out_size_w32)
1099 unsigned char *buf, *out;
1108 buf[ptr ++] = ((ub & -z) | z) & 0xFF;
1109 memset(buf + ptr, 0, (
sizeof sc->
buf) - ptr);
1111 for (i = 0; i < 2; i ++) {
1114 memset(buf, 0,
sizeof sc->
buf);
1117 sph_enc32be(out + 0, V00 ^ V10 ^ V20);
1118 sph_enc32be(out + 4, V01 ^ V11 ^ V21);
1119 sph_enc32be(out + 8, V02 ^ V12 ^ V22);
1120 sph_enc32be(out + 12, V03 ^ V13 ^ V23);
1121 sph_enc32be(out + 16, V04 ^ V14 ^ V24);
1122 sph_enc32be(out + 20, V05 ^ V15 ^ V25);
1123 sph_enc32be(out + 24, V06 ^ V16 ^ V26);
1124 if (out_size_w32 > 7)
1125 sph_enc32be(out + 28, V07 ^ V17 ^ V27);
1137 if (len < (
sizeof sc->
buf) - ptr) {
1138 memcpy(buf + ptr, data, len);
1148 clen = (
sizeof sc->
buf) - ptr;
1151 memcpy(buf + ptr, data, clen);
1153 data = (
const unsigned char *)data + clen;
1155 if (ptr ==
sizeof sc->
buf) {
1168 unsigned char *buf, *out;
1178 buf[ptr ++] = ((ub & -z) | z) & 0xFF;
1179 memset(buf + ptr, 0, (
sizeof sc->
buf) - ptr);
1181 for (i = 0; i < 3; i ++) {
1186 memset(buf, 0,
sizeof sc->
buf);
1189 sph_enc32be(out + 0, V00 ^ V10 ^ V20 ^ V30);
1190 sph_enc32be(out + 4, V01 ^ V11 ^ V21 ^ V31);
1191 sph_enc32be(out + 8, V02 ^ V12 ^ V22 ^ V32);
1192 sph_enc32be(out + 12, V03 ^ V13 ^ V23 ^ V33);
1193 sph_enc32be(out + 16, V04 ^ V14 ^ V24 ^ V34);
1194 sph_enc32be(out + 20, V05 ^ V15 ^ V25 ^ V35);
1195 sph_enc32be(out + 24, V06 ^ V16 ^ V26 ^ V36);
1196 sph_enc32be(out + 28, V07 ^ V17 ^ V27 ^ V37);
1199 sph_enc32be(out + 32, V00 ^ V10 ^ V20 ^ V30);
1200 sph_enc32be(out + 36, V01 ^ V11 ^ V21 ^ V31);
1201 sph_enc32be(out + 40, V02 ^ V12 ^ V22 ^ V32);
1202 sph_enc32be(out + 44, V03 ^ V13 ^ V23 ^ V33);
1217 if (len < (
sizeof sc->
buf) - ptr) {
1218 memcpy(buf + ptr, data, len);
1228 clen = (
sizeof sc->
buf) - ptr;
1231 memcpy(buf + ptr, data, clen);
1233 data = (
const unsigned char *)data + clen;
1235 if (ptr ==
sizeof sc->
buf) {
1248 unsigned char *buf, *out;
1258 buf[ptr ++] = ((ub & -z) | z) & 0xFF;
1259 memset(buf + ptr, 0, (
sizeof sc->
buf) - ptr);
1261 for (i = 0; i < 3; i ++) {
1266 memset(buf, 0,
sizeof sc->
buf);
1269 sph_enc32be(out + 0, V00 ^ V10 ^ V20 ^ V30 ^ V40);
1270 sph_enc32be(out + 4, V01 ^ V11 ^ V21 ^ V31 ^ V41);
1271 sph_enc32be(out + 8, V02 ^ V12 ^ V22 ^ V32 ^ V42);
1272 sph_enc32be(out + 12, V03 ^ V13 ^ V23 ^ V33 ^ V43);
1273 sph_enc32be(out + 16, V04 ^ V14 ^ V24 ^ V34 ^ V44);
1274 sph_enc32be(out + 20, V05 ^ V15 ^ V25 ^ V35 ^ V45);
1275 sph_enc32be(out + 24, V06 ^ V16 ^ V26 ^ V36 ^ V46);
1276 sph_enc32be(out + 28, V07 ^ V17 ^ V27 ^ V37 ^ V47);
1279 sph_enc32be(out + 32, V00 ^ V10 ^ V20 ^ V30 ^ V40);
1280 sph_enc32be(out + 36, V01 ^ V11 ^ V21 ^ V31 ^ V41);
1281 sph_enc32be(out + 40, V02 ^ V12 ^ V22 ^ V32 ^ V42);
1282 sph_enc32be(out + 44, V03 ^ V13 ^ V23 ^ V33 ^ V43);
1283 sph_enc32be(out + 48, V04 ^ V14 ^ V24 ^ V34 ^ V44);
1284 sph_enc32be(out + 52, V05 ^ V15 ^ V25 ^ V35 ^ V45);
1285 sph_enc32be(out + 56, V06 ^ V16 ^ V26 ^ V36 ^ V46);
1286 sph_enc32be(out + 60, V07 ^ V17 ^ V27 ^ V37 ^ V47);
1299 memcpy(sc->
V, V_INIT,
sizeof(sc->
V));
1307 luffa3(cc, data, len);
1321 luffa3_close(cc, ub, n, dst, 7);
1332 memcpy(sc->
V, V_INIT,
sizeof(sc->
V));
1340 luffa3(cc, data, len);
1354 luffa3_close(cc, ub, n, dst, 8);
1365 memcpy(sc->
V, V_INIT,
sizeof(sc->
V));
1373 luffa4(cc, data, len);
1387 luffa4_close(cc, ub, n, dst);
1398 memcpy(sc->
V, V_INIT,
sizeof(sc->
V));
1406 luffa5(cc, data, len);
1420 luffa5_close(cc, ub, n, dst);
void sph_luffa384_close(void *cc, void *dst)
Terminate the current Luffa-384 computation and output the result into the provided buffer...
void sph_luffa224(void *cc, const void *data, size_t len)
Process some data bytes.
#define WRITE_STATE3(state)
void sph_luffa384_init(void *cc)
Initialize a Luffa-384 context.
void sph_luffa256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
This structure is a context for Luffa-224 computations: it contains the intermediate values and some ...
void sph_luffa256_close(void *cc, void *dst)
Terminate the current Luffa-256 computation and output the result into the provided buffer...
void sph_luffa224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
This structure is a context for Luffa-384 computations.
void sph_luffa512_close(void *cc, void *dst)
Terminate the current Luffa-512 computation and output the result into the provided buffer...
void sph_luffa512(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_luffa256(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_luffa224_close(void *cc, void *dst)
Terminate the current Luffa-224 computation and output the result into the provided buffer...
#define READ_STATE5(state)
void sph_luffa512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
void sph_luffa256_init(void *cc)
Initialize a Luffa-256 context.
void * memcpy(void *a, const void *b, size_t c)
This structure is a context for Luffa-512 computations.
#define READ_STATE4(state)
#define READ_STATE3(state)
#define WRITE_STATE5(state)
#define WRITE_STATE4(state)
void sph_luffa224_init(void *cc)
Initialize a Luffa-224 context.
void sph_luffa384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
Add a few additional bits (0 to 7) to the current computation, then terminate it and output the resul...
void sph_luffa384(void *cc, const void *data, size_t len)
Process some data bytes.
void sph_luffa512_init(void *cc)
Initialize a Luffa-512 context.