Raven Core  3.0.0
P2P Digital Currency
field_5x52_asm_impl.h
Go to the documentation of this file.
1 /**********************************************************************
2  * Copyright (c) 2013-2014 Diederik Huys, Pieter Wuille *
3  * Distributed under the MIT software license, see the accompanying *
4  * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
5  **********************************************************************/
6 
14 #ifndef SECP256K1_FIELD_INNER5X52_IMPL_H
15 #define SECP256K1_FIELD_INNER5X52_IMPL_H
16 
17 SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) {
27  uint64_t tmp1, tmp2, tmp3;
28 __asm__ __volatile__(
29  "movq 0(%%rsi),%%r10\n"
30  "movq 8(%%rsi),%%r11\n"
31  "movq 16(%%rsi),%%r12\n"
32  "movq 24(%%rsi),%%r13\n"
33  "movq 32(%%rsi),%%r14\n"
34 
35  /* d += a3 * b0 */
36  "movq 0(%%rbx),%%rax\n"
37  "mulq %%r13\n"
38  "movq %%rax,%%rcx\n"
39  "movq %%rdx,%%r15\n"
40  /* d += a2 * b1 */
41  "movq 8(%%rbx),%%rax\n"
42  "mulq %%r12\n"
43  "addq %%rax,%%rcx\n"
44  "adcq %%rdx,%%r15\n"
45  /* d += a1 * b2 */
46  "movq 16(%%rbx),%%rax\n"
47  "mulq %%r11\n"
48  "addq %%rax,%%rcx\n"
49  "adcq %%rdx,%%r15\n"
50  /* d = a0 * b3 */
51  "movq 24(%%rbx),%%rax\n"
52  "mulq %%r10\n"
53  "addq %%rax,%%rcx\n"
54  "adcq %%rdx,%%r15\n"
55  /* c = a4 * b4 */
56  "movq 32(%%rbx),%%rax\n"
57  "mulq %%r14\n"
58  "movq %%rax,%%r8\n"
59  "movq %%rdx,%%r9\n"
60  /* d += (c & M) * R */
61  "movq $0xfffffffffffff,%%rdx\n"
62  "andq %%rdx,%%rax\n"
63  "movq $0x1000003d10,%%rdx\n"
64  "mulq %%rdx\n"
65  "addq %%rax,%%rcx\n"
66  "adcq %%rdx,%%r15\n"
67  /* c >>= 52 (%%r8 only) */
68  "shrdq $52,%%r9,%%r8\n"
69  /* t3 (tmp1) = d & M */
70  "movq %%rcx,%%rsi\n"
71  "movq $0xfffffffffffff,%%rdx\n"
72  "andq %%rdx,%%rsi\n"
73  "movq %%rsi,%q1\n"
74  /* d >>= 52 */
75  "shrdq $52,%%r15,%%rcx\n"
76  "xorq %%r15,%%r15\n"
77  /* d += a4 * b0 */
78  "movq 0(%%rbx),%%rax\n"
79  "mulq %%r14\n"
80  "addq %%rax,%%rcx\n"
81  "adcq %%rdx,%%r15\n"
82  /* d += a3 * b1 */
83  "movq 8(%%rbx),%%rax\n"
84  "mulq %%r13\n"
85  "addq %%rax,%%rcx\n"
86  "adcq %%rdx,%%r15\n"
87  /* d += a2 * b2 */
88  "movq 16(%%rbx),%%rax\n"
89  "mulq %%r12\n"
90  "addq %%rax,%%rcx\n"
91  "adcq %%rdx,%%r15\n"
92  /* d += a1 * b3 */
93  "movq 24(%%rbx),%%rax\n"
94  "mulq %%r11\n"
95  "addq %%rax,%%rcx\n"
96  "adcq %%rdx,%%r15\n"
97  /* d += a0 * b4 */
98  "movq 32(%%rbx),%%rax\n"
99  "mulq %%r10\n"
100  "addq %%rax,%%rcx\n"
101  "adcq %%rdx,%%r15\n"
102  /* d += c * R */
103  "movq %%r8,%%rax\n"
104  "movq $0x1000003d10,%%rdx\n"
105  "mulq %%rdx\n"
106  "addq %%rax,%%rcx\n"
107  "adcq %%rdx,%%r15\n"
108  /* t4 = d & M (%%rsi) */
109  "movq %%rcx,%%rsi\n"
110  "movq $0xfffffffffffff,%%rdx\n"
111  "andq %%rdx,%%rsi\n"
112  /* d >>= 52 */
113  "shrdq $52,%%r15,%%rcx\n"
114  "xorq %%r15,%%r15\n"
115  /* tx = t4 >> 48 (tmp3) */
116  "movq %%rsi,%%rax\n"
117  "shrq $48,%%rax\n"
118  "movq %%rax,%q3\n"
119  /* t4 &= (M >> 4) (tmp2) */
120  "movq $0xffffffffffff,%%rax\n"
121  "andq %%rax,%%rsi\n"
122  "movq %%rsi,%q2\n"
123  /* c = a0 * b0 */
124  "movq 0(%%rbx),%%rax\n"
125  "mulq %%r10\n"
126  "movq %%rax,%%r8\n"
127  "movq %%rdx,%%r9\n"
128  /* d += a4 * b1 */
129  "movq 8(%%rbx),%%rax\n"
130  "mulq %%r14\n"
131  "addq %%rax,%%rcx\n"
132  "adcq %%rdx,%%r15\n"
133  /* d += a3 * b2 */
134  "movq 16(%%rbx),%%rax\n"
135  "mulq %%r13\n"
136  "addq %%rax,%%rcx\n"
137  "adcq %%rdx,%%r15\n"
138  /* d += a2 * b3 */
139  "movq 24(%%rbx),%%rax\n"
140  "mulq %%r12\n"
141  "addq %%rax,%%rcx\n"
142  "adcq %%rdx,%%r15\n"
143  /* d += a1 * b4 */
144  "movq 32(%%rbx),%%rax\n"
145  "mulq %%r11\n"
146  "addq %%rax,%%rcx\n"
147  "adcq %%rdx,%%r15\n"
148  /* u0 = d & M (%%rsi) */
149  "movq %%rcx,%%rsi\n"
150  "movq $0xfffffffffffff,%%rdx\n"
151  "andq %%rdx,%%rsi\n"
152  /* d >>= 52 */
153  "shrdq $52,%%r15,%%rcx\n"
154  "xorq %%r15,%%r15\n"
155  /* u0 = (u0 << 4) | tx (%%rsi) */
156  "shlq $4,%%rsi\n"
157  "movq %q3,%%rax\n"
158  "orq %%rax,%%rsi\n"
159  /* c += u0 * (R >> 4) */
160  "movq $0x1000003d1,%%rax\n"
161  "mulq %%rsi\n"
162  "addq %%rax,%%r8\n"
163  "adcq %%rdx,%%r9\n"
164  /* r[0] = c & M */
165  "movq %%r8,%%rax\n"
166  "movq $0xfffffffffffff,%%rdx\n"
167  "andq %%rdx,%%rax\n"
168  "movq %%rax,0(%%rdi)\n"
169  /* c >>= 52 */
170  "shrdq $52,%%r9,%%r8\n"
171  "xorq %%r9,%%r9\n"
172  /* c += a1 * b0 */
173  "movq 0(%%rbx),%%rax\n"
174  "mulq %%r11\n"
175  "addq %%rax,%%r8\n"
176  "adcq %%rdx,%%r9\n"
177  /* c += a0 * b1 */
178  "movq 8(%%rbx),%%rax\n"
179  "mulq %%r10\n"
180  "addq %%rax,%%r8\n"
181  "adcq %%rdx,%%r9\n"
182  /* d += a4 * b2 */
183  "movq 16(%%rbx),%%rax\n"
184  "mulq %%r14\n"
185  "addq %%rax,%%rcx\n"
186  "adcq %%rdx,%%r15\n"
187  /* d += a3 * b3 */
188  "movq 24(%%rbx),%%rax\n"
189  "mulq %%r13\n"
190  "addq %%rax,%%rcx\n"
191  "adcq %%rdx,%%r15\n"
192  /* d += a2 * b4 */
193  "movq 32(%%rbx),%%rax\n"
194  "mulq %%r12\n"
195  "addq %%rax,%%rcx\n"
196  "adcq %%rdx,%%r15\n"
197  /* c += (d & M) * R */
198  "movq %%rcx,%%rax\n"
199  "movq $0xfffffffffffff,%%rdx\n"
200  "andq %%rdx,%%rax\n"
201  "movq $0x1000003d10,%%rdx\n"
202  "mulq %%rdx\n"
203  "addq %%rax,%%r8\n"
204  "adcq %%rdx,%%r9\n"
205  /* d >>= 52 */
206  "shrdq $52,%%r15,%%rcx\n"
207  "xorq %%r15,%%r15\n"
208  /* r[1] = c & M */
209  "movq %%r8,%%rax\n"
210  "movq $0xfffffffffffff,%%rdx\n"
211  "andq %%rdx,%%rax\n"
212  "movq %%rax,8(%%rdi)\n"
213  /* c >>= 52 */
214  "shrdq $52,%%r9,%%r8\n"
215  "xorq %%r9,%%r9\n"
216  /* c += a2 * b0 */
217  "movq 0(%%rbx),%%rax\n"
218  "mulq %%r12\n"
219  "addq %%rax,%%r8\n"
220  "adcq %%rdx,%%r9\n"
221  /* c += a1 * b1 */
222  "movq 8(%%rbx),%%rax\n"
223  "mulq %%r11\n"
224  "addq %%rax,%%r8\n"
225  "adcq %%rdx,%%r9\n"
226  /* c += a0 * b2 (last use of %%r10 = a0) */
227  "movq 16(%%rbx),%%rax\n"
228  "mulq %%r10\n"
229  "addq %%rax,%%r8\n"
230  "adcq %%rdx,%%r9\n"
231  /* fetch t3 (%%r10, overwrites a0), t4 (%%rsi) */
232  "movq %q2,%%rsi\n"
233  "movq %q1,%%r10\n"
234  /* d += a4 * b3 */
235  "movq 24(%%rbx),%%rax\n"
236  "mulq %%r14\n"
237  "addq %%rax,%%rcx\n"
238  "adcq %%rdx,%%r15\n"
239  /* d += a3 * b4 */
240  "movq 32(%%rbx),%%rax\n"
241  "mulq %%r13\n"
242  "addq %%rax,%%rcx\n"
243  "adcq %%rdx,%%r15\n"
244  /* c += (d & M) * R */
245  "movq %%rcx,%%rax\n"
246  "movq $0xfffffffffffff,%%rdx\n"
247  "andq %%rdx,%%rax\n"
248  "movq $0x1000003d10,%%rdx\n"
249  "mulq %%rdx\n"
250  "addq %%rax,%%r8\n"
251  "adcq %%rdx,%%r9\n"
252  /* d >>= 52 (%%rcx only) */
253  "shrdq $52,%%r15,%%rcx\n"
254  /* r[2] = c & M */
255  "movq %%r8,%%rax\n"
256  "movq $0xfffffffffffff,%%rdx\n"
257  "andq %%rdx,%%rax\n"
258  "movq %%rax,16(%%rdi)\n"
259  /* c >>= 52 */
260  "shrdq $52,%%r9,%%r8\n"
261  "xorq %%r9,%%r9\n"
262  /* c += t3 */
263  "addq %%r10,%%r8\n"
264  /* c += d * R */
265  "movq %%rcx,%%rax\n"
266  "movq $0x1000003d10,%%rdx\n"
267  "mulq %%rdx\n"
268  "addq %%rax,%%r8\n"
269  "adcq %%rdx,%%r9\n"
270  /* r[3] = c & M */
271  "movq %%r8,%%rax\n"
272  "movq $0xfffffffffffff,%%rdx\n"
273  "andq %%rdx,%%rax\n"
274  "movq %%rax,24(%%rdi)\n"
275  /* c >>= 52 (%%r8 only) */
276  "shrdq $52,%%r9,%%r8\n"
277  /* c += t4 (%%r8 only) */
278  "addq %%rsi,%%r8\n"
279  /* r[4] = c */
280  "movq %%r8,32(%%rdi)\n"
281 : "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3)
282 : "b"(b), "D"(r)
283 : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
284 );
285 }
286 
287 SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) {
297  uint64_t tmp1, tmp2, tmp3;
298 __asm__ __volatile__(
299  "movq 0(%%rsi),%%r10\n"
300  "movq 8(%%rsi),%%r11\n"
301  "movq 16(%%rsi),%%r12\n"
302  "movq 24(%%rsi),%%r13\n"
303  "movq 32(%%rsi),%%r14\n"
304  "movq $0xfffffffffffff,%%r15\n"
305 
306  /* d = (a0*2) * a3 */
307  "leaq (%%r10,%%r10,1),%%rax\n"
308  "mulq %%r13\n"
309  "movq %%rax,%%rbx\n"
310  "movq %%rdx,%%rcx\n"
311  /* d += (a1*2) * a2 */
312  "leaq (%%r11,%%r11,1),%%rax\n"
313  "mulq %%r12\n"
314  "addq %%rax,%%rbx\n"
315  "adcq %%rdx,%%rcx\n"
316  /* c = a4 * a4 */
317  "movq %%r14,%%rax\n"
318  "mulq %%r14\n"
319  "movq %%rax,%%r8\n"
320  "movq %%rdx,%%r9\n"
321  /* d += (c & M) * R */
322  "andq %%r15,%%rax\n"
323  "movq $0x1000003d10,%%rdx\n"
324  "mulq %%rdx\n"
325  "addq %%rax,%%rbx\n"
326  "adcq %%rdx,%%rcx\n"
327  /* c >>= 52 (%%r8 only) */
328  "shrdq $52,%%r9,%%r8\n"
329  /* t3 (tmp1) = d & M */
330  "movq %%rbx,%%rsi\n"
331  "andq %%r15,%%rsi\n"
332  "movq %%rsi,%q1\n"
333  /* d >>= 52 */
334  "shrdq $52,%%rcx,%%rbx\n"
335  "xorq %%rcx,%%rcx\n"
336  /* a4 *= 2 */
337  "addq %%r14,%%r14\n"
338  /* d += a0 * a4 */
339  "movq %%r10,%%rax\n"
340  "mulq %%r14\n"
341  "addq %%rax,%%rbx\n"
342  "adcq %%rdx,%%rcx\n"
343  /* d+= (a1*2) * a3 */
344  "leaq (%%r11,%%r11,1),%%rax\n"
345  "mulq %%r13\n"
346  "addq %%rax,%%rbx\n"
347  "adcq %%rdx,%%rcx\n"
348  /* d += a2 * a2 */
349  "movq %%r12,%%rax\n"
350  "mulq %%r12\n"
351  "addq %%rax,%%rbx\n"
352  "adcq %%rdx,%%rcx\n"
353  /* d += c * R */
354  "movq %%r8,%%rax\n"
355  "movq $0x1000003d10,%%rdx\n"
356  "mulq %%rdx\n"
357  "addq %%rax,%%rbx\n"
358  "adcq %%rdx,%%rcx\n"
359  /* t4 = d & M (%%rsi) */
360  "movq %%rbx,%%rsi\n"
361  "andq %%r15,%%rsi\n"
362  /* d >>= 52 */
363  "shrdq $52,%%rcx,%%rbx\n"
364  "xorq %%rcx,%%rcx\n"
365  /* tx = t4 >> 48 (tmp3) */
366  "movq %%rsi,%%rax\n"
367  "shrq $48,%%rax\n"
368  "movq %%rax,%q3\n"
369  /* t4 &= (M >> 4) (tmp2) */
370  "movq $0xffffffffffff,%%rax\n"
371  "andq %%rax,%%rsi\n"
372  "movq %%rsi,%q2\n"
373  /* c = a0 * a0 */
374  "movq %%r10,%%rax\n"
375  "mulq %%r10\n"
376  "movq %%rax,%%r8\n"
377  "movq %%rdx,%%r9\n"
378  /* d += a1 * a4 */
379  "movq %%r11,%%rax\n"
380  "mulq %%r14\n"
381  "addq %%rax,%%rbx\n"
382  "adcq %%rdx,%%rcx\n"
383  /* d += (a2*2) * a3 */
384  "leaq (%%r12,%%r12,1),%%rax\n"
385  "mulq %%r13\n"
386  "addq %%rax,%%rbx\n"
387  "adcq %%rdx,%%rcx\n"
388  /* u0 = d & M (%%rsi) */
389  "movq %%rbx,%%rsi\n"
390  "andq %%r15,%%rsi\n"
391  /* d >>= 52 */
392  "shrdq $52,%%rcx,%%rbx\n"
393  "xorq %%rcx,%%rcx\n"
394  /* u0 = (u0 << 4) | tx (%%rsi) */
395  "shlq $4,%%rsi\n"
396  "movq %q3,%%rax\n"
397  "orq %%rax,%%rsi\n"
398  /* c += u0 * (R >> 4) */
399  "movq $0x1000003d1,%%rax\n"
400  "mulq %%rsi\n"
401  "addq %%rax,%%r8\n"
402  "adcq %%rdx,%%r9\n"
403  /* r[0] = c & M */
404  "movq %%r8,%%rax\n"
405  "andq %%r15,%%rax\n"
406  "movq %%rax,0(%%rdi)\n"
407  /* c >>= 52 */
408  "shrdq $52,%%r9,%%r8\n"
409  "xorq %%r9,%%r9\n"
410  /* a0 *= 2 */
411  "addq %%r10,%%r10\n"
412  /* c += a0 * a1 */
413  "movq %%r10,%%rax\n"
414  "mulq %%r11\n"
415  "addq %%rax,%%r8\n"
416  "adcq %%rdx,%%r9\n"
417  /* d += a2 * a4 */
418  "movq %%r12,%%rax\n"
419  "mulq %%r14\n"
420  "addq %%rax,%%rbx\n"
421  "adcq %%rdx,%%rcx\n"
422  /* d += a3 * a3 */
423  "movq %%r13,%%rax\n"
424  "mulq %%r13\n"
425  "addq %%rax,%%rbx\n"
426  "adcq %%rdx,%%rcx\n"
427  /* c += (d & M) * R */
428  "movq %%rbx,%%rax\n"
429  "andq %%r15,%%rax\n"
430  "movq $0x1000003d10,%%rdx\n"
431  "mulq %%rdx\n"
432  "addq %%rax,%%r8\n"
433  "adcq %%rdx,%%r9\n"
434  /* d >>= 52 */
435  "shrdq $52,%%rcx,%%rbx\n"
436  "xorq %%rcx,%%rcx\n"
437  /* r[1] = c & M */
438  "movq %%r8,%%rax\n"
439  "andq %%r15,%%rax\n"
440  "movq %%rax,8(%%rdi)\n"
441  /* c >>= 52 */
442  "shrdq $52,%%r9,%%r8\n"
443  "xorq %%r9,%%r9\n"
444  /* c += a0 * a2 (last use of %%r10) */
445  "movq %%r10,%%rax\n"
446  "mulq %%r12\n"
447  "addq %%rax,%%r8\n"
448  "adcq %%rdx,%%r9\n"
449  /* fetch t3 (%%r10, overwrites a0),t4 (%%rsi) */
450  "movq %q2,%%rsi\n"
451  "movq %q1,%%r10\n"
452  /* c += a1 * a1 */
453  "movq %%r11,%%rax\n"
454  "mulq %%r11\n"
455  "addq %%rax,%%r8\n"
456  "adcq %%rdx,%%r9\n"
457  /* d += a3 * a4 */
458  "movq %%r13,%%rax\n"
459  "mulq %%r14\n"
460  "addq %%rax,%%rbx\n"
461  "adcq %%rdx,%%rcx\n"
462  /* c += (d & M) * R */
463  "movq %%rbx,%%rax\n"
464  "andq %%r15,%%rax\n"
465  "movq $0x1000003d10,%%rdx\n"
466  "mulq %%rdx\n"
467  "addq %%rax,%%r8\n"
468  "adcq %%rdx,%%r9\n"
469  /* d >>= 52 (%%rbx only) */
470  "shrdq $52,%%rcx,%%rbx\n"
471  /* r[2] = c & M */
472  "movq %%r8,%%rax\n"
473  "andq %%r15,%%rax\n"
474  "movq %%rax,16(%%rdi)\n"
475  /* c >>= 52 */
476  "shrdq $52,%%r9,%%r8\n"
477  "xorq %%r9,%%r9\n"
478  /* c += t3 */
479  "addq %%r10,%%r8\n"
480  /* c += d * R */
481  "movq %%rbx,%%rax\n"
482  "movq $0x1000003d10,%%rdx\n"
483  "mulq %%rdx\n"
484  "addq %%rax,%%r8\n"
485  "adcq %%rdx,%%r9\n"
486  /* r[3] = c & M */
487  "movq %%r8,%%rax\n"
488  "andq %%r15,%%rax\n"
489  "movq %%rax,24(%%rdi)\n"
490  /* c >>= 52 (%%r8 only) */
491  "shrdq $52,%%r9,%%r8\n"
492  /* c += t4 (%%r8 only) */
493  "addq %%rsi,%%r8\n"
494  /* r[4] = c */
495  "movq %%r8,32(%%rdi)\n"
496 : "+S"(a), "=m"(tmp1), "=m"(tmp2), "=m"(tmp3)
497 : "D"(r)
498 : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "cc", "memory"
499 );
500 }
501 
502 #endif /* SECP256K1_FIELD_INNER5X52_IMPL_H */
#define SECP256K1_INLINE
Definition: secp256k1.h:110
#define SECP256K1_RESTRICT
Definition: util.h:89