Source file
src/crypto/sha1/_asm/sha1block_amd64_asm.go
1
2
3
4
5 package main
6
7 import (
8 . "github.com/mmcloughlin/avo/build"
9 . "github.com/mmcloughlin/avo/operand"
10 . "github.com/mmcloughlin/avo/reg"
11 )
12
13
14
15
16
17
18
19
20
21
22
23 func main() {
24 Package("crypto/sha1")
25 ConstraintExpr("!purego")
26 blockAMD64()
27 blockAVX2()
28 Generate()
29 }
30
31 func LOAD(index int) {
32 MOVL(Mem{Base: SI}.Offset(index*4), R10L)
33 BSWAPL(R10L)
34 MOVL(R10L, Mem{Base: SP}.Offset(index*4))
35 }
36
37 func SHUFFLE(index int) {
38 MOVL(Mem{Base: SP}.Offset(((index)&0xf)*4), R10L)
39 XORL(Mem{Base: SP}.Offset(((index-3)&0xf)*4), R10L)
40 XORL(Mem{Base: SP}.Offset(((index-8)&0xf)*4), R10L)
41 XORL(Mem{Base: SP}.Offset(((index-14)&0xf)*4), R10L)
42 ROLL(Imm(1), R10L)
43 MOVL(R10L, Mem{Base: SP}.Offset(((index)&0xf)*4))
44 }
45
46 func FUNC1(a, b, c, d, e GPPhysical) {
47 MOVL(d, R9L)
48 XORL(c, R9L)
49 ANDL(b, R9L)
50 XORL(d, R9L)
51 }
52
53 func FUNC2(a, b, c, d, e GPPhysical) {
54 MOVL(b, R9L)
55 XORL(c, R9L)
56 XORL(d, R9L)
57 }
58
59 func FUNC3(a, b, c, d, e GPPhysical) {
60 MOVL(b, R8L)
61 ORL(c, R8L)
62 ANDL(d, R8L)
63 MOVL(b, R9L)
64 ANDL(c, R9L)
65 ORL(R8L, R9L)
66 }
67
68 func FUNC4(a, b, c, d, e GPPhysical) {
69 FUNC2(a, b, c, d, e)
70 }
71
72 func MIX(a, b, c, d, e GPPhysical, konst int) {
73 ROLL(Imm(30), b)
74 ADDL(R9L, e)
75 MOVL(a, R8L)
76 ROLL(Imm(5), R8L)
77 LEAL(Mem{Base: e, Index: R10L, Scale: 1}.Offset(konst), e)
78 ADDL(R8L, e)
79 }
80
81 func ROUND1(a, b, c, d, e GPPhysical, index int) {
82 LOAD(index)
83 FUNC1(a, b, c, d, e)
84 MIX(a, b, c, d, e, 0x5A827999)
85 }
86
87 func ROUND1x(a, b, c, d, e GPPhysical, index int) {
88 SHUFFLE(index)
89 FUNC1(a, b, c, d, e)
90 MIX(a, b, c, d, e, 0x5A827999)
91 }
92
93 func ROUND2(a, b, c, d, e GPPhysical, index int) {
94 SHUFFLE(index)
95 FUNC2(a, b, c, d, e)
96 MIX(a, b, c, d, e, 0x6ED9EBA1)
97 }
98
99 func ROUND3(a, b, c, d, e GPPhysical, index int) {
100 SHUFFLE(index)
101 FUNC3(a, b, c, d, e)
102 MIX(a, b, c, d, e, 0x8F1BBCDC)
103 }
104
105 func ROUND4(a, b, c, d, e GPPhysical, index int) {
106 SHUFFLE(index)
107 FUNC4(a, b, c, d, e)
108 MIX(a, b, c, d, e, 0xCA62C1D6)
109 }
110
111 func blockAMD64() {
112 Implement("blockAMD64")
113 Attributes(NOSPLIT)
114 AllocLocal(64)
115
116 Load(Param("dig"), RBP)
117 Load(Param("p").Base(), RSI)
118 Load(Param("p").Len(), RDX)
119 SHRQ(Imm(6), RDX)
120 SHLQ(Imm(6), RDX)
121
122 LEAQ(Mem{Base: SI, Index: DX, Scale: 1}, RDI)
123 MOVL(Mem{Base: BP}.Offset(0*4), EAX)
124 MOVL(Mem{Base: BP}.Offset(1*4), EBX)
125 MOVL(Mem{Base: BP}.Offset(2*4), ECX)
126 MOVL(Mem{Base: BP}.Offset(3*4), EDX)
127 MOVL(Mem{Base: BP}.Offset(4*4), EBP)
128
129 CMPQ(RSI, RDI)
130 JEQ(LabelRef("end"))
131
132 loop_amd64()
133 end()
134 }
135
136 func loop_amd64() {
137 Label("loop")
138 MOVL(EAX, R11L)
139 MOVL(EBX, R12L)
140 MOVL(ECX, R13L)
141 MOVL(EDX, R14L)
142 MOVL(EBP, R15L)
143
144 ROUND1(EAX, EBX, ECX, EDX, EBP, 0)
145 ROUND1(EBP, EAX, EBX, ECX, EDX, 1)
146 ROUND1(EDX, EBP, EAX, EBX, ECX, 2)
147 ROUND1(ECX, EDX, EBP, EAX, EBX, 3)
148 ROUND1(EBX, ECX, EDX, EBP, EAX, 4)
149 ROUND1(EAX, EBX, ECX, EDX, EBP, 5)
150 ROUND1(EBP, EAX, EBX, ECX, EDX, 6)
151 ROUND1(EDX, EBP, EAX, EBX, ECX, 7)
152 ROUND1(ECX, EDX, EBP, EAX, EBX, 8)
153 ROUND1(EBX, ECX, EDX, EBP, EAX, 9)
154 ROUND1(EAX, EBX, ECX, EDX, EBP, 10)
155 ROUND1(EBP, EAX, EBX, ECX, EDX, 11)
156 ROUND1(EDX, EBP, EAX, EBX, ECX, 12)
157 ROUND1(ECX, EDX, EBP, EAX, EBX, 13)
158 ROUND1(EBX, ECX, EDX, EBP, EAX, 14)
159 ROUND1(EAX, EBX, ECX, EDX, EBP, 15)
160
161 ROUND1x(EBP, EAX, EBX, ECX, EDX, 16)
162 ROUND1x(EDX, EBP, EAX, EBX, ECX, 17)
163 ROUND1x(ECX, EDX, EBP, EAX, EBX, 18)
164 ROUND1x(EBX, ECX, EDX, EBP, EAX, 19)
165
166 ROUND2(EAX, EBX, ECX, EDX, EBP, 20)
167 ROUND2(EBP, EAX, EBX, ECX, EDX, 21)
168 ROUND2(EDX, EBP, EAX, EBX, ECX, 22)
169 ROUND2(ECX, EDX, EBP, EAX, EBX, 23)
170 ROUND2(EBX, ECX, EDX, EBP, EAX, 24)
171 ROUND2(EAX, EBX, ECX, EDX, EBP, 25)
172 ROUND2(EBP, EAX, EBX, ECX, EDX, 26)
173 ROUND2(EDX, EBP, EAX, EBX, ECX, 27)
174 ROUND2(ECX, EDX, EBP, EAX, EBX, 28)
175 ROUND2(EBX, ECX, EDX, EBP, EAX, 29)
176 ROUND2(EAX, EBX, ECX, EDX, EBP, 30)
177 ROUND2(EBP, EAX, EBX, ECX, EDX, 31)
178 ROUND2(EDX, EBP, EAX, EBX, ECX, 32)
179 ROUND2(ECX, EDX, EBP, EAX, EBX, 33)
180 ROUND2(EBX, ECX, EDX, EBP, EAX, 34)
181 ROUND2(EAX, EBX, ECX, EDX, EBP, 35)
182 ROUND2(EBP, EAX, EBX, ECX, EDX, 36)
183 ROUND2(EDX, EBP, EAX, EBX, ECX, 37)
184 ROUND2(ECX, EDX, EBP, EAX, EBX, 38)
185 ROUND2(EBX, ECX, EDX, EBP, EAX, 39)
186
187 ROUND3(EAX, EBX, ECX, EDX, EBP, 40)
188 ROUND3(EBP, EAX, EBX, ECX, EDX, 41)
189 ROUND3(EDX, EBP, EAX, EBX, ECX, 42)
190 ROUND3(ECX, EDX, EBP, EAX, EBX, 43)
191 ROUND3(EBX, ECX, EDX, EBP, EAX, 44)
192 ROUND3(EAX, EBX, ECX, EDX, EBP, 45)
193 ROUND3(EBP, EAX, EBX, ECX, EDX, 46)
194 ROUND3(EDX, EBP, EAX, EBX, ECX, 47)
195 ROUND3(ECX, EDX, EBP, EAX, EBX, 48)
196 ROUND3(EBX, ECX, EDX, EBP, EAX, 49)
197 ROUND3(EAX, EBX, ECX, EDX, EBP, 50)
198 ROUND3(EBP, EAX, EBX, ECX, EDX, 51)
199 ROUND3(EDX, EBP, EAX, EBX, ECX, 52)
200 ROUND3(ECX, EDX, EBP, EAX, EBX, 53)
201 ROUND3(EBX, ECX, EDX, EBP, EAX, 54)
202 ROUND3(EAX, EBX, ECX, EDX, EBP, 55)
203 ROUND3(EBP, EAX, EBX, ECX, EDX, 56)
204 ROUND3(EDX, EBP, EAX, EBX, ECX, 57)
205 ROUND3(ECX, EDX, EBP, EAX, EBX, 58)
206 ROUND3(EBX, ECX, EDX, EBP, EAX, 59)
207
208 ROUND4(EAX, EBX, ECX, EDX, EBP, 60)
209 ROUND4(EBP, EAX, EBX, ECX, EDX, 61)
210 ROUND4(EDX, EBP, EAX, EBX, ECX, 62)
211 ROUND4(ECX, EDX, EBP, EAX, EBX, 63)
212 ROUND4(EBX, ECX, EDX, EBP, EAX, 64)
213 ROUND4(EAX, EBX, ECX, EDX, EBP, 65)
214 ROUND4(EBP, EAX, EBX, ECX, EDX, 66)
215 ROUND4(EDX, EBP, EAX, EBX, ECX, 67)
216 ROUND4(ECX, EDX, EBP, EAX, EBX, 68)
217 ROUND4(EBX, ECX, EDX, EBP, EAX, 69)
218 ROUND4(EAX, EBX, ECX, EDX, EBP, 70)
219 ROUND4(EBP, EAX, EBX, ECX, EDX, 71)
220 ROUND4(EDX, EBP, EAX, EBX, ECX, 72)
221 ROUND4(ECX, EDX, EBP, EAX, EBX, 73)
222 ROUND4(EBX, ECX, EDX, EBP, EAX, 74)
223 ROUND4(EAX, EBX, ECX, EDX, EBP, 75)
224 ROUND4(EBP, EAX, EBX, ECX, EDX, 76)
225 ROUND4(EDX, EBP, EAX, EBX, ECX, 77)
226 ROUND4(ECX, EDX, EBP, EAX, EBX, 78)
227 ROUND4(EBX, ECX, EDX, EBP, EAX, 79)
228
229 ADDL(R11L, EAX)
230 ADDL(R12L, EBX)
231 ADDL(R13L, ECX)
232 ADDL(R14L, EDX)
233 ADDL(R15L, EBP)
234
235 ADDQ(Imm(64), RSI)
236 CMPQ(RSI, RDI)
237 JB(LabelRef("loop"))
238 }
239
240 func end() {
241 Label("end")
242 Load(Param("dig"), RDI)
243 MOVL(EAX, Mem{Base: DI}.Offset(0*4))
244 MOVL(EBX, Mem{Base: DI}.Offset(1*4))
245 MOVL(ECX, Mem{Base: DI}.Offset(2*4))
246 MOVL(EDX, Mem{Base: DI}.Offset(3*4))
247 MOVL(EBP, Mem{Base: DI}.Offset(4*4))
248 RET()
249 }
250
251
252
253
254
255
256
257
258
259
260
261 func UPDATE_HASH(A, TB, C, D, E GPPhysical) {
262 ADDL(Mem{Base: R9}, A)
263 MOVL(A, Mem{Base: R9})
264 ADDL(Mem{Base: R9}.Offset(4), TB)
265 MOVL(TB, Mem{Base: R9}.Offset(4))
266 ADDL(Mem{Base: R9}.Offset(8), C)
267 MOVL(C, Mem{Base: R9}.Offset(8))
268 ADDL(Mem{Base: R9}.Offset(12), D)
269 MOVL(D, Mem{Base: R9}.Offset(12))
270 ADDL(Mem{Base: R9}.Offset(16), E)
271 MOVL(E, Mem{Base: R9}.Offset(16))
272 }
273
274
275
276 func PRECALC_0(OFFSET int) {
277 VMOVDQU(Mem{Base: R10}.Offset(OFFSET), X0)
278 }
279
280 func PRECALC_1(OFFSET int) {
281 VINSERTI128(Imm(1), Mem{Base: R13}.Offset(OFFSET), Y0, Y0)
282 }
283
284 func PRECALC_2(YREG VecPhysical) {
285 VPSHUFB(Y10, Y0, YREG)
286 }
287
288 func PRECALC_4(YREG VecPhysical, K_OFFSET int) {
289 VPADDD(Mem{Base: R8}.Offset(K_OFFSET), YREG, Y0)
290 }
291
292 func PRECALC_7(OFFSET int) {
293 VMOVDQU(Y0, Mem{Base: R14}.Offset(OFFSET*2))
294 }
295
296
297
298
299
300
301
302
303
304
305
306
307 func PRECALC_00_15(OFFSET int, YREG VecPhysical) {
308 PRECALC_0(OFFSET)
309 PRECALC_1(OFFSET)
310 PRECALC_2(YREG)
311 PRECALC_4(YREG, 0x0)
312 PRECALC_7(OFFSET)
313 }
314
315
316
317 func PRECALC_16(REG_SUB_16, REG_SUB_12, REG_SUB_4, REG VecPhysical) {
318 VPALIGNR(Imm(8), REG_SUB_16, REG_SUB_12, REG)
319 VPSRLDQ(Imm(4), REG_SUB_4, Y0)
320 }
321
322 func PRECALC_17(REG_SUB_16, REG_SUB_8, REG VecPhysical) {
323 VPXOR(REG_SUB_8, REG, REG)
324 VPXOR(REG_SUB_16, Y0, Y0)
325 }
326
327 func PRECALC_18(REG VecPhysical) {
328 VPXOR(Y0, REG, REG)
329 VPSLLDQ(Imm(12), REG, Y9)
330 }
331
332 func PRECALC_19(REG VecPhysical) {
333 VPSLLD(Imm(1), REG, Y0)
334 VPSRLD(Imm(31), REG, REG)
335 }
336
337 func PRECALC_20(REG VecPhysical) {
338 VPOR(REG, Y0, Y0)
339 VPSLLD(Imm(2), Y9, REG)
340 }
341
342 func PRECALC_21(REG VecPhysical) {
343 VPSRLD(Imm(30), Y9, Y9)
344 VPXOR(REG, Y0, Y0)
345 }
346
347 func PRECALC_23(REG VecPhysical, K_OFFSET, OFFSET int) {
348 VPXOR(Y9, Y0, REG)
349 VPADDD(Mem{Base: R8}.Offset(K_OFFSET), REG, Y0)
350 VMOVDQU(Y0, Mem{Base: R14}.Offset(OFFSET))
351 }
352
353
354
355
356
357
358
359
360
361
362
363 func PRECALC_16_31(REG, REG_SUB_4, REG_SUB_8, REG_SUB_12, REG_SUB_16 VecPhysical, K_OFFSET, OFFSET int) {
364 PRECALC_16(REG_SUB_16, REG_SUB_12, REG_SUB_4, REG)
365 PRECALC_17(REG_SUB_16, REG_SUB_8, REG)
366 PRECALC_18(REG)
367 PRECALC_19(REG)
368 PRECALC_20(REG)
369 PRECALC_21(REG)
370 PRECALC_23(REG, K_OFFSET, OFFSET)
371 }
372
373
374
375 func PRECALC_32(REG_SUB_8, REG_SUB_4 VecPhysical) {
376 VPALIGNR(Imm(8), REG_SUB_8, REG_SUB_4, Y0)
377 }
378
379 func PRECALC_33(REG_SUB_28, REG VecPhysical) {
380 VPXOR(REG_SUB_28, REG, REG)
381 }
382
383 func PRECALC_34(REG_SUB_16 VecPhysical) {
384 VPXOR(REG_SUB_16, Y0, Y0)
385 }
386
387 func PRECALC_35(REG VecPhysical) {
388 VPXOR(Y0, REG, REG)
389 }
390
391 func PRECALC_36(REG VecPhysical) {
392 VPSLLD(Imm(2), REG, Y0)
393 }
394
395 func PRECALC_37(REG VecPhysical) {
396 VPSRLD(Imm(30), REG, REG)
397 VPOR(REG, Y0, REG)
398 }
399
400 func PRECALC_39(REG VecPhysical, K_OFFSET, OFFSET int) {
401 VPADDD(Mem{Base: R8}.Offset(K_OFFSET), REG, Y0)
402 VMOVDQU(Y0, Mem{Base: R14}.Offset(OFFSET))
403 }
404
405
406
407
408
409
410
411
412
413 func PRECALC_32_79(REG, REG_SUB_4, REG_SUB_8, REG_SUB_16, REG_SUB_28 VecPhysical, K_OFFSET, OFFSET int) {
414 PRECALC_32(REG_SUB_8, REG_SUB_4)
415 PRECALC_33(REG_SUB_28, REG)
416 PRECALC_34(REG_SUB_16)
417 PRECALC_35(REG)
418 PRECALC_36(REG)
419 PRECALC_37(REG)
420 PRECALC_39(REG, K_OFFSET, OFFSET)
421 }
422
423 func PRECALC() {
424 PRECALC_00_15(0, Y15)
425 PRECALC_00_15(0x10, Y14)
426 PRECALC_00_15(0x20, Y13)
427 PRECALC_00_15(0x30, Y12)
428 PRECALC_16_31(Y8, Y12, Y13, Y14, Y15, 0, 0x80)
429 PRECALC_16_31(Y7, Y8, Y12, Y13, Y14, 0x20, 0xa0)
430 PRECALC_16_31(Y5, Y7, Y8, Y12, Y13, 0x20, 0xc0)
431 PRECALC_16_31(Y3, Y5, Y7, Y8, Y12, 0x20, 0xe0)
432 PRECALC_32_79(Y15, Y3, Y5, Y8, Y14, 0x20, 0x100)
433 PRECALC_32_79(Y14, Y15, Y3, Y7, Y13, 0x20, 0x120)
434 PRECALC_32_79(Y13, Y14, Y15, Y5, Y12, 0x40, 0x140)
435 PRECALC_32_79(Y12, Y13, Y14, Y3, Y8, 0x40, 0x160)
436 PRECALC_32_79(Y8, Y12, Y13, Y15, Y7, 0x40, 0x180)
437 PRECALC_32_79(Y7, Y8, Y12, Y14, Y5, 0x40, 0x1a0)
438 PRECALC_32_79(Y5, Y7, Y8, Y13, Y3, 0x40, 0x1c0)
439 PRECALC_32_79(Y3, Y5, Y7, Y12, Y15, 0x60, 0x1e0)
440 PRECALC_32_79(Y15, Y3, Y5, Y8, Y14, 0x60, 0x200)
441 PRECALC_32_79(Y14, Y15, Y3, Y7, Y13, 0x60, 0x220)
442 PRECALC_32_79(Y13, Y14, Y15, Y5, Y12, 0x60, 0x240)
443 PRECALC_32_79(Y12, Y13, Y14, Y3, Y8, 0x60, 0x260)
444 }
445
446
447
448
449
450 func CALC_F1_PRE(OFFSET int, REG_A, REG_B, REG_C, REG_E GPPhysical) {
451 ADDL(Mem{Base: R15}.Offset(OFFSET), REG_E)
452 ANDNL(REG_C, REG_A, EBP)
453 LEAL(Mem{Base: REG_E, Index: REG_B, Scale: 1}, REG_E)
454 RORXL(Imm(0x1b), REG_A, R12L)
455 RORXL(Imm(2), REG_A, REG_B)
456 }
457
458 func CALC_F1_POST(REG_A, REG_B, REG_E GPPhysical) {
459 ANDL(REG_B, REG_A)
460 XORL(EBP, REG_A)
461 LEAL(Mem{Base: REG_E, Index: R12, Scale: 1}, REG_E)
462 }
463
464
465
466 func CALC_0() {
467 MOVL(ESI, EBX)
468 RORXL(Imm(2), ESI, ESI)
469 ANDNL(EAX, EBX, EBP)
470 ANDL(EDI, EBX)
471 XORL(EBP, EBX)
472 CALC_F1_PRE(0x0, ECX, EBX, EDI, EDX)
473 PRECALC_0(0x80)
474 CALC_F1_POST(ECX, ESI, EDX)
475 }
476
477 func CALC_1() {
478 CALC_F1_PRE(0x4, EDX, ECX, ESI, EAX)
479 PRECALC_1(0x80)
480 CALC_F1_POST(EDX, EBX, EAX)
481 }
482
483 func CALC_2() {
484 CALC_F1_PRE(0x8, EAX, EDX, EBX, EDI)
485 PRECALC_2(Y15)
486 CALC_F1_POST(EAX, ECX, EDI)
487 }
488
489 func CALC_3() {
490 CALC_F1_PRE(0xc, EDI, EAX, ECX, ESI)
491 CALC_F1_POST(EDI, EDX, ESI)
492 }
493
494 func CALC_4() {
495 CALC_F1_PRE(0x20, ESI, EDI, EDX, EBX)
496 PRECALC_4(Y15, 0x0)
497 CALC_F1_POST(ESI, EAX, EBX)
498 }
499
500 func CALC_5() {
501 CALC_F1_PRE(0x24, EBX, ESI, EAX, ECX)
502 CALC_F1_POST(EBX, EDI, ECX)
503 }
504
505 func CALC_6() {
506 CALC_F1_PRE(0x28, ECX, EBX, EDI, EDX)
507 CALC_F1_POST(ECX, ESI, EDX)
508 }
509
510 func CALC_7() {
511 CALC_F1_PRE(0x2c, EDX, ECX, ESI, EAX)
512 PRECALC_7(0x0)
513 CALC_F1_POST(EDX, EBX, EAX)
514 }
515
516 func CALC_8() {
517 CALC_F1_PRE(0x40, EAX, EDX, EBX, EDI)
518 PRECALC_0(0x90)
519 CALC_F1_POST(EAX, ECX, EDI)
520 }
521
522 func CALC_9() {
523 CALC_F1_PRE(0x44, EDI, EAX, ECX, ESI)
524 PRECALC_1(0x90)
525 CALC_F1_POST(EDI, EDX, ESI)
526 }
527
528 func CALC_10() {
529 CALC_F1_PRE(0x48, ESI, EDI, EDX, EBX)
530 PRECALC_2(Y14)
531 CALC_F1_POST(ESI, EAX, EBX)
532 }
533
534 func CALC_11() {
535 CALC_F1_PRE(0x4c, EBX, ESI, EAX, ECX)
536 CALC_F1_POST(EBX, EDI, ECX)
537 }
538
539 func CALC_12() {
540 CALC_F1_PRE(0x60, ECX, EBX, EDI, EDX)
541 PRECALC_4(Y14, 0x0)
542 CALC_F1_POST(ECX, ESI, EDX)
543 }
544
545 func CALC_13() {
546 CALC_F1_PRE(0x64, EDX, ECX, ESI, EAX)
547 CALC_F1_POST(EDX, EBX, EAX)
548 }
549
550 func CALC_14() {
551 CALC_F1_PRE(0x68, EAX, EDX, EBX, EDI)
552 CALC_F1_POST(EAX, ECX, EDI)
553 }
554
555 func CALC_15() {
556 CALC_F1_PRE(0x6c, EDI, EAX, ECX, ESI)
557 PRECALC_7(0x10)
558 CALC_F1_POST(EDI, EDX, ESI)
559 }
560
561 func CALC_16() {
562 CALC_F1_PRE(0x80, ESI, EDI, EDX, EBX)
563 PRECALC_0(0xa0)
564 CALC_F1_POST(ESI, EAX, EBX)
565 }
566
567 func CALC_17() {
568 CALC_F1_PRE(0x84, EBX, ESI, EAX, ECX)
569 PRECALC_1(0xa0)
570 CALC_F1_POST(EBX, EDI, ECX)
571 }
572
573 func CALC_18() {
574 CALC_F1_PRE(0x88, ECX, EBX, EDI, EDX)
575 PRECALC_2(Y13)
576 CALC_F1_POST(ECX, ESI, EDX)
577 }
578
579 func CALC_F2_PRE(OFFSET int, REG_A, REG_B, REG_E GPPhysical) {
580 ADDL(Mem{Base: R15}.Offset(OFFSET), REG_E)
581 LEAL(Mem{Base: REG_E, Index: REG_B, Scale: 1}, REG_E)
582 RORXL(Imm(0x1b), REG_A, R12L)
583 RORXL(Imm(2), REG_A, REG_B)
584 }
585
586 func CALC_F2_POST(REG_A, REG_B, REG_C, REG_E GPPhysical) {
587 XORL(REG_B, REG_A)
588 ADDL(R12L, REG_E)
589 XORL(REG_C, REG_A)
590 }
591
592 func CALC_19() {
593 CALC_F2_PRE(0x8c, EDX, ECX, EAX)
594 CALC_F2_POST(EDX, EBX, ESI, EAX)
595 }
596
597 func CALC_20() {
598 CALC_F2_PRE(0xa0, EAX, EDX, EDI)
599 PRECALC_4(Y13, 0x0)
600 CALC_F2_POST(EAX, ECX, EBX, EDI)
601 }
602
603 func CALC_21() {
604 CALC_F2_PRE(0xa4, EDI, EAX, ESI)
605 CALC_F2_POST(EDI, EDX, ECX, ESI)
606 }
607
608 func CALC_22() {
609 CALC_F2_PRE(0xa8, ESI, EDI, EBX)
610 CALC_F2_POST(ESI, EAX, EDX, EBX)
611 }
612
613 func CALC_23() {
614 CALC_F2_PRE(0xac, EBX, ESI, ECX)
615 PRECALC_7(0x20)
616 CALC_F2_POST(EBX, EDI, EAX, ECX)
617 }
618
619 func CALC_24() {
620 CALC_F2_PRE(0xc0, ECX, EBX, EDX)
621 PRECALC_0(0xb0)
622 CALC_F2_POST(ECX, ESI, EDI, EDX)
623 }
624
625 func CALC_25() {
626 CALC_F2_PRE(0xc4, EDX, ECX, EAX)
627 PRECALC_1(0xb0)
628 CALC_F2_POST(EDX, EBX, ESI, EAX)
629 }
630
631 func CALC_26() {
632 CALC_F2_PRE(0xc8, EAX, EDX, EDI)
633 PRECALC_2(Y12)
634 CALC_F2_POST(EAX, ECX, EBX, EDI)
635 }
636
637 func CALC_27() {
638 CALC_F2_PRE(0xcc, EDI, EAX, ESI)
639 CALC_F2_POST(EDI, EDX, ECX, ESI)
640 }
641
642 func CALC_28() {
643 CALC_F2_PRE(0xe0, ESI, EDI, EBX)
644 PRECALC_4(Y12, 0x0)
645 CALC_F2_POST(ESI, EAX, EDX, EBX)
646 }
647
648 func CALC_29() {
649 CALC_F2_PRE(0xe4, EBX, ESI, ECX)
650 CALC_F2_POST(EBX, EDI, EAX, ECX)
651 }
652
653 func CALC_30() {
654 CALC_F2_PRE(0xe8, ECX, EBX, EDX)
655 CALC_F2_POST(ECX, ESI, EDI, EDX)
656 }
657
658 func CALC_31() {
659 CALC_F2_PRE(0xec, EDX, ECX, EAX)
660 PRECALC_7(0x30)
661 CALC_F2_POST(EDX, EBX, ESI, EAX)
662 }
663
664 func CALC_32() {
665 CALC_F2_PRE(0x100, EAX, EDX, EDI)
666 PRECALC_16(Y15, Y14, Y12, Y8)
667 CALC_F2_POST(EAX, ECX, EBX, EDI)
668 }
669
670 func CALC_33() {
671 CALC_F2_PRE(0x104, EDI, EAX, ESI)
672 PRECALC_17(Y15, Y13, Y8)
673 CALC_F2_POST(EDI, EDX, ECX, ESI)
674 }
675
676 func CALC_34() {
677 CALC_F2_PRE(0x108, ESI, EDI, EBX)
678 PRECALC_18(Y8)
679 CALC_F2_POST(ESI, EAX, EDX, EBX)
680 }
681
682 func CALC_35() {
683 CALC_F2_PRE(0x10c, EBX, ESI, ECX)
684 PRECALC_19(Y8)
685 CALC_F2_POST(EBX, EDI, EAX, ECX)
686 }
687
688 func CALC_36() {
689 CALC_F2_PRE(0x120, ECX, EBX, EDX)
690 PRECALC_20(Y8)
691 CALC_F2_POST(ECX, ESI, EDI, EDX)
692 }
693
694 func CALC_37() {
695 CALC_F2_PRE(0x124, EDX, ECX, EAX)
696 PRECALC_21(Y8)
697 CALC_F2_POST(EDX, EBX, ESI, EAX)
698 }
699
700 func CALC_38() {
701 CALC_F2_PRE(0x128, EAX, EDX, EDI)
702 CALC_F2_POST(EAX, ECX, EBX, EDI)
703 }
704
705 func CALC_F3_PRE(OFFSET int, REG_E GPPhysical) {
706 ADDL(Mem{Base: R15}.Offset(OFFSET), REG_E)
707 }
708
709 func CALC_F3_POST(REG_A, REG_B, REG_C, REG_E, REG_TB GPPhysical) {
710 LEAL(Mem{Base: REG_E, Index: REG_TB, Scale: 1}, REG_E)
711 MOVL(REG_B, EBP)
712 ORL(REG_A, EBP)
713 RORXL(Imm(0x1b), REG_A, R12L)
714 RORXL(Imm(2), REG_A, REG_TB)
715 ANDL(REG_C, EBP)
716 ANDL(REG_B, REG_A)
717 ORL(EBP, REG_A)
718 ADDL(R12L, REG_E)
719 }
720
721 func CALC_39() {
722 CALC_F3_PRE(0x12c, ESI)
723 PRECALC_23(Y8, 0x0, 0x80)
724 CALC_F3_POST(EDI, EDX, ECX, ESI, EAX)
725 }
726
727 func CALC_40() {
728 CALC_F3_PRE(0x140, EBX)
729 PRECALC_16(Y14, Y13, Y8, Y7)
730 CALC_F3_POST(ESI, EAX, EDX, EBX, EDI)
731 }
732
733 func CALC_41() {
734 CALC_F3_PRE(0x144, ECX)
735 PRECALC_17(Y14, Y12, Y7)
736 CALC_F3_POST(EBX, EDI, EAX, ECX, ESI)
737 }
738
739 func CALC_42() {
740 CALC_F3_PRE(0x148, EDX)
741 PRECALC_18(Y7)
742 CALC_F3_POST(ECX, ESI, EDI, EDX, EBX)
743 }
744
745 func CALC_43() {
746 CALC_F3_PRE(0x14c, EAX)
747 PRECALC_19(Y7)
748 CALC_F3_POST(EDX, EBX, ESI, EAX, ECX)
749 }
750
751 func CALC_44() {
752 CALC_F3_PRE(0x160, EDI)
753 PRECALC_20(Y7)
754 CALC_F3_POST(EAX, ECX, EBX, EDI, EDX)
755 }
756
757 func CALC_45() {
758 CALC_F3_PRE(0x164, ESI)
759 PRECALC_21(Y7)
760 CALC_F3_POST(EDI, EDX, ECX, ESI, EAX)
761 }
762
763 func CALC_46() {
764 CALC_F3_PRE(0x168, EBX)
765 CALC_F3_POST(ESI, EAX, EDX, EBX, EDI)
766 }
767
768 func CALC_47() {
769 CALC_F3_PRE(0x16c, ECX)
770 VPXOR(Y9, Y0, Y7)
771 VPADDD(Mem{Base: R8}.Offset(0x20), Y7, Y0)
772 VMOVDQU(Y0, Mem{Base: R14}.Offset(0xa0))
773 CALC_F3_POST(EBX, EDI, EAX, ECX, ESI)
774 }
775
776 func CALC_48() {
777 CALC_F3_PRE(0x180, EDX)
778 PRECALC_16(Y13, Y12, Y7, Y5)
779 CALC_F3_POST(ECX, ESI, EDI, EDX, EBX)
780 }
781
782 func CALC_49() {
783 CALC_F3_PRE(0x184, EAX)
784 PRECALC_17(Y13, Y8, Y5)
785 CALC_F3_POST(EDX, EBX, ESI, EAX, ECX)
786 }
787
788 func CALC_50() {
789 CALC_F3_PRE(0x188, EDI)
790 PRECALC_18(Y5)
791 CALC_F3_POST(EAX, ECX, EBX, EDI, EDX)
792 }
793
794 func CALC_51() {
795 CALC_F3_PRE(0x18c, ESI)
796 PRECALC_19(Y5)
797 CALC_F3_POST(EDI, EDX, ECX, ESI, EAX)
798 }
799
800 func CALC_52() {
801 CALC_F3_PRE(0x1a0, EBX)
802 PRECALC_20(Y5)
803 CALC_F3_POST(ESI, EAX, EDX, EBX, EDI)
804 }
805
806 func CALC_53() {
807 CALC_F3_PRE(0x1a4, ECX)
808 PRECALC_21(Y5)
809 CALC_F3_POST(EBX, EDI, EAX, ECX, ESI)
810 }
811
812 func CALC_54() {
813 CALC_F3_PRE(0x1a8, EDX)
814 CALC_F3_POST(ECX, ESI, EDI, EDX, EBX)
815 }
816
817 func CALC_55() {
818 CALC_F3_PRE(0x1ac, EAX)
819 PRECALC_23(Y5, 0x20, 0xc0)
820 CALC_F3_POST(EDX, EBX, ESI, EAX, ECX)
821 }
822
823 func CALC_56() {
824 CALC_F3_PRE(0x1c0, EDI)
825 PRECALC_16(Y12, Y8, Y5, Y3)
826 CALC_F3_POST(EAX, ECX, EBX, EDI, EDX)
827 }
828
829 func CALC_57() {
830 CALC_F3_PRE(0x1c4, ESI)
831 PRECALC_17(Y12, Y7, Y3)
832 CALC_F3_POST(EDI, EDX, ECX, ESI, EAX)
833 }
834
835 func CALC_58() {
836 CALC_F3_PRE(0x1c8, EBX)
837 PRECALC_18(Y3)
838 CALC_F3_POST(ESI, EAX, EDX, EBX, EDI)
839 }
840
841 func CALC_59() {
842 CALC_F2_PRE(0x1cc, EBX, ESI, ECX)
843 PRECALC_19(Y3)
844 CALC_F2_POST(EBX, EDI, EAX, ECX)
845 }
846
847 func CALC_60() {
848 CALC_F2_PRE(0x1e0, ECX, EBX, EDX)
849 PRECALC_20(Y3)
850 CALC_F2_POST(ECX, ESI, EDI, EDX)
851 }
852
853 func CALC_61() {
854 CALC_F2_PRE(0x1e4, EDX, ECX, EAX)
855 PRECALC_21(Y3)
856 CALC_F2_POST(EDX, EBX, ESI, EAX)
857 }
858
859 func CALC_62() {
860 CALC_F2_PRE(0x1e8, EAX, EDX, EDI)
861 CALC_F2_POST(EAX, ECX, EBX, EDI)
862 }
863
864 func CALC_63() {
865 CALC_F2_PRE(0x1ec, EDI, EAX, ESI)
866 PRECALC_23(Y3, 0x20, 0xe0)
867 CALC_F2_POST(EDI, EDX, ECX, ESI)
868 }
869
870 func CALC_64() {
871 CALC_F2_PRE(0x200, ESI, EDI, EBX)
872 PRECALC_32(Y5, Y3)
873 CALC_F2_POST(ESI, EAX, EDX, EBX)
874 }
875
876 func CALC_65() {
877 CALC_F2_PRE(0x204, EBX, ESI, ECX)
878 PRECALC_33(Y14, Y15)
879 CALC_F2_POST(EBX, EDI, EAX, ECX)
880 }
881
882 func CALC_66() {
883 CALC_F2_PRE(0x208, ECX, EBX, EDX)
884 PRECALC_34(Y8)
885 CALC_F2_POST(ECX, ESI, EDI, EDX)
886 }
887
888 func CALC_67() {
889 CALC_F2_PRE(0x20c, EDX, ECX, EAX)
890 PRECALC_35(Y15)
891 CALC_F2_POST(EDX, EBX, ESI, EAX)
892 }
893
894 func CALC_68() {
895 CALC_F2_PRE(0x220, EAX, EDX, EDI)
896 PRECALC_36(Y15)
897 CALC_F2_POST(EAX, ECX, EBX, EDI)
898 }
899
900 func CALC_69() {
901 CALC_F2_PRE(0x224, EDI, EAX, ESI)
902 PRECALC_37(Y15)
903 CALC_F2_POST(EDI, EDX, ECX, ESI)
904 }
905
906 func CALC_70() {
907 CALC_F2_PRE(0x228, ESI, EDI, EBX)
908 CALC_F2_POST(ESI, EAX, EDX, EBX)
909 }
910
911 func CALC_71() {
912 CALC_F2_PRE(0x22c, EBX, ESI, ECX)
913 PRECALC_39(Y15, 0x20, 0x100)
914 CALC_F2_POST(EBX, EDI, EAX, ECX)
915 }
916
917 func CALC_72() {
918 CALC_F2_PRE(0x240, ECX, EBX, EDX)
919 PRECALC_32(Y3, Y15)
920 CALC_F2_POST(ECX, ESI, EDI, EDX)
921 }
922
923 func CALC_73() {
924 CALC_F2_PRE(0x244, EDX, ECX, EAX)
925 PRECALC_33(Y13, Y14)
926 CALC_F2_POST(EDX, EBX, ESI, EAX)
927 }
928
929 func CALC_74() {
930 CALC_F2_PRE(0x248, EAX, EDX, EDI)
931 PRECALC_34(Y7)
932 CALC_F2_POST(EAX, ECX, EBX, EDI)
933 }
934
935 func CALC_75() {
936 CALC_F2_PRE(0x24c, EDI, EAX, ESI)
937 PRECALC_35(Y14)
938 CALC_F2_POST(EDI, EDX, ECX, ESI)
939 }
940
941 func CALC_76() {
942 CALC_F2_PRE(0x260, ESI, EDI, EBX)
943 PRECALC_36(Y14)
944 CALC_F2_POST(ESI, EAX, EDX, EBX)
945 }
946
947 func CALC_77() {
948 CALC_F2_PRE(0x264, EBX, ESI, ECX)
949 PRECALC_37(Y14)
950 CALC_F2_POST(EBX, EDI, EAX, ECX)
951 }
952
953 func CALC_78() {
954 CALC_F2_PRE(0x268, ECX, EBX, EDX)
955 CALC_F2_POST(ECX, ESI, EDI, EDX)
956 }
957
958 func CALC_79() {
959 ADDL(Mem{Base: R15}.Offset(0x26c), EAX)
960 LEAL(Mem{Base: AX, Index: CX, Scale: 1}, EAX)
961 RORXL(Imm(0x1b), EDX, R12L)
962 PRECALC_39(Y14, 0x20, 0x120)
963 ADDL(R12L, EAX)
964 }
965
966
967 func CALC_80() {
968 MOVL(ECX, EDX)
969 RORXL(Imm(2), ECX, ECX)
970 ANDNL(ESI, EDX, EBP)
971 ANDL(EBX, EDX)
972 XORL(EBP, EDX)
973 CALC_F1_PRE(0x10, EAX, EDX, EBX, EDI)
974 PRECALC_32(Y15, Y14)
975 CALC_F1_POST(EAX, ECX, EDI)
976 }
977
978 func CALC_81() {
979 CALC_F1_PRE(0x14, EDI, EAX, ECX, ESI)
980 PRECALC_33(Y12, Y13)
981 CALC_F1_POST(EDI, EDX, ESI)
982 }
983
984 func CALC_82() {
985 CALC_F1_PRE(0x18, ESI, EDI, EDX, EBX)
986 PRECALC_34(Y5)
987 CALC_F1_POST(ESI, EAX, EBX)
988 }
989
990 func CALC_83() {
991 CALC_F1_PRE(0x1c, EBX, ESI, EAX, ECX)
992 PRECALC_35(Y13)
993 CALC_F1_POST(EBX, EDI, ECX)
994 }
995
996 func CALC_84() {
997 CALC_F1_PRE(0x30, ECX, EBX, EDI, EDX)
998 PRECALC_36(Y13)
999 CALC_F1_POST(ECX, ESI, EDX)
1000 }
1001
1002 func CALC_85() {
1003 CALC_F1_PRE(0x34, EDX, ECX, ESI, EAX)
1004 PRECALC_37(Y13)
1005 CALC_F1_POST(EDX, EBX, EAX)
1006 }
1007
1008 func CALC_86() {
1009 CALC_F1_PRE(0x38, EAX, EDX, EBX, EDI)
1010 CALC_F1_POST(EAX, ECX, EDI)
1011 }
1012
1013 func CALC_87() {
1014 CALC_F1_PRE(0x3c, EDI, EAX, ECX, ESI)
1015 PRECALC_39(Y13, 0x40, 0x140)
1016 CALC_F1_POST(EDI, EDX, ESI)
1017 }
1018
1019 func CALC_88() {
1020 CALC_F1_PRE(0x50, ESI, EDI, EDX, EBX)
1021 PRECALC_32(Y14, Y13)
1022 CALC_F1_POST(ESI, EAX, EBX)
1023 }
1024
1025 func CALC_89() {
1026 CALC_F1_PRE(0x54, EBX, ESI, EAX, ECX)
1027 PRECALC_33(Y8, Y12)
1028 CALC_F1_POST(EBX, EDI, ECX)
1029 }
1030
1031 func CALC_90() {
1032 CALC_F1_PRE(0x58, ECX, EBX, EDI, EDX)
1033 PRECALC_34(Y3)
1034 CALC_F1_POST(ECX, ESI, EDX)
1035 }
1036
1037 func CALC_91() {
1038 CALC_F1_PRE(0x5c, EDX, ECX, ESI, EAX)
1039 PRECALC_35(Y12)
1040 CALC_F1_POST(EDX, EBX, EAX)
1041 }
1042
1043 func CALC_92() {
1044 CALC_F1_PRE(0x70, EAX, EDX, EBX, EDI)
1045 PRECALC_36(Y12)
1046 CALC_F1_POST(EAX, ECX, EDI)
1047 }
1048
1049 func CALC_93() {
1050 CALC_F1_PRE(0x74, EDI, EAX, ECX, ESI)
1051 PRECALC_37(Y12)
1052 CALC_F1_POST(EDI, EDX, ESI)
1053 }
1054
1055 func CALC_94() {
1056 CALC_F1_PRE(0x78, ESI, EDI, EDX, EBX)
1057 CALC_F1_POST(ESI, EAX, EBX)
1058 }
1059
1060 func CALC_95() {
1061 CALC_F1_PRE(0x7c, EBX, ESI, EAX, ECX)
1062 PRECALC_39(Y12, 0x40, 0x160)
1063 CALC_F1_POST(EBX, EDI, ECX)
1064 }
1065
1066 func CALC_96() {
1067 CALC_F1_PRE(0x90, ECX, EBX, EDI, EDX)
1068 PRECALC_32(Y13, Y12)
1069 CALC_F1_POST(ECX, ESI, EDX)
1070 }
1071
1072 func CALC_97() {
1073 CALC_F1_PRE(0x94, EDX, ECX, ESI, EAX)
1074 PRECALC_33(Y7, Y8)
1075 CALC_F1_POST(EDX, EBX, EAX)
1076 }
1077
1078 func CALC_98() {
1079 CALC_F1_PRE(0x98, EAX, EDX, EBX, EDI)
1080 PRECALC_34(Y15)
1081 CALC_F1_POST(EAX, ECX, EDI)
1082 }
1083
1084 func CALC_99() {
1085 CALC_F2_PRE(0x9c, EDI, EAX, ESI)
1086 PRECALC_35(Y8)
1087 CALC_F2_POST(EDI, EDX, ECX, ESI)
1088 }
1089
1090 func CALC_100() {
1091 CALC_F2_PRE(0xb0, ESI, EDI, EBX)
1092 PRECALC_36(Y8)
1093 CALC_F2_POST(ESI, EAX, EDX, EBX)
1094 }
1095
1096 func CALC_101() {
1097 CALC_F2_PRE(0xb4, EBX, ESI, ECX)
1098 PRECALC_37(Y8)
1099 CALC_F2_POST(EBX, EDI, EAX, ECX)
1100 }
1101
1102 func CALC_102() {
1103 CALC_F2_PRE(0xb8, ECX, EBX, EDX)
1104 CALC_F2_POST(ECX, ESI, EDI, EDX)
1105 }
1106
1107 func CALC_103() {
1108 CALC_F2_PRE(0xbc, EDX, ECX, EAX)
1109 PRECALC_39(Y8, 0x40, 0x180)
1110 CALC_F2_POST(EDX, EBX, ESI, EAX)
1111 }
1112
1113 func CALC_104() {
1114 CALC_F2_PRE(0xd0, EAX, EDX, EDI)
1115 PRECALC_32(Y12, Y8)
1116 CALC_F2_POST(EAX, ECX, EBX, EDI)
1117 }
1118
1119 func CALC_105() {
1120 CALC_F2_PRE(0xd4, EDI, EAX, ESI)
1121 PRECALC_33(Y5, Y7)
1122 CALC_F2_POST(EDI, EDX, ECX, ESI)
1123 }
1124
1125 func CALC_106() {
1126 CALC_F2_PRE(0xd8, ESI, EDI, EBX)
1127 PRECALC_34(Y14)
1128 CALC_F2_POST(ESI, EAX, EDX, EBX)
1129 }
1130
1131 func CALC_107() {
1132 CALC_F2_PRE(0xdc, EBX, ESI, ECX)
1133 PRECALC_35(Y7)
1134 CALC_F2_POST(EBX, EDI, EAX, ECX)
1135 }
1136
1137 func CALC_108() {
1138 CALC_F2_PRE(0xf0, ECX, EBX, EDX)
1139 PRECALC_36(Y7)
1140 CALC_F2_POST(ECX, ESI, EDI, EDX)
1141 }
1142
1143 func CALC_109() {
1144 CALC_F2_PRE(0xf4, EDX, ECX, EAX)
1145 PRECALC_37(Y7)
1146 CALC_F2_POST(EDX, EBX, ESI, EAX)
1147 }
1148
1149 func CALC_110() {
1150 CALC_F2_PRE(0xf8, EAX, EDX, EDI)
1151 CALC_F2_POST(EAX, ECX, EBX, EDI)
1152 }
1153
1154 func CALC_111() {
1155 CALC_F2_PRE(0xfc, EDI, EAX, ESI)
1156 PRECALC_39(Y7, 0x40, 0x1a0)
1157 CALC_F2_POST(EDI, EDX, ECX, ESI)
1158 }
1159
1160 func CALC_112() {
1161 CALC_F2_PRE(0x110, ESI, EDI, EBX)
1162 PRECALC_32(Y8, Y7)
1163 CALC_F2_POST(ESI, EAX, EDX, EBX)
1164 }
1165
1166 func CALC_113() {
1167 CALC_F2_PRE(0x114, EBX, ESI, ECX)
1168 PRECALC_33(Y3, Y5)
1169 CALC_F2_POST(EBX, EDI, EAX, ECX)
1170 }
1171
1172 func CALC_114() {
1173 CALC_F2_PRE(0x118, ECX, EBX, EDX)
1174 PRECALC_34(Y13)
1175 CALC_F2_POST(ECX, ESI, EDI, EDX)
1176 }
1177
1178 func CALC_115() {
1179 CALC_F2_PRE(0x11c, EDX, ECX, EAX)
1180 PRECALC_35(Y5)
1181 CALC_F2_POST(EDX, EBX, ESI, EAX)
1182 }
1183
1184 func CALC_116() {
1185 CALC_F2_PRE(0x130, EAX, EDX, EDI)
1186 PRECALC_36(Y5)
1187 CALC_F2_POST(EAX, ECX, EBX, EDI)
1188 }
1189
1190 func CALC_117() {
1191 CALC_F2_PRE(0x134, EDI, EAX, ESI)
1192 PRECALC_37(Y5)
1193 CALC_F2_POST(EDI, EDX, ECX, ESI)
1194 }
1195
1196 func CALC_118() {
1197 CALC_F2_PRE(0x138, ESI, EDI, EBX)
1198 CALC_F2_POST(ESI, EAX, EDX, EBX)
1199 }
1200
1201 func CALC_119() {
1202 CALC_F3_PRE(0x13c, ECX)
1203 PRECALC_39(Y5, 0x40, 0x1c0)
1204 CALC_F3_POST(EBX, EDI, EAX, ECX, ESI)
1205 }
1206
1207 func CALC_120() {
1208 CALC_F3_PRE(0x150, EDX)
1209 PRECALC_32(Y7, Y5)
1210 CALC_F3_POST(ECX, ESI, EDI, EDX, EBX)
1211 }
1212
1213 func CALC_121() {
1214 CALC_F3_PRE(0x154, EAX)
1215 PRECALC_33(Y15, Y3)
1216 CALC_F3_POST(EDX, EBX, ESI, EAX, ECX)
1217 }
1218
1219 func CALC_122() {
1220 CALC_F3_PRE(0x158, EDI)
1221 PRECALC_34(Y12)
1222 CALC_F3_POST(EAX, ECX, EBX, EDI, EDX)
1223 }
1224
1225 func CALC_123() {
1226 CALC_F3_PRE(0x15c, ESI)
1227 PRECALC_35(Y3)
1228 CALC_F3_POST(EDI, EDX, ECX, ESI, EAX)
1229 }
1230
1231 func CALC_124() {
1232 CALC_F3_PRE(0x170, EBX)
1233 PRECALC_36(Y3)
1234 CALC_F3_POST(ESI, EAX, EDX, EBX, EDI)
1235 }
1236
1237 func CALC_125() {
1238 CALC_F3_PRE(0x174, ECX)
1239 PRECALC_37(Y3)
1240 CALC_F3_POST(EBX, EDI, EAX, ECX, ESI)
1241 }
1242
1243 func CALC_126() {
1244 CALC_F3_PRE(0x178, EDX)
1245 CALC_F3_POST(ECX, ESI, EDI, EDX, EBX)
1246 }
1247
1248 func CALC_127() {
1249 CALC_F3_PRE(0x17c, EAX)
1250 PRECALC_39(Y3, 0x60, 0x1e0)
1251 CALC_F3_POST(EDX, EBX, ESI, EAX, ECX)
1252 }
1253
1254 func CALC_128() {
1255 CALC_F3_PRE(0x190, EDI)
1256 PRECALC_32(Y5, Y3)
1257 CALC_F3_POST(EAX, ECX, EBX, EDI, EDX)
1258 }
1259
1260 func CALC_129() {
1261 CALC_F3_PRE(0x194, ESI)
1262 PRECALC_33(Y14, Y15)
1263 CALC_F3_POST(EDI, EDX, ECX, ESI, EAX)
1264 }
1265
1266 func CALC_130() {
1267 CALC_F3_PRE(0x198, EBX)
1268 PRECALC_34(Y8)
1269 CALC_F3_POST(ESI, EAX, EDX, EBX, EDI)
1270 }
1271
1272 func CALC_131() {
1273 CALC_F3_PRE(0x19c, ECX)
1274 PRECALC_35(Y15)
1275 CALC_F3_POST(EBX, EDI, EAX, ECX, ESI)
1276 }
1277
1278 func CALC_132() {
1279 CALC_F3_PRE(0x1b0, EDX)
1280 PRECALC_36(Y15)
1281 CALC_F3_POST(ECX, ESI, EDI, EDX, EBX)
1282 }
1283
1284 func CALC_133() {
1285 CALC_F3_PRE(0x1b4, EAX)
1286 PRECALC_37(Y15)
1287 CALC_F3_POST(EDX, EBX, ESI, EAX, ECX)
1288 }
1289
1290 func CALC_134() {
1291 CALC_F3_PRE(0x1b8, EDI)
1292 CALC_F3_POST(EAX, ECX, EBX, EDI, EDX)
1293 }
1294
1295 func CALC_135() {
1296 CALC_F3_PRE(0x1bc, ESI)
1297 PRECALC_39(Y15, 0x60, 0x200)
1298 CALC_F3_POST(EDI, EDX, ECX, ESI, EAX)
1299 }
1300
1301 func CALC_136() {
1302 CALC_F3_PRE(0x1d0, EBX)
1303 PRECALC_32(Y3, Y15)
1304 CALC_F3_POST(ESI, EAX, EDX, EBX, EDI)
1305 }
1306
1307 func CALC_137() {
1308 CALC_F3_PRE(0x1d4, ECX)
1309 PRECALC_33(Y13, Y14)
1310 CALC_F3_POST(EBX, EDI, EAX, ECX, ESI)
1311 }
1312
1313 func CALC_138() {
1314 CALC_F3_PRE(0x1d8, EDX)
1315 PRECALC_34(Y7)
1316 CALC_F3_POST(ECX, ESI, EDI, EDX, EBX)
1317 }
1318
1319 func CALC_139() {
1320 CALC_F2_PRE(0x1dc, EDX, ECX, EAX)
1321 PRECALC_35(Y14)
1322 CALC_F2_POST(EDX, EBX, ESI, EAX)
1323 }
1324
1325 func CALC_140() {
1326 CALC_F2_PRE(0x1f0, EAX, EDX, EDI)
1327 PRECALC_36(Y14)
1328 CALC_F2_POST(EAX, ECX, EBX, EDI)
1329 }
1330
1331 func CALC_141() {
1332 CALC_F2_PRE(0x1f4, EDI, EAX, ESI)
1333 PRECALC_37(Y14)
1334 CALC_F2_POST(EDI, EDX, ECX, ESI)
1335 }
1336
1337 func CALC_142() {
1338 CALC_F2_PRE(0x1f8, ESI, EDI, EBX)
1339 CALC_F2_POST(ESI, EAX, EDX, EBX)
1340 }
1341
1342 func CALC_143() {
1343 CALC_F2_PRE(0x1fc, EBX, ESI, ECX)
1344 PRECALC_39(Y14, 0x60, 0x220)
1345 CALC_F2_POST(EBX, EDI, EAX, ECX)
1346 }
1347
1348 func CALC_144() {
1349 CALC_F2_PRE(0x210, ECX, EBX, EDX)
1350 PRECALC_32(Y15, Y14)
1351 CALC_F2_POST(ECX, ESI, EDI, EDX)
1352 }
1353
1354 func CALC_145() {
1355 CALC_F2_PRE(0x214, EDX, ECX, EAX)
1356 PRECALC_33(Y12, Y13)
1357 CALC_F2_POST(EDX, EBX, ESI, EAX)
1358 }
1359
1360 func CALC_146() {
1361 CALC_F2_PRE(0x218, EAX, EDX, EDI)
1362 PRECALC_34(Y5)
1363 CALC_F2_POST(EAX, ECX, EBX, EDI)
1364 }
1365
1366 func CALC_147() {
1367 CALC_F2_PRE(0x21c, EDI, EAX, ESI)
1368 PRECALC_35(Y13)
1369 CALC_F2_POST(EDI, EDX, ECX, ESI)
1370 }
1371
1372 func CALC_148() {
1373 CALC_F2_PRE(0x230, ESI, EDI, EBX)
1374 PRECALC_36(Y13)
1375 CALC_F2_POST(ESI, EAX, EDX, EBX)
1376 }
1377
1378 func CALC_149() {
1379 CALC_F2_PRE(0x234, EBX, ESI, ECX)
1380 PRECALC_37(Y13)
1381 CALC_F2_POST(EBX, EDI, EAX, ECX)
1382 }
1383
1384 func CALC_150() {
1385 CALC_F2_PRE(0x238, ECX, EBX, EDX)
1386 CALC_F2_POST(ECX, ESI, EDI, EDX)
1387 }
1388
1389 func CALC_151() {
1390 CALC_F2_PRE(0x23c, EDX, ECX, EAX)
1391 PRECALC_39(Y13, 0x60, 0x240)
1392 CALC_F2_POST(EDX, EBX, ESI, EAX)
1393 }
1394
1395 func CALC_152() {
1396 CALC_F2_PRE(0x250, EAX, EDX, EDI)
1397 PRECALC_32(Y14, Y13)
1398 CALC_F2_POST(EAX, ECX, EBX, EDI)
1399 }
1400
1401 func CALC_153() {
1402 CALC_F2_PRE(0x254, EDI, EAX, ESI)
1403 PRECALC_33(Y8, Y12)
1404 CALC_F2_POST(EDI, EDX, ECX, ESI)
1405 }
1406
1407 func CALC_154() {
1408 CALC_F2_PRE(0x258, ESI, EDI, EBX)
1409 PRECALC_34(Y3)
1410 CALC_F2_POST(ESI, EAX, EDX, EBX)
1411 }
1412
1413 func CALC_155() {
1414 CALC_F2_PRE(0x25c, EBX, ESI, ECX)
1415 PRECALC_35(Y12)
1416 CALC_F2_POST(EBX, EDI, EAX, ECX)
1417 }
1418
1419 func CALC_156() {
1420 CALC_F2_PRE(0x270, ECX, EBX, EDX)
1421 PRECALC_36(Y12)
1422 CALC_F2_POST(ECX, ESI, EDI, EDX)
1423 }
1424
1425 func CALC_157() {
1426 CALC_F2_PRE(0x274, EDX, ECX, EAX)
1427 PRECALC_37(Y12)
1428 CALC_F2_POST(EDX, EBX, ESI, EAX)
1429 }
1430
1431 func CALC_158() {
1432 CALC_F2_PRE(0x278, EAX, EDX, EDI)
1433 CALC_F2_POST(EAX, ECX, EBX, EDI)
1434 }
1435
1436 func CALC_159() {
1437 ADDL(Mem{Base: R15}.Offset(0x27c), ESI)
1438 LEAL(Mem{Base: SI, Index: AX, Scale: 1}, ESI)
1439 RORXL(Imm(0x1b), EDI, R12L)
1440 PRECALC_39(Y12, 0x60, 0x260)
1441 ADDL(R12L, ESI)
1442 }
1443
1444 func CALC() {
1445 MOVL(Mem{Base: R9}, ECX)
1446 MOVL(Mem{Base: R9}.Offset(4), ESI)
1447 MOVL(Mem{Base: R9}.Offset(8), EDI)
1448 MOVL(Mem{Base: R9}.Offset(12), EAX)
1449 MOVL(Mem{Base: R9}.Offset(16), EDX)
1450 MOVQ(RSP, R14)
1451 LEAQ(Mem{Base: SP}.Offset(2*4*80+32), R15)
1452 PRECALC()
1453 XCHGQ(R15, R14)
1454 loop_avx2()
1455 begin()
1456 }
1457
1458
1459 func loop_avx2() {
1460 Label("loop")
1461 CMPQ(R10, R8)
1462 JNE(LabelRef("begin"))
1463 VZEROUPPER()
1464 RET()
1465 }
1466
1467 func begin() {
1468 Label("begin")
1469 CALC_0()
1470 CALC_1()
1471 CALC_2()
1472 CALC_3()
1473 CALC_4()
1474 CALC_5()
1475 CALC_6()
1476 CALC_7()
1477 CALC_8()
1478 CALC_9()
1479 CALC_10()
1480 CALC_11()
1481 CALC_12()
1482 CALC_13()
1483 CALC_14()
1484 CALC_15()
1485 CALC_16()
1486 CALC_17()
1487 CALC_18()
1488 CALC_19()
1489 CALC_20()
1490 CALC_21()
1491 CALC_22()
1492 CALC_23()
1493 CALC_24()
1494 CALC_25()
1495 CALC_26()
1496 CALC_27()
1497 CALC_28()
1498 CALC_29()
1499 CALC_30()
1500 CALC_31()
1501 CALC_32()
1502 CALC_33()
1503 CALC_34()
1504 CALC_35()
1505 CALC_36()
1506 CALC_37()
1507 CALC_38()
1508 CALC_39()
1509 CALC_40()
1510 CALC_41()
1511 CALC_42()
1512 CALC_43()
1513 CALC_44()
1514 CALC_45()
1515 CALC_46()
1516 CALC_47()
1517 CALC_48()
1518 CALC_49()
1519 CALC_50()
1520 CALC_51()
1521 CALC_52()
1522 CALC_53()
1523 CALC_54()
1524 CALC_55()
1525 CALC_56()
1526 CALC_57()
1527 CALC_58()
1528 CALC_59()
1529 ADDQ(Imm(128), R10)
1530 CMPQ(R10, R11)
1531 CMOVQCC(R8, R10)
1532 CALC_60()
1533 CALC_61()
1534 CALC_62()
1535 CALC_63()
1536 CALC_64()
1537 CALC_65()
1538 CALC_66()
1539 CALC_67()
1540 CALC_68()
1541 CALC_69()
1542 CALC_70()
1543 CALC_71()
1544 CALC_72()
1545 CALC_73()
1546 CALC_74()
1547 CALC_75()
1548 CALC_76()
1549 CALC_77()
1550 CALC_78()
1551 CALC_79()
1552 UPDATE_HASH(EAX, EDX, EBX, ESI, EDI)
1553 CMPQ(R10, R8)
1554 JE(LabelRef("loop"))
1555 MOVL(EDX, ECX)
1556 CALC_80()
1557 CALC_81()
1558 CALC_82()
1559 CALC_83()
1560 CALC_84()
1561 CALC_85()
1562 CALC_86()
1563 CALC_87()
1564 CALC_88()
1565 CALC_89()
1566 CALC_90()
1567 CALC_91()
1568 CALC_92()
1569 CALC_93()
1570 CALC_94()
1571 CALC_95()
1572 CALC_96()
1573 CALC_97()
1574 CALC_98()
1575 CALC_99()
1576 CALC_100()
1577 CALC_101()
1578 CALC_102()
1579 CALC_103()
1580 CALC_104()
1581 CALC_105()
1582 CALC_106()
1583 CALC_107()
1584 CALC_108()
1585 CALC_109()
1586 CALC_110()
1587 CALC_111()
1588 CALC_112()
1589 CALC_113()
1590 CALC_114()
1591 CALC_115()
1592 CALC_116()
1593 CALC_117()
1594 CALC_118()
1595 CALC_119()
1596 CALC_120()
1597 CALC_121()
1598 CALC_122()
1599 CALC_123()
1600 CALC_124()
1601 CALC_125()
1602 CALC_126()
1603 CALC_127()
1604 CALC_128()
1605 CALC_129()
1606 CALC_130()
1607 CALC_131()
1608 CALC_132()
1609 CALC_133()
1610 CALC_134()
1611 CALC_135()
1612 CALC_136()
1613 CALC_137()
1614 CALC_138()
1615 CALC_139()
1616 ADDQ(Imm(128), R13)
1617 CMPQ(R13, R11)
1618 CMOVQCC(R8, R10)
1619 CALC_140()
1620 CALC_141()
1621 CALC_142()
1622 CALC_143()
1623 CALC_144()
1624 CALC_145()
1625 CALC_146()
1626 CALC_147()
1627 CALC_148()
1628 CALC_149()
1629 CALC_150()
1630 CALC_151()
1631 CALC_152()
1632 CALC_153()
1633 CALC_154()
1634 CALC_155()
1635 CALC_156()
1636 CALC_157()
1637 CALC_158()
1638 CALC_159()
1639 UPDATE_HASH(ESI, EDI, EDX, ECX, EBX)
1640 MOVL(ESI, R12L)
1641 MOVL(EDI, ESI)
1642 MOVL(EDX, EDI)
1643 MOVL(EBX, EDX)
1644 MOVL(ECX, EAX)
1645 MOVL(R12L, ECX)
1646 XCHGQ(R15, R14)
1647 JMP(LabelRef("loop"))
1648 }
1649
1650 func blockAVX2() {
1651 Implement("blockAVX2")
1652 AllocLocal(1408)
1653
1654 Load(Param("dig"), RDI)
1655 Load(Param("p").Base(), RSI)
1656 Load(Param("p").Len(), RDX)
1657 SHRQ(Imm(6), RDX)
1658 SHLQ(Imm(6), RDX)
1659
1660 K_XMM_AR := K_XMM_AR_DATA()
1661 LEAQ(K_XMM_AR, R8)
1662
1663 MOVQ(RDI, R9)
1664 MOVQ(RSI, R10)
1665 LEAQ(Mem{Base: SI}.Offset(64), R13)
1666
1667 ADDQ(RSI, RDX)
1668 ADDQ(Imm(64), RDX)
1669 MOVQ(RDX, R11)
1670
1671 CMPQ(R13, R11)
1672 CMOVQCC(R8, R13)
1673
1674 BSWAP_SHUFB_CTL := BSWAP_SHUFB_CTL_DATA()
1675 VMOVDQU(BSWAP_SHUFB_CTL, Y10)
1676 CALC()
1677 }
1678
1679
1680
1681
1682 var (
1683 K_XMM_AR_ptr, BSWAP_SHUFB_CTL_ptr *Mem
1684 )
1685
1686
1687
1688 var _K = []uint32{
1689 0x5A827999,
1690 0x6ED9EBA1,
1691 0x8F1BBCDC,
1692 0xCA62C1D6,
1693 }
1694
1695 func K_XMM_AR_DATA() Mem {
1696 if K_XMM_AR_ptr != nil {
1697 return *K_XMM_AR_ptr
1698 }
1699
1700 K_XMM_AR := GLOBL("K_XMM_AR", RODATA)
1701 K_XMM_AR_ptr = &K_XMM_AR
1702
1703 offset_idx := 0
1704 for _, v := range _K {
1705 DATA((offset_idx+0)*4, U32(v))
1706 DATA((offset_idx+1)*4, U32(v))
1707 DATA((offset_idx+2)*4, U32(v))
1708 DATA((offset_idx+3)*4, U32(v))
1709 DATA((offset_idx+4)*4, U32(v))
1710 DATA((offset_idx+5)*4, U32(v))
1711 DATA((offset_idx+6)*4, U32(v))
1712 DATA((offset_idx+7)*4, U32(v))
1713 offset_idx += 8
1714 }
1715 return K_XMM_AR
1716 }
1717
1718 var BSWAP_SHUFB_CTL_CONSTANTS = [8]uint32{
1719 0x00010203,
1720 0x04050607,
1721 0x08090a0b,
1722 0x0c0d0e0f,
1723 0x00010203,
1724 0x04050607,
1725 0x08090a0b,
1726 0x0c0d0e0f,
1727 }
1728
1729 func BSWAP_SHUFB_CTL_DATA() Mem {
1730 if BSWAP_SHUFB_CTL_ptr != nil {
1731 return *BSWAP_SHUFB_CTL_ptr
1732 }
1733
1734 BSWAP_SHUFB_CTL := GLOBL("BSWAP_SHUFB_CTL", RODATA)
1735 BSWAP_SHUFB_CTL_ptr = &BSWAP_SHUFB_CTL
1736 for i, v := range BSWAP_SHUFB_CTL_CONSTANTS {
1737
1738 DATA(i*4, U32(v))
1739 }
1740 return BSWAP_SHUFB_CTL
1741 }
1742
View as plain text