Text file
src/crypto/sha1/sha1block_amd64.s
1 // Code generated by command: go run sha1block_amd64_asm.go -out ../sha1block_amd64.s -pkg sha1. DO NOT EDIT.
2
3 //go:build !purego
4
5 #include "textflag.h"
6
7 // func blockAVX2(dig *digest, p []byte)
8 // Requires: AVX, AVX2, BMI, BMI2, CMOV
9 TEXT ·blockAVX2(SB), $1408-32
10 MOVQ dig+0(FP), DI
11 MOVQ p_base+8(FP), SI
12 MOVQ p_len+16(FP), DX
13 SHRQ $0x06, DX
14 SHLQ $0x06, DX
15 LEAQ K_XMM_AR<>+0(SB), R8
16 MOVQ DI, R9
17 MOVQ SI, R10
18 LEAQ 64(SI), R13
19 ADDQ SI, DX
20 ADDQ $0x40, DX
21 MOVQ DX, R11
22 CMPQ R13, R11
23 CMOVQCC R8, R13
24 VMOVDQU BSWAP_SHUFB_CTL<>+0(SB), Y10
25 MOVL (R9), CX
26 MOVL 4(R9), SI
27 MOVL 8(R9), DI
28 MOVL 12(R9), AX
29 MOVL 16(R9), DX
30 MOVQ SP, R14
31 LEAQ 672(SP), R15
32 VMOVDQU (R10), X0
33 VINSERTI128 $0x01, (R13), Y0, Y0
34 VPSHUFB Y10, Y0, Y15
35 VPADDD (R8), Y15, Y0
36 VMOVDQU Y0, (R14)
37 VMOVDQU 16(R10), X0
38 VINSERTI128 $0x01, 16(R13), Y0, Y0
39 VPSHUFB Y10, Y0, Y14
40 VPADDD (R8), Y14, Y0
41 VMOVDQU Y0, 32(R14)
42 VMOVDQU 32(R10), X0
43 VINSERTI128 $0x01, 32(R13), Y0, Y0
44 VPSHUFB Y10, Y0, Y13
45 VPADDD (R8), Y13, Y0
46 VMOVDQU Y0, 64(R14)
47 VMOVDQU 48(R10), X0
48 VINSERTI128 $0x01, 48(R13), Y0, Y0
49 VPSHUFB Y10, Y0, Y12
50 VPADDD (R8), Y12, Y0
51 VMOVDQU Y0, 96(R14)
52 VPALIGNR $0x08, Y15, Y14, Y8
53 VPSRLDQ $0x04, Y12, Y0
54 VPXOR Y13, Y8, Y8
55 VPXOR Y15, Y0, Y0
56 VPXOR Y0, Y8, Y8
57 VPSLLDQ $0x0c, Y8, Y9
58 VPSLLD $0x01, Y8, Y0
59 VPSRLD $0x1f, Y8, Y8
60 VPOR Y8, Y0, Y0
61 VPSLLD $0x02, Y9, Y8
62 VPSRLD $0x1e, Y9, Y9
63 VPXOR Y8, Y0, Y0
64 VPXOR Y9, Y0, Y8
65 VPADDD (R8), Y8, Y0
66 VMOVDQU Y0, 128(R14)
67 VPALIGNR $0x08, Y14, Y13, Y7
68 VPSRLDQ $0x04, Y8, Y0
69 VPXOR Y12, Y7, Y7
70 VPXOR Y14, Y0, Y0
71 VPXOR Y0, Y7, Y7
72 VPSLLDQ $0x0c, Y7, Y9
73 VPSLLD $0x01, Y7, Y0
74 VPSRLD $0x1f, Y7, Y7
75 VPOR Y7, Y0, Y0
76 VPSLLD $0x02, Y9, Y7
77 VPSRLD $0x1e, Y9, Y9
78 VPXOR Y7, Y0, Y0
79 VPXOR Y9, Y0, Y7
80 VPADDD 32(R8), Y7, Y0
81 VMOVDQU Y0, 160(R14)
82 VPALIGNR $0x08, Y13, Y12, Y5
83 VPSRLDQ $0x04, Y7, Y0
84 VPXOR Y8, Y5, Y5
85 VPXOR Y13, Y0, Y0
86 VPXOR Y0, Y5, Y5
87 VPSLLDQ $0x0c, Y5, Y9
88 VPSLLD $0x01, Y5, Y0
89 VPSRLD $0x1f, Y5, Y5
90 VPOR Y5, Y0, Y0
91 VPSLLD $0x02, Y9, Y5
92 VPSRLD $0x1e, Y9, Y9
93 VPXOR Y5, Y0, Y0
94 VPXOR Y9, Y0, Y5
95 VPADDD 32(R8), Y5, Y0
96 VMOVDQU Y0, 192(R14)
97 VPALIGNR $0x08, Y12, Y8, Y3
98 VPSRLDQ $0x04, Y5, Y0
99 VPXOR Y7, Y3, Y3
100 VPXOR Y12, Y0, Y0
101 VPXOR Y0, Y3, Y3
102 VPSLLDQ $0x0c, Y3, Y9
103 VPSLLD $0x01, Y3, Y0
104 VPSRLD $0x1f, Y3, Y3
105 VPOR Y3, Y0, Y0
106 VPSLLD $0x02, Y9, Y3
107 VPSRLD $0x1e, Y9, Y9
108 VPXOR Y3, Y0, Y0
109 VPXOR Y9, Y0, Y3
110 VPADDD 32(R8), Y3, Y0
111 VMOVDQU Y0, 224(R14)
112 VPALIGNR $0x08, Y5, Y3, Y0
113 VPXOR Y14, Y15, Y15
114 VPXOR Y8, Y0, Y0
115 VPXOR Y0, Y15, Y15
116 VPSLLD $0x02, Y15, Y0
117 VPSRLD $0x1e, Y15, Y15
118 VPOR Y15, Y0, Y15
119 VPADDD 32(R8), Y15, Y0
120 VMOVDQU Y0, 256(R14)
121 VPALIGNR $0x08, Y3, Y15, Y0
122 VPXOR Y13, Y14, Y14
123 VPXOR Y7, Y0, Y0
124 VPXOR Y0, Y14, Y14
125 VPSLLD $0x02, Y14, Y0
126 VPSRLD $0x1e, Y14, Y14
127 VPOR Y14, Y0, Y14
128 VPADDD 32(R8), Y14, Y0
129 VMOVDQU Y0, 288(R14)
130 VPALIGNR $0x08, Y15, Y14, Y0
131 VPXOR Y12, Y13, Y13
132 VPXOR Y5, Y0, Y0
133 VPXOR Y0, Y13, Y13
134 VPSLLD $0x02, Y13, Y0
135 VPSRLD $0x1e, Y13, Y13
136 VPOR Y13, Y0, Y13
137 VPADDD 64(R8), Y13, Y0
138 VMOVDQU Y0, 320(R14)
139 VPALIGNR $0x08, Y14, Y13, Y0
140 VPXOR Y8, Y12, Y12
141 VPXOR Y3, Y0, Y0
142 VPXOR Y0, Y12, Y12
143 VPSLLD $0x02, Y12, Y0
144 VPSRLD $0x1e, Y12, Y12
145 VPOR Y12, Y0, Y12
146 VPADDD 64(R8), Y12, Y0
147 VMOVDQU Y0, 352(R14)
148 VPALIGNR $0x08, Y13, Y12, Y0
149 VPXOR Y7, Y8, Y8
150 VPXOR Y15, Y0, Y0
151 VPXOR Y0, Y8, Y8
152 VPSLLD $0x02, Y8, Y0
153 VPSRLD $0x1e, Y8, Y8
154 VPOR Y8, Y0, Y8
155 VPADDD 64(R8), Y8, Y0
156 VMOVDQU Y0, 384(R14)
157 VPALIGNR $0x08, Y12, Y8, Y0
158 VPXOR Y5, Y7, Y7
159 VPXOR Y14, Y0, Y0
160 VPXOR Y0, Y7, Y7
161 VPSLLD $0x02, Y7, Y0
162 VPSRLD $0x1e, Y7, Y7
163 VPOR Y7, Y0, Y7
164 VPADDD 64(R8), Y7, Y0
165 VMOVDQU Y0, 416(R14)
166 VPALIGNR $0x08, Y8, Y7, Y0
167 VPXOR Y3, Y5, Y5
168 VPXOR Y13, Y0, Y0
169 VPXOR Y0, Y5, Y5
170 VPSLLD $0x02, Y5, Y0
171 VPSRLD $0x1e, Y5, Y5
172 VPOR Y5, Y0, Y5
173 VPADDD 64(R8), Y5, Y0
174 VMOVDQU Y0, 448(R14)
175 VPALIGNR $0x08, Y7, Y5, Y0
176 VPXOR Y15, Y3, Y3
177 VPXOR Y12, Y0, Y0
178 VPXOR Y0, Y3, Y3
179 VPSLLD $0x02, Y3, Y0
180 VPSRLD $0x1e, Y3, Y3
181 VPOR Y3, Y0, Y3
182 VPADDD 96(R8), Y3, Y0
183 VMOVDQU Y0, 480(R14)
184 VPALIGNR $0x08, Y5, Y3, Y0
185 VPXOR Y14, Y15, Y15
186 VPXOR Y8, Y0, Y0
187 VPXOR Y0, Y15, Y15
188 VPSLLD $0x02, Y15, Y0
189 VPSRLD $0x1e, Y15, Y15
190 VPOR Y15, Y0, Y15
191 VPADDD 96(R8), Y15, Y0
192 VMOVDQU Y0, 512(R14)
193 VPALIGNR $0x08, Y3, Y15, Y0
194 VPXOR Y13, Y14, Y14
195 VPXOR Y7, Y0, Y0
196 VPXOR Y0, Y14, Y14
197 VPSLLD $0x02, Y14, Y0
198 VPSRLD $0x1e, Y14, Y14
199 VPOR Y14, Y0, Y14
200 VPADDD 96(R8), Y14, Y0
201 VMOVDQU Y0, 544(R14)
202 VPALIGNR $0x08, Y15, Y14, Y0
203 VPXOR Y12, Y13, Y13
204 VPXOR Y5, Y0, Y0
205 VPXOR Y0, Y13, Y13
206 VPSLLD $0x02, Y13, Y0
207 VPSRLD $0x1e, Y13, Y13
208 VPOR Y13, Y0, Y13
209 VPADDD 96(R8), Y13, Y0
210 VMOVDQU Y0, 576(R14)
211 VPALIGNR $0x08, Y14, Y13, Y0
212 VPXOR Y8, Y12, Y12
213 VPXOR Y3, Y0, Y0
214 VPXOR Y0, Y12, Y12
215 VPSLLD $0x02, Y12, Y0
216 VPSRLD $0x1e, Y12, Y12
217 VPOR Y12, Y0, Y12
218 VPADDD 96(R8), Y12, Y0
219 VMOVDQU Y0, 608(R14)
220 XCHGQ R15, R14
221
222 loop:
223 CMPQ R10, R8
224 JNE begin
225 VZEROUPPER
226 RET
227
228 begin:
229 MOVL SI, BX
230 RORXL $0x02, SI, SI
231 ANDNL AX, BX, BP
232 ANDL DI, BX
233 XORL BP, BX
234 ADDL (R15), DX
235 ANDNL DI, CX, BP
236 LEAL (DX)(BX*1), DX
237 RORXL $0x1b, CX, R12
238 RORXL $0x02, CX, BX
239 VMOVDQU 128(R10), X0
240 ANDL SI, CX
241 XORL BP, CX
242 LEAL (DX)(R12*1), DX
243 ADDL 4(R15), AX
244 ANDNL SI, DX, BP
245 LEAL (AX)(CX*1), AX
246 RORXL $0x1b, DX, R12
247 RORXL $0x02, DX, CX
248 VINSERTI128 $0x01, 128(R13), Y0, Y0
249 ANDL BX, DX
250 XORL BP, DX
251 LEAL (AX)(R12*1), AX
252 ADDL 8(R15), DI
253 ANDNL BX, AX, BP
254 LEAL (DI)(DX*1), DI
255 RORXL $0x1b, AX, R12
256 RORXL $0x02, AX, DX
257 VPSHUFB Y10, Y0, Y15
258 ANDL CX, AX
259 XORL BP, AX
260 LEAL (DI)(R12*1), DI
261 ADDL 12(R15), SI
262 ANDNL CX, DI, BP
263 LEAL (SI)(AX*1), SI
264 RORXL $0x1b, DI, R12
265 RORXL $0x02, DI, AX
266 ANDL DX, DI
267 XORL BP, DI
268 LEAL (SI)(R12*1), SI
269 ADDL 32(R15), BX
270 ANDNL DX, SI, BP
271 LEAL (BX)(DI*1), BX
272 RORXL $0x1b, SI, R12
273 RORXL $0x02, SI, DI
274 VPADDD (R8), Y15, Y0
275 ANDL AX, SI
276 XORL BP, SI
277 LEAL (BX)(R12*1), BX
278 ADDL 36(R15), CX
279 ANDNL AX, BX, BP
280 LEAL (CX)(SI*1), CX
281 RORXL $0x1b, BX, R12
282 RORXL $0x02, BX, SI
283 ANDL DI, BX
284 XORL BP, BX
285 LEAL (CX)(R12*1), CX
286 ADDL 40(R15), DX
287 ANDNL DI, CX, BP
288 LEAL (DX)(BX*1), DX
289 RORXL $0x1b, CX, R12
290 RORXL $0x02, CX, BX
291 ANDL SI, CX
292 XORL BP, CX
293 LEAL (DX)(R12*1), DX
294 ADDL 44(R15), AX
295 ANDNL SI, DX, BP
296 LEAL (AX)(CX*1), AX
297 RORXL $0x1b, DX, R12
298 RORXL $0x02, DX, CX
299 VMOVDQU Y0, (R14)
300 ANDL BX, DX
301 XORL BP, DX
302 LEAL (AX)(R12*1), AX
303 ADDL 64(R15), DI
304 ANDNL BX, AX, BP
305 LEAL (DI)(DX*1), DI
306 RORXL $0x1b, AX, R12
307 RORXL $0x02, AX, DX
308 VMOVDQU 144(R10), X0
309 ANDL CX, AX
310 XORL BP, AX
311 LEAL (DI)(R12*1), DI
312 ADDL 68(R15), SI
313 ANDNL CX, DI, BP
314 LEAL (SI)(AX*1), SI
315 RORXL $0x1b, DI, R12
316 RORXL $0x02, DI, AX
317 VINSERTI128 $0x01, 144(R13), Y0, Y0
318 ANDL DX, DI
319 XORL BP, DI
320 LEAL (SI)(R12*1), SI
321 ADDL 72(R15), BX
322 ANDNL DX, SI, BP
323 LEAL (BX)(DI*1), BX
324 RORXL $0x1b, SI, R12
325 RORXL $0x02, SI, DI
326 VPSHUFB Y10, Y0, Y14
327 ANDL AX, SI
328 XORL BP, SI
329 LEAL (BX)(R12*1), BX
330 ADDL 76(R15), CX
331 ANDNL AX, BX, BP
332 LEAL (CX)(SI*1), CX
333 RORXL $0x1b, BX, R12
334 RORXL $0x02, BX, SI
335 ANDL DI, BX
336 XORL BP, BX
337 LEAL (CX)(R12*1), CX
338 ADDL 96(R15), DX
339 ANDNL DI, CX, BP
340 LEAL (DX)(BX*1), DX
341 RORXL $0x1b, CX, R12
342 RORXL $0x02, CX, BX
343 VPADDD (R8), Y14, Y0
344 ANDL SI, CX
345 XORL BP, CX
346 LEAL (DX)(R12*1), DX
347 ADDL 100(R15), AX
348 ANDNL SI, DX, BP
349 LEAL (AX)(CX*1), AX
350 RORXL $0x1b, DX, R12
351 RORXL $0x02, DX, CX
352 ANDL BX, DX
353 XORL BP, DX
354 LEAL (AX)(R12*1), AX
355 ADDL 104(R15), DI
356 ANDNL BX, AX, BP
357 LEAL (DI)(DX*1), DI
358 RORXL $0x1b, AX, R12
359 RORXL $0x02, AX, DX
360 ANDL CX, AX
361 XORL BP, AX
362 LEAL (DI)(R12*1), DI
363 ADDL 108(R15), SI
364 ANDNL CX, DI, BP
365 LEAL (SI)(AX*1), SI
366 RORXL $0x1b, DI, R12
367 RORXL $0x02, DI, AX
368 VMOVDQU Y0, 32(R14)
369 ANDL DX, DI
370 XORL BP, DI
371 LEAL (SI)(R12*1), SI
372 ADDL 128(R15), BX
373 ANDNL DX, SI, BP
374 LEAL (BX)(DI*1), BX
375 RORXL $0x1b, SI, R12
376 RORXL $0x02, SI, DI
377 VMOVDQU 160(R10), X0
378 ANDL AX, SI
379 XORL BP, SI
380 LEAL (BX)(R12*1), BX
381 ADDL 132(R15), CX
382 ANDNL AX, BX, BP
383 LEAL (CX)(SI*1), CX
384 RORXL $0x1b, BX, R12
385 RORXL $0x02, BX, SI
386 VINSERTI128 $0x01, 160(R13), Y0, Y0
387 ANDL DI, BX
388 XORL BP, BX
389 LEAL (CX)(R12*1), CX
390 ADDL 136(R15), DX
391 ANDNL DI, CX, BP
392 LEAL (DX)(BX*1), DX
393 RORXL $0x1b, CX, R12
394 RORXL $0x02, CX, BX
395 VPSHUFB Y10, Y0, Y13
396 ANDL SI, CX
397 XORL BP, CX
398 LEAL (DX)(R12*1), DX
399 ADDL 140(R15), AX
400 LEAL (AX)(CX*1), AX
401 RORXL $0x1b, DX, R12
402 RORXL $0x02, DX, CX
403 XORL BX, DX
404 ADDL R12, AX
405 XORL SI, DX
406 ADDL 160(R15), DI
407 LEAL (DI)(DX*1), DI
408 RORXL $0x1b, AX, R12
409 RORXL $0x02, AX, DX
410 VPADDD (R8), Y13, Y0
411 XORL CX, AX
412 ADDL R12, DI
413 XORL BX, AX
414 ADDL 164(R15), SI
415 LEAL (SI)(AX*1), SI
416 RORXL $0x1b, DI, R12
417 RORXL $0x02, DI, AX
418 XORL DX, DI
419 ADDL R12, SI
420 XORL CX, DI
421 ADDL 168(R15), BX
422 LEAL (BX)(DI*1), BX
423 RORXL $0x1b, SI, R12
424 RORXL $0x02, SI, DI
425 XORL AX, SI
426 ADDL R12, BX
427 XORL DX, SI
428 ADDL 172(R15), CX
429 LEAL (CX)(SI*1), CX
430 RORXL $0x1b, BX, R12
431 RORXL $0x02, BX, SI
432 VMOVDQU Y0, 64(R14)
433 XORL DI, BX
434 ADDL R12, CX
435 XORL AX, BX
436 ADDL 192(R15), DX
437 LEAL (DX)(BX*1), DX
438 RORXL $0x1b, CX, R12
439 RORXL $0x02, CX, BX
440 VMOVDQU 176(R10), X0
441 XORL SI, CX
442 ADDL R12, DX
443 XORL DI, CX
444 ADDL 196(R15), AX
445 LEAL (AX)(CX*1), AX
446 RORXL $0x1b, DX, R12
447 RORXL $0x02, DX, CX
448 VINSERTI128 $0x01, 176(R13), Y0, Y0
449 XORL BX, DX
450 ADDL R12, AX
451 XORL SI, DX
452 ADDL 200(R15), DI
453 LEAL (DI)(DX*1), DI
454 RORXL $0x1b, AX, R12
455 RORXL $0x02, AX, DX
456 VPSHUFB Y10, Y0, Y12
457 XORL CX, AX
458 ADDL R12, DI
459 XORL BX, AX
460 ADDL 204(R15), SI
461 LEAL (SI)(AX*1), SI
462 RORXL $0x1b, DI, R12
463 RORXL $0x02, DI, AX
464 XORL DX, DI
465 ADDL R12, SI
466 XORL CX, DI
467 ADDL 224(R15), BX
468 LEAL (BX)(DI*1), BX
469 RORXL $0x1b, SI, R12
470 RORXL $0x02, SI, DI
471 VPADDD (R8), Y12, Y0
472 XORL AX, SI
473 ADDL R12, BX
474 XORL DX, SI
475 ADDL 228(R15), CX
476 LEAL (CX)(SI*1), CX
477 RORXL $0x1b, BX, R12
478 RORXL $0x02, BX, SI
479 XORL DI, BX
480 ADDL R12, CX
481 XORL AX, BX
482 ADDL 232(R15), DX
483 LEAL (DX)(BX*1), DX
484 RORXL $0x1b, CX, R12
485 RORXL $0x02, CX, BX
486 XORL SI, CX
487 ADDL R12, DX
488 XORL DI, CX
489 ADDL 236(R15), AX
490 LEAL (AX)(CX*1), AX
491 RORXL $0x1b, DX, R12
492 RORXL $0x02, DX, CX
493 VMOVDQU Y0, 96(R14)
494 XORL BX, DX
495 ADDL R12, AX
496 XORL SI, DX
497 ADDL 256(R15), DI
498 LEAL (DI)(DX*1), DI
499 RORXL $0x1b, AX, R12
500 RORXL $0x02, AX, DX
501 VPALIGNR $0x08, Y15, Y14, Y8
502 VPSRLDQ $0x04, Y12, Y0
503 XORL CX, AX
504 ADDL R12, DI
505 XORL BX, AX
506 ADDL 260(R15), SI
507 LEAL (SI)(AX*1), SI
508 RORXL $0x1b, DI, R12
509 RORXL $0x02, DI, AX
510 VPXOR Y13, Y8, Y8
511 VPXOR Y15, Y0, Y0
512 XORL DX, DI
513 ADDL R12, SI
514 XORL CX, DI
515 ADDL 264(R15), BX
516 LEAL (BX)(DI*1), BX
517 RORXL $0x1b, SI, R12
518 RORXL $0x02, SI, DI
519 VPXOR Y0, Y8, Y8
520 VPSLLDQ $0x0c, Y8, Y9
521 XORL AX, SI
522 ADDL R12, BX
523 XORL DX, SI
524 ADDL 268(R15), CX
525 LEAL (CX)(SI*1), CX
526 RORXL $0x1b, BX, R12
527 RORXL $0x02, BX, SI
528 VPSLLD $0x01, Y8, Y0
529 VPSRLD $0x1f, Y8, Y8
530 XORL DI, BX
531 ADDL R12, CX
532 XORL AX, BX
533 ADDL 288(R15), DX
534 LEAL (DX)(BX*1), DX
535 RORXL $0x1b, CX, R12
536 RORXL $0x02, CX, BX
537 VPOR Y8, Y0, Y0
538 VPSLLD $0x02, Y9, Y8
539 XORL SI, CX
540 ADDL R12, DX
541 XORL DI, CX
542 ADDL 292(R15), AX
543 LEAL (AX)(CX*1), AX
544 RORXL $0x1b, DX, R12
545 RORXL $0x02, DX, CX
546 VPSRLD $0x1e, Y9, Y9
547 VPXOR Y8, Y0, Y0
548 XORL BX, DX
549 ADDL R12, AX
550 XORL SI, DX
551 ADDL 296(R15), DI
552 LEAL (DI)(DX*1), DI
553 RORXL $0x1b, AX, R12
554 RORXL $0x02, AX, DX
555 XORL CX, AX
556 ADDL R12, DI
557 XORL BX, AX
558 ADDL 300(R15), SI
559 VPXOR Y9, Y0, Y8
560 VPADDD (R8), Y8, Y0
561 VMOVDQU Y0, 128(R14)
562 LEAL (SI)(AX*1), SI
563 MOVL DX, BP
564 ORL DI, BP
565 RORXL $0x1b, DI, R12
566 RORXL $0x02, DI, AX
567 ANDL CX, BP
568 ANDL DX, DI
569 ORL BP, DI
570 ADDL R12, SI
571 ADDL 320(R15), BX
572 VPALIGNR $0x08, Y14, Y13, Y7
573 VPSRLDQ $0x04, Y8, Y0
574 LEAL (BX)(DI*1), BX
575 MOVL AX, BP
576 ORL SI, BP
577 RORXL $0x1b, SI, R12
578 RORXL $0x02, SI, DI
579 ANDL DX, BP
580 ANDL AX, SI
581 ORL BP, SI
582 ADDL R12, BX
583 ADDL 324(R15), CX
584 VPXOR Y12, Y7, Y7
585 VPXOR Y14, Y0, Y0
586 LEAL (CX)(SI*1), CX
587 MOVL DI, BP
588 ORL BX, BP
589 RORXL $0x1b, BX, R12
590 RORXL $0x02, BX, SI
591 ANDL AX, BP
592 ANDL DI, BX
593 ORL BP, BX
594 ADDL R12, CX
595 ADDL 328(R15), DX
596 VPXOR Y0, Y7, Y7
597 VPSLLDQ $0x0c, Y7, Y9
598 LEAL (DX)(BX*1), DX
599 MOVL SI, BP
600 ORL CX, BP
601 RORXL $0x1b, CX, R12
602 RORXL $0x02, CX, BX
603 ANDL DI, BP
604 ANDL SI, CX
605 ORL BP, CX
606 ADDL R12, DX
607 ADDL 332(R15), AX
608 VPSLLD $0x01, Y7, Y0
609 VPSRLD $0x1f, Y7, Y7
610 LEAL (AX)(CX*1), AX
611 MOVL BX, BP
612 ORL DX, BP
613 RORXL $0x1b, DX, R12
614 RORXL $0x02, DX, CX
615 ANDL SI, BP
616 ANDL BX, DX
617 ORL BP, DX
618 ADDL R12, AX
619 ADDL 352(R15), DI
620 VPOR Y7, Y0, Y0
621 VPSLLD $0x02, Y9, Y7
622 LEAL (DI)(DX*1), DI
623 MOVL CX, BP
624 ORL AX, BP
625 RORXL $0x1b, AX, R12
626 RORXL $0x02, AX, DX
627 ANDL BX, BP
628 ANDL CX, AX
629 ORL BP, AX
630 ADDL R12, DI
631 ADDL 356(R15), SI
632 VPSRLD $0x1e, Y9, Y9
633 VPXOR Y7, Y0, Y0
634 LEAL (SI)(AX*1), SI
635 MOVL DX, BP
636 ORL DI, BP
637 RORXL $0x1b, DI, R12
638 RORXL $0x02, DI, AX
639 ANDL CX, BP
640 ANDL DX, DI
641 ORL BP, DI
642 ADDL R12, SI
643 ADDL 360(R15), BX
644 LEAL (BX)(DI*1), BX
645 MOVL AX, BP
646 ORL SI, BP
647 RORXL $0x1b, SI, R12
648 RORXL $0x02, SI, DI
649 ANDL DX, BP
650 ANDL AX, SI
651 ORL BP, SI
652 ADDL R12, BX
653 ADDL 364(R15), CX
654 VPXOR Y9, Y0, Y7
655 VPADDD 32(R8), Y7, Y0
656 VMOVDQU Y0, 160(R14)
657 LEAL (CX)(SI*1), CX
658 MOVL DI, BP
659 ORL BX, BP
660 RORXL $0x1b, BX, R12
661 RORXL $0x02, BX, SI
662 ANDL AX, BP
663 ANDL DI, BX
664 ORL BP, BX
665 ADDL R12, CX
666 ADDL 384(R15), DX
667 VPALIGNR $0x08, Y13, Y12, Y5
668 VPSRLDQ $0x04, Y7, Y0
669 LEAL (DX)(BX*1), DX
670 MOVL SI, BP
671 ORL CX, BP
672 RORXL $0x1b, CX, R12
673 RORXL $0x02, CX, BX
674 ANDL DI, BP
675 ANDL SI, CX
676 ORL BP, CX
677 ADDL R12, DX
678 ADDL 388(R15), AX
679 VPXOR Y8, Y5, Y5
680 VPXOR Y13, Y0, Y0
681 LEAL (AX)(CX*1), AX
682 MOVL BX, BP
683 ORL DX, BP
684 RORXL $0x1b, DX, R12
685 RORXL $0x02, DX, CX
686 ANDL SI, BP
687 ANDL BX, DX
688 ORL BP, DX
689 ADDL R12, AX
690 ADDL 392(R15), DI
691 VPXOR Y0, Y5, Y5
692 VPSLLDQ $0x0c, Y5, Y9
693 LEAL (DI)(DX*1), DI
694 MOVL CX, BP
695 ORL AX, BP
696 RORXL $0x1b, AX, R12
697 RORXL $0x02, AX, DX
698 ANDL BX, BP
699 ANDL CX, AX
700 ORL BP, AX
701 ADDL R12, DI
702 ADDL 396(R15), SI
703 VPSLLD $0x01, Y5, Y0
704 VPSRLD $0x1f, Y5, Y5
705 LEAL (SI)(AX*1), SI
706 MOVL DX, BP
707 ORL DI, BP
708 RORXL $0x1b, DI, R12
709 RORXL $0x02, DI, AX
710 ANDL CX, BP
711 ANDL DX, DI
712 ORL BP, DI
713 ADDL R12, SI
714 ADDL 416(R15), BX
715 VPOR Y5, Y0, Y0
716 VPSLLD $0x02, Y9, Y5
717 LEAL (BX)(DI*1), BX
718 MOVL AX, BP
719 ORL SI, BP
720 RORXL $0x1b, SI, R12
721 RORXL $0x02, SI, DI
722 ANDL DX, BP
723 ANDL AX, SI
724 ORL BP, SI
725 ADDL R12, BX
726 ADDL 420(R15), CX
727 VPSRLD $0x1e, Y9, Y9
728 VPXOR Y5, Y0, Y0
729 LEAL (CX)(SI*1), CX
730 MOVL DI, BP
731 ORL BX, BP
732 RORXL $0x1b, BX, R12
733 RORXL $0x02, BX, SI
734 ANDL AX, BP
735 ANDL DI, BX
736 ORL BP, BX
737 ADDL R12, CX
738 ADDL 424(R15), DX
739 LEAL (DX)(BX*1), DX
740 MOVL SI, BP
741 ORL CX, BP
742 RORXL $0x1b, CX, R12
743 RORXL $0x02, CX, BX
744 ANDL DI, BP
745 ANDL SI, CX
746 ORL BP, CX
747 ADDL R12, DX
748 ADDL 428(R15), AX
749 VPXOR Y9, Y0, Y5
750 VPADDD 32(R8), Y5, Y0
751 VMOVDQU Y0, 192(R14)
752 LEAL (AX)(CX*1), AX
753 MOVL BX, BP
754 ORL DX, BP
755 RORXL $0x1b, DX, R12
756 RORXL $0x02, DX, CX
757 ANDL SI, BP
758 ANDL BX, DX
759 ORL BP, DX
760 ADDL R12, AX
761 ADDL 448(R15), DI
762 VPALIGNR $0x08, Y12, Y8, Y3
763 VPSRLDQ $0x04, Y5, Y0
764 LEAL (DI)(DX*1), DI
765 MOVL CX, BP
766 ORL AX, BP
767 RORXL $0x1b, AX, R12
768 RORXL $0x02, AX, DX
769 ANDL BX, BP
770 ANDL CX, AX
771 ORL BP, AX
772 ADDL R12, DI
773 ADDL 452(R15), SI
774 VPXOR Y7, Y3, Y3
775 VPXOR Y12, Y0, Y0
776 LEAL (SI)(AX*1), SI
777 MOVL DX, BP
778 ORL DI, BP
779 RORXL $0x1b, DI, R12
780 RORXL $0x02, DI, AX
781 ANDL CX, BP
782 ANDL DX, DI
783 ORL BP, DI
784 ADDL R12, SI
785 ADDL 456(R15), BX
786 VPXOR Y0, Y3, Y3
787 VPSLLDQ $0x0c, Y3, Y9
788 LEAL (BX)(DI*1), BX
789 MOVL AX, BP
790 ORL SI, BP
791 RORXL $0x1b, SI, R12
792 RORXL $0x02, SI, DI
793 ANDL DX, BP
794 ANDL AX, SI
795 ORL BP, SI
796 ADDL R12, BX
797 ADDL 460(R15), CX
798 LEAL (CX)(SI*1), CX
799 RORXL $0x1b, BX, R12
800 RORXL $0x02, BX, SI
801 VPSLLD $0x01, Y3, Y0
802 VPSRLD $0x1f, Y3, Y3
803 XORL DI, BX
804 ADDL R12, CX
805 XORL AX, BX
806 ADDQ $0x80, R10
807 CMPQ R10, R11
808 CMOVQCC R8, R10
809 ADDL 480(R15), DX
810 LEAL (DX)(BX*1), DX
811 RORXL $0x1b, CX, R12
812 RORXL $0x02, CX, BX
813 VPOR Y3, Y0, Y0
814 VPSLLD $0x02, Y9, Y3
815 XORL SI, CX
816 ADDL R12, DX
817 XORL DI, CX
818 ADDL 484(R15), AX
819 LEAL (AX)(CX*1), AX
820 RORXL $0x1b, DX, R12
821 RORXL $0x02, DX, CX
822 VPSRLD $0x1e, Y9, Y9
823 VPXOR Y3, Y0, Y0
824 XORL BX, DX
825 ADDL R12, AX
826 XORL SI, DX
827 ADDL 488(R15), DI
828 LEAL (DI)(DX*1), DI
829 RORXL $0x1b, AX, R12
830 RORXL $0x02, AX, DX
831 XORL CX, AX
832 ADDL R12, DI
833 XORL BX, AX
834 ADDL 492(R15), SI
835 LEAL (SI)(AX*1), SI
836 RORXL $0x1b, DI, R12
837 RORXL $0x02, DI, AX
838 VPXOR Y9, Y0, Y3
839 VPADDD 32(R8), Y3, Y0
840 VMOVDQU Y0, 224(R14)
841 XORL DX, DI
842 ADDL R12, SI
843 XORL CX, DI
844 ADDL 512(R15), BX
845 LEAL (BX)(DI*1), BX
846 RORXL $0x1b, SI, R12
847 RORXL $0x02, SI, DI
848 VPALIGNR $0x08, Y5, Y3, Y0
849 XORL AX, SI
850 ADDL R12, BX
851 XORL DX, SI
852 ADDL 516(R15), CX
853 LEAL (CX)(SI*1), CX
854 RORXL $0x1b, BX, R12
855 RORXL $0x02, BX, SI
856 VPXOR Y14, Y15, Y15
857 XORL DI, BX
858 ADDL R12, CX
859 XORL AX, BX
860 ADDL 520(R15), DX
861 LEAL (DX)(BX*1), DX
862 RORXL $0x1b, CX, R12
863 RORXL $0x02, CX, BX
864 VPXOR Y8, Y0, Y0
865 XORL SI, CX
866 ADDL R12, DX
867 XORL DI, CX
868 ADDL 524(R15), AX
869 LEAL (AX)(CX*1), AX
870 RORXL $0x1b, DX, R12
871 RORXL $0x02, DX, CX
872 VPXOR Y0, Y15, Y15
873 XORL BX, DX
874 ADDL R12, AX
875 XORL SI, DX
876 ADDL 544(R15), DI
877 LEAL (DI)(DX*1), DI
878 RORXL $0x1b, AX, R12
879 RORXL $0x02, AX, DX
880 VPSLLD $0x02, Y15, Y0
881 XORL CX, AX
882 ADDL R12, DI
883 XORL BX, AX
884 ADDL 548(R15), SI
885 LEAL (SI)(AX*1), SI
886 RORXL $0x1b, DI, R12
887 RORXL $0x02, DI, AX
888 VPSRLD $0x1e, Y15, Y15
889 VPOR Y15, Y0, Y15
890 XORL DX, DI
891 ADDL R12, SI
892 XORL CX, DI
893 ADDL 552(R15), BX
894 LEAL (BX)(DI*1), BX
895 RORXL $0x1b, SI, R12
896 RORXL $0x02, SI, DI
897 XORL AX, SI
898 ADDL R12, BX
899 XORL DX, SI
900 ADDL 556(R15), CX
901 LEAL (CX)(SI*1), CX
902 RORXL $0x1b, BX, R12
903 RORXL $0x02, BX, SI
904 VPADDD 32(R8), Y15, Y0
905 VMOVDQU Y0, 256(R14)
906 XORL DI, BX
907 ADDL R12, CX
908 XORL AX, BX
909 ADDL 576(R15), DX
910 LEAL (DX)(BX*1), DX
911 RORXL $0x1b, CX, R12
912 RORXL $0x02, CX, BX
913 VPALIGNR $0x08, Y3, Y15, Y0
914 XORL SI, CX
915 ADDL R12, DX
916 XORL DI, CX
917 ADDL 580(R15), AX
918 LEAL (AX)(CX*1), AX
919 RORXL $0x1b, DX, R12
920 RORXL $0x02, DX, CX
921 VPXOR Y13, Y14, Y14
922 XORL BX, DX
923 ADDL R12, AX
924 XORL SI, DX
925 ADDL 584(R15), DI
926 LEAL (DI)(DX*1), DI
927 RORXL $0x1b, AX, R12
928 RORXL $0x02, AX, DX
929 VPXOR Y7, Y0, Y0
930 XORL CX, AX
931 ADDL R12, DI
932 XORL BX, AX
933 ADDL 588(R15), SI
934 LEAL (SI)(AX*1), SI
935 RORXL $0x1b, DI, R12
936 RORXL $0x02, DI, AX
937 VPXOR Y0, Y14, Y14
938 XORL DX, DI
939 ADDL R12, SI
940 XORL CX, DI
941 ADDL 608(R15), BX
942 LEAL (BX)(DI*1), BX
943 RORXL $0x1b, SI, R12
944 RORXL $0x02, SI, DI
945 VPSLLD $0x02, Y14, Y0
946 XORL AX, SI
947 ADDL R12, BX
948 XORL DX, SI
949 ADDL 612(R15), CX
950 LEAL (CX)(SI*1), CX
951 RORXL $0x1b, BX, R12
952 RORXL $0x02, BX, SI
953 VPSRLD $0x1e, Y14, Y14
954 VPOR Y14, Y0, Y14
955 XORL DI, BX
956 ADDL R12, CX
957 XORL AX, BX
958 ADDL 616(R15), DX
959 LEAL (DX)(BX*1), DX
960 RORXL $0x1b, CX, R12
961 RORXL $0x02, CX, BX
962 XORL SI, CX
963 ADDL R12, DX
964 XORL DI, CX
965 ADDL 620(R15), AX
966 LEAL (AX)(CX*1), AX
967 RORXL $0x1b, DX, R12
968 VPADDD 32(R8), Y14, Y0
969 VMOVDQU Y0, 288(R14)
970 ADDL R12, AX
971 ADDL (R9), AX
972 MOVL AX, (R9)
973 ADDL 4(R9), DX
974 MOVL DX, 4(R9)
975 ADDL 8(R9), BX
976 MOVL BX, 8(R9)
977 ADDL 12(R9), SI
978 MOVL SI, 12(R9)
979 ADDL 16(R9), DI
980 MOVL DI, 16(R9)
981 CMPQ R10, R8
982 JE loop
983 MOVL DX, CX
984 MOVL CX, DX
985 RORXL $0x02, CX, CX
986 ANDNL SI, DX, BP
987 ANDL BX, DX
988 XORL BP, DX
989 ADDL 16(R15), DI
990 ANDNL BX, AX, BP
991 LEAL (DI)(DX*1), DI
992 RORXL $0x1b, AX, R12
993 RORXL $0x02, AX, DX
994 VPALIGNR $0x08, Y15, Y14, Y0
995 ANDL CX, AX
996 XORL BP, AX
997 LEAL (DI)(R12*1), DI
998 ADDL 20(R15), SI
999 ANDNL CX, DI, BP
1000 LEAL (SI)(AX*1), SI
1001 RORXL $0x1b, DI, R12
1002 RORXL $0x02, DI, AX
1003 VPXOR Y12, Y13, Y13
1004 ANDL DX, DI
1005 XORL BP, DI
1006 LEAL (SI)(R12*1), SI
1007 ADDL 24(R15), BX
1008 ANDNL DX, SI, BP
1009 LEAL (BX)(DI*1), BX
1010 RORXL $0x1b, SI, R12
1011 RORXL $0x02, SI, DI
1012 VPXOR Y5, Y0, Y0
1013 ANDL AX, SI
1014 XORL BP, SI
1015 LEAL (BX)(R12*1), BX
1016 ADDL 28(R15), CX
1017 ANDNL AX, BX, BP
1018 LEAL (CX)(SI*1), CX
1019 RORXL $0x1b, BX, R12
1020 RORXL $0x02, BX, SI
1021 VPXOR Y0, Y13, Y13
1022 ANDL DI, BX
1023 XORL BP, BX
1024 LEAL (CX)(R12*1), CX
1025 ADDL 48(R15), DX
1026 ANDNL DI, CX, BP
1027 LEAL (DX)(BX*1), DX
1028 RORXL $0x1b, CX, R12
1029 RORXL $0x02, CX, BX
1030 VPSLLD $0x02, Y13, Y0
1031 ANDL SI, CX
1032 XORL BP, CX
1033 LEAL (DX)(R12*1), DX
1034 ADDL 52(R15), AX
1035 ANDNL SI, DX, BP
1036 LEAL (AX)(CX*1), AX
1037 RORXL $0x1b, DX, R12
1038 RORXL $0x02, DX, CX
1039 VPSRLD $0x1e, Y13, Y13
1040 VPOR Y13, Y0, Y13
1041 ANDL BX, DX
1042 XORL BP, DX
1043 LEAL (AX)(R12*1), AX
1044 ADDL 56(R15), DI
1045 ANDNL BX, AX, BP
1046 LEAL (DI)(DX*1), DI
1047 RORXL $0x1b, AX, R12
1048 RORXL $0x02, AX, DX
1049 ANDL CX, AX
1050 XORL BP, AX
1051 LEAL (DI)(R12*1), DI
1052 ADDL 60(R15), SI
1053 ANDNL CX, DI, BP
1054 LEAL (SI)(AX*1), SI
1055 RORXL $0x1b, DI, R12
1056 RORXL $0x02, DI, AX
1057 VPADDD 64(R8), Y13, Y0
1058 VMOVDQU Y0, 320(R14)
1059 ANDL DX, DI
1060 XORL BP, DI
1061 LEAL (SI)(R12*1), SI
1062 ADDL 80(R15), BX
1063 ANDNL DX, SI, BP
1064 LEAL (BX)(DI*1), BX
1065 RORXL $0x1b, SI, R12
1066 RORXL $0x02, SI, DI
1067 VPALIGNR $0x08, Y14, Y13, Y0
1068 ANDL AX, SI
1069 XORL BP, SI
1070 LEAL (BX)(R12*1), BX
1071 ADDL 84(R15), CX
1072 ANDNL AX, BX, BP
1073 LEAL (CX)(SI*1), CX
1074 RORXL $0x1b, BX, R12
1075 RORXL $0x02, BX, SI
1076 VPXOR Y8, Y12, Y12
1077 ANDL DI, BX
1078 XORL BP, BX
1079 LEAL (CX)(R12*1), CX
1080 ADDL 88(R15), DX
1081 ANDNL DI, CX, BP
1082 LEAL (DX)(BX*1), DX
1083 RORXL $0x1b, CX, R12
1084 RORXL $0x02, CX, BX
1085 VPXOR Y3, Y0, Y0
1086 ANDL SI, CX
1087 XORL BP, CX
1088 LEAL (DX)(R12*1), DX
1089 ADDL 92(R15), AX
1090 ANDNL SI, DX, BP
1091 LEAL (AX)(CX*1), AX
1092 RORXL $0x1b, DX, R12
1093 RORXL $0x02, DX, CX
1094 VPXOR Y0, Y12, Y12
1095 ANDL BX, DX
1096 XORL BP, DX
1097 LEAL (AX)(R12*1), AX
1098 ADDL 112(R15), DI
1099 ANDNL BX, AX, BP
1100 LEAL (DI)(DX*1), DI
1101 RORXL $0x1b, AX, R12
1102 RORXL $0x02, AX, DX
1103 VPSLLD $0x02, Y12, Y0
1104 ANDL CX, AX
1105 XORL BP, AX
1106 LEAL (DI)(R12*1), DI
1107 ADDL 116(R15), SI
1108 ANDNL CX, DI, BP
1109 LEAL (SI)(AX*1), SI
1110 RORXL $0x1b, DI, R12
1111 RORXL $0x02, DI, AX
1112 VPSRLD $0x1e, Y12, Y12
1113 VPOR Y12, Y0, Y12
1114 ANDL DX, DI
1115 XORL BP, DI
1116 LEAL (SI)(R12*1), SI
1117 ADDL 120(R15), BX
1118 ANDNL DX, SI, BP
1119 LEAL (BX)(DI*1), BX
1120 RORXL $0x1b, SI, R12
1121 RORXL $0x02, SI, DI
1122 ANDL AX, SI
1123 XORL BP, SI
1124 LEAL (BX)(R12*1), BX
1125 ADDL 124(R15), CX
1126 ANDNL AX, BX, BP
1127 LEAL (CX)(SI*1), CX
1128 RORXL $0x1b, BX, R12
1129 RORXL $0x02, BX, SI
1130 VPADDD 64(R8), Y12, Y0
1131 VMOVDQU Y0, 352(R14)
1132 ANDL DI, BX
1133 XORL BP, BX
1134 LEAL (CX)(R12*1), CX
1135 ADDL 144(R15), DX
1136 ANDNL DI, CX, BP
1137 LEAL (DX)(BX*1), DX
1138 RORXL $0x1b, CX, R12
1139 RORXL $0x02, CX, BX
1140 VPALIGNR $0x08, Y13, Y12, Y0
1141 ANDL SI, CX
1142 XORL BP, CX
1143 LEAL (DX)(R12*1), DX
1144 ADDL 148(R15), AX
1145 ANDNL SI, DX, BP
1146 LEAL (AX)(CX*1), AX
1147 RORXL $0x1b, DX, R12
1148 RORXL $0x02, DX, CX
1149 VPXOR Y7, Y8, Y8
1150 ANDL BX, DX
1151 XORL BP, DX
1152 LEAL (AX)(R12*1), AX
1153 ADDL 152(R15), DI
1154 ANDNL BX, AX, BP
1155 LEAL (DI)(DX*1), DI
1156 RORXL $0x1b, AX, R12
1157 RORXL $0x02, AX, DX
1158 VPXOR Y15, Y0, Y0
1159 ANDL CX, AX
1160 XORL BP, AX
1161 LEAL (DI)(R12*1), DI
1162 ADDL 156(R15), SI
1163 LEAL (SI)(AX*1), SI
1164 RORXL $0x1b, DI, R12
1165 RORXL $0x02, DI, AX
1166 VPXOR Y0, Y8, Y8
1167 XORL DX, DI
1168 ADDL R12, SI
1169 XORL CX, DI
1170 ADDL 176(R15), BX
1171 LEAL (BX)(DI*1), BX
1172 RORXL $0x1b, SI, R12
1173 RORXL $0x02, SI, DI
1174 VPSLLD $0x02, Y8, Y0
1175 XORL AX, SI
1176 ADDL R12, BX
1177 XORL DX, SI
1178 ADDL 180(R15), CX
1179 LEAL (CX)(SI*1), CX
1180 RORXL $0x1b, BX, R12
1181 RORXL $0x02, BX, SI
1182 VPSRLD $0x1e, Y8, Y8
1183 VPOR Y8, Y0, Y8
1184 XORL DI, BX
1185 ADDL R12, CX
1186 XORL AX, BX
1187 ADDL 184(R15), DX
1188 LEAL (DX)(BX*1), DX
1189 RORXL $0x1b, CX, R12
1190 RORXL $0x02, CX, BX
1191 XORL SI, CX
1192 ADDL R12, DX
1193 XORL DI, CX
1194 ADDL 188(R15), AX
1195 LEAL (AX)(CX*1), AX
1196 RORXL $0x1b, DX, R12
1197 RORXL $0x02, DX, CX
1198 VPADDD 64(R8), Y8, Y0
1199 VMOVDQU Y0, 384(R14)
1200 XORL BX, DX
1201 ADDL R12, AX
1202 XORL SI, DX
1203 ADDL 208(R15), DI
1204 LEAL (DI)(DX*1), DI
1205 RORXL $0x1b, AX, R12
1206 RORXL $0x02, AX, DX
1207 VPALIGNR $0x08, Y12, Y8, Y0
1208 XORL CX, AX
1209 ADDL R12, DI
1210 XORL BX, AX
1211 ADDL 212(R15), SI
1212 LEAL (SI)(AX*1), SI
1213 RORXL $0x1b, DI, R12
1214 RORXL $0x02, DI, AX
1215 VPXOR Y5, Y7, Y7
1216 XORL DX, DI
1217 ADDL R12, SI
1218 XORL CX, DI
1219 ADDL 216(R15), BX
1220 LEAL (BX)(DI*1), BX
1221 RORXL $0x1b, SI, R12
1222 RORXL $0x02, SI, DI
1223 VPXOR Y14, Y0, Y0
1224 XORL AX, SI
1225 ADDL R12, BX
1226 XORL DX, SI
1227 ADDL 220(R15), CX
1228 LEAL (CX)(SI*1), CX
1229 RORXL $0x1b, BX, R12
1230 RORXL $0x02, BX, SI
1231 VPXOR Y0, Y7, Y7
1232 XORL DI, BX
1233 ADDL R12, CX
1234 XORL AX, BX
1235 ADDL 240(R15), DX
1236 LEAL (DX)(BX*1), DX
1237 RORXL $0x1b, CX, R12
1238 RORXL $0x02, CX, BX
1239 VPSLLD $0x02, Y7, Y0
1240 XORL SI, CX
1241 ADDL R12, DX
1242 XORL DI, CX
1243 ADDL 244(R15), AX
1244 LEAL (AX)(CX*1), AX
1245 RORXL $0x1b, DX, R12
1246 RORXL $0x02, DX, CX
1247 VPSRLD $0x1e, Y7, Y7
1248 VPOR Y7, Y0, Y7
1249 XORL BX, DX
1250 ADDL R12, AX
1251 XORL SI, DX
1252 ADDL 248(R15), DI
1253 LEAL (DI)(DX*1), DI
1254 RORXL $0x1b, AX, R12
1255 RORXL $0x02, AX, DX
1256 XORL CX, AX
1257 ADDL R12, DI
1258 XORL BX, AX
1259 ADDL 252(R15), SI
1260 LEAL (SI)(AX*1), SI
1261 RORXL $0x1b, DI, R12
1262 RORXL $0x02, DI, AX
1263 VPADDD 64(R8), Y7, Y0
1264 VMOVDQU Y0, 416(R14)
1265 XORL DX, DI
1266 ADDL R12, SI
1267 XORL CX, DI
1268 ADDL 272(R15), BX
1269 LEAL (BX)(DI*1), BX
1270 RORXL $0x1b, SI, R12
1271 RORXL $0x02, SI, DI
1272 VPALIGNR $0x08, Y8, Y7, Y0
1273 XORL AX, SI
1274 ADDL R12, BX
1275 XORL DX, SI
1276 ADDL 276(R15), CX
1277 LEAL (CX)(SI*1), CX
1278 RORXL $0x1b, BX, R12
1279 RORXL $0x02, BX, SI
1280 VPXOR Y3, Y5, Y5
1281 XORL DI, BX
1282 ADDL R12, CX
1283 XORL AX, BX
1284 ADDL 280(R15), DX
1285 LEAL (DX)(BX*1), DX
1286 RORXL $0x1b, CX, R12
1287 RORXL $0x02, CX, BX
1288 VPXOR Y13, Y0, Y0
1289 XORL SI, CX
1290 ADDL R12, DX
1291 XORL DI, CX
1292 ADDL 284(R15), AX
1293 LEAL (AX)(CX*1), AX
1294 RORXL $0x1b, DX, R12
1295 RORXL $0x02, DX, CX
1296 VPXOR Y0, Y5, Y5
1297 XORL BX, DX
1298 ADDL R12, AX
1299 XORL SI, DX
1300 ADDL 304(R15), DI
1301 LEAL (DI)(DX*1), DI
1302 RORXL $0x1b, AX, R12
1303 RORXL $0x02, AX, DX
1304 VPSLLD $0x02, Y5, Y0
1305 XORL CX, AX
1306 ADDL R12, DI
1307 XORL BX, AX
1308 ADDL 308(R15), SI
1309 LEAL (SI)(AX*1), SI
1310 RORXL $0x1b, DI, R12
1311 RORXL $0x02, DI, AX
1312 VPSRLD $0x1e, Y5, Y5
1313 VPOR Y5, Y0, Y5
1314 XORL DX, DI
1315 ADDL R12, SI
1316 XORL CX, DI
1317 ADDL 312(R15), BX
1318 LEAL (BX)(DI*1), BX
1319 RORXL $0x1b, SI, R12
1320 RORXL $0x02, SI, DI
1321 XORL AX, SI
1322 ADDL R12, BX
1323 XORL DX, SI
1324 ADDL 316(R15), CX
1325 VPADDD 64(R8), Y5, Y0
1326 VMOVDQU Y0, 448(R14)
1327 LEAL (CX)(SI*1), CX
1328 MOVL DI, BP
1329 ORL BX, BP
1330 RORXL $0x1b, BX, R12
1331 RORXL $0x02, BX, SI
1332 ANDL AX, BP
1333 ANDL DI, BX
1334 ORL BP, BX
1335 ADDL R12, CX
1336 ADDL 336(R15), DX
1337 VPALIGNR $0x08, Y7, Y5, Y0
1338 LEAL (DX)(BX*1), DX
1339 MOVL SI, BP
1340 ORL CX, BP
1341 RORXL $0x1b, CX, R12
1342 RORXL $0x02, CX, BX
1343 ANDL DI, BP
1344 ANDL SI, CX
1345 ORL BP, CX
1346 ADDL R12, DX
1347 ADDL 340(R15), AX
1348 VPXOR Y15, Y3, Y3
1349 LEAL (AX)(CX*1), AX
1350 MOVL BX, BP
1351 ORL DX, BP
1352 RORXL $0x1b, DX, R12
1353 RORXL $0x02, DX, CX
1354 ANDL SI, BP
1355 ANDL BX, DX
1356 ORL BP, DX
1357 ADDL R12, AX
1358 ADDL 344(R15), DI
1359 VPXOR Y12, Y0, Y0
1360 LEAL (DI)(DX*1), DI
1361 MOVL CX, BP
1362 ORL AX, BP
1363 RORXL $0x1b, AX, R12
1364 RORXL $0x02, AX, DX
1365 ANDL BX, BP
1366 ANDL CX, AX
1367 ORL BP, AX
1368 ADDL R12, DI
1369 ADDL 348(R15), SI
1370 VPXOR Y0, Y3, Y3
1371 LEAL (SI)(AX*1), SI
1372 MOVL DX, BP
1373 ORL DI, BP
1374 RORXL $0x1b, DI, R12
1375 RORXL $0x02, DI, AX
1376 ANDL CX, BP
1377 ANDL DX, DI
1378 ORL BP, DI
1379 ADDL R12, SI
1380 ADDL 368(R15), BX
1381 VPSLLD $0x02, Y3, Y0
1382 LEAL (BX)(DI*1), BX
1383 MOVL AX, BP
1384 ORL SI, BP
1385 RORXL $0x1b, SI, R12
1386 RORXL $0x02, SI, DI
1387 ANDL DX, BP
1388 ANDL AX, SI
1389 ORL BP, SI
1390 ADDL R12, BX
1391 ADDL 372(R15), CX
1392 VPSRLD $0x1e, Y3, Y3
1393 VPOR Y3, Y0, Y3
1394 LEAL (CX)(SI*1), CX
1395 MOVL DI, BP
1396 ORL BX, BP
1397 RORXL $0x1b, BX, R12
1398 RORXL $0x02, BX, SI
1399 ANDL AX, BP
1400 ANDL DI, BX
1401 ORL BP, BX
1402 ADDL R12, CX
1403 ADDL 376(R15), DX
1404 LEAL (DX)(BX*1), DX
1405 MOVL SI, BP
1406 ORL CX, BP
1407 RORXL $0x1b, CX, R12
1408 RORXL $0x02, CX, BX
1409 ANDL DI, BP
1410 ANDL SI, CX
1411 ORL BP, CX
1412 ADDL R12, DX
1413 ADDL 380(R15), AX
1414 VPADDD 96(R8), Y3, Y0
1415 VMOVDQU Y0, 480(R14)
1416 LEAL (AX)(CX*1), AX
1417 MOVL BX, BP
1418 ORL DX, BP
1419 RORXL $0x1b, DX, R12
1420 RORXL $0x02, DX, CX
1421 ANDL SI, BP
1422 ANDL BX, DX
1423 ORL BP, DX
1424 ADDL R12, AX
1425 ADDL 400(R15), DI
1426 VPALIGNR $0x08, Y5, Y3, Y0
1427 LEAL (DI)(DX*1), DI
1428 MOVL CX, BP
1429 ORL AX, BP
1430 RORXL $0x1b, AX, R12
1431 RORXL $0x02, AX, DX
1432 ANDL BX, BP
1433 ANDL CX, AX
1434 ORL BP, AX
1435 ADDL R12, DI
1436 ADDL 404(R15), SI
1437 VPXOR Y14, Y15, Y15
1438 LEAL (SI)(AX*1), SI
1439 MOVL DX, BP
1440 ORL DI, BP
1441 RORXL $0x1b, DI, R12
1442 RORXL $0x02, DI, AX
1443 ANDL CX, BP
1444 ANDL DX, DI
1445 ORL BP, DI
1446 ADDL R12, SI
1447 ADDL 408(R15), BX
1448 VPXOR Y8, Y0, Y0
1449 LEAL (BX)(DI*1), BX
1450 MOVL AX, BP
1451 ORL SI, BP
1452 RORXL $0x1b, SI, R12
1453 RORXL $0x02, SI, DI
1454 ANDL DX, BP
1455 ANDL AX, SI
1456 ORL BP, SI
1457 ADDL R12, BX
1458 ADDL 412(R15), CX
1459 VPXOR Y0, Y15, Y15
1460 LEAL (CX)(SI*1), CX
1461 MOVL DI, BP
1462 ORL BX, BP
1463 RORXL $0x1b, BX, R12
1464 RORXL $0x02, BX, SI
1465 ANDL AX, BP
1466 ANDL DI, BX
1467 ORL BP, BX
1468 ADDL R12, CX
1469 ADDL 432(R15), DX
1470 VPSLLD $0x02, Y15, Y0
1471 LEAL (DX)(BX*1), DX
1472 MOVL SI, BP
1473 ORL CX, BP
1474 RORXL $0x1b, CX, R12
1475 RORXL $0x02, CX, BX
1476 ANDL DI, BP
1477 ANDL SI, CX
1478 ORL BP, CX
1479 ADDL R12, DX
1480 ADDL 436(R15), AX
1481 VPSRLD $0x1e, Y15, Y15
1482 VPOR Y15, Y0, Y15
1483 LEAL (AX)(CX*1), AX
1484 MOVL BX, BP
1485 ORL DX, BP
1486 RORXL $0x1b, DX, R12
1487 RORXL $0x02, DX, CX
1488 ANDL SI, BP
1489 ANDL BX, DX
1490 ORL BP, DX
1491 ADDL R12, AX
1492 ADDL 440(R15), DI
1493 LEAL (DI)(DX*1), DI
1494 MOVL CX, BP
1495 ORL AX, BP
1496 RORXL $0x1b, AX, R12
1497 RORXL $0x02, AX, DX
1498 ANDL BX, BP
1499 ANDL CX, AX
1500 ORL BP, AX
1501 ADDL R12, DI
1502 ADDL 444(R15), SI
1503 VPADDD 96(R8), Y15, Y0
1504 VMOVDQU Y0, 512(R14)
1505 LEAL (SI)(AX*1), SI
1506 MOVL DX, BP
1507 ORL DI, BP
1508 RORXL $0x1b, DI, R12
1509 RORXL $0x02, DI, AX
1510 ANDL CX, BP
1511 ANDL DX, DI
1512 ORL BP, DI
1513 ADDL R12, SI
1514 ADDL 464(R15), BX
1515 VPALIGNR $0x08, Y3, Y15, Y0
1516 LEAL (BX)(DI*1), BX
1517 MOVL AX, BP
1518 ORL SI, BP
1519 RORXL $0x1b, SI, R12
1520 RORXL $0x02, SI, DI
1521 ANDL DX, BP
1522 ANDL AX, SI
1523 ORL BP, SI
1524 ADDL R12, BX
1525 ADDL 468(R15), CX
1526 VPXOR Y13, Y14, Y14
1527 LEAL (CX)(SI*1), CX
1528 MOVL DI, BP
1529 ORL BX, BP
1530 RORXL $0x1b, BX, R12
1531 RORXL $0x02, BX, SI
1532 ANDL AX, BP
1533 ANDL DI, BX
1534 ORL BP, BX
1535 ADDL R12, CX
1536 ADDL 472(R15), DX
1537 VPXOR Y7, Y0, Y0
1538 LEAL (DX)(BX*1), DX
1539 MOVL SI, BP
1540 ORL CX, BP
1541 RORXL $0x1b, CX, R12
1542 RORXL $0x02, CX, BX
1543 ANDL DI, BP
1544 ANDL SI, CX
1545 ORL BP, CX
1546 ADDL R12, DX
1547 ADDL 476(R15), AX
1548 LEAL (AX)(CX*1), AX
1549 RORXL $0x1b, DX, R12
1550 RORXL $0x02, DX, CX
1551 VPXOR Y0, Y14, Y14
1552 XORL BX, DX
1553 ADDL R12, AX
1554 XORL SI, DX
1555 ADDQ $0x80, R13
1556 CMPQ R13, R11
1557 CMOVQCC R8, R10
1558 ADDL 496(R15), DI
1559 LEAL (DI)(DX*1), DI
1560 RORXL $0x1b, AX, R12
1561 RORXL $0x02, AX, DX
1562 VPSLLD $0x02, Y14, Y0
1563 XORL CX, AX
1564 ADDL R12, DI
1565 XORL BX, AX
1566 ADDL 500(R15), SI
1567 LEAL (SI)(AX*1), SI
1568 RORXL $0x1b, DI, R12
1569 RORXL $0x02, DI, AX
1570 VPSRLD $0x1e, Y14, Y14
1571 VPOR Y14, Y0, Y14
1572 XORL DX, DI
1573 ADDL R12, SI
1574 XORL CX, DI
1575 ADDL 504(R15), BX
1576 LEAL (BX)(DI*1), BX
1577 RORXL $0x1b, SI, R12
1578 RORXL $0x02, SI, DI
1579 XORL AX, SI
1580 ADDL R12, BX
1581 XORL DX, SI
1582 ADDL 508(R15), CX
1583 LEAL (CX)(SI*1), CX
1584 RORXL $0x1b, BX, R12
1585 RORXL $0x02, BX, SI
1586 VPADDD 96(R8), Y14, Y0
1587 VMOVDQU Y0, 544(R14)
1588 XORL DI, BX
1589 ADDL R12, CX
1590 XORL AX, BX
1591 ADDL 528(R15), DX
1592 LEAL (DX)(BX*1), DX
1593 RORXL $0x1b, CX, R12
1594 RORXL $0x02, CX, BX
1595 VPALIGNR $0x08, Y15, Y14, Y0
1596 XORL SI, CX
1597 ADDL R12, DX
1598 XORL DI, CX
1599 ADDL 532(R15), AX
1600 LEAL (AX)(CX*1), AX
1601 RORXL $0x1b, DX, R12
1602 RORXL $0x02, DX, CX
1603 VPXOR Y12, Y13, Y13
1604 XORL BX, DX
1605 ADDL R12, AX
1606 XORL SI, DX
1607 ADDL 536(R15), DI
1608 LEAL (DI)(DX*1), DI
1609 RORXL $0x1b, AX, R12
1610 RORXL $0x02, AX, DX
1611 VPXOR Y5, Y0, Y0
1612 XORL CX, AX
1613 ADDL R12, DI
1614 XORL BX, AX
1615 ADDL 540(R15), SI
1616 LEAL (SI)(AX*1), SI
1617 RORXL $0x1b, DI, R12
1618 RORXL $0x02, DI, AX
1619 VPXOR Y0, Y13, Y13
1620 XORL DX, DI
1621 ADDL R12, SI
1622 XORL CX, DI
1623 ADDL 560(R15), BX
1624 LEAL (BX)(DI*1), BX
1625 RORXL $0x1b, SI, R12
1626 RORXL $0x02, SI, DI
1627 VPSLLD $0x02, Y13, Y0
1628 XORL AX, SI
1629 ADDL R12, BX
1630 XORL DX, SI
1631 ADDL 564(R15), CX
1632 LEAL (CX)(SI*1), CX
1633 RORXL $0x1b, BX, R12
1634 RORXL $0x02, BX, SI
1635 VPSRLD $0x1e, Y13, Y13
1636 VPOR Y13, Y0, Y13
1637 XORL DI, BX
1638 ADDL R12, CX
1639 XORL AX, BX
1640 ADDL 568(R15), DX
1641 LEAL (DX)(BX*1), DX
1642 RORXL $0x1b, CX, R12
1643 RORXL $0x02, CX, BX
1644 XORL SI, CX
1645 ADDL R12, DX
1646 XORL DI, CX
1647 ADDL 572(R15), AX
1648 LEAL (AX)(CX*1), AX
1649 RORXL $0x1b, DX, R12
1650 RORXL $0x02, DX, CX
1651 VPADDD 96(R8), Y13, Y0
1652 VMOVDQU Y0, 576(R14)
1653 XORL BX, DX
1654 ADDL R12, AX
1655 XORL SI, DX
1656 ADDL 592(R15), DI
1657 LEAL (DI)(DX*1), DI
1658 RORXL $0x1b, AX, R12
1659 RORXL $0x02, AX, DX
1660 VPALIGNR $0x08, Y14, Y13, Y0
1661 XORL CX, AX
1662 ADDL R12, DI
1663 XORL BX, AX
1664 ADDL 596(R15), SI
1665 LEAL (SI)(AX*1), SI
1666 RORXL $0x1b, DI, R12
1667 RORXL $0x02, DI, AX
1668 VPXOR Y8, Y12, Y12
1669 XORL DX, DI
1670 ADDL R12, SI
1671 XORL CX, DI
1672 ADDL 600(R15), BX
1673 LEAL (BX)(DI*1), BX
1674 RORXL $0x1b, SI, R12
1675 RORXL $0x02, SI, DI
1676 VPXOR Y3, Y0, Y0
1677 XORL AX, SI
1678 ADDL R12, BX
1679 XORL DX, SI
1680 ADDL 604(R15), CX
1681 LEAL (CX)(SI*1), CX
1682 RORXL $0x1b, BX, R12
1683 RORXL $0x02, BX, SI
1684 VPXOR Y0, Y12, Y12
1685 XORL DI, BX
1686 ADDL R12, CX
1687 XORL AX, BX
1688 ADDL 624(R15), DX
1689 LEAL (DX)(BX*1), DX
1690 RORXL $0x1b, CX, R12
1691 RORXL $0x02, CX, BX
1692 VPSLLD $0x02, Y12, Y0
1693 XORL SI, CX
1694 ADDL R12, DX
1695 XORL DI, CX
1696 ADDL 628(R15), AX
1697 LEAL (AX)(CX*1), AX
1698 RORXL $0x1b, DX, R12
1699 RORXL $0x02, DX, CX
1700 VPSRLD $0x1e, Y12, Y12
1701 VPOR Y12, Y0, Y12
1702 XORL BX, DX
1703 ADDL R12, AX
1704 XORL SI, DX
1705 ADDL 632(R15), DI
1706 LEAL (DI)(DX*1), DI
1707 RORXL $0x1b, AX, R12
1708 RORXL $0x02, AX, DX
1709 XORL CX, AX
1710 ADDL R12, DI
1711 XORL BX, AX
1712 ADDL 636(R15), SI
1713 LEAL (SI)(AX*1), SI
1714 RORXL $0x1b, DI, R12
1715 VPADDD 96(R8), Y12, Y0
1716 VMOVDQU Y0, 608(R14)
1717 ADDL R12, SI
1718 ADDL (R9), SI
1719 MOVL SI, (R9)
1720 ADDL 4(R9), DI
1721 MOVL DI, 4(R9)
1722 ADDL 8(R9), DX
1723 MOVL DX, 8(R9)
1724 ADDL 12(R9), CX
1725 MOVL CX, 12(R9)
1726 ADDL 16(R9), BX
1727 MOVL BX, 16(R9)
1728 MOVL SI, R12
1729 MOVL DI, SI
1730 MOVL DX, DI
1731 MOVL BX, DX
1732 MOVL CX, AX
1733 MOVL R12, CX
1734 XCHGQ R15, R14
1735 JMP loop
1736
1737 DATA K_XMM_AR<>+0(SB)/4, $0x5a827999
1738 DATA K_XMM_AR<>+4(SB)/4, $0x5a827999
1739 DATA K_XMM_AR<>+8(SB)/4, $0x5a827999
1740 DATA K_XMM_AR<>+12(SB)/4, $0x5a827999
1741 DATA K_XMM_AR<>+16(SB)/4, $0x5a827999
1742 DATA K_XMM_AR<>+20(SB)/4, $0x5a827999
1743 DATA K_XMM_AR<>+24(SB)/4, $0x5a827999
1744 DATA K_XMM_AR<>+28(SB)/4, $0x5a827999
1745 DATA K_XMM_AR<>+32(SB)/4, $0x6ed9eba1
1746 DATA K_XMM_AR<>+36(SB)/4, $0x6ed9eba1
1747 DATA K_XMM_AR<>+40(SB)/4, $0x6ed9eba1
1748 DATA K_XMM_AR<>+44(SB)/4, $0x6ed9eba1
1749 DATA K_XMM_AR<>+48(SB)/4, $0x6ed9eba1
1750 DATA K_XMM_AR<>+52(SB)/4, $0x6ed9eba1
1751 DATA K_XMM_AR<>+56(SB)/4, $0x6ed9eba1
1752 DATA K_XMM_AR<>+60(SB)/4, $0x6ed9eba1
1753 DATA K_XMM_AR<>+64(SB)/4, $0x8f1bbcdc
1754 DATA K_XMM_AR<>+68(SB)/4, $0x8f1bbcdc
1755 DATA K_XMM_AR<>+72(SB)/4, $0x8f1bbcdc
1756 DATA K_XMM_AR<>+76(SB)/4, $0x8f1bbcdc
1757 DATA K_XMM_AR<>+80(SB)/4, $0x8f1bbcdc
1758 DATA K_XMM_AR<>+84(SB)/4, $0x8f1bbcdc
1759 DATA K_XMM_AR<>+88(SB)/4, $0x8f1bbcdc
1760 DATA K_XMM_AR<>+92(SB)/4, $0x8f1bbcdc
1761 DATA K_XMM_AR<>+96(SB)/4, $0xca62c1d6
1762 DATA K_XMM_AR<>+100(SB)/4, $0xca62c1d6
1763 DATA K_XMM_AR<>+104(SB)/4, $0xca62c1d6
1764 DATA K_XMM_AR<>+108(SB)/4, $0xca62c1d6
1765 DATA K_XMM_AR<>+112(SB)/4, $0xca62c1d6
1766 DATA K_XMM_AR<>+116(SB)/4, $0xca62c1d6
1767 DATA K_XMM_AR<>+120(SB)/4, $0xca62c1d6
1768 DATA K_XMM_AR<>+124(SB)/4, $0xca62c1d6
1769 GLOBL K_XMM_AR<>(SB), RODATA, $128
1770
1771 DATA BSWAP_SHUFB_CTL<>+0(SB)/4, $0x00010203
1772 DATA BSWAP_SHUFB_CTL<>+4(SB)/4, $0x04050607
1773 DATA BSWAP_SHUFB_CTL<>+8(SB)/4, $0x08090a0b
1774 DATA BSWAP_SHUFB_CTL<>+12(SB)/4, $0x0c0d0e0f
1775 DATA BSWAP_SHUFB_CTL<>+16(SB)/4, $0x00010203
1776 DATA BSWAP_SHUFB_CTL<>+20(SB)/4, $0x04050607
1777 DATA BSWAP_SHUFB_CTL<>+24(SB)/4, $0x08090a0b
1778 DATA BSWAP_SHUFB_CTL<>+28(SB)/4, $0x0c0d0e0f
1779 GLOBL BSWAP_SHUFB_CTL<>(SB), RODATA, $32
1780
1781 // func blockSHANI(dig *digest, p []byte)
1782 // Requires: AVX, SHA, SSE2, SSE4.1, SSSE3
1783 TEXT ·blockSHANI(SB), $48-32
1784 MOVQ dig+0(FP), DI
1785 MOVQ p_base+8(FP), SI
1786 MOVQ p_len+16(FP), DX
1787 CMPQ DX, $0x00
1788 JEQ done
1789 ADDQ SI, DX
1790
1791 // Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes
1792 LEAQ 15(SP), AX
1793 MOVQ $0x000000000000000f, CX
1794 NOTQ CX
1795 ANDQ CX, AX
1796
1797 // Load initial hash state
1798 PINSRD $0x03, 16(DI), X5
1799 VMOVDQU (DI), X0
1800 PAND upper_mask<>+0(SB), X5
1801 PSHUFD $0x1b, X0, X0
1802 VMOVDQA shuffle_mask<>+0(SB), X7
1803
1804 loop:
1805 // Save ABCD and E working values
1806 VMOVDQA X5, (AX)
1807 VMOVDQA X0, 16(AX)
1808
1809 // Rounds 0-3
1810 VMOVDQU (SI), X1
1811 PSHUFB X7, X1
1812 PADDD X1, X5
1813 VMOVDQA X0, X6
1814 SHA1RNDS4 $0x00, X5, X0
1815
1816 // Rounds 4-7
1817 VMOVDQU 16(SI), X2
1818 PSHUFB X7, X2
1819 SHA1NEXTE X2, X6
1820 VMOVDQA X0, X5
1821 SHA1RNDS4 $0x00, X6, X0
1822 SHA1MSG1 X2, X1
1823
1824 // Rounds 8-11
1825 VMOVDQU 32(SI), X3
1826 PSHUFB X7, X3
1827 SHA1NEXTE X3, X5
1828 VMOVDQA X0, X6
1829 SHA1RNDS4 $0x00, X5, X0
1830 SHA1MSG1 X3, X2
1831 PXOR X3, X1
1832
1833 // Rounds 12-15
1834 VMOVDQU 48(SI), X4
1835 PSHUFB X7, X4
1836 SHA1NEXTE X4, X6
1837 VMOVDQA X0, X5
1838 SHA1MSG2 X4, X1
1839 SHA1RNDS4 $0x00, X6, X0
1840 SHA1MSG1 X4, X3
1841 PXOR X4, X2
1842
1843 // Rounds 16-19
1844 SHA1NEXTE X1, X5
1845 VMOVDQA X0, X6
1846 SHA1MSG2 X1, X2
1847 SHA1RNDS4 $0x00, X5, X0
1848 SHA1MSG1 X1, X4
1849 PXOR X1, X3
1850
1851 // Rounds 20-23
1852 SHA1NEXTE X2, X6
1853 VMOVDQA X0, X5
1854 SHA1MSG2 X2, X3
1855 SHA1RNDS4 $0x01, X6, X0
1856 SHA1MSG1 X2, X1
1857 PXOR X2, X4
1858
1859 // Rounds 24-27
1860 SHA1NEXTE X3, X5
1861 VMOVDQA X0, X6
1862 SHA1MSG2 X3, X4
1863 SHA1RNDS4 $0x01, X5, X0
1864 SHA1MSG1 X3, X2
1865 PXOR X3, X1
1866
1867 // Rounds 28-31
1868 SHA1NEXTE X4, X6
1869 VMOVDQA X0, X5
1870 SHA1MSG2 X4, X1
1871 SHA1RNDS4 $0x01, X6, X0
1872 SHA1MSG1 X4, X3
1873 PXOR X4, X2
1874
1875 // Rounds 32-35
1876 SHA1NEXTE X1, X5
1877 VMOVDQA X0, X6
1878 SHA1MSG2 X1, X2
1879 SHA1RNDS4 $0x01, X5, X0
1880 SHA1MSG1 X1, X4
1881 PXOR X1, X3
1882
1883 // Rounds 36-39
1884 SHA1NEXTE X2, X6
1885 VMOVDQA X0, X5
1886 SHA1MSG2 X2, X3
1887 SHA1RNDS4 $0x01, X6, X0
1888 SHA1MSG1 X2, X1
1889 PXOR X2, X4
1890
1891 // Rounds 40-43
1892 SHA1NEXTE X3, X5
1893 VMOVDQA X0, X6
1894 SHA1MSG2 X3, X4
1895 SHA1RNDS4 $0x02, X5, X0
1896 SHA1MSG1 X3, X2
1897 PXOR X3, X1
1898
1899 // Rounds 44-47
1900 SHA1NEXTE X4, X6
1901 VMOVDQA X0, X5
1902 SHA1MSG2 X4, X1
1903 SHA1RNDS4 $0x02, X6, X0
1904 SHA1MSG1 X4, X3
1905 PXOR X4, X2
1906
1907 // Rounds 48-51
1908 SHA1NEXTE X1, X5
1909 VMOVDQA X0, X6
1910 SHA1MSG2 X1, X2
1911 SHA1RNDS4 $0x02, X5, X0
1912 SHA1MSG1 X1, X4
1913 PXOR X1, X3
1914
1915 // Rounds 52-55
1916 SHA1NEXTE X2, X6
1917 VMOVDQA X0, X5
1918 SHA1MSG2 X2, X3
1919 SHA1RNDS4 $0x02, X6, X0
1920 SHA1MSG1 X2, X1
1921 PXOR X2, X4
1922
1923 // Rounds 56-59
1924 SHA1NEXTE X3, X5
1925 VMOVDQA X0, X6
1926 SHA1MSG2 X3, X4
1927 SHA1RNDS4 $0x02, X5, X0
1928 SHA1MSG1 X3, X2
1929 PXOR X3, X1
1930
1931 // Rounds 60-63
1932 SHA1NEXTE X4, X6
1933 VMOVDQA X0, X5
1934 SHA1MSG2 X4, X1
1935 SHA1RNDS4 $0x03, X6, X0
1936 SHA1MSG1 X4, X3
1937 PXOR X4, X2
1938
1939 // Rounds 64-67
1940 SHA1NEXTE X1, X5
1941 VMOVDQA X0, X6
1942 SHA1MSG2 X1, X2
1943 SHA1RNDS4 $0x03, X5, X0
1944 SHA1MSG1 X1, X4
1945 PXOR X1, X3
1946
1947 // Rounds 68-71
1948 SHA1NEXTE X2, X6
1949 VMOVDQA X0, X5
1950 SHA1MSG2 X2, X3
1951 SHA1RNDS4 $0x03, X6, X0
1952 PXOR X2, X4
1953
1954 // Rounds 72-75
1955 SHA1NEXTE X3, X5
1956 VMOVDQA X0, X6
1957 SHA1MSG2 X3, X4
1958 SHA1RNDS4 $0x03, X5, X0
1959
1960 // Rounds 76-79
1961 SHA1NEXTE X4, X6
1962 VMOVDQA X0, X5
1963 SHA1RNDS4 $0x03, X6, X0
1964
1965 // Add saved E and ABCD
1966 SHA1NEXTE (AX), X5
1967 PADDD 16(AX), X0
1968
1969 // Check if we are done, if not return to the loop
1970 ADDQ $0x40, SI
1971 CMPQ SI, DX
1972 JNE loop
1973
1974 // Write the hash state back to digest
1975 PSHUFD $0x1b, X0, X0
1976 VMOVDQU X0, (DI)
1977 PEXTRD $0x03, X5, 16(DI)
1978
1979 done:
1980 RET
1981
1982 DATA upper_mask<>+0(SB)/8, $0x0000000000000000
1983 DATA upper_mask<>+8(SB)/8, $0xffffffff00000000
1984 GLOBL upper_mask<>(SB), RODATA, $16
1985
1986 DATA shuffle_mask<>+0(SB)/8, $0x08090a0b0c0d0e0f
1987 DATA shuffle_mask<>+8(SB)/8, $0x0001020304050607
1988 GLOBL shuffle_mask<>(SB), RODATA, $16
1989
View as plain text