1 // Code generated by command: go run gcm_amd64_asm.go -out ../../gcm_amd64.s -pkg aes. DO NOT EDIT.
2
3 //go:build !purego
4
5 #include "textflag.h"
6
7 // func gcmAesFinish(productTable *[256]byte, tagMask *[16]byte, T *[16]byte, pLen uint64, dLen uint64)
8 // Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3
9 TEXT ·gcmAesFinish(SB), NOSPLIT, $0-40
10 MOVQ productTable+0(FP), DI
11 MOVQ tagMask+8(FP), SI
12 MOVQ T+16(FP), DX
13 MOVQ pLen+24(FP), AX
14 MOVQ dLen+32(FP), CX
15 MOVOU (DX), X8
16 MOVOU (SI), X13
17 MOVOU bswapMask<>+0(SB), X15
18 MOVOU gcmPoly<>+0(SB), X14
19 SHLQ $0x03, AX
20 SHLQ $0x03, CX
21 MOVQ AX, X0
22 PINSRQ $0x01, CX, X0
23 PXOR X8, X0
24 MOVOU 224(DI), X8
25 MOVOU 240(DI), X10
26 MOVOU X8, X9
27 PCLMULQDQ $0x00, X0, X8
28 PCLMULQDQ $0x11, X0, X9
29 PSHUFD $0x4e, X0, X11
30 PXOR X0, X11
31 PCLMULQDQ $0x00, X11, X10
32 PXOR X8, X10
33 PXOR X9, X10
34 MOVOU X10, X11
35 PSRLDQ $0x08, X10
36 PSLLDQ $0x08, X11
37 PXOR X10, X9
38 PXOR X11, X8
39 MOVOU X14, X11
40 PCLMULQDQ $0x01, X8, X11
41 PSHUFD $0x4e, X8, X8
42 PXOR X11, X8
43 MOVOU X14, X11
44 PCLMULQDQ $0x01, X8, X11
45 PSHUFD $0x4e, X8, X8
46 PXOR X11, X8
47 PXOR X9, X8
48 PSHUFB X15, X8
49 PXOR X13, X8
50 MOVOU X8, (DX)
51 RET
52
53 DATA bswapMask<>+0(SB)/8, $0x08090a0b0c0d0e0f
54 DATA bswapMask<>+8(SB)/8, $0x0001020304050607
55 GLOBL bswapMask<>(SB), RODATA|NOPTR, $16
56
57 DATA gcmPoly<>+0(SB)/8, $0x0000000000000001
58 DATA gcmPoly<>+8(SB)/8, $0xc200000000000000
59 GLOBL gcmPoly<>(SB), RODATA|NOPTR, $16
60
61 // func gcmAesInit(productTable *[256]byte, ks []uint32)
62 // Requires: AES, PCLMULQDQ, SSE2, SSSE3
63 TEXT ·gcmAesInit(SB), NOSPLIT, $0-32
64 MOVQ productTable+0(FP), DI
65 MOVQ ks_base+8(FP), SI
66 MOVQ ks_len+16(FP), DX
67 SHRQ $0x02, DX
68 DECQ DX
69 MOVOU bswapMask<>+0(SB), X15
70 MOVOU gcmPoly<>+0(SB), X14
71
72 // Encrypt block 0, with the AES key to generate the hash key H
73 MOVOU (SI), X0
74 MOVOU 16(SI), X11
75 AESENC X11, X0
76 MOVOU 32(SI), X11
77 AESENC X11, X0
78 MOVOU 48(SI), X11
79 AESENC X11, X0
80 MOVOU 64(SI), X11
81 AESENC X11, X0
82 MOVOU 80(SI), X11
83 AESENC X11, X0
84 MOVOU 96(SI), X11
85 AESENC X11, X0
86 MOVOU 112(SI), X11
87 AESENC X11, X0
88 MOVOU 128(SI), X11
89 AESENC X11, X0
90 MOVOU 144(SI), X11
91 AESENC X11, X0
92 MOVOU 160(SI), X11
93 CMPQ DX, $0x0c
94 JB initEncLast
95 AESENC X11, X0
96 MOVOU 176(SI), X11
97 AESENC X11, X0
98 MOVOU 192(SI), X11
99 JE initEncLast
100 AESENC X11, X0
101 MOVOU 208(SI), X11
102 AESENC X11, X0
103 MOVOU 224(SI), X11
104
105 initEncLast:
106 AESENCLAST X11, X0
107 PSHUFB X15, X0
108
109 // H * 2
110 PSHUFD $0xff, X0, X11
111 MOVOU X0, X12
112 PSRAL $0x1f, X11
113 PAND X14, X11
114 PSRLL $0x1f, X12
115 PSLLDQ $0x04, X12
116 PSLLL $0x01, X0
117 PXOR X11, X0
118 PXOR X12, X0
119
120 // Karatsuba pre-computations
121 MOVOU X0, 224(DI)
122 PSHUFD $0x4e, X0, X1
123 PXOR X0, X1
124 MOVOU X1, 240(DI)
125 MOVOU X0, X2
126 MOVOU X1, X3
127
128 // Now prepare powers of H and pre-computations for them
129 MOVQ $0x00000007, AX
130
131 initLoop:
132 MOVOU X2, X11
133 MOVOU X2, X12
134 MOVOU X3, X13
135 PCLMULQDQ $0x00, X0, X11
136 PCLMULQDQ $0x11, X0, X12
137 PCLMULQDQ $0x00, X1, X13
138 PXOR X11, X13
139 PXOR X12, X13
140 MOVOU X13, X4
141 PSLLDQ $0x08, X4
142 PSRLDQ $0x08, X13
143 PXOR X4, X11
144 PXOR X13, X12
145 MOVOU X14, X2
146 PCLMULQDQ $0x01, X11, X2
147 PSHUFD $0x4e, X11, X11
148 PXOR X2, X11
149 MOVOU X14, X2
150 PCLMULQDQ $0x01, X11, X2
151 PSHUFD $0x4e, X11, X11
152 PXOR X11, X2
153 PXOR X12, X2
154 MOVOU X2, 192(DI)
155 PSHUFD $0x4e, X2, X3
156 PXOR X2, X3
157 MOVOU X3, 208(DI)
158 DECQ AX
159 LEAQ -32(DI), DI
160 JNE initLoop
161 RET
162
163 // func gcmAesData(productTable *[256]byte, data []byte, T *[16]byte)
164 // Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3
165 TEXT ·gcmAesData(SB), NOSPLIT, $0-40
166 MOVQ productTable+0(FP), DI
167 MOVQ data_base+8(FP), SI
168 MOVQ data_len+16(FP), DX
169 MOVQ T+32(FP), CX
170 PXOR X8, X8
171 MOVOU bswapMask<>+0(SB), X15
172 MOVOU gcmPoly<>+0(SB), X14
173 TESTQ DX, DX
174 JEQ dataBail
175 CMPQ DX, $0x0d
176 JE dataTLS
177 CMPQ DX, $0x80
178 JB startSinglesLoop
179 JMP dataOctaLoop
180
181 dataTLS:
182 MOVOU 224(DI), X12
183 MOVOU 240(DI), X13
184 PXOR X0, X0
185 MOVQ (SI), X0
186 PINSRD $0x02, 8(SI), X0
187 PINSRB $0x0c, 12(SI), X0
188 XORQ DX, DX
189 JMP dataMul
190
191 dataOctaLoop:
192 CMPQ DX, $0x80
193 JB startSinglesLoop
194 SUBQ $0x80, DX
195 MOVOU (SI), X0
196 MOVOU 16(SI), X1
197 MOVOU 32(SI), X2
198 MOVOU 48(SI), X3
199 MOVOU 64(SI), X4
200 MOVOU 80(SI), X5
201 MOVOU 96(SI), X6
202 MOVOU 112(SI), X7
203 LEAQ 128(SI), SI
204 PSHUFB X15, X0
205 PSHUFB X15, X1
206 PSHUFB X15, X2
207 PSHUFB X15, X3
208 PSHUFB X15, X4
209 PSHUFB X15, X5
210 PSHUFB X15, X6
211 PSHUFB X15, X7
212 PXOR X8, X0
213 MOVOU (DI), X8
214 MOVOU 16(DI), X10
215 MOVOU X8, X9
216 PSHUFD $0x4e, X0, X12
217 PXOR X0, X12
218 PCLMULQDQ $0x00, X0, X8
219 PCLMULQDQ $0x11, X0, X9
220 PCLMULQDQ $0x00, X12, X10
221 MOVOU 32(DI), X12
222 MOVOU X12, X13
223 PCLMULQDQ $0x00, X1, X12
224 PXOR X12, X8
225 PCLMULQDQ $0x11, X1, X13
226 PXOR X13, X9
227 PSHUFD $0x4e, X1, X12
228 PXOR X12, X1
229 MOVOU 48(DI), X12
230 PCLMULQDQ $0x00, X1, X12
231 PXOR X12, X10
232 MOVOU 64(DI), X12
233 MOVOU X12, X13
234 PCLMULQDQ $0x00, X2, X12
235 PXOR X12, X8
236 PCLMULQDQ $0x11, X2, X13
237 PXOR X13, X9
238 PSHUFD $0x4e, X2, X12
239 PXOR X12, X2
240 MOVOU 80(DI), X12
241 PCLMULQDQ $0x00, X2, X12
242 PXOR X12, X10
243 MOVOU 96(DI), X12
244 MOVOU X12, X13
245 PCLMULQDQ $0x00, X3, X12
246 PXOR X12, X8
247 PCLMULQDQ $0x11, X3, X13
248 PXOR X13, X9
249 PSHUFD $0x4e, X3, X12
250 PXOR X12, X3
251 MOVOU 112(DI), X12
252 PCLMULQDQ $0x00, X3, X12
253 PXOR X12, X10
254 MOVOU 128(DI), X12
255 MOVOU X12, X13
256 PCLMULQDQ $0x00, X4, X12
257 PXOR X12, X8
258 PCLMULQDQ $0x11, X4, X13
259 PXOR X13, X9
260 PSHUFD $0x4e, X4, X12
261 PXOR X12, X4
262 MOVOU 144(DI), X12
263 PCLMULQDQ $0x00, X4, X12
264 PXOR X12, X10
265 MOVOU 160(DI), X12
266 MOVOU X12, X13
267 PCLMULQDQ $0x00, X5, X12
268 PXOR X12, X8
269 PCLMULQDQ $0x11, X5, X13
270 PXOR X13, X9
271 PSHUFD $0x4e, X5, X12
272 PXOR X12, X5
273 MOVOU 176(DI), X12
274 PCLMULQDQ $0x00, X5, X12
275 PXOR X12, X10
276 MOVOU 192(DI), X12
277 MOVOU X12, X13
278 PCLMULQDQ $0x00, X6, X12
279 PXOR X12, X8
280 PCLMULQDQ $0x11, X6, X13
281 PXOR X13, X9
282 PSHUFD $0x4e, X6, X12
283 PXOR X12, X6
284 MOVOU 208(DI), X12
285 PCLMULQDQ $0x00, X6, X12
286 PXOR X12, X10
287 MOVOU 224(DI), X12
288 MOVOU X12, X13
289 PCLMULQDQ $0x00, X7, X12
290 PXOR X12, X8
291 PCLMULQDQ $0x11, X7, X13
292 PXOR X13, X9
293 PSHUFD $0x4e, X7, X12
294 PXOR X12, X7
295 MOVOU 240(DI), X12
296 PCLMULQDQ $0x00, X7, X12
297 PXOR X12, X10
298 PXOR X8, X10
299 PXOR X9, X10
300 MOVOU X10, X11
301 PSRLDQ $0x08, X10
302 PSLLDQ $0x08, X11
303 PXOR X10, X9
304 PXOR X11, X8
305 MOVOU X14, X11
306 PCLMULQDQ $0x01, X8, X11
307 PSHUFD $0x4e, X8, X8
308 PXOR X11, X8
309 MOVOU X14, X11
310 PCLMULQDQ $0x01, X8, X11
311 PSHUFD $0x4e, X8, X8
312 PXOR X11, X8
313 PXOR X9, X8
314 JMP dataOctaLoop
315
316 startSinglesLoop:
317 MOVOU 224(DI), X12
318 MOVOU 240(DI), X13
319
320 dataSinglesLoop:
321 CMPQ DX, $0x10
322 JB dataEnd
323 SUBQ $0x10, DX
324 MOVOU (SI), X0
325
326 dataMul:
327 PSHUFB X15, X0
328 PXOR X8, X0
329 MOVOU X12, X8
330 MOVOU X13, X10
331 MOVOU X12, X9
332 PSHUFD $0x4e, X0, X11
333 PXOR X0, X11
334 PCLMULQDQ $0x00, X0, X8
335 PCLMULQDQ $0x11, X0, X9
336 PCLMULQDQ $0x00, X11, X10
337 PXOR X8, X10
338 PXOR X9, X10
339 MOVOU X10, X11
340 PSRLDQ $0x08, X10
341 PSLLDQ $0x08, X11
342 PXOR X10, X9
343 PXOR X11, X8
344 MOVOU X14, X11
345 PCLMULQDQ $0x01, X8, X11
346 PSHUFD $0x4e, X8, X8
347 PXOR X11, X8
348 MOVOU X14, X11
349 PCLMULQDQ $0x01, X8, X11
350 PSHUFD $0x4e, X8, X8
351 PXOR X11, X8
352 PXOR X9, X8
353 LEAQ 16(SI), SI
354 JMP dataSinglesLoop
355
356 dataEnd:
357 TESTQ DX, DX
358 JEQ dataBail
359 PXOR X0, X0
360 LEAQ -1(SI)(DX*1), SI
361
362 dataLoadLoop:
363 PSLLDQ $0x01, X0
364 PINSRB $0x00, (SI), X0
365 LEAQ -1(SI), SI
366 DECQ DX
367 JNE dataLoadLoop
368 JMP dataMul
369
370 dataBail:
371 MOVOU X8, (CX)
372 RET
373
374 // func gcmAesEnc(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32)
375 // Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3
376 TEXT ·gcmAesEnc(SB), $256-96
377 MOVQ productTable+0(FP), DI
378 MOVQ dst_base+8(FP), DX
379 MOVQ src_base+32(FP), SI
380 MOVQ src_len+40(FP), R9
381 MOVQ ctr+56(FP), CX
382 MOVQ T+64(FP), R8
383 MOVQ ks_base+72(FP), AX
384 MOVQ ks_len+80(FP), R13
385 SHRQ $0x02, R13
386 DECQ R13
387 MOVOU bswapMask<>+0(SB), X15
388 MOVOU gcmPoly<>+0(SB), X14
389 MOVOU (R8), X8
390 PXOR X9, X9
391 PXOR X10, X10
392 MOVOU (CX), X0
393 MOVL 12(CX), R10
394 MOVOU (AX), X11
395 MOVL 12(AX), R12
396 BSWAPL R10
397 BSWAPL R12
398 PXOR X0, X11
399 MOVOU X11, 128(SP)
400 ADDL $0x01, R10
401 MOVL R10, R11
402 XORL R12, R11
403 BSWAPL R11
404 MOVL R11, 140(SP)
405 CMPQ R9, $0x80
406 JB gcmAesEncSingles
407 SUBQ $0x80, R9
408
409 // We have at least 8 blocks to encrypt, prepare the rest of the counters
410 MOVOU X11, 144(SP)
411 ADDL $0x01, R10
412 MOVL R10, R11
413 XORL R12, R11
414 BSWAPL R11
415 MOVL R11, 156(SP)
416 MOVOU X11, 160(SP)
417 ADDL $0x01, R10
418 MOVL R10, R11
419 XORL R12, R11
420 BSWAPL R11
421 MOVL R11, 172(SP)
422 MOVOU X11, 176(SP)
423 ADDL $0x01, R10
424 MOVL R10, R11
425 XORL R12, R11
426 BSWAPL R11
427 MOVL R11, 188(SP)
428 MOVOU X11, 192(SP)
429 ADDL $0x01, R10
430 MOVL R10, R11
431 XORL R12, R11
432 BSWAPL R11
433 MOVL R11, 204(SP)
434 MOVOU X11, 208(SP)
435 ADDL $0x01, R10
436 MOVL R10, R11
437 XORL R12, R11
438 BSWAPL R11
439 MOVL R11, 220(SP)
440 MOVOU X11, 224(SP)
441 ADDL $0x01, R10
442 MOVL R10, R11
443 XORL R12, R11
444 BSWAPL R11
445 MOVL R11, 236(SP)
446 MOVOU X11, 240(SP)
447 ADDL $0x01, R10
448 MOVL R10, R11
449 XORL R12, R11
450 BSWAPL R11
451 MOVL R11, 252(SP)
452 MOVOU 128(SP), X0
453 MOVOU 144(SP), X1
454 MOVOU 160(SP), X2
455 MOVOU 176(SP), X3
456 MOVOU 192(SP), X4
457 MOVOU 208(SP), X5
458 MOVOU 224(SP), X6
459 MOVOU 240(SP), X7
460 MOVOU 16(AX), X11
461 AESENC X11, X0
462 AESENC X11, X1
463 AESENC X11, X2
464 AESENC X11, X3
465 AESENC X11, X4
466 AESENC X11, X5
467 AESENC X11, X6
468 AESENC X11, X7
469 ADDL $0x01, R10
470 MOVL R10, R11
471 XORL R12, R11
472 BSWAPL R11
473 MOVL R11, 140(SP)
474 MOVOU 32(AX), X11
475 AESENC X11, X0
476 AESENC X11, X1
477 AESENC X11, X2
478 AESENC X11, X3
479 AESENC X11, X4
480 AESENC X11, X5
481 AESENC X11, X6
482 AESENC X11, X7
483 ADDL $0x01, R10
484 MOVL R10, R11
485 XORL R12, R11
486 BSWAPL R11
487 MOVL R11, 156(SP)
488 MOVOU 48(AX), X11
489 AESENC X11, X0
490 AESENC X11, X1
491 AESENC X11, X2
492 AESENC X11, X3
493 AESENC X11, X4
494 AESENC X11, X5
495 AESENC X11, X6
496 AESENC X11, X7
497 ADDL $0x01, R10
498 MOVL R10, R11
499 XORL R12, R11
500 BSWAPL R11
501 MOVL R11, 172(SP)
502 MOVOU 64(AX), X11
503 AESENC X11, X0
504 AESENC X11, X1
505 AESENC X11, X2
506 AESENC X11, X3
507 AESENC X11, X4
508 AESENC X11, X5
509 AESENC X11, X6
510 AESENC X11, X7
511 ADDL $0x01, R10
512 MOVL R10, R11
513 XORL R12, R11
514 BSWAPL R11
515 MOVL R11, 188(SP)
516 MOVOU 80(AX), X11
517 AESENC X11, X0
518 AESENC X11, X1
519 AESENC X11, X2
520 AESENC X11, X3
521 AESENC X11, X4
522 AESENC X11, X5
523 AESENC X11, X6
524 AESENC X11, X7
525 ADDL $0x01, R10
526 MOVL R10, R11
527 XORL R12, R11
528 BSWAPL R11
529 MOVL R11, 204(SP)
530 MOVOU 96(AX), X11
531 AESENC X11, X0
532 AESENC X11, X1
533 AESENC X11, X2
534 AESENC X11, X3
535 AESENC X11, X4
536 AESENC X11, X5
537 AESENC X11, X6
538 AESENC X11, X7
539 ADDL $0x01, R10
540 MOVL R10, R11
541 XORL R12, R11
542 BSWAPL R11
543 MOVL R11, 220(SP)
544 MOVOU 112(AX), X11
545 AESENC X11, X0
546 AESENC X11, X1
547 AESENC X11, X2
548 AESENC X11, X3
549 AESENC X11, X4
550 AESENC X11, X5
551 AESENC X11, X6
552 AESENC X11, X7
553 ADDL $0x01, R10
554 MOVL R10, R11
555 XORL R12, R11
556 BSWAPL R11
557 MOVL R11, 236(SP)
558 MOVOU 128(AX), X11
559 AESENC X11, X0
560 AESENC X11, X1
561 AESENC X11, X2
562 AESENC X11, X3
563 AESENC X11, X4
564 AESENC X11, X5
565 AESENC X11, X6
566 AESENC X11, X7
567 ADDL $0x01, R10
568 MOVL R10, R11
569 XORL R12, R11
570 BSWAPL R11
571 MOVL R11, 252(SP)
572 MOVOU 144(AX), X11
573 AESENC X11, X0
574 AESENC X11, X1
575 AESENC X11, X2
576 AESENC X11, X3
577 AESENC X11, X4
578 AESENC X11, X5
579 AESENC X11, X6
580 AESENC X11, X7
581 MOVOU 160(AX), X11
582 CMPQ R13, $0x0c
583 JB encLast1
584 AESENC X11, X0
585 AESENC X11, X1
586 AESENC X11, X2
587 AESENC X11, X3
588 AESENC X11, X4
589 AESENC X11, X5
590 AESENC X11, X6
591 AESENC X11, X7
592 MOVOU 176(AX), X11
593 AESENC X11, X0
594 AESENC X11, X1
595 AESENC X11, X2
596 AESENC X11, X3
597 AESENC X11, X4
598 AESENC X11, X5
599 AESENC X11, X6
600 AESENC X11, X7
601 MOVOU 192(AX), X11
602 JE encLast1
603 AESENC X11, X0
604 AESENC X11, X1
605 AESENC X11, X2
606 AESENC X11, X3
607 AESENC X11, X4
608 AESENC X11, X5
609 AESENC X11, X6
610 AESENC X11, X7
611 MOVOU 208(AX), X11
612 AESENC X11, X0
613 AESENC X11, X1
614 AESENC X11, X2
615 AESENC X11, X3
616 AESENC X11, X4
617 AESENC X11, X5
618 AESENC X11, X6
619 AESENC X11, X7
620 MOVOU 224(AX), X11
621
622 encLast1:
623 AESENCLAST X11, X0
624 AESENCLAST X11, X1
625 AESENCLAST X11, X2
626 AESENCLAST X11, X3
627 AESENCLAST X11, X4
628 AESENCLAST X11, X5
629 AESENCLAST X11, X6
630 AESENCLAST X11, X7
631 MOVOU (SI), X11
632 PXOR X11, X0
633 MOVOU 16(SI), X11
634 PXOR X11, X1
635 MOVOU 32(SI), X11
636 PXOR X11, X2
637 MOVOU 48(SI), X11
638 PXOR X11, X3
639 MOVOU 64(SI), X11
640 PXOR X11, X4
641 MOVOU 80(SI), X11
642 PXOR X11, X5
643 MOVOU 96(SI), X11
644 PXOR X11, X6
645 MOVOU 112(SI), X11
646 PXOR X11, X7
647 MOVOU X0, (DX)
648 PSHUFB X15, X0
649 PXOR X8, X0
650 MOVOU X1, 16(DX)
651 PSHUFB X15, X1
652 MOVOU X2, 32(DX)
653 PSHUFB X15, X2
654 MOVOU X3, 48(DX)
655 PSHUFB X15, X3
656 MOVOU X4, 64(DX)
657 PSHUFB X15, X4
658 MOVOU X5, 80(DX)
659 PSHUFB X15, X5
660 MOVOU X6, 96(DX)
661 PSHUFB X15, X6
662 MOVOU X7, 112(DX)
663 PSHUFB X15, X7
664 MOVOU X0, (SP)
665 MOVOU X1, 16(SP)
666 MOVOU X2, 32(SP)
667 MOVOU X3, 48(SP)
668 MOVOU X4, 64(SP)
669 MOVOU X5, 80(SP)
670 MOVOU X6, 96(SP)
671 MOVOU X7, 112(SP)
672 LEAQ 128(SI), SI
673 LEAQ 128(DX), DX
674
675 gcmAesEncOctetsLoop:
676 CMPQ R9, $0x80
677 JB gcmAesEncOctetsEnd
678 SUBQ $0x80, R9
679 MOVOU 128(SP), X0
680 MOVOU 144(SP), X1
681 MOVOU 160(SP), X2
682 MOVOU 176(SP), X3
683 MOVOU 192(SP), X4
684 MOVOU 208(SP), X5
685 MOVOU 224(SP), X6
686 MOVOU 240(SP), X7
687 MOVOU (SP), X11
688 PSHUFD $0x4e, X11, X12
689 PXOR X11, X12
690 MOVOU (DI), X8
691 MOVOU 16(DI), X10
692 MOVOU X8, X9
693 PCLMULQDQ $0x00, X12, X10
694 PCLMULQDQ $0x00, X11, X8
695 PCLMULQDQ $0x11, X11, X9
696 MOVOU 16(AX), X11
697 AESENC X11, X0
698 AESENC X11, X1
699 AESENC X11, X2
700 AESENC X11, X3
701 MOVOU 32(DI), X12
702 MOVOU X12, X13
703 AESENC X11, X4
704 AESENC X11, X5
705 AESENC X11, X6
706 AESENC X11, X7
707 MOVOU 16(SP), X11
708 PCLMULQDQ $0x00, X11, X12
709 PXOR X12, X8
710 PSHUFD $0x4e, X11, X12
711 PCLMULQDQ $0x11, X11, X13
712 PXOR X12, X11
713 PXOR X13, X9
714 MOVOU 48(DI), X13
715 PCLMULQDQ $0x00, X13, X11
716 PXOR X11, X10
717 ADDL $0x01, R10
718 MOVL R10, R11
719 XORL R12, R11
720 BSWAPL R11
721 MOVL R11, 140(SP)
722 MOVOU 32(AX), X11
723 AESENC X11, X0
724 AESENC X11, X1
725 AESENC X11, X2
726 AESENC X11, X3
727 MOVOU 64(DI), X12
728 MOVOU X12, X13
729 AESENC X11, X4
730 AESENC X11, X5
731 AESENC X11, X6
732 AESENC X11, X7
733 MOVOU 32(SP), X11
734 PCLMULQDQ $0x00, X11, X12
735 PXOR X12, X8
736 PSHUFD $0x4e, X11, X12
737 PCLMULQDQ $0x11, X11, X13
738 PXOR X12, X11
739 PXOR X13, X9
740 MOVOU 80(DI), X13
741 PCLMULQDQ $0x00, X13, X11
742 PXOR X11, X10
743 ADDL $0x01, R10
744 MOVL R10, R11
745 XORL R12, R11
746 BSWAPL R11
747 MOVL R11, 156(SP)
748 MOVOU 48(AX), X11
749 AESENC X11, X0
750 AESENC X11, X1
751 AESENC X11, X2
752 AESENC X11, X3
753 MOVOU 96(DI), X12
754 MOVOU X12, X13
755 AESENC X11, X4
756 AESENC X11, X5
757 AESENC X11, X6
758 AESENC X11, X7
759 MOVOU 48(SP), X11
760 PCLMULQDQ $0x00, X11, X12
761 PXOR X12, X8
762 PSHUFD $0x4e, X11, X12
763 PCLMULQDQ $0x11, X11, X13
764 PXOR X12, X11
765 PXOR X13, X9
766 MOVOU 112(DI), X13
767 PCLMULQDQ $0x00, X13, X11
768 PXOR X11, X10
769 ADDL $0x01, R10
770 MOVL R10, R11
771 XORL R12, R11
772 BSWAPL R11
773 MOVL R11, 172(SP)
774 MOVOU 64(AX), X11
775 AESENC X11, X0
776 AESENC X11, X1
777 AESENC X11, X2
778 AESENC X11, X3
779 MOVOU 128(DI), X12
780 MOVOU X12, X13
781 AESENC X11, X4
782 AESENC X11, X5
783 AESENC X11, X6
784 AESENC X11, X7
785 MOVOU 64(SP), X11
786 PCLMULQDQ $0x00, X11, X12
787 PXOR X12, X8
788 PSHUFD $0x4e, X11, X12
789 PCLMULQDQ $0x11, X11, X13
790 PXOR X12, X11
791 PXOR X13, X9
792 MOVOU 144(DI), X13
793 PCLMULQDQ $0x00, X13, X11
794 PXOR X11, X10
795 ADDL $0x01, R10
796 MOVL R10, R11
797 XORL R12, R11
798 BSWAPL R11
799 MOVL R11, 188(SP)
800 MOVOU 80(AX), X11
801 AESENC X11, X0
802 AESENC X11, X1
803 AESENC X11, X2
804 AESENC X11, X3
805 MOVOU 160(DI), X12
806 MOVOU X12, X13
807 AESENC X11, X4
808 AESENC X11, X5
809 AESENC X11, X6
810 AESENC X11, X7
811 MOVOU 80(SP), X11
812 PCLMULQDQ $0x00, X11, X12
813 PXOR X12, X8
814 PSHUFD $0x4e, X11, X12
815 PCLMULQDQ $0x11, X11, X13
816 PXOR X12, X11
817 PXOR X13, X9
818 MOVOU 176(DI), X13
819 PCLMULQDQ $0x00, X13, X11
820 PXOR X11, X10
821 ADDL $0x01, R10
822 MOVL R10, R11
823 XORL R12, R11
824 BSWAPL R11
825 MOVL R11, 204(SP)
826 MOVOU 96(AX), X11
827 AESENC X11, X0
828 AESENC X11, X1
829 AESENC X11, X2
830 AESENC X11, X3
831 MOVOU 192(DI), X12
832 MOVOU X12, X13
833 AESENC X11, X4
834 AESENC X11, X5
835 AESENC X11, X6
836 AESENC X11, X7
837 MOVOU 96(SP), X11
838 PCLMULQDQ $0x00, X11, X12
839 PXOR X12, X8
840 PSHUFD $0x4e, X11, X12
841 PCLMULQDQ $0x11, X11, X13
842 PXOR X12, X11
843 PXOR X13, X9
844 MOVOU 208(DI), X13
845 PCLMULQDQ $0x00, X13, X11
846 PXOR X11, X10
847 ADDL $0x01, R10
848 MOVL R10, R11
849 XORL R12, R11
850 BSWAPL R11
851 MOVL R11, 220(SP)
852 MOVOU 112(AX), X11
853 AESENC X11, X0
854 AESENC X11, X1
855 AESENC X11, X2
856 AESENC X11, X3
857 MOVOU 224(DI), X12
858 MOVOU X12, X13
859 AESENC X11, X4
860 AESENC X11, X5
861 AESENC X11, X6
862 AESENC X11, X7
863 MOVOU 112(SP), X11
864 PCLMULQDQ $0x00, X11, X12
865 PXOR X12, X8
866 PSHUFD $0x4e, X11, X12
867 PCLMULQDQ $0x11, X11, X13
868 PXOR X12, X11
869 PXOR X13, X9
870 MOVOU 240(DI), X13
871 PCLMULQDQ $0x00, X13, X11
872 PXOR X11, X10
873 ADDL $0x01, R10
874 MOVL R10, R11
875 XORL R12, R11
876 BSWAPL R11
877 MOVL R11, 236(SP)
878 MOVOU 128(AX), X11
879 AESENC X11, X0
880 AESENC X11, X1
881 AESENC X11, X2
882 AESENC X11, X3
883 AESENC X11, X4
884 AESENC X11, X5
885 AESENC X11, X6
886 AESENC X11, X7
887 ADDL $0x01, R10
888 MOVL R10, R11
889 XORL R12, R11
890 BSWAPL R11
891 MOVL R11, 252(SP)
892 PXOR X8, X10
893 PXOR X9, X10
894 MOVOU X10, X11
895 PSRLDQ $0x08, X10
896 PSLLDQ $0x08, X11
897 PXOR X10, X9
898 PXOR X11, X8
899 MOVOU X14, X11
900 PCLMULQDQ $0x01, X8, X11
901 PSHUFD $0x4e, X8, X8
902 PXOR X11, X8
903 MOVOU 144(AX), X11
904 AESENC X11, X0
905 AESENC X11, X1
906 AESENC X11, X2
907 AESENC X11, X3
908 AESENC X11, X4
909 AESENC X11, X5
910 AESENC X11, X6
911 AESENC X11, X7
912 MOVOU X14, X11
913 PCLMULQDQ $0x01, X8, X11
914 PSHUFD $0x4e, X8, X8
915 PXOR X11, X8
916 PXOR X9, X8
917 MOVOU 160(AX), X11
918 CMPQ R13, $0x0c
919 JB encLast2
920 AESENC X11, X0
921 AESENC X11, X1
922 AESENC X11, X2
923 AESENC X11, X3
924 AESENC X11, X4
925 AESENC X11, X5
926 AESENC X11, X6
927 AESENC X11, X7
928 MOVOU 176(AX), X11
929 AESENC X11, X0
930 AESENC X11, X1
931 AESENC X11, X2
932 AESENC X11, X3
933 AESENC X11, X4
934 AESENC X11, X5
935 AESENC X11, X6
936 AESENC X11, X7
937 MOVOU 192(AX), X11
938 JE encLast2
939 AESENC X11, X0
940 AESENC X11, X1
941 AESENC X11, X2
942 AESENC X11, X3
943 AESENC X11, X4
944 AESENC X11, X5
945 AESENC X11, X6
946 AESENC X11, X7
947 MOVOU 208(AX), X11
948 AESENC X11, X0
949 AESENC X11, X1
950 AESENC X11, X2
951 AESENC X11, X3
952 AESENC X11, X4
953 AESENC X11, X5
954 AESENC X11, X6
955 AESENC X11, X7
956 MOVOU 224(AX), X11
957
958 encLast2:
959 AESENCLAST X11, X0
960 AESENCLAST X11, X1
961 AESENCLAST X11, X2
962 AESENCLAST X11, X3
963 AESENCLAST X11, X4
964 AESENCLAST X11, X5
965 AESENCLAST X11, X6
966 AESENCLAST X11, X7
967 MOVOU (SI), X11
968 PXOR X11, X0
969 MOVOU 16(SI), X11
970 PXOR X11, X1
971 MOVOU 32(SI), X11
972 PXOR X11, X2
973 MOVOU 48(SI), X11
974 PXOR X11, X3
975 MOVOU 64(SI), X11
976 PXOR X11, X4
977 MOVOU 80(SI), X11
978 PXOR X11, X5
979 MOVOU 96(SI), X11
980 PXOR X11, X6
981 MOVOU 112(SI), X11
982 PXOR X11, X7
983 MOVOU X0, (DX)
984 PSHUFB X15, X0
985 PXOR X8, X0
986 MOVOU X1, 16(DX)
987 PSHUFB X15, X1
988 MOVOU X2, 32(DX)
989 PSHUFB X15, X2
990 MOVOU X3, 48(DX)
991 PSHUFB X15, X3
992 MOVOU X4, 64(DX)
993 PSHUFB X15, X4
994 MOVOU X5, 80(DX)
995 PSHUFB X15, X5
996 MOVOU X6, 96(DX)
997 PSHUFB X15, X6
998 MOVOU X7, 112(DX)
999 PSHUFB X15, X7
1000 MOVOU X0, (SP)
1001 MOVOU X1, 16(SP)
1002 MOVOU X2, 32(SP)
1003 MOVOU X3, 48(SP)
1004 MOVOU X4, 64(SP)
1005 MOVOU X5, 80(SP)
1006 MOVOU X6, 96(SP)
1007 MOVOU X7, 112(SP)
1008 LEAQ 128(SI), SI
1009 LEAQ 128(DX), DX
1010 JMP gcmAesEncOctetsLoop
1011
1012 gcmAesEncOctetsEnd:
1013 MOVOU (SP), X11
1014 MOVOU (DI), X8
1015 MOVOU 16(DI), X10
1016 MOVOU X8, X9
1017 PSHUFD $0x4e, X11, X12
1018 PXOR X11, X12
1019 PCLMULQDQ $0x00, X11, X8
1020 PCLMULQDQ $0x11, X11, X9
1021 PCLMULQDQ $0x00, X12, X10
1022 MOVOU 16(SP), X11
1023 MOVOU 32(DI), X12
1024 MOVOU X12, X13
1025 PCLMULQDQ $0x00, X11, X12
1026 PXOR X12, X8
1027 PCLMULQDQ $0x11, X11, X13
1028 PXOR X13, X9
1029 PSHUFD $0x4e, X11, X12
1030 PXOR X12, X11
1031 MOVOU 48(DI), X12
1032 PCLMULQDQ $0x00, X11, X12
1033 PXOR X12, X10
1034 MOVOU 32(SP), X11
1035 MOVOU 64(DI), X12
1036 MOVOU X12, X13
1037 PCLMULQDQ $0x00, X11, X12
1038 PXOR X12, X8
1039 PCLMULQDQ $0x11, X11, X13
1040 PXOR X13, X9
1041 PSHUFD $0x4e, X11, X12
1042 PXOR X12, X11
1043 MOVOU 80(DI), X12
1044 PCLMULQDQ $0x00, X11, X12
1045 PXOR X12, X10
1046 MOVOU 48(SP), X11
1047 MOVOU 96(DI), X12
1048 MOVOU X12, X13
1049 PCLMULQDQ $0x00, X11, X12
1050 PXOR X12, X8
1051 PCLMULQDQ $0x11, X11, X13
1052 PXOR X13, X9
1053 PSHUFD $0x4e, X11, X12
1054 PXOR X12, X11
1055 MOVOU 112(DI), X12
1056 PCLMULQDQ $0x00, X11, X12
1057 PXOR X12, X10
1058 MOVOU 64(SP), X11
1059 MOVOU 128(DI), X12
1060 MOVOU X12, X13
1061 PCLMULQDQ $0x00, X11, X12
1062 PXOR X12, X8
1063 PCLMULQDQ $0x11, X11, X13
1064 PXOR X13, X9
1065 PSHUFD $0x4e, X11, X12
1066 PXOR X12, X11
1067 MOVOU 144(DI), X12
1068 PCLMULQDQ $0x00, X11, X12
1069 PXOR X12, X10
1070 MOVOU 80(SP), X11
1071 MOVOU 160(DI), X12
1072 MOVOU X12, X13
1073 PCLMULQDQ $0x00, X11, X12
1074 PXOR X12, X8
1075 PCLMULQDQ $0x11, X11, X13
1076 PXOR X13, X9
1077 PSHUFD $0x4e, X11, X12
1078 PXOR X12, X11
1079 MOVOU 176(DI), X12
1080 PCLMULQDQ $0x00, X11, X12
1081 PXOR X12, X10
1082 MOVOU 96(SP), X11
1083 MOVOU 192(DI), X12
1084 MOVOU X12, X13
1085 PCLMULQDQ $0x00, X11, X12
1086 PXOR X12, X8
1087 PCLMULQDQ $0x11, X11, X13
1088 PXOR X13, X9
1089 PSHUFD $0x4e, X11, X12
1090 PXOR X12, X11
1091 MOVOU 208(DI), X12
1092 PCLMULQDQ $0x00, X11, X12
1093 PXOR X12, X10
1094 MOVOU 112(SP), X11
1095 MOVOU 224(DI), X12
1096 MOVOU X12, X13
1097 PCLMULQDQ $0x00, X11, X12
1098 PXOR X12, X8
1099 PCLMULQDQ $0x11, X11, X13
1100 PXOR X13, X9
1101 PSHUFD $0x4e, X11, X12
1102 PXOR X12, X11
1103 MOVOU 240(DI), X12
1104 PCLMULQDQ $0x00, X11, X12
1105 PXOR X12, X10
1106 PXOR X8, X10
1107 PXOR X9, X10
1108 MOVOU X10, X11
1109 PSRLDQ $0x08, X10
1110 PSLLDQ $0x08, X11
1111 PXOR X10, X9
1112 PXOR X11, X8
1113 MOVOU X14, X11
1114 PCLMULQDQ $0x01, X8, X11
1115 PSHUFD $0x4e, X8, X8
1116 PXOR X11, X8
1117 MOVOU X14, X11
1118 PCLMULQDQ $0x01, X8, X11
1119 PSHUFD $0x4e, X8, X8
1120 PXOR X11, X8
1121 PXOR X9, X8
1122 TESTQ R9, R9
1123 JE gcmAesEncDone
1124 SUBQ $0x07, R10
1125
1126 gcmAesEncSingles:
1127 MOVOU 16(AX), X1
1128 MOVOU 32(AX), X2
1129 MOVOU 48(AX), X3
1130 MOVOU 64(AX), X4
1131 MOVOU 80(AX), X5
1132 MOVOU 96(AX), X6
1133 MOVOU 112(AX), X7
1134 MOVOU 224(DI), X13
1135
1136 gcmAesEncSinglesLoop:
1137 CMPQ R9, $0x10
1138 JB gcmAesEncTail
1139 SUBQ $0x10, R9
1140 MOVOU 128(SP), X0
1141 ADDL $0x01, R10
1142 MOVL R10, R11
1143 XORL R12, R11
1144 BSWAPL R11
1145 MOVL R11, 140(SP)
1146 AESENC X1, X0
1147 AESENC X2, X0
1148 AESENC X3, X0
1149 AESENC X4, X0
1150 AESENC X5, X0
1151 AESENC X6, X0
1152 AESENC X7, X0
1153 MOVOU 128(AX), X11
1154 AESENC X11, X0
1155 MOVOU 144(AX), X11
1156 AESENC X11, X0
1157 MOVOU 160(AX), X11
1158 CMPQ R13, $0x0c
1159 JB encLast3
1160 AESENC X11, X0
1161 MOVOU 176(AX), X11
1162 AESENC X11, X0
1163 MOVOU 192(AX), X11
1164 JE encLast3
1165 AESENC X11, X0
1166 MOVOU 208(AX), X11
1167 AESENC X11, X0
1168 MOVOU 224(AX), X11
1169
1170 encLast3:
1171 AESENCLAST X11, X0
1172 MOVOU (SI), X11
1173 PXOR X11, X0
1174 MOVOU X0, (DX)
1175 PSHUFB X15, X0
1176 PXOR X8, X0
1177 MOVOU X13, X8
1178 MOVOU X13, X9
1179 MOVOU 240(DI), X10
1180 PSHUFD $0x4e, X0, X11
1181 PXOR X0, X11
1182 PCLMULQDQ $0x00, X0, X8
1183 PCLMULQDQ $0x11, X0, X9
1184 PCLMULQDQ $0x00, X11, X10
1185 PXOR X8, X10
1186 PXOR X9, X10
1187 MOVOU X10, X11
1188 PSRLDQ $0x08, X10
1189 PSLLDQ $0x08, X11
1190 PXOR X10, X9
1191 PXOR X11, X8
1192 MOVOU X14, X11
1193 PCLMULQDQ $0x01, X8, X11
1194 PSHUFD $0x4e, X8, X8
1195 PXOR X11, X8
1196 MOVOU X14, X11
1197 PCLMULQDQ $0x01, X8, X11
1198 PSHUFD $0x4e, X8, X8
1199 PXOR X11, X8
1200 PXOR X9, X8
1201 LEAQ 16(SI), SI
1202 LEAQ 16(DX), DX
1203 JMP gcmAesEncSinglesLoop
1204
1205 gcmAesEncTail:
1206 TESTQ R9, R9
1207 JE gcmAesEncDone
1208 MOVOU 128(SP), X0
1209 AESENC X1, X0
1210 AESENC X2, X0
1211 AESENC X3, X0
1212 AESENC X4, X0
1213 AESENC X5, X0
1214 AESENC X6, X0
1215 AESENC X7, X0
1216 MOVOU 128(AX), X11
1217 AESENC X11, X0
1218 MOVOU 144(AX), X11
1219 AESENC X11, X0
1220 MOVOU 160(AX), X11
1221 CMPQ R13, $0x0c
1222 JB encLast4
1223 AESENC X11, X0
1224 MOVOU 176(AX), X11
1225 AESENC X11, X0
1226 MOVOU 192(AX), X11
1227 JE encLast4
1228 AESENC X11, X0
1229 MOVOU 208(AX), X11
1230 AESENC X11, X0
1231 MOVOU 224(AX), X11
1232
1233 encLast4:
1234 AESENCLAST X11, X0
1235 MOVOU X0, X11
1236 LEAQ -1(SI)(R9*1), SI
1237 MOVQ R9, R11
1238 SHLQ $0x04, R11
1239 LEAQ andMask<>+0(SB), R10
1240 MOVOU -16(R10)(R11*1), X12
1241 PXOR X0, X0
1242
1243 ptxLoadLoop:
1244 PSLLDQ $0x01, X0
1245 PINSRB $0x00, (SI), X0
1246 LEAQ -1(SI), SI
1247 DECQ R9
1248 JNE ptxLoadLoop
1249 PXOR X11, X0
1250 PAND X12, X0
1251 MOVOU X0, (DX)
1252 PSHUFB X15, X0
1253 PXOR X8, X0
1254 MOVOU X13, X8
1255 MOVOU X13, X9
1256 MOVOU 240(DI), X10
1257 PSHUFD $0x4e, X0, X11
1258 PXOR X0, X11
1259 PCLMULQDQ $0x00, X0, X8
1260 PCLMULQDQ $0x11, X0, X9
1261 PCLMULQDQ $0x00, X11, X10
1262 PXOR X8, X10
1263 PXOR X9, X10
1264 MOVOU X10, X11
1265 PSRLDQ $0x08, X10
1266 PSLLDQ $0x08, X11
1267 PXOR X10, X9
1268 PXOR X11, X8
1269 MOVOU X14, X11
1270 PCLMULQDQ $0x01, X8, X11
1271 PSHUFD $0x4e, X8, X8
1272 PXOR X11, X8
1273 MOVOU X14, X11
1274 PCLMULQDQ $0x01, X8, X11
1275 PSHUFD $0x4e, X8, X8
1276 PXOR X11, X8
1277 PXOR X9, X8
1278
1279 gcmAesEncDone:
1280 MOVOU X8, (R8)
1281 RET
1282
1283 DATA andMask<>+0(SB)/8, $0x00000000000000ff
1284 DATA andMask<>+8(SB)/8, $0x0000000000000000
1285 DATA andMask<>+16(SB)/8, $0x000000000000ffff
1286 DATA andMask<>+24(SB)/8, $0x0000000000000000
1287 DATA andMask<>+32(SB)/8, $0x0000000000ffffff
1288 DATA andMask<>+40(SB)/8, $0x0000000000000000
1289 DATA andMask<>+48(SB)/8, $0x00000000ffffffff
1290 DATA andMask<>+56(SB)/8, $0x0000000000000000
1291 DATA andMask<>+64(SB)/8, $0x000000ffffffffff
1292 DATA andMask<>+72(SB)/8, $0x0000000000000000
1293 DATA andMask<>+80(SB)/8, $0x0000ffffffffffff
1294 DATA andMask<>+88(SB)/8, $0x0000000000000000
1295 DATA andMask<>+96(SB)/8, $0x00ffffffffffffff
1296 DATA andMask<>+104(SB)/8, $0x0000000000000000
1297 DATA andMask<>+112(SB)/8, $0xffffffffffffffff
1298 DATA andMask<>+120(SB)/8, $0x0000000000000000
1299 DATA andMask<>+128(SB)/8, $0xffffffffffffffff
1300 DATA andMask<>+136(SB)/8, $0x00000000000000ff
1301 DATA andMask<>+144(SB)/8, $0xffffffffffffffff
1302 DATA andMask<>+152(SB)/8, $0x000000000000ffff
1303 DATA andMask<>+160(SB)/8, $0xffffffffffffffff
1304 DATA andMask<>+168(SB)/8, $0x0000000000ffffff
1305 DATA andMask<>+176(SB)/8, $0xffffffffffffffff
1306 DATA andMask<>+184(SB)/8, $0x00000000ffffffff
1307 DATA andMask<>+192(SB)/8, $0xffffffffffffffff
1308 DATA andMask<>+200(SB)/8, $0x000000ffffffffff
1309 DATA andMask<>+208(SB)/8, $0xffffffffffffffff
1310 DATA andMask<>+216(SB)/8, $0x0000ffffffffffff
1311 DATA andMask<>+224(SB)/8, $0xffffffffffffffff
1312 DATA andMask<>+232(SB)/8, $0x00ffffffffffffff
1313 GLOBL andMask<>(SB), RODATA|NOPTR, $240
1314
1315 // func gcmAesDec(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32)
1316 // Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3
1317 TEXT ·gcmAesDec(SB), $128-96
1318 MOVQ productTable+0(FP), DI
1319 MOVQ dst_base+8(FP), SI
1320 MOVQ src_base+32(FP), DX
1321 MOVQ src_len+40(FP), R9
1322 MOVQ ctr+56(FP), CX
1323 MOVQ T+64(FP), R8
1324 MOVQ ks_base+72(FP), AX
1325 MOVQ ks_len+80(FP), R13
1326 SHRQ $0x02, R13
1327 DECQ R13
1328 MOVOU bswapMask<>+0(SB), X15
1329 MOVOU gcmPoly<>+0(SB), X14
1330 MOVOU (R8), X8
1331 PXOR X9, X9
1332 PXOR X10, X10
1333 MOVOU (CX), X0
1334 MOVL 12(CX), R10
1335 MOVOU (AX), X11
1336 MOVL 12(AX), R12
1337 BSWAPL R10
1338 BSWAPL R12
1339 PXOR X0, X11
1340 MOVOU X11, (SP)
1341 ADDL $0x01, R10
1342 MOVL R10, R11
1343 XORL R12, R11
1344 BSWAPL R11
1345 MOVL R11, 12(SP)
1346 CMPQ R9, $0x80
1347 JB gcmAesDecSingles
1348 MOVOU X11, 16(SP)
1349 ADDL $0x01, R10
1350 MOVL R10, R11
1351 XORL R12, R11
1352 BSWAPL R11
1353 MOVL R11, 28(SP)
1354 MOVOU X11, 32(SP)
1355 ADDL $0x01, R10
1356 MOVL R10, R11
1357 XORL R12, R11
1358 BSWAPL R11
1359 MOVL R11, 44(SP)
1360 MOVOU X11, 48(SP)
1361 ADDL $0x01, R10
1362 MOVL R10, R11
1363 XORL R12, R11
1364 BSWAPL R11
1365 MOVL R11, 60(SP)
1366 MOVOU X11, 64(SP)
1367 ADDL $0x01, R10
1368 MOVL R10, R11
1369 XORL R12, R11
1370 BSWAPL R11
1371 MOVL R11, 76(SP)
1372 MOVOU X11, 80(SP)
1373 ADDL $0x01, R10
1374 MOVL R10, R11
1375 XORL R12, R11
1376 BSWAPL R11
1377 MOVL R11, 92(SP)
1378 MOVOU X11, 96(SP)
1379 ADDL $0x01, R10
1380 MOVL R10, R11
1381 XORL R12, R11
1382 BSWAPL R11
1383 MOVL R11, 108(SP)
1384 MOVOU X11, 112(SP)
1385 ADDL $0x01, R10
1386 MOVL R10, R11
1387 XORL R12, R11
1388 BSWAPL R11
1389 MOVL R11, 124(SP)
1390
1391 gcmAesDecOctetsLoop:
1392 CMPQ R9, $0x80
1393 JB gcmAesDecEndOctets
1394 SUBQ $0x80, R9
1395 MOVOU (SP), X0
1396 MOVOU 16(SP), X1
1397 MOVOU 32(SP), X2
1398 MOVOU 48(SP), X3
1399 MOVOU 64(SP), X4
1400 MOVOU 80(SP), X5
1401 MOVOU 96(SP), X6
1402 MOVOU 112(SP), X7
1403 MOVOU (DX), X11
1404 PSHUFB X15, X11
1405 PXOR X8, X11
1406 PSHUFD $0x4e, X11, X12
1407 PXOR X11, X12
1408 MOVOU (DI), X8
1409 MOVOU 16(DI), X10
1410 MOVOU X8, X9
1411 PCLMULQDQ $0x00, X12, X10
1412 PCLMULQDQ $0x00, X11, X8
1413 PCLMULQDQ $0x11, X11, X9
1414 MOVOU 16(AX), X11
1415 AESENC X11, X0
1416 AESENC X11, X1
1417 AESENC X11, X2
1418 AESENC X11, X3
1419 MOVOU 32(DI), X12
1420 MOVOU X12, X13
1421 AESENC X11, X4
1422 AESENC X11, X5
1423 AESENC X11, X6
1424 AESENC X11, X7
1425 MOVOU 16(DX), X11
1426 PSHUFB X15, X11
1427 PCLMULQDQ $0x00, X11, X12
1428 PXOR X12, X8
1429 PSHUFD $0x4e, X11, X12
1430 PCLMULQDQ $0x11, X11, X13
1431 PXOR X12, X11
1432 PXOR X13, X9
1433 MOVOU 48(DI), X13
1434 PCLMULQDQ $0x00, X13, X11
1435 PXOR X11, X10
1436 ADDL $0x01, R10
1437 MOVL R10, R11
1438 XORL R12, R11
1439 BSWAPL R11
1440 MOVL R11, 12(SP)
1441 MOVOU 32(AX), X11
1442 AESENC X11, X0
1443 AESENC X11, X1
1444 AESENC X11, X2
1445 AESENC X11, X3
1446 MOVOU 64(DI), X12
1447 MOVOU X12, X13
1448 AESENC X11, X4
1449 AESENC X11, X5
1450 AESENC X11, X6
1451 AESENC X11, X7
1452 MOVOU 32(DX), X11
1453 PSHUFB X15, X11
1454 PCLMULQDQ $0x00, X11, X12
1455 PXOR X12, X8
1456 PSHUFD $0x4e, X11, X12
1457 PCLMULQDQ $0x11, X11, X13
1458 PXOR X12, X11
1459 PXOR X13, X9
1460 MOVOU 80(DI), X13
1461 PCLMULQDQ $0x00, X13, X11
1462 PXOR X11, X10
1463 ADDL $0x01, R10
1464 MOVL R10, R11
1465 XORL R12, R11
1466 BSWAPL R11
1467 MOVL R11, 28(SP)
1468 MOVOU 48(AX), X11
1469 AESENC X11, X0
1470 AESENC X11, X1
1471 AESENC X11, X2
1472 AESENC X11, X3
1473 MOVOU 96(DI), X12
1474 MOVOU X12, X13
1475 AESENC X11, X4
1476 AESENC X11, X5
1477 AESENC X11, X6
1478 AESENC X11, X7
1479 MOVOU 48(DX), X11
1480 PSHUFB X15, X11
1481 PCLMULQDQ $0x00, X11, X12
1482 PXOR X12, X8
1483 PSHUFD $0x4e, X11, X12
1484 PCLMULQDQ $0x11, X11, X13
1485 PXOR X12, X11
1486 PXOR X13, X9
1487 MOVOU 112(DI), X13
1488 PCLMULQDQ $0x00, X13, X11
1489 PXOR X11, X10
1490 ADDL $0x01, R10
1491 MOVL R10, R11
1492 XORL R12, R11
1493 BSWAPL R11
1494 MOVL R11, 44(SP)
1495 MOVOU 64(AX), X11
1496 AESENC X11, X0
1497 AESENC X11, X1
1498 AESENC X11, X2
1499 AESENC X11, X3
1500 MOVOU 128(DI), X12
1501 MOVOU X12, X13
1502 AESENC X11, X4
1503 AESENC X11, X5
1504 AESENC X11, X6
1505 AESENC X11, X7
1506 MOVOU 64(DX), X11
1507 PSHUFB X15, X11
1508 PCLMULQDQ $0x00, X11, X12
1509 PXOR X12, X8
1510 PSHUFD $0x4e, X11, X12
1511 PCLMULQDQ $0x11, X11, X13
1512 PXOR X12, X11
1513 PXOR X13, X9
1514 MOVOU 144(DI), X13
1515 PCLMULQDQ $0x00, X13, X11
1516 PXOR X11, X10
1517 ADDL $0x01, R10
1518 MOVL R10, R11
1519 XORL R12, R11
1520 BSWAPL R11
1521 MOVL R11, 60(SP)
1522 MOVOU 80(AX), X11
1523 AESENC X11, X0
1524 AESENC X11, X1
1525 AESENC X11, X2
1526 AESENC X11, X3
1527 MOVOU 160(DI), X12
1528 MOVOU X12, X13
1529 AESENC X11, X4
1530 AESENC X11, X5
1531 AESENC X11, X6
1532 AESENC X11, X7
1533 MOVOU 80(DX), X11
1534 PSHUFB X15, X11
1535 PCLMULQDQ $0x00, X11, X12
1536 PXOR X12, X8
1537 PSHUFD $0x4e, X11, X12
1538 PCLMULQDQ $0x11, X11, X13
1539 PXOR X12, X11
1540 PXOR X13, X9
1541 MOVOU 176(DI), X13
1542 PCLMULQDQ $0x00, X13, X11
1543 PXOR X11, X10
1544 ADDL $0x01, R10
1545 MOVL R10, R11
1546 XORL R12, R11
1547 BSWAPL R11
1548 MOVL R11, 76(SP)
1549 MOVOU 96(AX), X11
1550 AESENC X11, X0
1551 AESENC X11, X1
1552 AESENC X11, X2
1553 AESENC X11, X3
1554 MOVOU 192(DI), X12
1555 MOVOU X12, X13
1556 AESENC X11, X4
1557 AESENC X11, X5
1558 AESENC X11, X6
1559 AESENC X11, X7
1560 MOVOU 96(DX), X11
1561 PSHUFB X15, X11
1562 PCLMULQDQ $0x00, X11, X12
1563 PXOR X12, X8
1564 PSHUFD $0x4e, X11, X12
1565 PCLMULQDQ $0x11, X11, X13
1566 PXOR X12, X11
1567 PXOR X13, X9
1568 MOVOU 208(DI), X13
1569 PCLMULQDQ $0x00, X13, X11
1570 PXOR X11, X10
1571 ADDL $0x01, R10
1572 MOVL R10, R11
1573 XORL R12, R11
1574 BSWAPL R11
1575 MOVL R11, 92(SP)
1576 MOVOU 112(AX), X11
1577 AESENC X11, X0
1578 AESENC X11, X1
1579 AESENC X11, X2
1580 AESENC X11, X3
1581 MOVOU 224(DI), X12
1582 MOVOU X12, X13
1583 AESENC X11, X4
1584 AESENC X11, X5
1585 AESENC X11, X6
1586 AESENC X11, X7
1587 MOVOU 112(DX), X11
1588 PSHUFB X15, X11
1589 PCLMULQDQ $0x00, X11, X12
1590 PXOR X12, X8
1591 PSHUFD $0x4e, X11, X12
1592 PCLMULQDQ $0x11, X11, X13
1593 PXOR X12, X11
1594 PXOR X13, X9
1595 MOVOU 240(DI), X13
1596 PCLMULQDQ $0x00, X13, X11
1597 PXOR X11, X10
1598 ADDL $0x01, R10
1599 MOVL R10, R11
1600 XORL R12, R11
1601 BSWAPL R11
1602 MOVL R11, 108(SP)
1603 MOVOU 128(AX), X11
1604 AESENC X11, X0
1605 AESENC X11, X1
1606 AESENC X11, X2
1607 AESENC X11, X3
1608 AESENC X11, X4
1609 AESENC X11, X5
1610 AESENC X11, X6
1611 AESENC X11, X7
1612 ADDL $0x01, R10
1613 MOVL R10, R11
1614 XORL R12, R11
1615 BSWAPL R11
1616 MOVL R11, 124(SP)
1617 PXOR X8, X10
1618 PXOR X9, X10
1619 MOVOU X10, X11
1620 PSRLDQ $0x08, X10
1621 PSLLDQ $0x08, X11
1622 PXOR X10, X9
1623 PXOR X11, X8
1624 MOVOU X14, X11
1625 PCLMULQDQ $0x01, X8, X11
1626 PSHUFD $0x4e, X8, X8
1627 PXOR X11, X8
1628 MOVOU 144(AX), X11
1629 AESENC X11, X0
1630 AESENC X11, X1
1631 AESENC X11, X2
1632 AESENC X11, X3
1633 AESENC X11, X4
1634 AESENC X11, X5
1635 AESENC X11, X6
1636 AESENC X11, X7
1637 MOVOU X14, X11
1638 PCLMULQDQ $0x01, X8, X11
1639 PSHUFD $0x4e, X8, X8
1640 PXOR X11, X8
1641 PXOR X9, X8
1642 MOVOU 160(AX), X11
1643 CMPQ R13, $0x0c
1644 JB decLast1
1645 AESENC X11, X0
1646 AESENC X11, X1
1647 AESENC X11, X2
1648 AESENC X11, X3
1649 AESENC X11, X4
1650 AESENC X11, X5
1651 AESENC X11, X6
1652 AESENC X11, X7
1653 MOVOU 176(AX), X11
1654 AESENC X11, X0
1655 AESENC X11, X1
1656 AESENC X11, X2
1657 AESENC X11, X3
1658 AESENC X11, X4
1659 AESENC X11, X5
1660 AESENC X11, X6
1661 AESENC X11, X7
1662 MOVOU 192(AX), X11
1663 JE decLast1
1664 AESENC X11, X0
1665 AESENC X11, X1
1666 AESENC X11, X2
1667 AESENC X11, X3
1668 AESENC X11, X4
1669 AESENC X11, X5
1670 AESENC X11, X6
1671 AESENC X11, X7
1672 MOVOU 208(AX), X11
1673 AESENC X11, X0
1674 AESENC X11, X1
1675 AESENC X11, X2
1676 AESENC X11, X3
1677 AESENC X11, X4
1678 AESENC X11, X5
1679 AESENC X11, X6
1680 AESENC X11, X7
1681 MOVOU 224(AX), X11
1682
1683 decLast1:
1684 AESENCLAST X11, X0
1685 AESENCLAST X11, X1
1686 AESENCLAST X11, X2
1687 AESENCLAST X11, X3
1688 AESENCLAST X11, X4
1689 AESENCLAST X11, X5
1690 AESENCLAST X11, X6
1691 AESENCLAST X11, X7
1692 MOVOU (DX), X11
1693 PXOR X11, X0
1694 MOVOU 16(DX), X11
1695 PXOR X11, X1
1696 MOVOU 32(DX), X11
1697 PXOR X11, X2
1698 MOVOU 48(DX), X11
1699 PXOR X11, X3
1700 MOVOU 64(DX), X11
1701 PXOR X11, X4
1702 MOVOU 80(DX), X11
1703 PXOR X11, X5
1704 MOVOU 96(DX), X11
1705 PXOR X11, X6
1706 MOVOU 112(DX), X11
1707 PXOR X11, X7
1708 MOVOU X0, (SI)
1709 MOVOU X1, 16(SI)
1710 MOVOU X2, 32(SI)
1711 MOVOU X3, 48(SI)
1712 MOVOU X4, 64(SI)
1713 MOVOU X5, 80(SI)
1714 MOVOU X6, 96(SI)
1715 MOVOU X7, 112(SI)
1716 LEAQ 128(SI), SI
1717 LEAQ 128(DX), DX
1718 JMP gcmAesDecOctetsLoop
1719
1720 gcmAesDecEndOctets:
1721 SUBQ $0x07, R10
1722
1723 gcmAesDecSingles:
1724 MOVOU 16(AX), X1
1725 MOVOU 32(AX), X2
1726 MOVOU 48(AX), X3
1727 MOVOU 64(AX), X4
1728 MOVOU 80(AX), X5
1729 MOVOU 96(AX), X6
1730 MOVOU 112(AX), X7
1731 MOVOU 224(DI), X13
1732
1733 gcmAesDecSinglesLoop:
1734 CMPQ R9, $0x10
1735 JB gcmAesDecTail
1736 SUBQ $0x10, R9
1737 MOVOU (DX), X0
1738 MOVOU X0, X12
1739 PSHUFB X15, X0
1740 PXOR X8, X0
1741 MOVOU X13, X8
1742 MOVOU X13, X9
1743 MOVOU 240(DI), X10
1744 PCLMULQDQ $0x00, X0, X8
1745 PCLMULQDQ $0x11, X0, X9
1746 PSHUFD $0x4e, X0, X11
1747 PXOR X0, X11
1748 PCLMULQDQ $0x00, X11, X10
1749 PXOR X8, X10
1750 PXOR X9, X10
1751 MOVOU X10, X11
1752 PSRLDQ $0x08, X10
1753 PSLLDQ $0x08, X11
1754 PXOR X10, X9
1755 PXOR X11, X8
1756 MOVOU X14, X11
1757 PCLMULQDQ $0x01, X8, X11
1758 PSHUFD $0x4e, X8, X8
1759 PXOR X11, X8
1760 MOVOU X14, X11
1761 PCLMULQDQ $0x01, X8, X11
1762 PSHUFD $0x4e, X8, X8
1763 PXOR X11, X8
1764 PXOR X9, X8
1765 MOVOU (SP), X0
1766 ADDL $0x01, R10
1767 MOVL R10, R11
1768 XORL R12, R11
1769 BSWAPL R11
1770 MOVL R11, 12(SP)
1771 AESENC X1, X0
1772 AESENC X2, X0
1773 AESENC X3, X0
1774 AESENC X4, X0
1775 AESENC X5, X0
1776 AESENC X6, X0
1777 AESENC X7, X0
1778 MOVOU 128(AX), X11
1779 AESENC X11, X0
1780 MOVOU 144(AX), X11
1781 AESENC X11, X0
1782 MOVOU 160(AX), X11
1783 CMPQ R13, $0x0c
1784 JB decLast2
1785 AESENC X11, X0
1786 MOVOU 176(AX), X11
1787 AESENC X11, X0
1788 MOVOU 192(AX), X11
1789 JE decLast2
1790 AESENC X11, X0
1791 MOVOU 208(AX), X11
1792 AESENC X11, X0
1793 MOVOU 224(AX), X11
1794
1795 decLast2:
1796 AESENCLAST X11, X0
1797 PXOR X12, X0
1798 MOVOU X0, (SI)
1799 LEAQ 16(SI), SI
1800 LEAQ 16(DX), DX
1801 JMP gcmAesDecSinglesLoop
1802
1803 gcmAesDecTail:
1804 TESTQ R9, R9
1805 JE gcmAesDecDone
1806 MOVQ R9, R11
1807 SHLQ $0x04, R11
1808 LEAQ andMask<>+0(SB), R10
1809 MOVOU -16(R10)(R11*1), X12
1810 MOVOU (DX), X0
1811 PAND X12, X0
1812 MOVOU X0, X12
1813 PSHUFB X15, X0
1814 PXOR X8, X0
1815 MOVOU 224(DI), X8
1816 MOVOU 240(DI), X10
1817 MOVOU X8, X9
1818 PCLMULQDQ $0x00, X0, X8
1819 PCLMULQDQ $0x11, X0, X9
1820 PSHUFD $0x4e, X0, X11
1821 PXOR X0, X11
1822 PCLMULQDQ $0x00, X11, X10
1823 PXOR X8, X10
1824 PXOR X9, X10
1825 MOVOU X10, X11
1826 PSRLDQ $0x08, X10
1827 PSLLDQ $0x08, X11
1828 PXOR X10, X9
1829 PXOR X11, X8
1830 MOVOU X14, X11
1831 PCLMULQDQ $0x01, X8, X11
1832 PSHUFD $0x4e, X8, X8
1833 PXOR X11, X8
1834 MOVOU X14, X11
1835 PCLMULQDQ $0x01, X8, X11
1836 PSHUFD $0x4e, X8, X8
1837 PXOR X11, X8
1838 PXOR X9, X8
1839 MOVOU (SP), X0
1840 ADDL $0x01, R10
1841 MOVL R10, R11
1842 XORL R12, R11
1843 BSWAPL R11
1844 MOVL R11, 12(SP)
1845 AESENC X1, X0
1846 AESENC X2, X0
1847 AESENC X3, X0
1848 AESENC X4, X0
1849 AESENC X5, X0
1850 AESENC X6, X0
1851 AESENC X7, X0
1852 MOVOU 128(AX), X11
1853 AESENC X11, X0
1854 MOVOU 144(AX), X11
1855 AESENC X11, X0
1856 MOVOU 160(AX), X11
1857 CMPQ R13, $0x0c
1858 JB decLast3
1859 AESENC X11, X0
1860 MOVOU 176(AX), X11
1861 AESENC X11, X0
1862 MOVOU 192(AX), X11
1863 JE decLast3
1864 AESENC X11, X0
1865 MOVOU 208(AX), X11
1866 AESENC X11, X0
1867 MOVOU 224(AX), X11
1868
1869 decLast3:
1870 AESENCLAST X11, X0
1871 PXOR X12, X0
1872
1873 ptxStoreLoop:
1874 PEXTRB $0x00, X0, (SI)
1875 PSRLDQ $0x01, X0
1876 LEAQ 1(SI), SI
1877 DECQ R9
1878 JNE ptxStoreLoop
1879
1880 gcmAesDecDone:
1881 MOVOU X8, (R8)
1882 RET
1883
View as plain text