// Code generated by command: go run gcm_amd64_asm.go -out ../../gcm_amd64.s -pkg aes. DO NOT EDIT. //go:build !purego #include "textflag.h" // func gcmAesFinish(productTable *[256]byte, tagMask *[16]byte, T *[16]byte, pLen uint64, dLen uint64) // Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3 TEXT ·gcmAesFinish(SB), NOSPLIT, $0-40 MOVQ productTable+0(FP), DI MOVQ tagMask+8(FP), SI MOVQ T+16(FP), DX MOVQ pLen+24(FP), AX MOVQ dLen+32(FP), CX MOVOU (DX), X8 MOVOU (SI), X13 MOVOU bswapMask<>+0(SB), X15 MOVOU gcmPoly<>+0(SB), X14 SHLQ $0x03, AX SHLQ $0x03, CX MOVQ AX, X0 PINSRQ $0x01, CX, X0 PXOR X8, X0 MOVOU 224(DI), X8 MOVOU 240(DI), X10 MOVOU X8, X9 PCLMULQDQ $0x00, X0, X8 PCLMULQDQ $0x11, X0, X9 PSHUFD $0x4e, X0, X11 PXOR X0, X11 PCLMULQDQ $0x00, X11, X10 PXOR X8, X10 PXOR X9, X10 MOVOU X10, X11 PSRLDQ $0x08, X10 PSLLDQ $0x08, X11 PXOR X10, X9 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 PXOR X9, X8 PSHUFB X15, X8 PXOR X13, X8 MOVOU X8, (DX) RET DATA bswapMask<>+0(SB)/8, $0x08090a0b0c0d0e0f DATA bswapMask<>+8(SB)/8, $0x0001020304050607 GLOBL bswapMask<>(SB), RODATA|NOPTR, $16 DATA gcmPoly<>+0(SB)/8, $0x0000000000000001 DATA gcmPoly<>+8(SB)/8, $0xc200000000000000 GLOBL gcmPoly<>(SB), RODATA|NOPTR, $16 // func gcmAesInit(productTable *[256]byte, ks []uint32) // Requires: AES, PCLMULQDQ, SSE2, SSSE3 TEXT ·gcmAesInit(SB), NOSPLIT, $0-32 MOVQ productTable+0(FP), DI MOVQ ks_base+8(FP), SI MOVQ ks_len+16(FP), DX SHRQ $0x02, DX DECQ DX MOVOU bswapMask<>+0(SB), X15 MOVOU gcmPoly<>+0(SB), X14 // Encrypt block 0, with the AES key to generate the hash key H MOVOU (SI), X0 MOVOU 16(SI), X11 AESENC X11, X0 MOVOU 32(SI), X11 AESENC X11, X0 MOVOU 48(SI), X11 AESENC X11, X0 MOVOU 64(SI), X11 AESENC X11, X0 MOVOU 80(SI), X11 AESENC X11, X0 MOVOU 96(SI), X11 AESENC X11, X0 MOVOU 112(SI), X11 AESENC X11, X0 MOVOU 128(SI), X11 AESENC X11, X0 MOVOU 144(SI), X11 AESENC X11, X0 MOVOU 160(SI), X11 CMPQ DX, $0x0c JB initEncLast AESENC X11, X0 MOVOU 176(SI), X11 AESENC X11, X0 MOVOU 192(SI), X11 JE initEncLast AESENC X11, X0 MOVOU 208(SI), X11 AESENC X11, X0 MOVOU 224(SI), X11 initEncLast: AESENCLAST X11, X0 PSHUFB X15, X0 // H * 2 PSHUFD $0xff, X0, X11 MOVOU X0, X12 PSRAL $0x1f, X11 PAND X14, X11 PSRLL $0x1f, X12 PSLLDQ $0x04, X12 PSLLL $0x01, X0 PXOR X11, X0 PXOR X12, X0 // Karatsuba pre-computations MOVOU X0, 224(DI) PSHUFD $0x4e, X0, X1 PXOR X0, X1 MOVOU X1, 240(DI) MOVOU X0, X2 MOVOU X1, X3 // Now prepare powers of H and pre-computations for them MOVQ $0x00000007, AX initLoop: MOVOU X2, X11 MOVOU X2, X12 MOVOU X3, X13 PCLMULQDQ $0x00, X0, X11 PCLMULQDQ $0x11, X0, X12 PCLMULQDQ $0x00, X1, X13 PXOR X11, X13 PXOR X12, X13 MOVOU X13, X4 PSLLDQ $0x08, X4 PSRLDQ $0x08, X13 PXOR X4, X11 PXOR X13, X12 MOVOU X14, X2 PCLMULQDQ $0x01, X11, X2 PSHUFD $0x4e, X11, X11 PXOR X2, X11 MOVOU X14, X2 PCLMULQDQ $0x01, X11, X2 PSHUFD $0x4e, X11, X11 PXOR X11, X2 PXOR X12, X2 MOVOU X2, 192(DI) PSHUFD $0x4e, X2, X3 PXOR X2, X3 MOVOU X3, 208(DI) DECQ AX LEAQ -32(DI), DI JNE initLoop RET // func gcmAesData(productTable *[256]byte, data []byte, T *[16]byte) // Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3 TEXT ·gcmAesData(SB), NOSPLIT, $0-40 MOVQ productTable+0(FP), DI MOVQ data_base+8(FP), SI MOVQ data_len+16(FP), DX MOVQ T+32(FP), CX PXOR X8, X8 MOVOU bswapMask<>+0(SB), X15 MOVOU gcmPoly<>+0(SB), X14 TESTQ DX, DX JEQ dataBail CMPQ DX, $0x0d JE dataTLS CMPQ DX, $0x80 JB startSinglesLoop JMP dataOctaLoop dataTLS: MOVOU 224(DI), X12 MOVOU 240(DI), X13 PXOR X0, X0 MOVQ (SI), X0 PINSRD $0x02, 8(SI), X0 PINSRB $0x0c, 12(SI), X0 XORQ DX, DX JMP dataMul dataOctaLoop: CMPQ DX, $0x80 JB startSinglesLoop SUBQ $0x80, DX MOVOU (SI), X0 MOVOU 16(SI), X1 MOVOU 32(SI), X2 MOVOU 48(SI), X3 MOVOU 64(SI), X4 MOVOU 80(SI), X5 MOVOU 96(SI), X6 MOVOU 112(SI), X7 LEAQ 128(SI), SI PSHUFB X15, X0 PSHUFB X15, X1 PSHUFB X15, X2 PSHUFB X15, X3 PSHUFB X15, X4 PSHUFB X15, X5 PSHUFB X15, X6 PSHUFB X15, X7 PXOR X8, X0 MOVOU (DI), X8 MOVOU 16(DI), X10 MOVOU X8, X9 PSHUFD $0x4e, X0, X12 PXOR X0, X12 PCLMULQDQ $0x00, X0, X8 PCLMULQDQ $0x11, X0, X9 PCLMULQDQ $0x00, X12, X10 MOVOU 32(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X1, X12 PXOR X12, X8 PCLMULQDQ $0x11, X1, X13 PXOR X13, X9 PSHUFD $0x4e, X1, X12 PXOR X12, X1 MOVOU 48(DI), X12 PCLMULQDQ $0x00, X1, X12 PXOR X12, X10 MOVOU 64(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X2, X12 PXOR X12, X8 PCLMULQDQ $0x11, X2, X13 PXOR X13, X9 PSHUFD $0x4e, X2, X12 PXOR X12, X2 MOVOU 80(DI), X12 PCLMULQDQ $0x00, X2, X12 PXOR X12, X10 MOVOU 96(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X3, X12 PXOR X12, X8 PCLMULQDQ $0x11, X3, X13 PXOR X13, X9 PSHUFD $0x4e, X3, X12 PXOR X12, X3 MOVOU 112(DI), X12 PCLMULQDQ $0x00, X3, X12 PXOR X12, X10 MOVOU 128(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X4, X12 PXOR X12, X8 PCLMULQDQ $0x11, X4, X13 PXOR X13, X9 PSHUFD $0x4e, X4, X12 PXOR X12, X4 MOVOU 144(DI), X12 PCLMULQDQ $0x00, X4, X12 PXOR X12, X10 MOVOU 160(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X5, X12 PXOR X12, X8 PCLMULQDQ $0x11, X5, X13 PXOR X13, X9 PSHUFD $0x4e, X5, X12 PXOR X12, X5 MOVOU 176(DI), X12 PCLMULQDQ $0x00, X5, X12 PXOR X12, X10 MOVOU 192(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X6, X12 PXOR X12, X8 PCLMULQDQ $0x11, X6, X13 PXOR X13, X9 PSHUFD $0x4e, X6, X12 PXOR X12, X6 MOVOU 208(DI), X12 PCLMULQDQ $0x00, X6, X12 PXOR X12, X10 MOVOU 224(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X7, X12 PXOR X12, X8 PCLMULQDQ $0x11, X7, X13 PXOR X13, X9 PSHUFD $0x4e, X7, X12 PXOR X12, X7 MOVOU 240(DI), X12 PCLMULQDQ $0x00, X7, X12 PXOR X12, X10 PXOR X8, X10 PXOR X9, X10 MOVOU X10, X11 PSRLDQ $0x08, X10 PSLLDQ $0x08, X11 PXOR X10, X9 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 PXOR X9, X8 JMP dataOctaLoop startSinglesLoop: MOVOU 224(DI), X12 MOVOU 240(DI), X13 dataSinglesLoop: CMPQ DX, $0x10 JB dataEnd SUBQ $0x10, DX MOVOU (SI), X0 dataMul: PSHUFB X15, X0 PXOR X8, X0 MOVOU X12, X8 MOVOU X13, X10 MOVOU X12, X9 PSHUFD $0x4e, X0, X11 PXOR X0, X11 PCLMULQDQ $0x00, X0, X8 PCLMULQDQ $0x11, X0, X9 PCLMULQDQ $0x00, X11, X10 PXOR X8, X10 PXOR X9, X10 MOVOU X10, X11 PSRLDQ $0x08, X10 PSLLDQ $0x08, X11 PXOR X10, X9 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 PXOR X9, X8 LEAQ 16(SI), SI JMP dataSinglesLoop dataEnd: TESTQ DX, DX JEQ dataBail PXOR X0, X0 LEAQ -1(SI)(DX*1), SI dataLoadLoop: PSLLDQ $0x01, X0 PINSRB $0x00, (SI), X0 LEAQ -1(SI), SI DECQ DX JNE dataLoadLoop JMP dataMul dataBail: MOVOU X8, (CX) RET // func gcmAesEnc(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32) // Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3 TEXT ·gcmAesEnc(SB), $256-96 MOVQ productTable+0(FP), DI MOVQ dst_base+8(FP), DX MOVQ src_base+32(FP), SI MOVQ src_len+40(FP), R9 MOVQ ctr+56(FP), CX MOVQ T+64(FP), R8 MOVQ ks_base+72(FP), AX MOVQ ks_len+80(FP), R13 SHRQ $0x02, R13 DECQ R13 MOVOU bswapMask<>+0(SB), X15 MOVOU gcmPoly<>+0(SB), X14 MOVOU (R8), X8 PXOR X9, X9 PXOR X10, X10 MOVOU (CX), X0 MOVL 12(CX), R10 MOVOU (AX), X11 MOVL 12(AX), R12 BSWAPL R10 BSWAPL R12 PXOR X0, X11 MOVOU X11, 128(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 140(SP) CMPQ R9, $0x80 JB gcmAesEncSingles SUBQ $0x80, R9 // We have at least 8 blocks to encrypt, prepare the rest of the counters MOVOU X11, 144(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 156(SP) MOVOU X11, 160(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 172(SP) MOVOU X11, 176(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 188(SP) MOVOU X11, 192(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 204(SP) MOVOU X11, 208(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 220(SP) MOVOU X11, 224(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 236(SP) MOVOU X11, 240(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 252(SP) MOVOU 128(SP), X0 MOVOU 144(SP), X1 MOVOU 160(SP), X2 MOVOU 176(SP), X3 MOVOU 192(SP), X4 MOVOU 208(SP), X5 MOVOU 224(SP), X6 MOVOU 240(SP), X7 MOVOU 16(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 140(SP) MOVOU 32(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 156(SP) MOVOU 48(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 172(SP) MOVOU 64(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 188(SP) MOVOU 80(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 204(SP) MOVOU 96(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 220(SP) MOVOU 112(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 236(SP) MOVOU 128(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 252(SP) MOVOU 144(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 160(AX), X11 CMPQ R13, $0x0c JB encLast1 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 176(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 192(AX), X11 JE encLast1 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 208(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 224(AX), X11 encLast1: AESENCLAST X11, X0 AESENCLAST X11, X1 AESENCLAST X11, X2 AESENCLAST X11, X3 AESENCLAST X11, X4 AESENCLAST X11, X5 AESENCLAST X11, X6 AESENCLAST X11, X7 MOVOU (SI), X11 PXOR X11, X0 MOVOU 16(SI), X11 PXOR X11, X1 MOVOU 32(SI), X11 PXOR X11, X2 MOVOU 48(SI), X11 PXOR X11, X3 MOVOU 64(SI), X11 PXOR X11, X4 MOVOU 80(SI), X11 PXOR X11, X5 MOVOU 96(SI), X11 PXOR X11, X6 MOVOU 112(SI), X11 PXOR X11, X7 MOVOU X0, (DX) PSHUFB X15, X0 PXOR X8, X0 MOVOU X1, 16(DX) PSHUFB X15, X1 MOVOU X2, 32(DX) PSHUFB X15, X2 MOVOU X3, 48(DX) PSHUFB X15, X3 MOVOU X4, 64(DX) PSHUFB X15, X4 MOVOU X5, 80(DX) PSHUFB X15, X5 MOVOU X6, 96(DX) PSHUFB X15, X6 MOVOU X7, 112(DX) PSHUFB X15, X7 MOVOU X0, (SP) MOVOU X1, 16(SP) MOVOU X2, 32(SP) MOVOU X3, 48(SP) MOVOU X4, 64(SP) MOVOU X5, 80(SP) MOVOU X6, 96(SP) MOVOU X7, 112(SP) LEAQ 128(SI), SI LEAQ 128(DX), DX gcmAesEncOctetsLoop: CMPQ R9, $0x80 JB gcmAesEncOctetsEnd SUBQ $0x80, R9 MOVOU 128(SP), X0 MOVOU 144(SP), X1 MOVOU 160(SP), X2 MOVOU 176(SP), X3 MOVOU 192(SP), X4 MOVOU 208(SP), X5 MOVOU 224(SP), X6 MOVOU 240(SP), X7 MOVOU (SP), X11 PSHUFD $0x4e, X11, X12 PXOR X11, X12 MOVOU (DI), X8 MOVOU 16(DI), X10 MOVOU X8, X9 PCLMULQDQ $0x00, X12, X10 PCLMULQDQ $0x00, X11, X8 PCLMULQDQ $0x11, X11, X9 MOVOU 16(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 32(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 16(SP), X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 48(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 140(SP) MOVOU 32(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 64(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 32(SP), X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 80(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 156(SP) MOVOU 48(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 96(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 48(SP), X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 112(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 172(SP) MOVOU 64(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 128(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 64(SP), X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 144(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 188(SP) MOVOU 80(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 160(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 80(SP), X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 176(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 204(SP) MOVOU 96(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 192(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 96(SP), X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 208(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 220(SP) MOVOU 112(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 224(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 112(SP), X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 240(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 236(SP) MOVOU 128(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 252(SP) PXOR X8, X10 PXOR X9, X10 MOVOU X10, X11 PSRLDQ $0x08, X10 PSLLDQ $0x08, X11 PXOR X10, X9 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 MOVOU 144(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 PXOR X9, X8 MOVOU 160(AX), X11 CMPQ R13, $0x0c JB encLast2 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 176(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 192(AX), X11 JE encLast2 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 208(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 224(AX), X11 encLast2: AESENCLAST X11, X0 AESENCLAST X11, X1 AESENCLAST X11, X2 AESENCLAST X11, X3 AESENCLAST X11, X4 AESENCLAST X11, X5 AESENCLAST X11, X6 AESENCLAST X11, X7 MOVOU (SI), X11 PXOR X11, X0 MOVOU 16(SI), X11 PXOR X11, X1 MOVOU 32(SI), X11 PXOR X11, X2 MOVOU 48(SI), X11 PXOR X11, X3 MOVOU 64(SI), X11 PXOR X11, X4 MOVOU 80(SI), X11 PXOR X11, X5 MOVOU 96(SI), X11 PXOR X11, X6 MOVOU 112(SI), X11 PXOR X11, X7 MOVOU X0, (DX) PSHUFB X15, X0 PXOR X8, X0 MOVOU X1, 16(DX) PSHUFB X15, X1 MOVOU X2, 32(DX) PSHUFB X15, X2 MOVOU X3, 48(DX) PSHUFB X15, X3 MOVOU X4, 64(DX) PSHUFB X15, X4 MOVOU X5, 80(DX) PSHUFB X15, X5 MOVOU X6, 96(DX) PSHUFB X15, X6 MOVOU X7, 112(DX) PSHUFB X15, X7 MOVOU X0, (SP) MOVOU X1, 16(SP) MOVOU X2, 32(SP) MOVOU X3, 48(SP) MOVOU X4, 64(SP) MOVOU X5, 80(SP) MOVOU X6, 96(SP) MOVOU X7, 112(SP) LEAQ 128(SI), SI LEAQ 128(DX), DX JMP gcmAesEncOctetsLoop gcmAesEncOctetsEnd: MOVOU (SP), X11 MOVOU (DI), X8 MOVOU 16(DI), X10 MOVOU X8, X9 PSHUFD $0x4e, X11, X12 PXOR X11, X12 PCLMULQDQ $0x00, X11, X8 PCLMULQDQ $0x11, X11, X9 PCLMULQDQ $0x00, X12, X10 MOVOU 16(SP), X11 MOVOU 32(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PCLMULQDQ $0x11, X11, X13 PXOR X13, X9 PSHUFD $0x4e, X11, X12 PXOR X12, X11 MOVOU 48(DI), X12 PCLMULQDQ $0x00, X11, X12 PXOR X12, X10 MOVOU 32(SP), X11 MOVOU 64(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PCLMULQDQ $0x11, X11, X13 PXOR X13, X9 PSHUFD $0x4e, X11, X12 PXOR X12, X11 MOVOU 80(DI), X12 PCLMULQDQ $0x00, X11, X12 PXOR X12, X10 MOVOU 48(SP), X11 MOVOU 96(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PCLMULQDQ $0x11, X11, X13 PXOR X13, X9 PSHUFD $0x4e, X11, X12 PXOR X12, X11 MOVOU 112(DI), X12 PCLMULQDQ $0x00, X11, X12 PXOR X12, X10 MOVOU 64(SP), X11 MOVOU 128(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PCLMULQDQ $0x11, X11, X13 PXOR X13, X9 PSHUFD $0x4e, X11, X12 PXOR X12, X11 MOVOU 144(DI), X12 PCLMULQDQ $0x00, X11, X12 PXOR X12, X10 MOVOU 80(SP), X11 MOVOU 160(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PCLMULQDQ $0x11, X11, X13 PXOR X13, X9 PSHUFD $0x4e, X11, X12 PXOR X12, X11 MOVOU 176(DI), X12 PCLMULQDQ $0x00, X11, X12 PXOR X12, X10 MOVOU 96(SP), X11 MOVOU 192(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PCLMULQDQ $0x11, X11, X13 PXOR X13, X9 PSHUFD $0x4e, X11, X12 PXOR X12, X11 MOVOU 208(DI), X12 PCLMULQDQ $0x00, X11, X12 PXOR X12, X10 MOVOU 112(SP), X11 MOVOU 224(DI), X12 MOVOU X12, X13 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PCLMULQDQ $0x11, X11, X13 PXOR X13, X9 PSHUFD $0x4e, X11, X12 PXOR X12, X11 MOVOU 240(DI), X12 PCLMULQDQ $0x00, X11, X12 PXOR X12, X10 PXOR X8, X10 PXOR X9, X10 MOVOU X10, X11 PSRLDQ $0x08, X10 PSLLDQ $0x08, X11 PXOR X10, X9 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 PXOR X9, X8 TESTQ R9, R9 JE gcmAesEncDone SUBQ $0x07, R10 gcmAesEncSingles: MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 224(DI), X13 gcmAesEncSinglesLoop: CMPQ R9, $0x10 JB gcmAesEncTail SUBQ $0x10, R9 MOVOU 128(SP), X0 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 140(SP) AESENC X1, X0 AESENC X2, X0 AESENC X3, X0 AESENC X4, X0 AESENC X5, X0 AESENC X6, X0 AESENC X7, X0 MOVOU 128(AX), X11 AESENC X11, X0 MOVOU 144(AX), X11 AESENC X11, X0 MOVOU 160(AX), X11 CMPQ R13, $0x0c JB encLast3 AESENC X11, X0 MOVOU 176(AX), X11 AESENC X11, X0 MOVOU 192(AX), X11 JE encLast3 AESENC X11, X0 MOVOU 208(AX), X11 AESENC X11, X0 MOVOU 224(AX), X11 encLast3: AESENCLAST X11, X0 MOVOU (SI), X11 PXOR X11, X0 MOVOU X0, (DX) PSHUFB X15, X0 PXOR X8, X0 MOVOU X13, X8 MOVOU X13, X9 MOVOU 240(DI), X10 PSHUFD $0x4e, X0, X11 PXOR X0, X11 PCLMULQDQ $0x00, X0, X8 PCLMULQDQ $0x11, X0, X9 PCLMULQDQ $0x00, X11, X10 PXOR X8, X10 PXOR X9, X10 MOVOU X10, X11 PSRLDQ $0x08, X10 PSLLDQ $0x08, X11 PXOR X10, X9 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 PXOR X9, X8 LEAQ 16(SI), SI LEAQ 16(DX), DX JMP gcmAesEncSinglesLoop gcmAesEncTail: TESTQ R9, R9 JE gcmAesEncDone MOVOU 128(SP), X0 AESENC X1, X0 AESENC X2, X0 AESENC X3, X0 AESENC X4, X0 AESENC X5, X0 AESENC X6, X0 AESENC X7, X0 MOVOU 128(AX), X11 AESENC X11, X0 MOVOU 144(AX), X11 AESENC X11, X0 MOVOU 160(AX), X11 CMPQ R13, $0x0c JB encLast4 AESENC X11, X0 MOVOU 176(AX), X11 AESENC X11, X0 MOVOU 192(AX), X11 JE encLast4 AESENC X11, X0 MOVOU 208(AX), X11 AESENC X11, X0 MOVOU 224(AX), X11 encLast4: AESENCLAST X11, X0 MOVOU X0, X11 LEAQ -1(SI)(R9*1), SI MOVQ R9, R11 SHLQ $0x04, R11 LEAQ andMask<>+0(SB), R10 MOVOU -16(R10)(R11*1), X12 PXOR X0, X0 ptxLoadLoop: PSLLDQ $0x01, X0 PINSRB $0x00, (SI), X0 LEAQ -1(SI), SI DECQ R9 JNE ptxLoadLoop PXOR X11, X0 PAND X12, X0 MOVOU X0, (DX) PSHUFB X15, X0 PXOR X8, X0 MOVOU X13, X8 MOVOU X13, X9 MOVOU 240(DI), X10 PSHUFD $0x4e, X0, X11 PXOR X0, X11 PCLMULQDQ $0x00, X0, X8 PCLMULQDQ $0x11, X0, X9 PCLMULQDQ $0x00, X11, X10 PXOR X8, X10 PXOR X9, X10 MOVOU X10, X11 PSRLDQ $0x08, X10 PSLLDQ $0x08, X11 PXOR X10, X9 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 PXOR X9, X8 gcmAesEncDone: MOVOU X8, (R8) RET DATA andMask<>+0(SB)/8, $0x00000000000000ff DATA andMask<>+8(SB)/8, $0x0000000000000000 DATA andMask<>+16(SB)/8, $0x000000000000ffff DATA andMask<>+24(SB)/8, $0x0000000000000000 DATA andMask<>+32(SB)/8, $0x0000000000ffffff DATA andMask<>+40(SB)/8, $0x0000000000000000 DATA andMask<>+48(SB)/8, $0x00000000ffffffff DATA andMask<>+56(SB)/8, $0x0000000000000000 DATA andMask<>+64(SB)/8, $0x000000ffffffffff DATA andMask<>+72(SB)/8, $0x0000000000000000 DATA andMask<>+80(SB)/8, $0x0000ffffffffffff DATA andMask<>+88(SB)/8, $0x0000000000000000 DATA andMask<>+96(SB)/8, $0x00ffffffffffffff DATA andMask<>+104(SB)/8, $0x0000000000000000 DATA andMask<>+112(SB)/8, $0xffffffffffffffff DATA andMask<>+120(SB)/8, $0x0000000000000000 DATA andMask<>+128(SB)/8, $0xffffffffffffffff DATA andMask<>+136(SB)/8, $0x00000000000000ff DATA andMask<>+144(SB)/8, $0xffffffffffffffff DATA andMask<>+152(SB)/8, $0x000000000000ffff DATA andMask<>+160(SB)/8, $0xffffffffffffffff DATA andMask<>+168(SB)/8, $0x0000000000ffffff DATA andMask<>+176(SB)/8, $0xffffffffffffffff DATA andMask<>+184(SB)/8, $0x00000000ffffffff DATA andMask<>+192(SB)/8, $0xffffffffffffffff DATA andMask<>+200(SB)/8, $0x000000ffffffffff DATA andMask<>+208(SB)/8, $0xffffffffffffffff DATA andMask<>+216(SB)/8, $0x0000ffffffffffff DATA andMask<>+224(SB)/8, $0xffffffffffffffff DATA andMask<>+232(SB)/8, $0x00ffffffffffffff GLOBL andMask<>(SB), RODATA|NOPTR, $240 // func gcmAesDec(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32) // Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3 TEXT ·gcmAesDec(SB), $128-96 MOVQ productTable+0(FP), DI MOVQ dst_base+8(FP), SI MOVQ src_base+32(FP), DX MOVQ src_len+40(FP), R9 MOVQ ctr+56(FP), CX MOVQ T+64(FP), R8 MOVQ ks_base+72(FP), AX MOVQ ks_len+80(FP), R13 SHRQ $0x02, R13 DECQ R13 MOVOU bswapMask<>+0(SB), X15 MOVOU gcmPoly<>+0(SB), X14 MOVOU (R8), X8 PXOR X9, X9 PXOR X10, X10 MOVOU (CX), X0 MOVL 12(CX), R10 MOVOU (AX), X11 MOVL 12(AX), R12 BSWAPL R10 BSWAPL R12 PXOR X0, X11 MOVOU X11, (SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 12(SP) CMPQ R9, $0x80 JB gcmAesDecSingles MOVOU X11, 16(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 28(SP) MOVOU X11, 32(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 44(SP) MOVOU X11, 48(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 60(SP) MOVOU X11, 64(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 76(SP) MOVOU X11, 80(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 92(SP) MOVOU X11, 96(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 108(SP) MOVOU X11, 112(SP) ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 124(SP) gcmAesDecOctetsLoop: CMPQ R9, $0x80 JB gcmAesDecEndOctets SUBQ $0x80, R9 MOVOU (SP), X0 MOVOU 16(SP), X1 MOVOU 32(SP), X2 MOVOU 48(SP), X3 MOVOU 64(SP), X4 MOVOU 80(SP), X5 MOVOU 96(SP), X6 MOVOU 112(SP), X7 MOVOU (DX), X11 PSHUFB X15, X11 PXOR X8, X11 PSHUFD $0x4e, X11, X12 PXOR X11, X12 MOVOU (DI), X8 MOVOU 16(DI), X10 MOVOU X8, X9 PCLMULQDQ $0x00, X12, X10 PCLMULQDQ $0x00, X11, X8 PCLMULQDQ $0x11, X11, X9 MOVOU 16(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 32(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 16(DX), X11 PSHUFB X15, X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 48(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 12(SP) MOVOU 32(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 64(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 32(DX), X11 PSHUFB X15, X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 80(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 28(SP) MOVOU 48(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 96(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 48(DX), X11 PSHUFB X15, X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 112(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 44(SP) MOVOU 64(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 128(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 64(DX), X11 PSHUFB X15, X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 144(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 60(SP) MOVOU 80(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 160(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 80(DX), X11 PSHUFB X15, X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 176(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 76(SP) MOVOU 96(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 192(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 96(DX), X11 PSHUFB X15, X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 208(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 92(SP) MOVOU 112(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 MOVOU 224(DI), X12 MOVOU X12, X13 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 112(DX), X11 PSHUFB X15, X11 PCLMULQDQ $0x00, X11, X12 PXOR X12, X8 PSHUFD $0x4e, X11, X12 PCLMULQDQ $0x11, X11, X13 PXOR X12, X11 PXOR X13, X9 MOVOU 240(DI), X13 PCLMULQDQ $0x00, X13, X11 PXOR X11, X10 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 108(SP) MOVOU 128(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 124(SP) PXOR X8, X10 PXOR X9, X10 MOVOU X10, X11 PSRLDQ $0x08, X10 PSLLDQ $0x08, X11 PXOR X10, X9 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 MOVOU 144(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 PXOR X9, X8 MOVOU 160(AX), X11 CMPQ R13, $0x0c JB decLast1 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 176(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 192(AX), X11 JE decLast1 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 208(AX), X11 AESENC X11, X0 AESENC X11, X1 AESENC X11, X2 AESENC X11, X3 AESENC X11, X4 AESENC X11, X5 AESENC X11, X6 AESENC X11, X7 MOVOU 224(AX), X11 decLast1: AESENCLAST X11, X0 AESENCLAST X11, X1 AESENCLAST X11, X2 AESENCLAST X11, X3 AESENCLAST X11, X4 AESENCLAST X11, X5 AESENCLAST X11, X6 AESENCLAST X11, X7 MOVOU (DX), X11 PXOR X11, X0 MOVOU 16(DX), X11 PXOR X11, X1 MOVOU 32(DX), X11 PXOR X11, X2 MOVOU 48(DX), X11 PXOR X11, X3 MOVOU 64(DX), X11 PXOR X11, X4 MOVOU 80(DX), X11 PXOR X11, X5 MOVOU 96(DX), X11 PXOR X11, X6 MOVOU 112(DX), X11 PXOR X11, X7 MOVOU X0, (SI) MOVOU X1, 16(SI) MOVOU X2, 32(SI) MOVOU X3, 48(SI) MOVOU X4, 64(SI) MOVOU X5, 80(SI) MOVOU X6, 96(SI) MOVOU X7, 112(SI) LEAQ 128(SI), SI LEAQ 128(DX), DX JMP gcmAesDecOctetsLoop gcmAesDecEndOctets: SUBQ $0x07, R10 gcmAesDecSingles: MOVOU 16(AX), X1 MOVOU 32(AX), X2 MOVOU 48(AX), X3 MOVOU 64(AX), X4 MOVOU 80(AX), X5 MOVOU 96(AX), X6 MOVOU 112(AX), X7 MOVOU 224(DI), X13 gcmAesDecSinglesLoop: CMPQ R9, $0x10 JB gcmAesDecTail SUBQ $0x10, R9 MOVOU (DX), X0 MOVOU X0, X12 PSHUFB X15, X0 PXOR X8, X0 MOVOU X13, X8 MOVOU X13, X9 MOVOU 240(DI), X10 PCLMULQDQ $0x00, X0, X8 PCLMULQDQ $0x11, X0, X9 PSHUFD $0x4e, X0, X11 PXOR X0, X11 PCLMULQDQ $0x00, X11, X10 PXOR X8, X10 PXOR X9, X10 MOVOU X10, X11 PSRLDQ $0x08, X10 PSLLDQ $0x08, X11 PXOR X10, X9 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 PXOR X9, X8 MOVOU (SP), X0 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 12(SP) AESENC X1, X0 AESENC X2, X0 AESENC X3, X0 AESENC X4, X0 AESENC X5, X0 AESENC X6, X0 AESENC X7, X0 MOVOU 128(AX), X11 AESENC X11, X0 MOVOU 144(AX), X11 AESENC X11, X0 MOVOU 160(AX), X11 CMPQ R13, $0x0c JB decLast2 AESENC X11, X0 MOVOU 176(AX), X11 AESENC X11, X0 MOVOU 192(AX), X11 JE decLast2 AESENC X11, X0 MOVOU 208(AX), X11 AESENC X11, X0 MOVOU 224(AX), X11 decLast2: AESENCLAST X11, X0 PXOR X12, X0 MOVOU X0, (SI) LEAQ 16(SI), SI LEAQ 16(DX), DX JMP gcmAesDecSinglesLoop gcmAesDecTail: TESTQ R9, R9 JE gcmAesDecDone MOVQ R9, R11 SHLQ $0x04, R11 LEAQ andMask<>+0(SB), R10 MOVOU -16(R10)(R11*1), X12 MOVOU (DX), X0 PAND X12, X0 MOVOU X0, X12 PSHUFB X15, X0 PXOR X8, X0 MOVOU 224(DI), X8 MOVOU 240(DI), X10 MOVOU X8, X9 PCLMULQDQ $0x00, X0, X8 PCLMULQDQ $0x11, X0, X9 PSHUFD $0x4e, X0, X11 PXOR X0, X11 PCLMULQDQ $0x00, X11, X10 PXOR X8, X10 PXOR X9, X10 MOVOU X10, X11 PSRLDQ $0x08, X10 PSLLDQ $0x08, X11 PXOR X10, X9 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 MOVOU X14, X11 PCLMULQDQ $0x01, X8, X11 PSHUFD $0x4e, X8, X8 PXOR X11, X8 PXOR X9, X8 MOVOU (SP), X0 ADDL $0x01, R10 MOVL R10, R11 XORL R12, R11 BSWAPL R11 MOVL R11, 12(SP) AESENC X1, X0 AESENC X2, X0 AESENC X3, X0 AESENC X4, X0 AESENC X5, X0 AESENC X6, X0 AESENC X7, X0 MOVOU 128(AX), X11 AESENC X11, X0 MOVOU 144(AX), X11 AESENC X11, X0 MOVOU 160(AX), X11 CMPQ R13, $0x0c JB decLast3 AESENC X11, X0 MOVOU 176(AX), X11 AESENC X11, X0 MOVOU 192(AX), X11 JE decLast3 AESENC X11, X0 MOVOU 208(AX), X11 AESENC X11, X0 MOVOU 224(AX), X11 decLast3: AESENCLAST X11, X0 PXOR X12, X0 ptxStoreLoop: PEXTRB $0x00, X0, (SI) PSRLDQ $0x01, X0 LEAQ 1(SI), SI DECQ R9 JNE ptxStoreLoop gcmAesDecDone: MOVOU X8, (R8) RET