Text file src/crypto/sha1/sha1block_amd64.s

     1  // Code generated by command: go run sha1block_amd64_asm.go -out ../sha1block_amd64.s -pkg sha1. DO NOT EDIT.
     2  
     3  //go:build !purego
     4  
     5  #include "textflag.h"
     6  
     7  // func blockAVX2(dig *digest, p []byte)
     8  // Requires: AVX, AVX2, BMI, BMI2, CMOV
     9  TEXT ·blockAVX2(SB), $1408-32
    10  	MOVQ        dig+0(FP), DI
    11  	MOVQ        p_base+8(FP), SI
    12  	MOVQ        p_len+16(FP), DX
    13  	SHRQ        $0x06, DX
    14  	SHLQ        $0x06, DX
    15  	LEAQ        K_XMM_AR<>+0(SB), R8
    16  	MOVQ        DI, R9
    17  	MOVQ        SI, R10
    18  	LEAQ        64(SI), R13
    19  	ADDQ        SI, DX
    20  	ADDQ        $0x40, DX
    21  	MOVQ        DX, R11
    22  	CMPQ        R13, R11
    23  	CMOVQCC     R8, R13
    24  	VMOVDQU     BSWAP_SHUFB_CTL<>+0(SB), Y10
    25  	MOVL        (R9), CX
    26  	MOVL        4(R9), SI
    27  	MOVL        8(R9), DI
    28  	MOVL        12(R9), AX
    29  	MOVL        16(R9), DX
    30  	MOVQ        SP, R14
    31  	LEAQ        672(SP), R15
    32  	VMOVDQU     (R10), X0
    33  	VINSERTI128 $0x01, (R13), Y0, Y0
    34  	VPSHUFB     Y10, Y0, Y15
    35  	VPADDD      (R8), Y15, Y0
    36  	VMOVDQU     Y0, (R14)
    37  	VMOVDQU     16(R10), X0
    38  	VINSERTI128 $0x01, 16(R13), Y0, Y0
    39  	VPSHUFB     Y10, Y0, Y14
    40  	VPADDD      (R8), Y14, Y0
    41  	VMOVDQU     Y0, 32(R14)
    42  	VMOVDQU     32(R10), X0
    43  	VINSERTI128 $0x01, 32(R13), Y0, Y0
    44  	VPSHUFB     Y10, Y0, Y13
    45  	VPADDD      (R8), Y13, Y0
    46  	VMOVDQU     Y0, 64(R14)
    47  	VMOVDQU     48(R10), X0
    48  	VINSERTI128 $0x01, 48(R13), Y0, Y0
    49  	VPSHUFB     Y10, Y0, Y12
    50  	VPADDD      (R8), Y12, Y0
    51  	VMOVDQU     Y0, 96(R14)
    52  	VPALIGNR    $0x08, Y15, Y14, Y8
    53  	VPSRLDQ     $0x04, Y12, Y0
    54  	VPXOR       Y13, Y8, Y8
    55  	VPXOR       Y15, Y0, Y0
    56  	VPXOR       Y0, Y8, Y8
    57  	VPSLLDQ     $0x0c, Y8, Y9
    58  	VPSLLD      $0x01, Y8, Y0
    59  	VPSRLD      $0x1f, Y8, Y8
    60  	VPOR        Y8, Y0, Y0
    61  	VPSLLD      $0x02, Y9, Y8
    62  	VPSRLD      $0x1e, Y9, Y9
    63  	VPXOR       Y8, Y0, Y0
    64  	VPXOR       Y9, Y0, Y8
    65  	VPADDD      (R8), Y8, Y0
    66  	VMOVDQU     Y0, 128(R14)
    67  	VPALIGNR    $0x08, Y14, Y13, Y7
    68  	VPSRLDQ     $0x04, Y8, Y0
    69  	VPXOR       Y12, Y7, Y7
    70  	VPXOR       Y14, Y0, Y0
    71  	VPXOR       Y0, Y7, Y7
    72  	VPSLLDQ     $0x0c, Y7, Y9
    73  	VPSLLD      $0x01, Y7, Y0
    74  	VPSRLD      $0x1f, Y7, Y7
    75  	VPOR        Y7, Y0, Y0
    76  	VPSLLD      $0x02, Y9, Y7
    77  	VPSRLD      $0x1e, Y9, Y9
    78  	VPXOR       Y7, Y0, Y0
    79  	VPXOR       Y9, Y0, Y7
    80  	VPADDD      32(R8), Y7, Y0
    81  	VMOVDQU     Y0, 160(R14)
    82  	VPALIGNR    $0x08, Y13, Y12, Y5
    83  	VPSRLDQ     $0x04, Y7, Y0
    84  	VPXOR       Y8, Y5, Y5
    85  	VPXOR       Y13, Y0, Y0
    86  	VPXOR       Y0, Y5, Y5
    87  	VPSLLDQ     $0x0c, Y5, Y9
    88  	VPSLLD      $0x01, Y5, Y0
    89  	VPSRLD      $0x1f, Y5, Y5
    90  	VPOR        Y5, Y0, Y0
    91  	VPSLLD      $0x02, Y9, Y5
    92  	VPSRLD      $0x1e, Y9, Y9
    93  	VPXOR       Y5, Y0, Y0
    94  	VPXOR       Y9, Y0, Y5
    95  	VPADDD      32(R8), Y5, Y0
    96  	VMOVDQU     Y0, 192(R14)
    97  	VPALIGNR    $0x08, Y12, Y8, Y3
    98  	VPSRLDQ     $0x04, Y5, Y0
    99  	VPXOR       Y7, Y3, Y3
   100  	VPXOR       Y12, Y0, Y0
   101  	VPXOR       Y0, Y3, Y3
   102  	VPSLLDQ     $0x0c, Y3, Y9
   103  	VPSLLD      $0x01, Y3, Y0
   104  	VPSRLD      $0x1f, Y3, Y3
   105  	VPOR        Y3, Y0, Y0
   106  	VPSLLD      $0x02, Y9, Y3
   107  	VPSRLD      $0x1e, Y9, Y9
   108  	VPXOR       Y3, Y0, Y0
   109  	VPXOR       Y9, Y0, Y3
   110  	VPADDD      32(R8), Y3, Y0
   111  	VMOVDQU     Y0, 224(R14)
   112  	VPALIGNR    $0x08, Y5, Y3, Y0
   113  	VPXOR       Y14, Y15, Y15
   114  	VPXOR       Y8, Y0, Y0
   115  	VPXOR       Y0, Y15, Y15
   116  	VPSLLD      $0x02, Y15, Y0
   117  	VPSRLD      $0x1e, Y15, Y15
   118  	VPOR        Y15, Y0, Y15
   119  	VPADDD      32(R8), Y15, Y0
   120  	VMOVDQU     Y0, 256(R14)
   121  	VPALIGNR    $0x08, Y3, Y15, Y0
   122  	VPXOR       Y13, Y14, Y14
   123  	VPXOR       Y7, Y0, Y0
   124  	VPXOR       Y0, Y14, Y14
   125  	VPSLLD      $0x02, Y14, Y0
   126  	VPSRLD      $0x1e, Y14, Y14
   127  	VPOR        Y14, Y0, Y14
   128  	VPADDD      32(R8), Y14, Y0
   129  	VMOVDQU     Y0, 288(R14)
   130  	VPALIGNR    $0x08, Y15, Y14, Y0
   131  	VPXOR       Y12, Y13, Y13
   132  	VPXOR       Y5, Y0, Y0
   133  	VPXOR       Y0, Y13, Y13
   134  	VPSLLD      $0x02, Y13, Y0
   135  	VPSRLD      $0x1e, Y13, Y13
   136  	VPOR        Y13, Y0, Y13
   137  	VPADDD      64(R8), Y13, Y0
   138  	VMOVDQU     Y0, 320(R14)
   139  	VPALIGNR    $0x08, Y14, Y13, Y0
   140  	VPXOR       Y8, Y12, Y12
   141  	VPXOR       Y3, Y0, Y0
   142  	VPXOR       Y0, Y12, Y12
   143  	VPSLLD      $0x02, Y12, Y0
   144  	VPSRLD      $0x1e, Y12, Y12
   145  	VPOR        Y12, Y0, Y12
   146  	VPADDD      64(R8), Y12, Y0
   147  	VMOVDQU     Y0, 352(R14)
   148  	VPALIGNR    $0x08, Y13, Y12, Y0
   149  	VPXOR       Y7, Y8, Y8
   150  	VPXOR       Y15, Y0, Y0
   151  	VPXOR       Y0, Y8, Y8
   152  	VPSLLD      $0x02, Y8, Y0
   153  	VPSRLD      $0x1e, Y8, Y8
   154  	VPOR        Y8, Y0, Y8
   155  	VPADDD      64(R8), Y8, Y0
   156  	VMOVDQU     Y0, 384(R14)
   157  	VPALIGNR    $0x08, Y12, Y8, Y0
   158  	VPXOR       Y5, Y7, Y7
   159  	VPXOR       Y14, Y0, Y0
   160  	VPXOR       Y0, Y7, Y7
   161  	VPSLLD      $0x02, Y7, Y0
   162  	VPSRLD      $0x1e, Y7, Y7
   163  	VPOR        Y7, Y0, Y7
   164  	VPADDD      64(R8), Y7, Y0
   165  	VMOVDQU     Y0, 416(R14)
   166  	VPALIGNR    $0x08, Y8, Y7, Y0
   167  	VPXOR       Y3, Y5, Y5
   168  	VPXOR       Y13, Y0, Y0
   169  	VPXOR       Y0, Y5, Y5
   170  	VPSLLD      $0x02, Y5, Y0
   171  	VPSRLD      $0x1e, Y5, Y5
   172  	VPOR        Y5, Y0, Y5
   173  	VPADDD      64(R8), Y5, Y0
   174  	VMOVDQU     Y0, 448(R14)
   175  	VPALIGNR    $0x08, Y7, Y5, Y0
   176  	VPXOR       Y15, Y3, Y3
   177  	VPXOR       Y12, Y0, Y0
   178  	VPXOR       Y0, Y3, Y3
   179  	VPSLLD      $0x02, Y3, Y0
   180  	VPSRLD      $0x1e, Y3, Y3
   181  	VPOR        Y3, Y0, Y3
   182  	VPADDD      96(R8), Y3, Y0
   183  	VMOVDQU     Y0, 480(R14)
   184  	VPALIGNR    $0x08, Y5, Y3, Y0
   185  	VPXOR       Y14, Y15, Y15
   186  	VPXOR       Y8, Y0, Y0
   187  	VPXOR       Y0, Y15, Y15
   188  	VPSLLD      $0x02, Y15, Y0
   189  	VPSRLD      $0x1e, Y15, Y15
   190  	VPOR        Y15, Y0, Y15
   191  	VPADDD      96(R8), Y15, Y0
   192  	VMOVDQU     Y0, 512(R14)
   193  	VPALIGNR    $0x08, Y3, Y15, Y0
   194  	VPXOR       Y13, Y14, Y14
   195  	VPXOR       Y7, Y0, Y0
   196  	VPXOR       Y0, Y14, Y14
   197  	VPSLLD      $0x02, Y14, Y0
   198  	VPSRLD      $0x1e, Y14, Y14
   199  	VPOR        Y14, Y0, Y14
   200  	VPADDD      96(R8), Y14, Y0
   201  	VMOVDQU     Y0, 544(R14)
   202  	VPALIGNR    $0x08, Y15, Y14, Y0
   203  	VPXOR       Y12, Y13, Y13
   204  	VPXOR       Y5, Y0, Y0
   205  	VPXOR       Y0, Y13, Y13
   206  	VPSLLD      $0x02, Y13, Y0
   207  	VPSRLD      $0x1e, Y13, Y13
   208  	VPOR        Y13, Y0, Y13
   209  	VPADDD      96(R8), Y13, Y0
   210  	VMOVDQU     Y0, 576(R14)
   211  	VPALIGNR    $0x08, Y14, Y13, Y0
   212  	VPXOR       Y8, Y12, Y12
   213  	VPXOR       Y3, Y0, Y0
   214  	VPXOR       Y0, Y12, Y12
   215  	VPSLLD      $0x02, Y12, Y0
   216  	VPSRLD      $0x1e, Y12, Y12
   217  	VPOR        Y12, Y0, Y12
   218  	VPADDD      96(R8), Y12, Y0
   219  	VMOVDQU     Y0, 608(R14)
   220  	XCHGQ       R15, R14
   221  
   222  loop:
   223  	CMPQ R10, R8
   224  	JNE  begin
   225  	VZEROUPPER
   226  	RET
   227  
   228  begin:
   229  	MOVL        SI, BX
   230  	RORXL       $0x02, SI, SI
   231  	ANDNL       AX, BX, BP
   232  	ANDL        DI, BX
   233  	XORL        BP, BX
   234  	ADDL        (R15), DX
   235  	ANDNL       DI, CX, BP
   236  	LEAL        (DX)(BX*1), DX
   237  	RORXL       $0x1b, CX, R12
   238  	RORXL       $0x02, CX, BX
   239  	VMOVDQU     128(R10), X0
   240  	ANDL        SI, CX
   241  	XORL        BP, CX
   242  	LEAL        (DX)(R12*1), DX
   243  	ADDL        4(R15), AX
   244  	ANDNL       SI, DX, BP
   245  	LEAL        (AX)(CX*1), AX
   246  	RORXL       $0x1b, DX, R12
   247  	RORXL       $0x02, DX, CX
   248  	VINSERTI128 $0x01, 128(R13), Y0, Y0
   249  	ANDL        BX, DX
   250  	XORL        BP, DX
   251  	LEAL        (AX)(R12*1), AX
   252  	ADDL        8(R15), DI
   253  	ANDNL       BX, AX, BP
   254  	LEAL        (DI)(DX*1), DI
   255  	RORXL       $0x1b, AX, R12
   256  	RORXL       $0x02, AX, DX
   257  	VPSHUFB     Y10, Y0, Y15
   258  	ANDL        CX, AX
   259  	XORL        BP, AX
   260  	LEAL        (DI)(R12*1), DI
   261  	ADDL        12(R15), SI
   262  	ANDNL       CX, DI, BP
   263  	LEAL        (SI)(AX*1), SI
   264  	RORXL       $0x1b, DI, R12
   265  	RORXL       $0x02, DI, AX
   266  	ANDL        DX, DI
   267  	XORL        BP, DI
   268  	LEAL        (SI)(R12*1), SI
   269  	ADDL        32(R15), BX
   270  	ANDNL       DX, SI, BP
   271  	LEAL        (BX)(DI*1), BX
   272  	RORXL       $0x1b, SI, R12
   273  	RORXL       $0x02, SI, DI
   274  	VPADDD      (R8), Y15, Y0
   275  	ANDL        AX, SI
   276  	XORL        BP, SI
   277  	LEAL        (BX)(R12*1), BX
   278  	ADDL        36(R15), CX
   279  	ANDNL       AX, BX, BP
   280  	LEAL        (CX)(SI*1), CX
   281  	RORXL       $0x1b, BX, R12
   282  	RORXL       $0x02, BX, SI
   283  	ANDL        DI, BX
   284  	XORL        BP, BX
   285  	LEAL        (CX)(R12*1), CX
   286  	ADDL        40(R15), DX
   287  	ANDNL       DI, CX, BP
   288  	LEAL        (DX)(BX*1), DX
   289  	RORXL       $0x1b, CX, R12
   290  	RORXL       $0x02, CX, BX
   291  	ANDL        SI, CX
   292  	XORL        BP, CX
   293  	LEAL        (DX)(R12*1), DX
   294  	ADDL        44(R15), AX
   295  	ANDNL       SI, DX, BP
   296  	LEAL        (AX)(CX*1), AX
   297  	RORXL       $0x1b, DX, R12
   298  	RORXL       $0x02, DX, CX
   299  	VMOVDQU     Y0, (R14)
   300  	ANDL        BX, DX
   301  	XORL        BP, DX
   302  	LEAL        (AX)(R12*1), AX
   303  	ADDL        64(R15), DI
   304  	ANDNL       BX, AX, BP
   305  	LEAL        (DI)(DX*1), DI
   306  	RORXL       $0x1b, AX, R12
   307  	RORXL       $0x02, AX, DX
   308  	VMOVDQU     144(R10), X0
   309  	ANDL        CX, AX
   310  	XORL        BP, AX
   311  	LEAL        (DI)(R12*1), DI
   312  	ADDL        68(R15), SI
   313  	ANDNL       CX, DI, BP
   314  	LEAL        (SI)(AX*1), SI
   315  	RORXL       $0x1b, DI, R12
   316  	RORXL       $0x02, DI, AX
   317  	VINSERTI128 $0x01, 144(R13), Y0, Y0
   318  	ANDL        DX, DI
   319  	XORL        BP, DI
   320  	LEAL        (SI)(R12*1), SI
   321  	ADDL        72(R15), BX
   322  	ANDNL       DX, SI, BP
   323  	LEAL        (BX)(DI*1), BX
   324  	RORXL       $0x1b, SI, R12
   325  	RORXL       $0x02, SI, DI
   326  	VPSHUFB     Y10, Y0, Y14
   327  	ANDL        AX, SI
   328  	XORL        BP, SI
   329  	LEAL        (BX)(R12*1), BX
   330  	ADDL        76(R15), CX
   331  	ANDNL       AX, BX, BP
   332  	LEAL        (CX)(SI*1), CX
   333  	RORXL       $0x1b, BX, R12
   334  	RORXL       $0x02, BX, SI
   335  	ANDL        DI, BX
   336  	XORL        BP, BX
   337  	LEAL        (CX)(R12*1), CX
   338  	ADDL        96(R15), DX
   339  	ANDNL       DI, CX, BP
   340  	LEAL        (DX)(BX*1), DX
   341  	RORXL       $0x1b, CX, R12
   342  	RORXL       $0x02, CX, BX
   343  	VPADDD      (R8), Y14, Y0
   344  	ANDL        SI, CX
   345  	XORL        BP, CX
   346  	LEAL        (DX)(R12*1), DX
   347  	ADDL        100(R15), AX
   348  	ANDNL       SI, DX, BP
   349  	LEAL        (AX)(CX*1), AX
   350  	RORXL       $0x1b, DX, R12
   351  	RORXL       $0x02, DX, CX
   352  	ANDL        BX, DX
   353  	XORL        BP, DX
   354  	LEAL        (AX)(R12*1), AX
   355  	ADDL        104(R15), DI
   356  	ANDNL       BX, AX, BP
   357  	LEAL        (DI)(DX*1), DI
   358  	RORXL       $0x1b, AX, R12
   359  	RORXL       $0x02, AX, DX
   360  	ANDL        CX, AX
   361  	XORL        BP, AX
   362  	LEAL        (DI)(R12*1), DI
   363  	ADDL        108(R15), SI
   364  	ANDNL       CX, DI, BP
   365  	LEAL        (SI)(AX*1), SI
   366  	RORXL       $0x1b, DI, R12
   367  	RORXL       $0x02, DI, AX
   368  	VMOVDQU     Y0, 32(R14)
   369  	ANDL        DX, DI
   370  	XORL        BP, DI
   371  	LEAL        (SI)(R12*1), SI
   372  	ADDL        128(R15), BX
   373  	ANDNL       DX, SI, BP
   374  	LEAL        (BX)(DI*1), BX
   375  	RORXL       $0x1b, SI, R12
   376  	RORXL       $0x02, SI, DI
   377  	VMOVDQU     160(R10), X0
   378  	ANDL        AX, SI
   379  	XORL        BP, SI
   380  	LEAL        (BX)(R12*1), BX
   381  	ADDL        132(R15), CX
   382  	ANDNL       AX, BX, BP
   383  	LEAL        (CX)(SI*1), CX
   384  	RORXL       $0x1b, BX, R12
   385  	RORXL       $0x02, BX, SI
   386  	VINSERTI128 $0x01, 160(R13), Y0, Y0
   387  	ANDL        DI, BX
   388  	XORL        BP, BX
   389  	LEAL        (CX)(R12*1), CX
   390  	ADDL        136(R15), DX
   391  	ANDNL       DI, CX, BP
   392  	LEAL        (DX)(BX*1), DX
   393  	RORXL       $0x1b, CX, R12
   394  	RORXL       $0x02, CX, BX
   395  	VPSHUFB     Y10, Y0, Y13
   396  	ANDL        SI, CX
   397  	XORL        BP, CX
   398  	LEAL        (DX)(R12*1), DX
   399  	ADDL        140(R15), AX
   400  	LEAL        (AX)(CX*1), AX
   401  	RORXL       $0x1b, DX, R12
   402  	RORXL       $0x02, DX, CX
   403  	XORL        BX, DX
   404  	ADDL        R12, AX
   405  	XORL        SI, DX
   406  	ADDL        160(R15), DI
   407  	LEAL        (DI)(DX*1), DI
   408  	RORXL       $0x1b, AX, R12
   409  	RORXL       $0x02, AX, DX
   410  	VPADDD      (R8), Y13, Y0
   411  	XORL        CX, AX
   412  	ADDL        R12, DI
   413  	XORL        BX, AX
   414  	ADDL        164(R15), SI
   415  	LEAL        (SI)(AX*1), SI
   416  	RORXL       $0x1b, DI, R12
   417  	RORXL       $0x02, DI, AX
   418  	XORL        DX, DI
   419  	ADDL        R12, SI
   420  	XORL        CX, DI
   421  	ADDL        168(R15), BX
   422  	LEAL        (BX)(DI*1), BX
   423  	RORXL       $0x1b, SI, R12
   424  	RORXL       $0x02, SI, DI
   425  	XORL        AX, SI
   426  	ADDL        R12, BX
   427  	XORL        DX, SI
   428  	ADDL        172(R15), CX
   429  	LEAL        (CX)(SI*1), CX
   430  	RORXL       $0x1b, BX, R12
   431  	RORXL       $0x02, BX, SI
   432  	VMOVDQU     Y0, 64(R14)
   433  	XORL        DI, BX
   434  	ADDL        R12, CX
   435  	XORL        AX, BX
   436  	ADDL        192(R15), DX
   437  	LEAL        (DX)(BX*1), DX
   438  	RORXL       $0x1b, CX, R12
   439  	RORXL       $0x02, CX, BX
   440  	VMOVDQU     176(R10), X0
   441  	XORL        SI, CX
   442  	ADDL        R12, DX
   443  	XORL        DI, CX
   444  	ADDL        196(R15), AX
   445  	LEAL        (AX)(CX*1), AX
   446  	RORXL       $0x1b, DX, R12
   447  	RORXL       $0x02, DX, CX
   448  	VINSERTI128 $0x01, 176(R13), Y0, Y0
   449  	XORL        BX, DX
   450  	ADDL        R12, AX
   451  	XORL        SI, DX
   452  	ADDL        200(R15), DI
   453  	LEAL        (DI)(DX*1), DI
   454  	RORXL       $0x1b, AX, R12
   455  	RORXL       $0x02, AX, DX
   456  	VPSHUFB     Y10, Y0, Y12
   457  	XORL        CX, AX
   458  	ADDL        R12, DI
   459  	XORL        BX, AX
   460  	ADDL        204(R15), SI
   461  	LEAL        (SI)(AX*1), SI
   462  	RORXL       $0x1b, DI, R12
   463  	RORXL       $0x02, DI, AX
   464  	XORL        DX, DI
   465  	ADDL        R12, SI
   466  	XORL        CX, DI
   467  	ADDL        224(R15), BX
   468  	LEAL        (BX)(DI*1), BX
   469  	RORXL       $0x1b, SI, R12
   470  	RORXL       $0x02, SI, DI
   471  	VPADDD      (R8), Y12, Y0
   472  	XORL        AX, SI
   473  	ADDL        R12, BX
   474  	XORL        DX, SI
   475  	ADDL        228(R15), CX
   476  	LEAL        (CX)(SI*1), CX
   477  	RORXL       $0x1b, BX, R12
   478  	RORXL       $0x02, BX, SI
   479  	XORL        DI, BX
   480  	ADDL        R12, CX
   481  	XORL        AX, BX
   482  	ADDL        232(R15), DX
   483  	LEAL        (DX)(BX*1), DX
   484  	RORXL       $0x1b, CX, R12
   485  	RORXL       $0x02, CX, BX
   486  	XORL        SI, CX
   487  	ADDL        R12, DX
   488  	XORL        DI, CX
   489  	ADDL        236(R15), AX
   490  	LEAL        (AX)(CX*1), AX
   491  	RORXL       $0x1b, DX, R12
   492  	RORXL       $0x02, DX, CX
   493  	VMOVDQU     Y0, 96(R14)
   494  	XORL        BX, DX
   495  	ADDL        R12, AX
   496  	XORL        SI, DX
   497  	ADDL        256(R15), DI
   498  	LEAL        (DI)(DX*1), DI
   499  	RORXL       $0x1b, AX, R12
   500  	RORXL       $0x02, AX, DX
   501  	VPALIGNR    $0x08, Y15, Y14, Y8
   502  	VPSRLDQ     $0x04, Y12, Y0
   503  	XORL        CX, AX
   504  	ADDL        R12, DI
   505  	XORL        BX, AX
   506  	ADDL        260(R15), SI
   507  	LEAL        (SI)(AX*1), SI
   508  	RORXL       $0x1b, DI, R12
   509  	RORXL       $0x02, DI, AX
   510  	VPXOR       Y13, Y8, Y8
   511  	VPXOR       Y15, Y0, Y0
   512  	XORL        DX, DI
   513  	ADDL        R12, SI
   514  	XORL        CX, DI
   515  	ADDL        264(R15), BX
   516  	LEAL        (BX)(DI*1), BX
   517  	RORXL       $0x1b, SI, R12
   518  	RORXL       $0x02, SI, DI
   519  	VPXOR       Y0, Y8, Y8
   520  	VPSLLDQ     $0x0c, Y8, Y9
   521  	XORL        AX, SI
   522  	ADDL        R12, BX
   523  	XORL        DX, SI
   524  	ADDL        268(R15), CX
   525  	LEAL        (CX)(SI*1), CX
   526  	RORXL       $0x1b, BX, R12
   527  	RORXL       $0x02, BX, SI
   528  	VPSLLD      $0x01, Y8, Y0
   529  	VPSRLD      $0x1f, Y8, Y8
   530  	XORL        DI, BX
   531  	ADDL        R12, CX
   532  	XORL        AX, BX
   533  	ADDL        288(R15), DX
   534  	LEAL        (DX)(BX*1), DX
   535  	RORXL       $0x1b, CX, R12
   536  	RORXL       $0x02, CX, BX
   537  	VPOR        Y8, Y0, Y0
   538  	VPSLLD      $0x02, Y9, Y8
   539  	XORL        SI, CX
   540  	ADDL        R12, DX
   541  	XORL        DI, CX
   542  	ADDL        292(R15), AX
   543  	LEAL        (AX)(CX*1), AX
   544  	RORXL       $0x1b, DX, R12
   545  	RORXL       $0x02, DX, CX
   546  	VPSRLD      $0x1e, Y9, Y9
   547  	VPXOR       Y8, Y0, Y0
   548  	XORL        BX, DX
   549  	ADDL        R12, AX
   550  	XORL        SI, DX
   551  	ADDL        296(R15), DI
   552  	LEAL        (DI)(DX*1), DI
   553  	RORXL       $0x1b, AX, R12
   554  	RORXL       $0x02, AX, DX
   555  	XORL        CX, AX
   556  	ADDL        R12, DI
   557  	XORL        BX, AX
   558  	ADDL        300(R15), SI
   559  	VPXOR       Y9, Y0, Y8
   560  	VPADDD      (R8), Y8, Y0
   561  	VMOVDQU     Y0, 128(R14)
   562  	LEAL        (SI)(AX*1), SI
   563  	MOVL        DX, BP
   564  	ORL         DI, BP
   565  	RORXL       $0x1b, DI, R12
   566  	RORXL       $0x02, DI, AX
   567  	ANDL        CX, BP
   568  	ANDL        DX, DI
   569  	ORL         BP, DI
   570  	ADDL        R12, SI
   571  	ADDL        320(R15), BX
   572  	VPALIGNR    $0x08, Y14, Y13, Y7
   573  	VPSRLDQ     $0x04, Y8, Y0
   574  	LEAL        (BX)(DI*1), BX
   575  	MOVL        AX, BP
   576  	ORL         SI, BP
   577  	RORXL       $0x1b, SI, R12
   578  	RORXL       $0x02, SI, DI
   579  	ANDL        DX, BP
   580  	ANDL        AX, SI
   581  	ORL         BP, SI
   582  	ADDL        R12, BX
   583  	ADDL        324(R15), CX
   584  	VPXOR       Y12, Y7, Y7
   585  	VPXOR       Y14, Y0, Y0
   586  	LEAL        (CX)(SI*1), CX
   587  	MOVL        DI, BP
   588  	ORL         BX, BP
   589  	RORXL       $0x1b, BX, R12
   590  	RORXL       $0x02, BX, SI
   591  	ANDL        AX, BP
   592  	ANDL        DI, BX
   593  	ORL         BP, BX
   594  	ADDL        R12, CX
   595  	ADDL        328(R15), DX
   596  	VPXOR       Y0, Y7, Y7
   597  	VPSLLDQ     $0x0c, Y7, Y9
   598  	LEAL        (DX)(BX*1), DX
   599  	MOVL        SI, BP
   600  	ORL         CX, BP
   601  	RORXL       $0x1b, CX, R12
   602  	RORXL       $0x02, CX, BX
   603  	ANDL        DI, BP
   604  	ANDL        SI, CX
   605  	ORL         BP, CX
   606  	ADDL        R12, DX
   607  	ADDL        332(R15), AX
   608  	VPSLLD      $0x01, Y7, Y0
   609  	VPSRLD      $0x1f, Y7, Y7
   610  	LEAL        (AX)(CX*1), AX
   611  	MOVL        BX, BP
   612  	ORL         DX, BP
   613  	RORXL       $0x1b, DX, R12
   614  	RORXL       $0x02, DX, CX
   615  	ANDL        SI, BP
   616  	ANDL        BX, DX
   617  	ORL         BP, DX
   618  	ADDL        R12, AX
   619  	ADDL        352(R15), DI
   620  	VPOR        Y7, Y0, Y0
   621  	VPSLLD      $0x02, Y9, Y7
   622  	LEAL        (DI)(DX*1), DI
   623  	MOVL        CX, BP
   624  	ORL         AX, BP
   625  	RORXL       $0x1b, AX, R12
   626  	RORXL       $0x02, AX, DX
   627  	ANDL        BX, BP
   628  	ANDL        CX, AX
   629  	ORL         BP, AX
   630  	ADDL        R12, DI
   631  	ADDL        356(R15), SI
   632  	VPSRLD      $0x1e, Y9, Y9
   633  	VPXOR       Y7, Y0, Y0
   634  	LEAL        (SI)(AX*1), SI
   635  	MOVL        DX, BP
   636  	ORL         DI, BP
   637  	RORXL       $0x1b, DI, R12
   638  	RORXL       $0x02, DI, AX
   639  	ANDL        CX, BP
   640  	ANDL        DX, DI
   641  	ORL         BP, DI
   642  	ADDL        R12, SI
   643  	ADDL        360(R15), BX
   644  	LEAL        (BX)(DI*1), BX
   645  	MOVL        AX, BP
   646  	ORL         SI, BP
   647  	RORXL       $0x1b, SI, R12
   648  	RORXL       $0x02, SI, DI
   649  	ANDL        DX, BP
   650  	ANDL        AX, SI
   651  	ORL         BP, SI
   652  	ADDL        R12, BX
   653  	ADDL        364(R15), CX
   654  	VPXOR       Y9, Y0, Y7
   655  	VPADDD      32(R8), Y7, Y0
   656  	VMOVDQU     Y0, 160(R14)
   657  	LEAL        (CX)(SI*1), CX
   658  	MOVL        DI, BP
   659  	ORL         BX, BP
   660  	RORXL       $0x1b, BX, R12
   661  	RORXL       $0x02, BX, SI
   662  	ANDL        AX, BP
   663  	ANDL        DI, BX
   664  	ORL         BP, BX
   665  	ADDL        R12, CX
   666  	ADDL        384(R15), DX
   667  	VPALIGNR    $0x08, Y13, Y12, Y5
   668  	VPSRLDQ     $0x04, Y7, Y0
   669  	LEAL        (DX)(BX*1), DX
   670  	MOVL        SI, BP
   671  	ORL         CX, BP
   672  	RORXL       $0x1b, CX, R12
   673  	RORXL       $0x02, CX, BX
   674  	ANDL        DI, BP
   675  	ANDL        SI, CX
   676  	ORL         BP, CX
   677  	ADDL        R12, DX
   678  	ADDL        388(R15), AX
   679  	VPXOR       Y8, Y5, Y5
   680  	VPXOR       Y13, Y0, Y0
   681  	LEAL        (AX)(CX*1), AX
   682  	MOVL        BX, BP
   683  	ORL         DX, BP
   684  	RORXL       $0x1b, DX, R12
   685  	RORXL       $0x02, DX, CX
   686  	ANDL        SI, BP
   687  	ANDL        BX, DX
   688  	ORL         BP, DX
   689  	ADDL        R12, AX
   690  	ADDL        392(R15), DI
   691  	VPXOR       Y0, Y5, Y5
   692  	VPSLLDQ     $0x0c, Y5, Y9
   693  	LEAL        (DI)(DX*1), DI
   694  	MOVL        CX, BP
   695  	ORL         AX, BP
   696  	RORXL       $0x1b, AX, R12
   697  	RORXL       $0x02, AX, DX
   698  	ANDL        BX, BP
   699  	ANDL        CX, AX
   700  	ORL         BP, AX
   701  	ADDL        R12, DI
   702  	ADDL        396(R15), SI
   703  	VPSLLD      $0x01, Y5, Y0
   704  	VPSRLD      $0x1f, Y5, Y5
   705  	LEAL        (SI)(AX*1), SI
   706  	MOVL        DX, BP
   707  	ORL         DI, BP
   708  	RORXL       $0x1b, DI, R12
   709  	RORXL       $0x02, DI, AX
   710  	ANDL        CX, BP
   711  	ANDL        DX, DI
   712  	ORL         BP, DI
   713  	ADDL        R12, SI
   714  	ADDL        416(R15), BX
   715  	VPOR        Y5, Y0, Y0
   716  	VPSLLD      $0x02, Y9, Y5
   717  	LEAL        (BX)(DI*1), BX
   718  	MOVL        AX, BP
   719  	ORL         SI, BP
   720  	RORXL       $0x1b, SI, R12
   721  	RORXL       $0x02, SI, DI
   722  	ANDL        DX, BP
   723  	ANDL        AX, SI
   724  	ORL         BP, SI
   725  	ADDL        R12, BX
   726  	ADDL        420(R15), CX
   727  	VPSRLD      $0x1e, Y9, Y9
   728  	VPXOR       Y5, Y0, Y0
   729  	LEAL        (CX)(SI*1), CX
   730  	MOVL        DI, BP
   731  	ORL         BX, BP
   732  	RORXL       $0x1b, BX, R12
   733  	RORXL       $0x02, BX, SI
   734  	ANDL        AX, BP
   735  	ANDL        DI, BX
   736  	ORL         BP, BX
   737  	ADDL        R12, CX
   738  	ADDL        424(R15), DX
   739  	LEAL        (DX)(BX*1), DX
   740  	MOVL        SI, BP
   741  	ORL         CX, BP
   742  	RORXL       $0x1b, CX, R12
   743  	RORXL       $0x02, CX, BX
   744  	ANDL        DI, BP
   745  	ANDL        SI, CX
   746  	ORL         BP, CX
   747  	ADDL        R12, DX
   748  	ADDL        428(R15), AX
   749  	VPXOR       Y9, Y0, Y5
   750  	VPADDD      32(R8), Y5, Y0
   751  	VMOVDQU     Y0, 192(R14)
   752  	LEAL        (AX)(CX*1), AX
   753  	MOVL        BX, BP
   754  	ORL         DX, BP
   755  	RORXL       $0x1b, DX, R12
   756  	RORXL       $0x02, DX, CX
   757  	ANDL        SI, BP
   758  	ANDL        BX, DX
   759  	ORL         BP, DX
   760  	ADDL        R12, AX
   761  	ADDL        448(R15), DI
   762  	VPALIGNR    $0x08, Y12, Y8, Y3
   763  	VPSRLDQ     $0x04, Y5, Y0
   764  	LEAL        (DI)(DX*1), DI
   765  	MOVL        CX, BP
   766  	ORL         AX, BP
   767  	RORXL       $0x1b, AX, R12
   768  	RORXL       $0x02, AX, DX
   769  	ANDL        BX, BP
   770  	ANDL        CX, AX
   771  	ORL         BP, AX
   772  	ADDL        R12, DI
   773  	ADDL        452(R15), SI
   774  	VPXOR       Y7, Y3, Y3
   775  	VPXOR       Y12, Y0, Y0
   776  	LEAL        (SI)(AX*1), SI
   777  	MOVL        DX, BP
   778  	ORL         DI, BP
   779  	RORXL       $0x1b, DI, R12
   780  	RORXL       $0x02, DI, AX
   781  	ANDL        CX, BP
   782  	ANDL        DX, DI
   783  	ORL         BP, DI
   784  	ADDL        R12, SI
   785  	ADDL        456(R15), BX
   786  	VPXOR       Y0, Y3, Y3
   787  	VPSLLDQ     $0x0c, Y3, Y9
   788  	LEAL        (BX)(DI*1), BX
   789  	MOVL        AX, BP
   790  	ORL         SI, BP
   791  	RORXL       $0x1b, SI, R12
   792  	RORXL       $0x02, SI, DI
   793  	ANDL        DX, BP
   794  	ANDL        AX, SI
   795  	ORL         BP, SI
   796  	ADDL        R12, BX
   797  	ADDL        460(R15), CX
   798  	LEAL        (CX)(SI*1), CX
   799  	RORXL       $0x1b, BX, R12
   800  	RORXL       $0x02, BX, SI
   801  	VPSLLD      $0x01, Y3, Y0
   802  	VPSRLD      $0x1f, Y3, Y3
   803  	XORL        DI, BX
   804  	ADDL        R12, CX
   805  	XORL        AX, BX
   806  	ADDQ        $0x80, R10
   807  	CMPQ        R10, R11
   808  	CMOVQCC     R8, R10
   809  	ADDL        480(R15), DX
   810  	LEAL        (DX)(BX*1), DX
   811  	RORXL       $0x1b, CX, R12
   812  	RORXL       $0x02, CX, BX
   813  	VPOR        Y3, Y0, Y0
   814  	VPSLLD      $0x02, Y9, Y3
   815  	XORL        SI, CX
   816  	ADDL        R12, DX
   817  	XORL        DI, CX
   818  	ADDL        484(R15), AX
   819  	LEAL        (AX)(CX*1), AX
   820  	RORXL       $0x1b, DX, R12
   821  	RORXL       $0x02, DX, CX
   822  	VPSRLD      $0x1e, Y9, Y9
   823  	VPXOR       Y3, Y0, Y0
   824  	XORL        BX, DX
   825  	ADDL        R12, AX
   826  	XORL        SI, DX
   827  	ADDL        488(R15), DI
   828  	LEAL        (DI)(DX*1), DI
   829  	RORXL       $0x1b, AX, R12
   830  	RORXL       $0x02, AX, DX
   831  	XORL        CX, AX
   832  	ADDL        R12, DI
   833  	XORL        BX, AX
   834  	ADDL        492(R15), SI
   835  	LEAL        (SI)(AX*1), SI
   836  	RORXL       $0x1b, DI, R12
   837  	RORXL       $0x02, DI, AX
   838  	VPXOR       Y9, Y0, Y3
   839  	VPADDD      32(R8), Y3, Y0
   840  	VMOVDQU     Y0, 224(R14)
   841  	XORL        DX, DI
   842  	ADDL        R12, SI
   843  	XORL        CX, DI
   844  	ADDL        512(R15), BX
   845  	LEAL        (BX)(DI*1), BX
   846  	RORXL       $0x1b, SI, R12
   847  	RORXL       $0x02, SI, DI
   848  	VPALIGNR    $0x08, Y5, Y3, Y0
   849  	XORL        AX, SI
   850  	ADDL        R12, BX
   851  	XORL        DX, SI
   852  	ADDL        516(R15), CX
   853  	LEAL        (CX)(SI*1), CX
   854  	RORXL       $0x1b, BX, R12
   855  	RORXL       $0x02, BX, SI
   856  	VPXOR       Y14, Y15, Y15
   857  	XORL        DI, BX
   858  	ADDL        R12, CX
   859  	XORL        AX, BX
   860  	ADDL        520(R15), DX
   861  	LEAL        (DX)(BX*1), DX
   862  	RORXL       $0x1b, CX, R12
   863  	RORXL       $0x02, CX, BX
   864  	VPXOR       Y8, Y0, Y0
   865  	XORL        SI, CX
   866  	ADDL        R12, DX
   867  	XORL        DI, CX
   868  	ADDL        524(R15), AX
   869  	LEAL        (AX)(CX*1), AX
   870  	RORXL       $0x1b, DX, R12
   871  	RORXL       $0x02, DX, CX
   872  	VPXOR       Y0, Y15, Y15
   873  	XORL        BX, DX
   874  	ADDL        R12, AX
   875  	XORL        SI, DX
   876  	ADDL        544(R15), DI
   877  	LEAL        (DI)(DX*1), DI
   878  	RORXL       $0x1b, AX, R12
   879  	RORXL       $0x02, AX, DX
   880  	VPSLLD      $0x02, Y15, Y0
   881  	XORL        CX, AX
   882  	ADDL        R12, DI
   883  	XORL        BX, AX
   884  	ADDL        548(R15), SI
   885  	LEAL        (SI)(AX*1), SI
   886  	RORXL       $0x1b, DI, R12
   887  	RORXL       $0x02, DI, AX
   888  	VPSRLD      $0x1e, Y15, Y15
   889  	VPOR        Y15, Y0, Y15
   890  	XORL        DX, DI
   891  	ADDL        R12, SI
   892  	XORL        CX, DI
   893  	ADDL        552(R15), BX
   894  	LEAL        (BX)(DI*1), BX
   895  	RORXL       $0x1b, SI, R12
   896  	RORXL       $0x02, SI, DI
   897  	XORL        AX, SI
   898  	ADDL        R12, BX
   899  	XORL        DX, SI
   900  	ADDL        556(R15), CX
   901  	LEAL        (CX)(SI*1), CX
   902  	RORXL       $0x1b, BX, R12
   903  	RORXL       $0x02, BX, SI
   904  	VPADDD      32(R8), Y15, Y0
   905  	VMOVDQU     Y0, 256(R14)
   906  	XORL        DI, BX
   907  	ADDL        R12, CX
   908  	XORL        AX, BX
   909  	ADDL        576(R15), DX
   910  	LEAL        (DX)(BX*1), DX
   911  	RORXL       $0x1b, CX, R12
   912  	RORXL       $0x02, CX, BX
   913  	VPALIGNR    $0x08, Y3, Y15, Y0
   914  	XORL        SI, CX
   915  	ADDL        R12, DX
   916  	XORL        DI, CX
   917  	ADDL        580(R15), AX
   918  	LEAL        (AX)(CX*1), AX
   919  	RORXL       $0x1b, DX, R12
   920  	RORXL       $0x02, DX, CX
   921  	VPXOR       Y13, Y14, Y14
   922  	XORL        BX, DX
   923  	ADDL        R12, AX
   924  	XORL        SI, DX
   925  	ADDL        584(R15), DI
   926  	LEAL        (DI)(DX*1), DI
   927  	RORXL       $0x1b, AX, R12
   928  	RORXL       $0x02, AX, DX
   929  	VPXOR       Y7, Y0, Y0
   930  	XORL        CX, AX
   931  	ADDL        R12, DI
   932  	XORL        BX, AX
   933  	ADDL        588(R15), SI
   934  	LEAL        (SI)(AX*1), SI
   935  	RORXL       $0x1b, DI, R12
   936  	RORXL       $0x02, DI, AX
   937  	VPXOR       Y0, Y14, Y14
   938  	XORL        DX, DI
   939  	ADDL        R12, SI
   940  	XORL        CX, DI
   941  	ADDL        608(R15), BX
   942  	LEAL        (BX)(DI*1), BX
   943  	RORXL       $0x1b, SI, R12
   944  	RORXL       $0x02, SI, DI
   945  	VPSLLD      $0x02, Y14, Y0
   946  	XORL        AX, SI
   947  	ADDL        R12, BX
   948  	XORL        DX, SI
   949  	ADDL        612(R15), CX
   950  	LEAL        (CX)(SI*1), CX
   951  	RORXL       $0x1b, BX, R12
   952  	RORXL       $0x02, BX, SI
   953  	VPSRLD      $0x1e, Y14, Y14
   954  	VPOR        Y14, Y0, Y14
   955  	XORL        DI, BX
   956  	ADDL        R12, CX
   957  	XORL        AX, BX
   958  	ADDL        616(R15), DX
   959  	LEAL        (DX)(BX*1), DX
   960  	RORXL       $0x1b, CX, R12
   961  	RORXL       $0x02, CX, BX
   962  	XORL        SI, CX
   963  	ADDL        R12, DX
   964  	XORL        DI, CX
   965  	ADDL        620(R15), AX
   966  	LEAL        (AX)(CX*1), AX
   967  	RORXL       $0x1b, DX, R12
   968  	VPADDD      32(R8), Y14, Y0
   969  	VMOVDQU     Y0, 288(R14)
   970  	ADDL        R12, AX
   971  	ADDL        (R9), AX
   972  	MOVL        AX, (R9)
   973  	ADDL        4(R9), DX
   974  	MOVL        DX, 4(R9)
   975  	ADDL        8(R9), BX
   976  	MOVL        BX, 8(R9)
   977  	ADDL        12(R9), SI
   978  	MOVL        SI, 12(R9)
   979  	ADDL        16(R9), DI
   980  	MOVL        DI, 16(R9)
   981  	CMPQ        R10, R8
   982  	JE          loop
   983  	MOVL        DX, CX
   984  	MOVL        CX, DX
   985  	RORXL       $0x02, CX, CX
   986  	ANDNL       SI, DX, BP
   987  	ANDL        BX, DX
   988  	XORL        BP, DX
   989  	ADDL        16(R15), DI
   990  	ANDNL       BX, AX, BP
   991  	LEAL        (DI)(DX*1), DI
   992  	RORXL       $0x1b, AX, R12
   993  	RORXL       $0x02, AX, DX
   994  	VPALIGNR    $0x08, Y15, Y14, Y0
   995  	ANDL        CX, AX
   996  	XORL        BP, AX
   997  	LEAL        (DI)(R12*1), DI
   998  	ADDL        20(R15), SI
   999  	ANDNL       CX, DI, BP
  1000  	LEAL        (SI)(AX*1), SI
  1001  	RORXL       $0x1b, DI, R12
  1002  	RORXL       $0x02, DI, AX
  1003  	VPXOR       Y12, Y13, Y13
  1004  	ANDL        DX, DI
  1005  	XORL        BP, DI
  1006  	LEAL        (SI)(R12*1), SI
  1007  	ADDL        24(R15), BX
  1008  	ANDNL       DX, SI, BP
  1009  	LEAL        (BX)(DI*1), BX
  1010  	RORXL       $0x1b, SI, R12
  1011  	RORXL       $0x02, SI, DI
  1012  	VPXOR       Y5, Y0, Y0
  1013  	ANDL        AX, SI
  1014  	XORL        BP, SI
  1015  	LEAL        (BX)(R12*1), BX
  1016  	ADDL        28(R15), CX
  1017  	ANDNL       AX, BX, BP
  1018  	LEAL        (CX)(SI*1), CX
  1019  	RORXL       $0x1b, BX, R12
  1020  	RORXL       $0x02, BX, SI
  1021  	VPXOR       Y0, Y13, Y13
  1022  	ANDL        DI, BX
  1023  	XORL        BP, BX
  1024  	LEAL        (CX)(R12*1), CX
  1025  	ADDL        48(R15), DX
  1026  	ANDNL       DI, CX, BP
  1027  	LEAL        (DX)(BX*1), DX
  1028  	RORXL       $0x1b, CX, R12
  1029  	RORXL       $0x02, CX, BX
  1030  	VPSLLD      $0x02, Y13, Y0
  1031  	ANDL        SI, CX
  1032  	XORL        BP, CX
  1033  	LEAL        (DX)(R12*1), DX
  1034  	ADDL        52(R15), AX
  1035  	ANDNL       SI, DX, BP
  1036  	LEAL        (AX)(CX*1), AX
  1037  	RORXL       $0x1b, DX, R12
  1038  	RORXL       $0x02, DX, CX
  1039  	VPSRLD      $0x1e, Y13, Y13
  1040  	VPOR        Y13, Y0, Y13
  1041  	ANDL        BX, DX
  1042  	XORL        BP, DX
  1043  	LEAL        (AX)(R12*1), AX
  1044  	ADDL        56(R15), DI
  1045  	ANDNL       BX, AX, BP
  1046  	LEAL        (DI)(DX*1), DI
  1047  	RORXL       $0x1b, AX, R12
  1048  	RORXL       $0x02, AX, DX
  1049  	ANDL        CX, AX
  1050  	XORL        BP, AX
  1051  	LEAL        (DI)(R12*1), DI
  1052  	ADDL        60(R15), SI
  1053  	ANDNL       CX, DI, BP
  1054  	LEAL        (SI)(AX*1), SI
  1055  	RORXL       $0x1b, DI, R12
  1056  	RORXL       $0x02, DI, AX
  1057  	VPADDD      64(R8), Y13, Y0
  1058  	VMOVDQU     Y0, 320(R14)
  1059  	ANDL        DX, DI
  1060  	XORL        BP, DI
  1061  	LEAL        (SI)(R12*1), SI
  1062  	ADDL        80(R15), BX
  1063  	ANDNL       DX, SI, BP
  1064  	LEAL        (BX)(DI*1), BX
  1065  	RORXL       $0x1b, SI, R12
  1066  	RORXL       $0x02, SI, DI
  1067  	VPALIGNR    $0x08, Y14, Y13, Y0
  1068  	ANDL        AX, SI
  1069  	XORL        BP, SI
  1070  	LEAL        (BX)(R12*1), BX
  1071  	ADDL        84(R15), CX
  1072  	ANDNL       AX, BX, BP
  1073  	LEAL        (CX)(SI*1), CX
  1074  	RORXL       $0x1b, BX, R12
  1075  	RORXL       $0x02, BX, SI
  1076  	VPXOR       Y8, Y12, Y12
  1077  	ANDL        DI, BX
  1078  	XORL        BP, BX
  1079  	LEAL        (CX)(R12*1), CX
  1080  	ADDL        88(R15), DX
  1081  	ANDNL       DI, CX, BP
  1082  	LEAL        (DX)(BX*1), DX
  1083  	RORXL       $0x1b, CX, R12
  1084  	RORXL       $0x02, CX, BX
  1085  	VPXOR       Y3, Y0, Y0
  1086  	ANDL        SI, CX
  1087  	XORL        BP, CX
  1088  	LEAL        (DX)(R12*1), DX
  1089  	ADDL        92(R15), AX
  1090  	ANDNL       SI, DX, BP
  1091  	LEAL        (AX)(CX*1), AX
  1092  	RORXL       $0x1b, DX, R12
  1093  	RORXL       $0x02, DX, CX
  1094  	VPXOR       Y0, Y12, Y12
  1095  	ANDL        BX, DX
  1096  	XORL        BP, DX
  1097  	LEAL        (AX)(R12*1), AX
  1098  	ADDL        112(R15), DI
  1099  	ANDNL       BX, AX, BP
  1100  	LEAL        (DI)(DX*1), DI
  1101  	RORXL       $0x1b, AX, R12
  1102  	RORXL       $0x02, AX, DX
  1103  	VPSLLD      $0x02, Y12, Y0
  1104  	ANDL        CX, AX
  1105  	XORL        BP, AX
  1106  	LEAL        (DI)(R12*1), DI
  1107  	ADDL        116(R15), SI
  1108  	ANDNL       CX, DI, BP
  1109  	LEAL        (SI)(AX*1), SI
  1110  	RORXL       $0x1b, DI, R12
  1111  	RORXL       $0x02, DI, AX
  1112  	VPSRLD      $0x1e, Y12, Y12
  1113  	VPOR        Y12, Y0, Y12
  1114  	ANDL        DX, DI
  1115  	XORL        BP, DI
  1116  	LEAL        (SI)(R12*1), SI
  1117  	ADDL        120(R15), BX
  1118  	ANDNL       DX, SI, BP
  1119  	LEAL        (BX)(DI*1), BX
  1120  	RORXL       $0x1b, SI, R12
  1121  	RORXL       $0x02, SI, DI
  1122  	ANDL        AX, SI
  1123  	XORL        BP, SI
  1124  	LEAL        (BX)(R12*1), BX
  1125  	ADDL        124(R15), CX
  1126  	ANDNL       AX, BX, BP
  1127  	LEAL        (CX)(SI*1), CX
  1128  	RORXL       $0x1b, BX, R12
  1129  	RORXL       $0x02, BX, SI
  1130  	VPADDD      64(R8), Y12, Y0
  1131  	VMOVDQU     Y0, 352(R14)
  1132  	ANDL        DI, BX
  1133  	XORL        BP, BX
  1134  	LEAL        (CX)(R12*1), CX
  1135  	ADDL        144(R15), DX
  1136  	ANDNL       DI, CX, BP
  1137  	LEAL        (DX)(BX*1), DX
  1138  	RORXL       $0x1b, CX, R12
  1139  	RORXL       $0x02, CX, BX
  1140  	VPALIGNR    $0x08, Y13, Y12, Y0
  1141  	ANDL        SI, CX
  1142  	XORL        BP, CX
  1143  	LEAL        (DX)(R12*1), DX
  1144  	ADDL        148(R15), AX
  1145  	ANDNL       SI, DX, BP
  1146  	LEAL        (AX)(CX*1), AX
  1147  	RORXL       $0x1b, DX, R12
  1148  	RORXL       $0x02, DX, CX
  1149  	VPXOR       Y7, Y8, Y8
  1150  	ANDL        BX, DX
  1151  	XORL        BP, DX
  1152  	LEAL        (AX)(R12*1), AX
  1153  	ADDL        152(R15), DI
  1154  	ANDNL       BX, AX, BP
  1155  	LEAL        (DI)(DX*1), DI
  1156  	RORXL       $0x1b, AX, R12
  1157  	RORXL       $0x02, AX, DX
  1158  	VPXOR       Y15, Y0, Y0
  1159  	ANDL        CX, AX
  1160  	XORL        BP, AX
  1161  	LEAL        (DI)(R12*1), DI
  1162  	ADDL        156(R15), SI
  1163  	LEAL        (SI)(AX*1), SI
  1164  	RORXL       $0x1b, DI, R12
  1165  	RORXL       $0x02, DI, AX
  1166  	VPXOR       Y0, Y8, Y8
  1167  	XORL        DX, DI
  1168  	ADDL        R12, SI
  1169  	XORL        CX, DI
  1170  	ADDL        176(R15), BX
  1171  	LEAL        (BX)(DI*1), BX
  1172  	RORXL       $0x1b, SI, R12
  1173  	RORXL       $0x02, SI, DI
  1174  	VPSLLD      $0x02, Y8, Y0
  1175  	XORL        AX, SI
  1176  	ADDL        R12, BX
  1177  	XORL        DX, SI
  1178  	ADDL        180(R15), CX
  1179  	LEAL        (CX)(SI*1), CX
  1180  	RORXL       $0x1b, BX, R12
  1181  	RORXL       $0x02, BX, SI
  1182  	VPSRLD      $0x1e, Y8, Y8
  1183  	VPOR        Y8, Y0, Y8
  1184  	XORL        DI, BX
  1185  	ADDL        R12, CX
  1186  	XORL        AX, BX
  1187  	ADDL        184(R15), DX
  1188  	LEAL        (DX)(BX*1), DX
  1189  	RORXL       $0x1b, CX, R12
  1190  	RORXL       $0x02, CX, BX
  1191  	XORL        SI, CX
  1192  	ADDL        R12, DX
  1193  	XORL        DI, CX
  1194  	ADDL        188(R15), AX
  1195  	LEAL        (AX)(CX*1), AX
  1196  	RORXL       $0x1b, DX, R12
  1197  	RORXL       $0x02, DX, CX
  1198  	VPADDD      64(R8), Y8, Y0
  1199  	VMOVDQU     Y0, 384(R14)
  1200  	XORL        BX, DX
  1201  	ADDL        R12, AX
  1202  	XORL        SI, DX
  1203  	ADDL        208(R15), DI
  1204  	LEAL        (DI)(DX*1), DI
  1205  	RORXL       $0x1b, AX, R12
  1206  	RORXL       $0x02, AX, DX
  1207  	VPALIGNR    $0x08, Y12, Y8, Y0
  1208  	XORL        CX, AX
  1209  	ADDL        R12, DI
  1210  	XORL        BX, AX
  1211  	ADDL        212(R15), SI
  1212  	LEAL        (SI)(AX*1), SI
  1213  	RORXL       $0x1b, DI, R12
  1214  	RORXL       $0x02, DI, AX
  1215  	VPXOR       Y5, Y7, Y7
  1216  	XORL        DX, DI
  1217  	ADDL        R12, SI
  1218  	XORL        CX, DI
  1219  	ADDL        216(R15), BX
  1220  	LEAL        (BX)(DI*1), BX
  1221  	RORXL       $0x1b, SI, R12
  1222  	RORXL       $0x02, SI, DI
  1223  	VPXOR       Y14, Y0, Y0
  1224  	XORL        AX, SI
  1225  	ADDL        R12, BX
  1226  	XORL        DX, SI
  1227  	ADDL        220(R15), CX
  1228  	LEAL        (CX)(SI*1), CX
  1229  	RORXL       $0x1b, BX, R12
  1230  	RORXL       $0x02, BX, SI
  1231  	VPXOR       Y0, Y7, Y7
  1232  	XORL        DI, BX
  1233  	ADDL        R12, CX
  1234  	XORL        AX, BX
  1235  	ADDL        240(R15), DX
  1236  	LEAL        (DX)(BX*1), DX
  1237  	RORXL       $0x1b, CX, R12
  1238  	RORXL       $0x02, CX, BX
  1239  	VPSLLD      $0x02, Y7, Y0
  1240  	XORL        SI, CX
  1241  	ADDL        R12, DX
  1242  	XORL        DI, CX
  1243  	ADDL        244(R15), AX
  1244  	LEAL        (AX)(CX*1), AX
  1245  	RORXL       $0x1b, DX, R12
  1246  	RORXL       $0x02, DX, CX
  1247  	VPSRLD      $0x1e, Y7, Y7
  1248  	VPOR        Y7, Y0, Y7
  1249  	XORL        BX, DX
  1250  	ADDL        R12, AX
  1251  	XORL        SI, DX
  1252  	ADDL        248(R15), DI
  1253  	LEAL        (DI)(DX*1), DI
  1254  	RORXL       $0x1b, AX, R12
  1255  	RORXL       $0x02, AX, DX
  1256  	XORL        CX, AX
  1257  	ADDL        R12, DI
  1258  	XORL        BX, AX
  1259  	ADDL        252(R15), SI
  1260  	LEAL        (SI)(AX*1), SI
  1261  	RORXL       $0x1b, DI, R12
  1262  	RORXL       $0x02, DI, AX
  1263  	VPADDD      64(R8), Y7, Y0
  1264  	VMOVDQU     Y0, 416(R14)
  1265  	XORL        DX, DI
  1266  	ADDL        R12, SI
  1267  	XORL        CX, DI
  1268  	ADDL        272(R15), BX
  1269  	LEAL        (BX)(DI*1), BX
  1270  	RORXL       $0x1b, SI, R12
  1271  	RORXL       $0x02, SI, DI
  1272  	VPALIGNR    $0x08, Y8, Y7, Y0
  1273  	XORL        AX, SI
  1274  	ADDL        R12, BX
  1275  	XORL        DX, SI
  1276  	ADDL        276(R15), CX
  1277  	LEAL        (CX)(SI*1), CX
  1278  	RORXL       $0x1b, BX, R12
  1279  	RORXL       $0x02, BX, SI
  1280  	VPXOR       Y3, Y5, Y5
  1281  	XORL        DI, BX
  1282  	ADDL        R12, CX
  1283  	XORL        AX, BX
  1284  	ADDL        280(R15), DX
  1285  	LEAL        (DX)(BX*1), DX
  1286  	RORXL       $0x1b, CX, R12
  1287  	RORXL       $0x02, CX, BX
  1288  	VPXOR       Y13, Y0, Y0
  1289  	XORL        SI, CX
  1290  	ADDL        R12, DX
  1291  	XORL        DI, CX
  1292  	ADDL        284(R15), AX
  1293  	LEAL        (AX)(CX*1), AX
  1294  	RORXL       $0x1b, DX, R12
  1295  	RORXL       $0x02, DX, CX
  1296  	VPXOR       Y0, Y5, Y5
  1297  	XORL        BX, DX
  1298  	ADDL        R12, AX
  1299  	XORL        SI, DX
  1300  	ADDL        304(R15), DI
  1301  	LEAL        (DI)(DX*1), DI
  1302  	RORXL       $0x1b, AX, R12
  1303  	RORXL       $0x02, AX, DX
  1304  	VPSLLD      $0x02, Y5, Y0
  1305  	XORL        CX, AX
  1306  	ADDL        R12, DI
  1307  	XORL        BX, AX
  1308  	ADDL        308(R15), SI
  1309  	LEAL        (SI)(AX*1), SI
  1310  	RORXL       $0x1b, DI, R12
  1311  	RORXL       $0x02, DI, AX
  1312  	VPSRLD      $0x1e, Y5, Y5
  1313  	VPOR        Y5, Y0, Y5
  1314  	XORL        DX, DI
  1315  	ADDL        R12, SI
  1316  	XORL        CX, DI
  1317  	ADDL        312(R15), BX
  1318  	LEAL        (BX)(DI*1), BX
  1319  	RORXL       $0x1b, SI, R12
  1320  	RORXL       $0x02, SI, DI
  1321  	XORL        AX, SI
  1322  	ADDL        R12, BX
  1323  	XORL        DX, SI
  1324  	ADDL        316(R15), CX
  1325  	VPADDD      64(R8), Y5, Y0
  1326  	VMOVDQU     Y0, 448(R14)
  1327  	LEAL        (CX)(SI*1), CX
  1328  	MOVL        DI, BP
  1329  	ORL         BX, BP
  1330  	RORXL       $0x1b, BX, R12
  1331  	RORXL       $0x02, BX, SI
  1332  	ANDL        AX, BP
  1333  	ANDL        DI, BX
  1334  	ORL         BP, BX
  1335  	ADDL        R12, CX
  1336  	ADDL        336(R15), DX
  1337  	VPALIGNR    $0x08, Y7, Y5, Y0
  1338  	LEAL        (DX)(BX*1), DX
  1339  	MOVL        SI, BP
  1340  	ORL         CX, BP
  1341  	RORXL       $0x1b, CX, R12
  1342  	RORXL       $0x02, CX, BX
  1343  	ANDL        DI, BP
  1344  	ANDL        SI, CX
  1345  	ORL         BP, CX
  1346  	ADDL        R12, DX
  1347  	ADDL        340(R15), AX
  1348  	VPXOR       Y15, Y3, Y3
  1349  	LEAL        (AX)(CX*1), AX
  1350  	MOVL        BX, BP
  1351  	ORL         DX, BP
  1352  	RORXL       $0x1b, DX, R12
  1353  	RORXL       $0x02, DX, CX
  1354  	ANDL        SI, BP
  1355  	ANDL        BX, DX
  1356  	ORL         BP, DX
  1357  	ADDL        R12, AX
  1358  	ADDL        344(R15), DI
  1359  	VPXOR       Y12, Y0, Y0
  1360  	LEAL        (DI)(DX*1), DI
  1361  	MOVL        CX, BP
  1362  	ORL         AX, BP
  1363  	RORXL       $0x1b, AX, R12
  1364  	RORXL       $0x02, AX, DX
  1365  	ANDL        BX, BP
  1366  	ANDL        CX, AX
  1367  	ORL         BP, AX
  1368  	ADDL        R12, DI
  1369  	ADDL        348(R15), SI
  1370  	VPXOR       Y0, Y3, Y3
  1371  	LEAL        (SI)(AX*1), SI
  1372  	MOVL        DX, BP
  1373  	ORL         DI, BP
  1374  	RORXL       $0x1b, DI, R12
  1375  	RORXL       $0x02, DI, AX
  1376  	ANDL        CX, BP
  1377  	ANDL        DX, DI
  1378  	ORL         BP, DI
  1379  	ADDL        R12, SI
  1380  	ADDL        368(R15), BX
  1381  	VPSLLD      $0x02, Y3, Y0
  1382  	LEAL        (BX)(DI*1), BX
  1383  	MOVL        AX, BP
  1384  	ORL         SI, BP
  1385  	RORXL       $0x1b, SI, R12
  1386  	RORXL       $0x02, SI, DI
  1387  	ANDL        DX, BP
  1388  	ANDL        AX, SI
  1389  	ORL         BP, SI
  1390  	ADDL        R12, BX
  1391  	ADDL        372(R15), CX
  1392  	VPSRLD      $0x1e, Y3, Y3
  1393  	VPOR        Y3, Y0, Y3
  1394  	LEAL        (CX)(SI*1), CX
  1395  	MOVL        DI, BP
  1396  	ORL         BX, BP
  1397  	RORXL       $0x1b, BX, R12
  1398  	RORXL       $0x02, BX, SI
  1399  	ANDL        AX, BP
  1400  	ANDL        DI, BX
  1401  	ORL         BP, BX
  1402  	ADDL        R12, CX
  1403  	ADDL        376(R15), DX
  1404  	LEAL        (DX)(BX*1), DX
  1405  	MOVL        SI, BP
  1406  	ORL         CX, BP
  1407  	RORXL       $0x1b, CX, R12
  1408  	RORXL       $0x02, CX, BX
  1409  	ANDL        DI, BP
  1410  	ANDL        SI, CX
  1411  	ORL         BP, CX
  1412  	ADDL        R12, DX
  1413  	ADDL        380(R15), AX
  1414  	VPADDD      96(R8), Y3, Y0
  1415  	VMOVDQU     Y0, 480(R14)
  1416  	LEAL        (AX)(CX*1), AX
  1417  	MOVL        BX, BP
  1418  	ORL         DX, BP
  1419  	RORXL       $0x1b, DX, R12
  1420  	RORXL       $0x02, DX, CX
  1421  	ANDL        SI, BP
  1422  	ANDL        BX, DX
  1423  	ORL         BP, DX
  1424  	ADDL        R12, AX
  1425  	ADDL        400(R15), DI
  1426  	VPALIGNR    $0x08, Y5, Y3, Y0
  1427  	LEAL        (DI)(DX*1), DI
  1428  	MOVL        CX, BP
  1429  	ORL         AX, BP
  1430  	RORXL       $0x1b, AX, R12
  1431  	RORXL       $0x02, AX, DX
  1432  	ANDL        BX, BP
  1433  	ANDL        CX, AX
  1434  	ORL         BP, AX
  1435  	ADDL        R12, DI
  1436  	ADDL        404(R15), SI
  1437  	VPXOR       Y14, Y15, Y15
  1438  	LEAL        (SI)(AX*1), SI
  1439  	MOVL        DX, BP
  1440  	ORL         DI, BP
  1441  	RORXL       $0x1b, DI, R12
  1442  	RORXL       $0x02, DI, AX
  1443  	ANDL        CX, BP
  1444  	ANDL        DX, DI
  1445  	ORL         BP, DI
  1446  	ADDL        R12, SI
  1447  	ADDL        408(R15), BX
  1448  	VPXOR       Y8, Y0, Y0
  1449  	LEAL        (BX)(DI*1), BX
  1450  	MOVL        AX, BP
  1451  	ORL         SI, BP
  1452  	RORXL       $0x1b, SI, R12
  1453  	RORXL       $0x02, SI, DI
  1454  	ANDL        DX, BP
  1455  	ANDL        AX, SI
  1456  	ORL         BP, SI
  1457  	ADDL        R12, BX
  1458  	ADDL        412(R15), CX
  1459  	VPXOR       Y0, Y15, Y15
  1460  	LEAL        (CX)(SI*1), CX
  1461  	MOVL        DI, BP
  1462  	ORL         BX, BP
  1463  	RORXL       $0x1b, BX, R12
  1464  	RORXL       $0x02, BX, SI
  1465  	ANDL        AX, BP
  1466  	ANDL        DI, BX
  1467  	ORL         BP, BX
  1468  	ADDL        R12, CX
  1469  	ADDL        432(R15), DX
  1470  	VPSLLD      $0x02, Y15, Y0
  1471  	LEAL        (DX)(BX*1), DX
  1472  	MOVL        SI, BP
  1473  	ORL         CX, BP
  1474  	RORXL       $0x1b, CX, R12
  1475  	RORXL       $0x02, CX, BX
  1476  	ANDL        DI, BP
  1477  	ANDL        SI, CX
  1478  	ORL         BP, CX
  1479  	ADDL        R12, DX
  1480  	ADDL        436(R15), AX
  1481  	VPSRLD      $0x1e, Y15, Y15
  1482  	VPOR        Y15, Y0, Y15
  1483  	LEAL        (AX)(CX*1), AX
  1484  	MOVL        BX, BP
  1485  	ORL         DX, BP
  1486  	RORXL       $0x1b, DX, R12
  1487  	RORXL       $0x02, DX, CX
  1488  	ANDL        SI, BP
  1489  	ANDL        BX, DX
  1490  	ORL         BP, DX
  1491  	ADDL        R12, AX
  1492  	ADDL        440(R15), DI
  1493  	LEAL        (DI)(DX*1), DI
  1494  	MOVL        CX, BP
  1495  	ORL         AX, BP
  1496  	RORXL       $0x1b, AX, R12
  1497  	RORXL       $0x02, AX, DX
  1498  	ANDL        BX, BP
  1499  	ANDL        CX, AX
  1500  	ORL         BP, AX
  1501  	ADDL        R12, DI
  1502  	ADDL        444(R15), SI
  1503  	VPADDD      96(R8), Y15, Y0
  1504  	VMOVDQU     Y0, 512(R14)
  1505  	LEAL        (SI)(AX*1), SI
  1506  	MOVL        DX, BP
  1507  	ORL         DI, BP
  1508  	RORXL       $0x1b, DI, R12
  1509  	RORXL       $0x02, DI, AX
  1510  	ANDL        CX, BP
  1511  	ANDL        DX, DI
  1512  	ORL         BP, DI
  1513  	ADDL        R12, SI
  1514  	ADDL        464(R15), BX
  1515  	VPALIGNR    $0x08, Y3, Y15, Y0
  1516  	LEAL        (BX)(DI*1), BX
  1517  	MOVL        AX, BP
  1518  	ORL         SI, BP
  1519  	RORXL       $0x1b, SI, R12
  1520  	RORXL       $0x02, SI, DI
  1521  	ANDL        DX, BP
  1522  	ANDL        AX, SI
  1523  	ORL         BP, SI
  1524  	ADDL        R12, BX
  1525  	ADDL        468(R15), CX
  1526  	VPXOR       Y13, Y14, Y14
  1527  	LEAL        (CX)(SI*1), CX
  1528  	MOVL        DI, BP
  1529  	ORL         BX, BP
  1530  	RORXL       $0x1b, BX, R12
  1531  	RORXL       $0x02, BX, SI
  1532  	ANDL        AX, BP
  1533  	ANDL        DI, BX
  1534  	ORL         BP, BX
  1535  	ADDL        R12, CX
  1536  	ADDL        472(R15), DX
  1537  	VPXOR       Y7, Y0, Y0
  1538  	LEAL        (DX)(BX*1), DX
  1539  	MOVL        SI, BP
  1540  	ORL         CX, BP
  1541  	RORXL       $0x1b, CX, R12
  1542  	RORXL       $0x02, CX, BX
  1543  	ANDL        DI, BP
  1544  	ANDL        SI, CX
  1545  	ORL         BP, CX
  1546  	ADDL        R12, DX
  1547  	ADDL        476(R15), AX
  1548  	LEAL        (AX)(CX*1), AX
  1549  	RORXL       $0x1b, DX, R12
  1550  	RORXL       $0x02, DX, CX
  1551  	VPXOR       Y0, Y14, Y14
  1552  	XORL        BX, DX
  1553  	ADDL        R12, AX
  1554  	XORL        SI, DX
  1555  	ADDQ        $0x80, R13
  1556  	CMPQ        R13, R11
  1557  	CMOVQCC     R8, R10
  1558  	ADDL        496(R15), DI
  1559  	LEAL        (DI)(DX*1), DI
  1560  	RORXL       $0x1b, AX, R12
  1561  	RORXL       $0x02, AX, DX
  1562  	VPSLLD      $0x02, Y14, Y0
  1563  	XORL        CX, AX
  1564  	ADDL        R12, DI
  1565  	XORL        BX, AX
  1566  	ADDL        500(R15), SI
  1567  	LEAL        (SI)(AX*1), SI
  1568  	RORXL       $0x1b, DI, R12
  1569  	RORXL       $0x02, DI, AX
  1570  	VPSRLD      $0x1e, Y14, Y14
  1571  	VPOR        Y14, Y0, Y14
  1572  	XORL        DX, DI
  1573  	ADDL        R12, SI
  1574  	XORL        CX, DI
  1575  	ADDL        504(R15), BX
  1576  	LEAL        (BX)(DI*1), BX
  1577  	RORXL       $0x1b, SI, R12
  1578  	RORXL       $0x02, SI, DI
  1579  	XORL        AX, SI
  1580  	ADDL        R12, BX
  1581  	XORL        DX, SI
  1582  	ADDL        508(R15), CX
  1583  	LEAL        (CX)(SI*1), CX
  1584  	RORXL       $0x1b, BX, R12
  1585  	RORXL       $0x02, BX, SI
  1586  	VPADDD      96(R8), Y14, Y0
  1587  	VMOVDQU     Y0, 544(R14)
  1588  	XORL        DI, BX
  1589  	ADDL        R12, CX
  1590  	XORL        AX, BX
  1591  	ADDL        528(R15), DX
  1592  	LEAL        (DX)(BX*1), DX
  1593  	RORXL       $0x1b, CX, R12
  1594  	RORXL       $0x02, CX, BX
  1595  	VPALIGNR    $0x08, Y15, Y14, Y0
  1596  	XORL        SI, CX
  1597  	ADDL        R12, DX
  1598  	XORL        DI, CX
  1599  	ADDL        532(R15), AX
  1600  	LEAL        (AX)(CX*1), AX
  1601  	RORXL       $0x1b, DX, R12
  1602  	RORXL       $0x02, DX, CX
  1603  	VPXOR       Y12, Y13, Y13
  1604  	XORL        BX, DX
  1605  	ADDL        R12, AX
  1606  	XORL        SI, DX
  1607  	ADDL        536(R15), DI
  1608  	LEAL        (DI)(DX*1), DI
  1609  	RORXL       $0x1b, AX, R12
  1610  	RORXL       $0x02, AX, DX
  1611  	VPXOR       Y5, Y0, Y0
  1612  	XORL        CX, AX
  1613  	ADDL        R12, DI
  1614  	XORL        BX, AX
  1615  	ADDL        540(R15), SI
  1616  	LEAL        (SI)(AX*1), SI
  1617  	RORXL       $0x1b, DI, R12
  1618  	RORXL       $0x02, DI, AX
  1619  	VPXOR       Y0, Y13, Y13
  1620  	XORL        DX, DI
  1621  	ADDL        R12, SI
  1622  	XORL        CX, DI
  1623  	ADDL        560(R15), BX
  1624  	LEAL        (BX)(DI*1), BX
  1625  	RORXL       $0x1b, SI, R12
  1626  	RORXL       $0x02, SI, DI
  1627  	VPSLLD      $0x02, Y13, Y0
  1628  	XORL        AX, SI
  1629  	ADDL        R12, BX
  1630  	XORL        DX, SI
  1631  	ADDL        564(R15), CX
  1632  	LEAL        (CX)(SI*1), CX
  1633  	RORXL       $0x1b, BX, R12
  1634  	RORXL       $0x02, BX, SI
  1635  	VPSRLD      $0x1e, Y13, Y13
  1636  	VPOR        Y13, Y0, Y13
  1637  	XORL        DI, BX
  1638  	ADDL        R12, CX
  1639  	XORL        AX, BX
  1640  	ADDL        568(R15), DX
  1641  	LEAL        (DX)(BX*1), DX
  1642  	RORXL       $0x1b, CX, R12
  1643  	RORXL       $0x02, CX, BX
  1644  	XORL        SI, CX
  1645  	ADDL        R12, DX
  1646  	XORL        DI, CX
  1647  	ADDL        572(R15), AX
  1648  	LEAL        (AX)(CX*1), AX
  1649  	RORXL       $0x1b, DX, R12
  1650  	RORXL       $0x02, DX, CX
  1651  	VPADDD      96(R8), Y13, Y0
  1652  	VMOVDQU     Y0, 576(R14)
  1653  	XORL        BX, DX
  1654  	ADDL        R12, AX
  1655  	XORL        SI, DX
  1656  	ADDL        592(R15), DI
  1657  	LEAL        (DI)(DX*1), DI
  1658  	RORXL       $0x1b, AX, R12
  1659  	RORXL       $0x02, AX, DX
  1660  	VPALIGNR    $0x08, Y14, Y13, Y0
  1661  	XORL        CX, AX
  1662  	ADDL        R12, DI
  1663  	XORL        BX, AX
  1664  	ADDL        596(R15), SI
  1665  	LEAL        (SI)(AX*1), SI
  1666  	RORXL       $0x1b, DI, R12
  1667  	RORXL       $0x02, DI, AX
  1668  	VPXOR       Y8, Y12, Y12
  1669  	XORL        DX, DI
  1670  	ADDL        R12, SI
  1671  	XORL        CX, DI
  1672  	ADDL        600(R15), BX
  1673  	LEAL        (BX)(DI*1), BX
  1674  	RORXL       $0x1b, SI, R12
  1675  	RORXL       $0x02, SI, DI
  1676  	VPXOR       Y3, Y0, Y0
  1677  	XORL        AX, SI
  1678  	ADDL        R12, BX
  1679  	XORL        DX, SI
  1680  	ADDL        604(R15), CX
  1681  	LEAL        (CX)(SI*1), CX
  1682  	RORXL       $0x1b, BX, R12
  1683  	RORXL       $0x02, BX, SI
  1684  	VPXOR       Y0, Y12, Y12
  1685  	XORL        DI, BX
  1686  	ADDL        R12, CX
  1687  	XORL        AX, BX
  1688  	ADDL        624(R15), DX
  1689  	LEAL        (DX)(BX*1), DX
  1690  	RORXL       $0x1b, CX, R12
  1691  	RORXL       $0x02, CX, BX
  1692  	VPSLLD      $0x02, Y12, Y0
  1693  	XORL        SI, CX
  1694  	ADDL        R12, DX
  1695  	XORL        DI, CX
  1696  	ADDL        628(R15), AX
  1697  	LEAL        (AX)(CX*1), AX
  1698  	RORXL       $0x1b, DX, R12
  1699  	RORXL       $0x02, DX, CX
  1700  	VPSRLD      $0x1e, Y12, Y12
  1701  	VPOR        Y12, Y0, Y12
  1702  	XORL        BX, DX
  1703  	ADDL        R12, AX
  1704  	XORL        SI, DX
  1705  	ADDL        632(R15), DI
  1706  	LEAL        (DI)(DX*1), DI
  1707  	RORXL       $0x1b, AX, R12
  1708  	RORXL       $0x02, AX, DX
  1709  	XORL        CX, AX
  1710  	ADDL        R12, DI
  1711  	XORL        BX, AX
  1712  	ADDL        636(R15), SI
  1713  	LEAL        (SI)(AX*1), SI
  1714  	RORXL       $0x1b, DI, R12
  1715  	VPADDD      96(R8), Y12, Y0
  1716  	VMOVDQU     Y0, 608(R14)
  1717  	ADDL        R12, SI
  1718  	ADDL        (R9), SI
  1719  	MOVL        SI, (R9)
  1720  	ADDL        4(R9), DI
  1721  	MOVL        DI, 4(R9)
  1722  	ADDL        8(R9), DX
  1723  	MOVL        DX, 8(R9)
  1724  	ADDL        12(R9), CX
  1725  	MOVL        CX, 12(R9)
  1726  	ADDL        16(R9), BX
  1727  	MOVL        BX, 16(R9)
  1728  	MOVL        SI, R12
  1729  	MOVL        DI, SI
  1730  	MOVL        DX, DI
  1731  	MOVL        BX, DX
  1732  	MOVL        CX, AX
  1733  	MOVL        R12, CX
  1734  	XCHGQ       R15, R14
  1735  	JMP         loop
  1736  
  1737  DATA K_XMM_AR<>+0(SB)/4, $0x5a827999
  1738  DATA K_XMM_AR<>+4(SB)/4, $0x5a827999
  1739  DATA K_XMM_AR<>+8(SB)/4, $0x5a827999
  1740  DATA K_XMM_AR<>+12(SB)/4, $0x5a827999
  1741  DATA K_XMM_AR<>+16(SB)/4, $0x5a827999
  1742  DATA K_XMM_AR<>+20(SB)/4, $0x5a827999
  1743  DATA K_XMM_AR<>+24(SB)/4, $0x5a827999
  1744  DATA K_XMM_AR<>+28(SB)/4, $0x5a827999
  1745  DATA K_XMM_AR<>+32(SB)/4, $0x6ed9eba1
  1746  DATA K_XMM_AR<>+36(SB)/4, $0x6ed9eba1
  1747  DATA K_XMM_AR<>+40(SB)/4, $0x6ed9eba1
  1748  DATA K_XMM_AR<>+44(SB)/4, $0x6ed9eba1
  1749  DATA K_XMM_AR<>+48(SB)/4, $0x6ed9eba1
  1750  DATA K_XMM_AR<>+52(SB)/4, $0x6ed9eba1
  1751  DATA K_XMM_AR<>+56(SB)/4, $0x6ed9eba1
  1752  DATA K_XMM_AR<>+60(SB)/4, $0x6ed9eba1
  1753  DATA K_XMM_AR<>+64(SB)/4, $0x8f1bbcdc
  1754  DATA K_XMM_AR<>+68(SB)/4, $0x8f1bbcdc
  1755  DATA K_XMM_AR<>+72(SB)/4, $0x8f1bbcdc
  1756  DATA K_XMM_AR<>+76(SB)/4, $0x8f1bbcdc
  1757  DATA K_XMM_AR<>+80(SB)/4, $0x8f1bbcdc
  1758  DATA K_XMM_AR<>+84(SB)/4, $0x8f1bbcdc
  1759  DATA K_XMM_AR<>+88(SB)/4, $0x8f1bbcdc
  1760  DATA K_XMM_AR<>+92(SB)/4, $0x8f1bbcdc
  1761  DATA K_XMM_AR<>+96(SB)/4, $0xca62c1d6
  1762  DATA K_XMM_AR<>+100(SB)/4, $0xca62c1d6
  1763  DATA K_XMM_AR<>+104(SB)/4, $0xca62c1d6
  1764  DATA K_XMM_AR<>+108(SB)/4, $0xca62c1d6
  1765  DATA K_XMM_AR<>+112(SB)/4, $0xca62c1d6
  1766  DATA K_XMM_AR<>+116(SB)/4, $0xca62c1d6
  1767  DATA K_XMM_AR<>+120(SB)/4, $0xca62c1d6
  1768  DATA K_XMM_AR<>+124(SB)/4, $0xca62c1d6
  1769  GLOBL K_XMM_AR<>(SB), RODATA, $128
  1770  
  1771  DATA BSWAP_SHUFB_CTL<>+0(SB)/4, $0x00010203
  1772  DATA BSWAP_SHUFB_CTL<>+4(SB)/4, $0x04050607
  1773  DATA BSWAP_SHUFB_CTL<>+8(SB)/4, $0x08090a0b
  1774  DATA BSWAP_SHUFB_CTL<>+12(SB)/4, $0x0c0d0e0f
  1775  DATA BSWAP_SHUFB_CTL<>+16(SB)/4, $0x00010203
  1776  DATA BSWAP_SHUFB_CTL<>+20(SB)/4, $0x04050607
  1777  DATA BSWAP_SHUFB_CTL<>+24(SB)/4, $0x08090a0b
  1778  DATA BSWAP_SHUFB_CTL<>+28(SB)/4, $0x0c0d0e0f
  1779  GLOBL BSWAP_SHUFB_CTL<>(SB), RODATA, $32
  1780  
  1781  // func blockSHANI(dig *digest, p []byte)
  1782  // Requires: AVX, SHA, SSE2, SSE4.1, SSSE3
  1783  TEXT ·blockSHANI(SB), $48-32
  1784  	MOVQ dig+0(FP), DI
  1785  	MOVQ p_base+8(FP), SI
  1786  	MOVQ p_len+16(FP), DX
  1787  	CMPQ DX, $0x00
  1788  	JEQ  done
  1789  	ADDQ SI, DX
  1790  
  1791  	// Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes
  1792  	LEAQ 15(SP), AX
  1793  	MOVQ $0x000000000000000f, CX
  1794  	NOTQ CX
  1795  	ANDQ CX, AX
  1796  
  1797  	// Load initial hash state
  1798  	PINSRD  $0x03, 16(DI), X5
  1799  	VMOVDQU (DI), X0
  1800  	PAND    upper_mask<>+0(SB), X5
  1801  	PSHUFD  $0x1b, X0, X0
  1802  	VMOVDQA shuffle_mask<>+0(SB), X7
  1803  
  1804  loop:
  1805  	// Save ABCD and E working values
  1806  	VMOVDQA X5, (AX)
  1807  	VMOVDQA X0, 16(AX)
  1808  
  1809  	// Rounds 0-3
  1810  	VMOVDQU   (SI), X1
  1811  	PSHUFB    X7, X1
  1812  	PADDD     X1, X5
  1813  	VMOVDQA   X0, X6
  1814  	SHA1RNDS4 $0x00, X5, X0
  1815  
  1816  	// Rounds 4-7
  1817  	VMOVDQU   16(SI), X2
  1818  	PSHUFB    X7, X2
  1819  	SHA1NEXTE X2, X6
  1820  	VMOVDQA   X0, X5
  1821  	SHA1RNDS4 $0x00, X6, X0
  1822  	SHA1MSG1  X2, X1
  1823  
  1824  	// Rounds 8-11
  1825  	VMOVDQU   32(SI), X3
  1826  	PSHUFB    X7, X3
  1827  	SHA1NEXTE X3, X5
  1828  	VMOVDQA   X0, X6
  1829  	SHA1RNDS4 $0x00, X5, X0
  1830  	SHA1MSG1  X3, X2
  1831  	PXOR      X3, X1
  1832  
  1833  	// Rounds 12-15
  1834  	VMOVDQU   48(SI), X4
  1835  	PSHUFB    X7, X4
  1836  	SHA1NEXTE X4, X6
  1837  	VMOVDQA   X0, X5
  1838  	SHA1MSG2  X4, X1
  1839  	SHA1RNDS4 $0x00, X6, X0
  1840  	SHA1MSG1  X4, X3
  1841  	PXOR      X4, X2
  1842  
  1843  	// Rounds 16-19
  1844  	SHA1NEXTE X1, X5
  1845  	VMOVDQA   X0, X6
  1846  	SHA1MSG2  X1, X2
  1847  	SHA1RNDS4 $0x00, X5, X0
  1848  	SHA1MSG1  X1, X4
  1849  	PXOR      X1, X3
  1850  
  1851  	// Rounds 20-23
  1852  	SHA1NEXTE X2, X6
  1853  	VMOVDQA   X0, X5
  1854  	SHA1MSG2  X2, X3
  1855  	SHA1RNDS4 $0x01, X6, X0
  1856  	SHA1MSG1  X2, X1
  1857  	PXOR      X2, X4
  1858  
  1859  	// Rounds 24-27
  1860  	SHA1NEXTE X3, X5
  1861  	VMOVDQA   X0, X6
  1862  	SHA1MSG2  X3, X4
  1863  	SHA1RNDS4 $0x01, X5, X0
  1864  	SHA1MSG1  X3, X2
  1865  	PXOR      X3, X1
  1866  
  1867  	// Rounds 28-31
  1868  	SHA1NEXTE X4, X6
  1869  	VMOVDQA   X0, X5
  1870  	SHA1MSG2  X4, X1
  1871  	SHA1RNDS4 $0x01, X6, X0
  1872  	SHA1MSG1  X4, X3
  1873  	PXOR      X4, X2
  1874  
  1875  	// Rounds 32-35
  1876  	SHA1NEXTE X1, X5
  1877  	VMOVDQA   X0, X6
  1878  	SHA1MSG2  X1, X2
  1879  	SHA1RNDS4 $0x01, X5, X0
  1880  	SHA1MSG1  X1, X4
  1881  	PXOR      X1, X3
  1882  
  1883  	// Rounds 36-39
  1884  	SHA1NEXTE X2, X6
  1885  	VMOVDQA   X0, X5
  1886  	SHA1MSG2  X2, X3
  1887  	SHA1RNDS4 $0x01, X6, X0
  1888  	SHA1MSG1  X2, X1
  1889  	PXOR      X2, X4
  1890  
  1891  	// Rounds 40-43
  1892  	SHA1NEXTE X3, X5
  1893  	VMOVDQA   X0, X6
  1894  	SHA1MSG2  X3, X4
  1895  	SHA1RNDS4 $0x02, X5, X0
  1896  	SHA1MSG1  X3, X2
  1897  	PXOR      X3, X1
  1898  
  1899  	// Rounds 44-47
  1900  	SHA1NEXTE X4, X6
  1901  	VMOVDQA   X0, X5
  1902  	SHA1MSG2  X4, X1
  1903  	SHA1RNDS4 $0x02, X6, X0
  1904  	SHA1MSG1  X4, X3
  1905  	PXOR      X4, X2
  1906  
  1907  	// Rounds 48-51
  1908  	SHA1NEXTE X1, X5
  1909  	VMOVDQA   X0, X6
  1910  	SHA1MSG2  X1, X2
  1911  	SHA1RNDS4 $0x02, X5, X0
  1912  	SHA1MSG1  X1, X4
  1913  	PXOR      X1, X3
  1914  
  1915  	// Rounds 52-55
  1916  	SHA1NEXTE X2, X6
  1917  	VMOVDQA   X0, X5
  1918  	SHA1MSG2  X2, X3
  1919  	SHA1RNDS4 $0x02, X6, X0
  1920  	SHA1MSG1  X2, X1
  1921  	PXOR      X2, X4
  1922  
  1923  	// Rounds 56-59
  1924  	SHA1NEXTE X3, X5
  1925  	VMOVDQA   X0, X6
  1926  	SHA1MSG2  X3, X4
  1927  	SHA1RNDS4 $0x02, X5, X0
  1928  	SHA1MSG1  X3, X2
  1929  	PXOR      X3, X1
  1930  
  1931  	// Rounds 60-63
  1932  	SHA1NEXTE X4, X6
  1933  	VMOVDQA   X0, X5
  1934  	SHA1MSG2  X4, X1
  1935  	SHA1RNDS4 $0x03, X6, X0
  1936  	SHA1MSG1  X4, X3
  1937  	PXOR      X4, X2
  1938  
  1939  	// Rounds 64-67
  1940  	SHA1NEXTE X1, X5
  1941  	VMOVDQA   X0, X6
  1942  	SHA1MSG2  X1, X2
  1943  	SHA1RNDS4 $0x03, X5, X0
  1944  	SHA1MSG1  X1, X4
  1945  	PXOR      X1, X3
  1946  
  1947  	// Rounds 68-71
  1948  	SHA1NEXTE X2, X6
  1949  	VMOVDQA   X0, X5
  1950  	SHA1MSG2  X2, X3
  1951  	SHA1RNDS4 $0x03, X6, X0
  1952  	PXOR      X2, X4
  1953  
  1954  	// Rounds 72-75
  1955  	SHA1NEXTE X3, X5
  1956  	VMOVDQA   X0, X6
  1957  	SHA1MSG2  X3, X4
  1958  	SHA1RNDS4 $0x03, X5, X0
  1959  
  1960  	// Rounds 76-79
  1961  	SHA1NEXTE X4, X6
  1962  	VMOVDQA   X0, X5
  1963  	SHA1RNDS4 $0x03, X6, X0
  1964  
  1965  	// Add saved E and ABCD
  1966  	SHA1NEXTE (AX), X5
  1967  	PADDD     16(AX), X0
  1968  
  1969  	// Check if we are done, if not return to the loop
  1970  	ADDQ $0x40, SI
  1971  	CMPQ SI, DX
  1972  	JNE  loop
  1973  
  1974  	// Write the hash state back to digest
  1975  	PSHUFD  $0x1b, X0, X0
  1976  	VMOVDQU X0, (DI)
  1977  	PEXTRD  $0x03, X5, 16(DI)
  1978  
  1979  done:
  1980  	RET
  1981  
  1982  DATA upper_mask<>+0(SB)/8, $0x0000000000000000
  1983  DATA upper_mask<>+8(SB)/8, $0xffffffff00000000
  1984  GLOBL upper_mask<>(SB), RODATA, $16
  1985  
  1986  DATA shuffle_mask<>+0(SB)/8, $0x08090a0b0c0d0e0f
  1987  DATA shuffle_mask<>+8(SB)/8, $0x0001020304050607
  1988  GLOBL shuffle_mask<>(SB), RODATA, $16
  1989  

View as plain text