Text file src/crypto/internal/fips/sha256/sha256block_ppc64x.s

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build (ppc64 || ppc64le) && !purego
     6  
     7  // Based on CRYPTOGAMS code with the following comment:
     8  // # ====================================================================
     9  // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
    10  // # project. The module is, however, dual licensed under OpenSSL and
    11  // # CRYPTOGAMS licenses depending on where you obtain it. For further
    12  // # details see http://www.openssl.org/~appro/cryptogams/.
    13  // # ====================================================================
    14  
    15  #include "textflag.h"
    16  
    17  // SHA256 block routine. See sha256block.go for Go equivalent.
    18  //
    19  // The algorithm is detailed in FIPS 180-4:
    20  //
    21  //  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    22  //
    23  // Wt = Mt; for 0 <= t <= 15
    24  // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    25  //
    26  // a = H0
    27  // b = H1
    28  // c = H2
    29  // d = H3
    30  // e = H4
    31  // f = H5
    32  // g = H6
    33  // h = H7
    34  //
    35  // for t = 0 to 63 {
    36  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    37  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    38  //    h = g
    39  //    g = f
    40  //    f = e
    41  //    e = d + T1
    42  //    d = c
    43  //    c = b
    44  //    b = a
    45  //    a = T1 + T2
    46  // }
    47  //
    48  // H0 = a + H0
    49  // H1 = b + H1
    50  // H2 = c + H2
    51  // H3 = d + H3
    52  // H4 = e + H4
    53  // H5 = f + H5
    54  // H6 = g + H6
    55  // H7 = h + H7
    56  
    57  #define CTX	R3
    58  #define INP	R4
    59  #define END	R5
    60  #define TBL	R6 // Pointer into kcon table
    61  #define LEN	R9
    62  #define TEMP	R12
    63  
    64  #define TBL_STRT	R7 // Pointer to start of kcon table.
    65  
    66  #define R_x000	R0
    67  #define R_x010	R8
    68  #define R_x020	R10
    69  #define R_x030	R11
    70  #define R_x040	R14
    71  #define R_x050	R15
    72  #define R_x060	R16
    73  #define R_x070	R17
    74  #define R_x080	R18
    75  #define R_x090	R19
    76  #define R_x0a0	R20
    77  #define R_x0b0	R21
    78  #define R_x0c0	R22
    79  #define R_x0d0	R23
    80  #define R_x0e0	R24
    81  #define R_x0f0	R25
    82  #define R_x100	R26
    83  #define R_x110	R27
    84  
    85  
    86  // V0-V7 are A-H
    87  // V8-V23 are used for the message schedule
    88  #define KI	V24
    89  #define FUNC	V25
    90  #define S0	V26
    91  #define S1	V27
    92  #define s0	V28
    93  #define s1	V29
    94  #define LEMASK	V31 // Permutation control register for little endian
    95  
    96  // 4 copies of each Kt, to fill all 4 words of a vector register
    97  DATA  ·kcon+0x000(SB)/8, $0x428a2f98428a2f98
    98  DATA  ·kcon+0x008(SB)/8, $0x428a2f98428a2f98
    99  DATA  ·kcon+0x010(SB)/8, $0x7137449171374491
   100  DATA  ·kcon+0x018(SB)/8, $0x7137449171374491
   101  DATA  ·kcon+0x020(SB)/8, $0xb5c0fbcfb5c0fbcf
   102  DATA  ·kcon+0x028(SB)/8, $0xb5c0fbcfb5c0fbcf
   103  DATA  ·kcon+0x030(SB)/8, $0xe9b5dba5e9b5dba5
   104  DATA  ·kcon+0x038(SB)/8, $0xe9b5dba5e9b5dba5
   105  DATA  ·kcon+0x040(SB)/8, $0x3956c25b3956c25b
   106  DATA  ·kcon+0x048(SB)/8, $0x3956c25b3956c25b
   107  DATA  ·kcon+0x050(SB)/8, $0x59f111f159f111f1
   108  DATA  ·kcon+0x058(SB)/8, $0x59f111f159f111f1
   109  DATA  ·kcon+0x060(SB)/8, $0x923f82a4923f82a4
   110  DATA  ·kcon+0x068(SB)/8, $0x923f82a4923f82a4
   111  DATA  ·kcon+0x070(SB)/8, $0xab1c5ed5ab1c5ed5
   112  DATA  ·kcon+0x078(SB)/8, $0xab1c5ed5ab1c5ed5
   113  DATA  ·kcon+0x080(SB)/8, $0xd807aa98d807aa98
   114  DATA  ·kcon+0x088(SB)/8, $0xd807aa98d807aa98
   115  DATA  ·kcon+0x090(SB)/8, $0x12835b0112835b01
   116  DATA  ·kcon+0x098(SB)/8, $0x12835b0112835b01
   117  DATA  ·kcon+0x0A0(SB)/8, $0x243185be243185be
   118  DATA  ·kcon+0x0A8(SB)/8, $0x243185be243185be
   119  DATA  ·kcon+0x0B0(SB)/8, $0x550c7dc3550c7dc3
   120  DATA  ·kcon+0x0B8(SB)/8, $0x550c7dc3550c7dc3
   121  DATA  ·kcon+0x0C0(SB)/8, $0x72be5d7472be5d74
   122  DATA  ·kcon+0x0C8(SB)/8, $0x72be5d7472be5d74
   123  DATA  ·kcon+0x0D0(SB)/8, $0x80deb1fe80deb1fe
   124  DATA  ·kcon+0x0D8(SB)/8, $0x80deb1fe80deb1fe
   125  DATA  ·kcon+0x0E0(SB)/8, $0x9bdc06a79bdc06a7
   126  DATA  ·kcon+0x0E8(SB)/8, $0x9bdc06a79bdc06a7
   127  DATA  ·kcon+0x0F0(SB)/8, $0xc19bf174c19bf174
   128  DATA  ·kcon+0x0F8(SB)/8, $0xc19bf174c19bf174
   129  DATA  ·kcon+0x100(SB)/8, $0xe49b69c1e49b69c1
   130  DATA  ·kcon+0x108(SB)/8, $0xe49b69c1e49b69c1
   131  DATA  ·kcon+0x110(SB)/8, $0xefbe4786efbe4786
   132  DATA  ·kcon+0x118(SB)/8, $0xefbe4786efbe4786
   133  DATA  ·kcon+0x120(SB)/8, $0x0fc19dc60fc19dc6
   134  DATA  ·kcon+0x128(SB)/8, $0x0fc19dc60fc19dc6
   135  DATA  ·kcon+0x130(SB)/8, $0x240ca1cc240ca1cc
   136  DATA  ·kcon+0x138(SB)/8, $0x240ca1cc240ca1cc
   137  DATA  ·kcon+0x140(SB)/8, $0x2de92c6f2de92c6f
   138  DATA  ·kcon+0x148(SB)/8, $0x2de92c6f2de92c6f
   139  DATA  ·kcon+0x150(SB)/8, $0x4a7484aa4a7484aa
   140  DATA  ·kcon+0x158(SB)/8, $0x4a7484aa4a7484aa
   141  DATA  ·kcon+0x160(SB)/8, $0x5cb0a9dc5cb0a9dc
   142  DATA  ·kcon+0x168(SB)/8, $0x5cb0a9dc5cb0a9dc
   143  DATA  ·kcon+0x170(SB)/8, $0x76f988da76f988da
   144  DATA  ·kcon+0x178(SB)/8, $0x76f988da76f988da
   145  DATA  ·kcon+0x180(SB)/8, $0x983e5152983e5152
   146  DATA  ·kcon+0x188(SB)/8, $0x983e5152983e5152
   147  DATA  ·kcon+0x190(SB)/8, $0xa831c66da831c66d
   148  DATA  ·kcon+0x198(SB)/8, $0xa831c66da831c66d
   149  DATA  ·kcon+0x1A0(SB)/8, $0xb00327c8b00327c8
   150  DATA  ·kcon+0x1A8(SB)/8, $0xb00327c8b00327c8
   151  DATA  ·kcon+0x1B0(SB)/8, $0xbf597fc7bf597fc7
   152  DATA  ·kcon+0x1B8(SB)/8, $0xbf597fc7bf597fc7
   153  DATA  ·kcon+0x1C0(SB)/8, $0xc6e00bf3c6e00bf3
   154  DATA  ·kcon+0x1C8(SB)/8, $0xc6e00bf3c6e00bf3
   155  DATA  ·kcon+0x1D0(SB)/8, $0xd5a79147d5a79147
   156  DATA  ·kcon+0x1D8(SB)/8, $0xd5a79147d5a79147
   157  DATA  ·kcon+0x1E0(SB)/8, $0x06ca635106ca6351
   158  DATA  ·kcon+0x1E8(SB)/8, $0x06ca635106ca6351
   159  DATA  ·kcon+0x1F0(SB)/8, $0x1429296714292967
   160  DATA  ·kcon+0x1F8(SB)/8, $0x1429296714292967
   161  DATA  ·kcon+0x200(SB)/8, $0x27b70a8527b70a85
   162  DATA  ·kcon+0x208(SB)/8, $0x27b70a8527b70a85
   163  DATA  ·kcon+0x210(SB)/8, $0x2e1b21382e1b2138
   164  DATA  ·kcon+0x218(SB)/8, $0x2e1b21382e1b2138
   165  DATA  ·kcon+0x220(SB)/8, $0x4d2c6dfc4d2c6dfc
   166  DATA  ·kcon+0x228(SB)/8, $0x4d2c6dfc4d2c6dfc
   167  DATA  ·kcon+0x230(SB)/8, $0x53380d1353380d13
   168  DATA  ·kcon+0x238(SB)/8, $0x53380d1353380d13
   169  DATA  ·kcon+0x240(SB)/8, $0x650a7354650a7354
   170  DATA  ·kcon+0x248(SB)/8, $0x650a7354650a7354
   171  DATA  ·kcon+0x250(SB)/8, $0x766a0abb766a0abb
   172  DATA  ·kcon+0x258(SB)/8, $0x766a0abb766a0abb
   173  DATA  ·kcon+0x260(SB)/8, $0x81c2c92e81c2c92e
   174  DATA  ·kcon+0x268(SB)/8, $0x81c2c92e81c2c92e
   175  DATA  ·kcon+0x270(SB)/8, $0x92722c8592722c85
   176  DATA  ·kcon+0x278(SB)/8, $0x92722c8592722c85
   177  DATA  ·kcon+0x280(SB)/8, $0xa2bfe8a1a2bfe8a1
   178  DATA  ·kcon+0x288(SB)/8, $0xa2bfe8a1a2bfe8a1
   179  DATA  ·kcon+0x290(SB)/8, $0xa81a664ba81a664b
   180  DATA  ·kcon+0x298(SB)/8, $0xa81a664ba81a664b
   181  DATA  ·kcon+0x2A0(SB)/8, $0xc24b8b70c24b8b70
   182  DATA  ·kcon+0x2A8(SB)/8, $0xc24b8b70c24b8b70
   183  DATA  ·kcon+0x2B0(SB)/8, $0xc76c51a3c76c51a3
   184  DATA  ·kcon+0x2B8(SB)/8, $0xc76c51a3c76c51a3
   185  DATA  ·kcon+0x2C0(SB)/8, $0xd192e819d192e819
   186  DATA  ·kcon+0x2C8(SB)/8, $0xd192e819d192e819
   187  DATA  ·kcon+0x2D0(SB)/8, $0xd6990624d6990624
   188  DATA  ·kcon+0x2D8(SB)/8, $0xd6990624d6990624
   189  DATA  ·kcon+0x2E0(SB)/8, $0xf40e3585f40e3585
   190  DATA  ·kcon+0x2E8(SB)/8, $0xf40e3585f40e3585
   191  DATA  ·kcon+0x2F0(SB)/8, $0x106aa070106aa070
   192  DATA  ·kcon+0x2F8(SB)/8, $0x106aa070106aa070
   193  DATA  ·kcon+0x300(SB)/8, $0x19a4c11619a4c116
   194  DATA  ·kcon+0x308(SB)/8, $0x19a4c11619a4c116
   195  DATA  ·kcon+0x310(SB)/8, $0x1e376c081e376c08
   196  DATA  ·kcon+0x318(SB)/8, $0x1e376c081e376c08
   197  DATA  ·kcon+0x320(SB)/8, $0x2748774c2748774c
   198  DATA  ·kcon+0x328(SB)/8, $0x2748774c2748774c
   199  DATA  ·kcon+0x330(SB)/8, $0x34b0bcb534b0bcb5
   200  DATA  ·kcon+0x338(SB)/8, $0x34b0bcb534b0bcb5
   201  DATA  ·kcon+0x340(SB)/8, $0x391c0cb3391c0cb3
   202  DATA  ·kcon+0x348(SB)/8, $0x391c0cb3391c0cb3
   203  DATA  ·kcon+0x350(SB)/8, $0x4ed8aa4a4ed8aa4a
   204  DATA  ·kcon+0x358(SB)/8, $0x4ed8aa4a4ed8aa4a
   205  DATA  ·kcon+0x360(SB)/8, $0x5b9cca4f5b9cca4f
   206  DATA  ·kcon+0x368(SB)/8, $0x5b9cca4f5b9cca4f
   207  DATA  ·kcon+0x370(SB)/8, $0x682e6ff3682e6ff3
   208  DATA  ·kcon+0x378(SB)/8, $0x682e6ff3682e6ff3
   209  DATA  ·kcon+0x380(SB)/8, $0x748f82ee748f82ee
   210  DATA  ·kcon+0x388(SB)/8, $0x748f82ee748f82ee
   211  DATA  ·kcon+0x390(SB)/8, $0x78a5636f78a5636f
   212  DATA  ·kcon+0x398(SB)/8, $0x78a5636f78a5636f
   213  DATA  ·kcon+0x3A0(SB)/8, $0x84c8781484c87814
   214  DATA  ·kcon+0x3A8(SB)/8, $0x84c8781484c87814
   215  DATA  ·kcon+0x3B0(SB)/8, $0x8cc702088cc70208
   216  DATA  ·kcon+0x3B8(SB)/8, $0x8cc702088cc70208
   217  DATA  ·kcon+0x3C0(SB)/8, $0x90befffa90befffa
   218  DATA  ·kcon+0x3C8(SB)/8, $0x90befffa90befffa
   219  DATA  ·kcon+0x3D0(SB)/8, $0xa4506ceba4506ceb
   220  DATA  ·kcon+0x3D8(SB)/8, $0xa4506ceba4506ceb
   221  DATA  ·kcon+0x3E0(SB)/8, $0xbef9a3f7bef9a3f7
   222  DATA  ·kcon+0x3E8(SB)/8, $0xbef9a3f7bef9a3f7
   223  DATA  ·kcon+0x3F0(SB)/8, $0xc67178f2c67178f2
   224  DATA  ·kcon+0x3F8(SB)/8, $0xc67178f2c67178f2
   225  DATA  ·kcon+0x400(SB)/8, $0x0000000000000000
   226  DATA  ·kcon+0x408(SB)/8, $0x0000000000000000
   227  
   228  #ifdef GOARCH_ppc64le
   229  DATA  ·kcon+0x410(SB)/8, $0x1011121310111213 // permutation control vectors
   230  DATA  ·kcon+0x418(SB)/8, $0x1011121300010203
   231  DATA  ·kcon+0x420(SB)/8, $0x1011121310111213
   232  DATA  ·kcon+0x428(SB)/8, $0x0405060700010203
   233  DATA  ·kcon+0x430(SB)/8, $0x1011121308090a0b
   234  DATA  ·kcon+0x438(SB)/8, $0x0405060700010203
   235  #else
   236  DATA  ·kcon+0x410(SB)/8, $0x1011121300010203
   237  DATA  ·kcon+0x418(SB)/8, $0x1011121310111213 // permutation control vectors
   238  DATA  ·kcon+0x420(SB)/8, $0x0405060700010203
   239  DATA  ·kcon+0x428(SB)/8, $0x1011121310111213
   240  DATA  ·kcon+0x430(SB)/8, $0x0001020304050607
   241  DATA  ·kcon+0x438(SB)/8, $0x08090a0b10111213
   242  #endif
   243  
   244  GLOBL ·kcon(SB), RODATA, $1088
   245  
   246  #define SHA256ROUND0(a, b, c, d, e, f, g, h, xi, idx) \
   247  	VSEL		g, f, e, FUNC; \
   248  	VSHASIGMAW	$15, e, $1, S1; \
   249  	VADDUWM		xi, h, h; \
   250  	VSHASIGMAW	$0, a, $1, S0; \
   251  	VADDUWM		FUNC, h, h; \
   252  	VXOR		b, a, FUNC; \
   253  	VADDUWM		S1, h, h; \
   254  	VSEL		b, c, FUNC, FUNC; \
   255  	VADDUWM		KI, g, g; \
   256  	VADDUWM		h, d, d; \
   257  	VADDUWM		FUNC, S0, S0; \
   258  	LVX		(TBL)(idx), KI; \
   259  	VADDUWM		S0, h, h
   260  
   261  #define SHA256ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14, idx) \
   262  	VSHASIGMAW	$0, xj_1, $0, s0; \
   263  	VSEL		g, f, e, FUNC; \
   264  	VSHASIGMAW	$15, e, $1, S1; \
   265  	VADDUWM		xi, h, h; \
   266  	VSHASIGMAW	$0, a, $1, S0; \
   267  	VSHASIGMAW	$15, xj_14, $0, s1; \
   268  	VADDUWM		FUNC, h, h; \
   269  	VXOR		b, a, FUNC; \
   270  	VADDUWM		xj_9, xj, xj; \
   271  	VADDUWM		S1, h, h; \
   272  	VSEL		b, c, FUNC, FUNC; \
   273  	VADDUWM		KI, g, g; \
   274  	VADDUWM		h, d, d; \
   275  	VADDUWM		FUNC, S0, S0; \
   276  	VADDUWM		s0, xj, xj; \
   277  	LVX		(TBL)(idx), KI; \
   278  	VADDUWM		S0, h, h; \
   279  	VADDUWM		s1, xj, xj
   280  
   281  #ifdef GOARCH_ppc64le
   282  #define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt
   283  #else
   284  #define VPERMLE(va,vb,vc,vt)
   285  #endif
   286  
   287  // func blockPOWER(dig *Digest, p []byte)
   288  TEXT ·blockPOWER(SB),0,$0-32
   289  	MOVD	dig+0(FP), CTX
   290  	MOVD	p_base+8(FP), INP
   291  	MOVD	p_len+16(FP), LEN
   292  
   293  	SRD	$6, LEN
   294  	SLD	$6, LEN
   295  	ADD	INP, LEN, END
   296  
   297  	CMP	INP, END
   298  	BEQ	end
   299  
   300  	MOVD	$·kcon(SB), TBL_STRT
   301  	MOVD	$0x10, R_x010
   302  
   303  #ifdef GOARCH_ppc64le
   304  	MOVWZ	$8, TEMP
   305  	LVSL	(TEMP)(R0), LEMASK
   306  	VSPLTISB	$0x0F, KI
   307  	VXOR	KI, LEMASK, LEMASK
   308  #endif
   309  
   310  	LXVW4X	(CTX)(R_x000), V0
   311  	LXVW4X	(CTX)(R_x010), V4
   312  
   313  	// unpack the input values into vector registers
   314  	VSLDOI	$4, V0, V0, V1
   315  	VSLDOI	$8, V0, V0, V2
   316  	VSLDOI	$12, V0, V0, V3
   317  	VSLDOI	$4, V4, V4, V5
   318  	VSLDOI	$8, V4, V4, V6
   319  	VSLDOI	$12, V4, V4, V7
   320  
   321  	MOVD	$0x020, R_x020
   322  	MOVD	$0x030, R_x030
   323  	MOVD	$0x040, R_x040
   324  	MOVD	$0x050, R_x050
   325  	MOVD	$0x060, R_x060
   326  	MOVD	$0x070, R_x070
   327  	MOVD	$0x080, R_x080
   328  	MOVD	$0x090, R_x090
   329  	MOVD	$0x0a0, R_x0a0
   330  	MOVD	$0x0b0, R_x0b0
   331  	MOVD	$0x0c0, R_x0c0
   332  	MOVD	$0x0d0, R_x0d0
   333  	MOVD	$0x0e0, R_x0e0
   334  	MOVD	$0x0f0, R_x0f0
   335  	MOVD	$0x100, R_x100
   336  	MOVD	$0x110, R_x110
   337  
   338  loop:
   339  	MOVD	TBL_STRT, TBL
   340  	LVX	(TBL)(R_x000), KI
   341  
   342  	LXVD2X	(INP)(R_x000), V8 // load v8 in advance
   343  
   344  	// Offload to VSR24-31 (aka FPR24-31)
   345  	XXLOR	V0, V0, VS24
   346  	XXLOR	V1, V1, VS25
   347  	XXLOR	V2, V2, VS26
   348  	XXLOR	V3, V3, VS27
   349  	XXLOR	V4, V4, VS28
   350  	XXLOR	V5, V5, VS29
   351  	XXLOR	V6, V6, VS30
   352  	XXLOR	V7, V7, VS31
   353  
   354  	VADDUWM	KI, V7, V7        // h+K[i]
   355  	LVX	(TBL)(R_x010), KI
   356  
   357  	VPERMLE(V8, V8, LEMASK, V8)
   358  	SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8, R_x020)
   359  	VSLDOI	$4, V8, V8, V9
   360  	SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9, R_x030)
   361  	VSLDOI	$4, V9, V9, V10
   362  	SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10, R_x040)
   363  	LXVD2X	(INP)(R_x010), V12 // load v12 in advance
   364  	VSLDOI	$4, V10, V10, V11
   365  	SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11, R_x050)
   366  	VPERMLE(V12, V12, LEMASK, V12)
   367  	SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12, R_x060)
   368  	VSLDOI	$4, V12, V12, V13
   369  	SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13, R_x070)
   370  	VSLDOI	$4, V13, V13, V14
   371  	SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14, R_x080)
   372  	LXVD2X	(INP)(R_x020), V16 // load v16 in advance
   373  	VSLDOI	$4, V14, V14, V15
   374  	SHA256ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15, R_x090)
   375  	VPERMLE(V16, V16, LEMASK, V16)
   376  	SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16, R_x0a0)
   377  	VSLDOI	$4, V16, V16, V17
   378  	SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17, R_x0b0)
   379  	VSLDOI	$4, V17, V17, V18
   380  	SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18, R_x0c0)
   381  	VSLDOI	$4, V18, V18, V19
   382  	LXVD2X	(INP)(R_x030), V20 // load v20 in advance
   383  	SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19, R_x0d0)
   384  	VPERMLE(V20, V20, LEMASK, V20)
   385  	SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20, R_x0e0)
   386  	VSLDOI	$4, V20, V20, V21
   387  	SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21, R_x0f0)
   388  	VSLDOI	$4, V21, V21, V22
   389  	SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22, R_x100)
   390  	VSLDOI	$4, V22, V22, V23
   391  	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x110)
   392  
   393  	MOVD	$3, TEMP
   394  	MOVD	TEMP, CTR
   395  	ADD	$0x120, TBL
   396  	ADD	$0x40, INP
   397  
   398  L16_xx:
   399  	SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23, R_x000)
   400  	SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8, R_x010)
   401  	SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9, R_x020)
   402  	SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10, R_x030)
   403  	SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11, R_x040)
   404  	SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12, R_x050)
   405  	SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13, R_x060)
   406  	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14, R_x070)
   407  	SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15, R_x080)
   408  	SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16, R_x090)
   409  	SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17, R_x0a0)
   410  	SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18, R_x0b0)
   411  	SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19, R_x0c0)
   412  	SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20, R_x0d0)
   413  	SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21, R_x0e0)
   414  	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x0f0)
   415  	ADD	$0x100, TBL
   416  
   417  	BDNZ	L16_xx
   418  
   419  	XXLOR	VS24, VS24, V10
   420  
   421  	XXLOR	VS25, VS25, V11
   422  	VADDUWM	V10, V0, V0
   423  	XXLOR	VS26, VS26, V12
   424  	VADDUWM	V11, V1, V1
   425  	XXLOR	VS27, VS27, V13
   426  	VADDUWM	V12, V2, V2
   427  	XXLOR	VS28, VS28, V14
   428  	VADDUWM	V13, V3, V3
   429  	XXLOR	VS29, VS29, V15
   430  	VADDUWM	V14, V4, V4
   431  	XXLOR	VS30, VS30, V16
   432  	VADDUWM	V15, V5, V5
   433  	XXLOR	VS31, VS31, V17
   434  	VADDUWM	V16, V6, V6
   435  	VADDUWM	V17, V7, V7
   436  
   437  	CMPU	INP, END
   438  	BLT	loop
   439  
   440  	LVX	(TBL)(R_x000), V8
   441  	VPERM	V0, V1, KI, V0
   442  	LVX	(TBL)(R_x010), V9
   443  	VPERM	V4, V5, KI, V4
   444  	VPERM	V0, V2, V8, V0
   445  	VPERM	V4, V6, V8, V4
   446  	VPERM	V0, V3, V9, V0
   447  	VPERM	V4, V7, V9, V4
   448  	STXVD2X	V0, (CTX+R_x000)
   449  	STXVD2X	V4, (CTX+R_x010)
   450  
   451  end:
   452  	RET
   453  
   454  

View as plain text