Text file src/crypto/internal/fips/sha256/sha256block_loong64.s

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // SHA256 block routine. See sha256block.go for Go equivalent.
    10  //
    11  // The algorithm is detailed in FIPS 180-4:
    12  //
    13  //  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    14  //
    15  // W[i] = M[i]; for 0 <= i <= 15
    16  // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
    17  //
    18  // a = H0
    19  // b = H1
    20  // c = H2
    21  // d = H3
    22  // e = H4
    23  // f = H5
    24  // g = H6
    25  // h = H7
    26  //
    27  // for i = 0 to 63 {
    28  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + K[i] + W[i]
    29  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    30  //    h = g
    31  //    g = f
    32  //    f = e
    33  //    e = d + T1
    34  //    d = c
    35  //    c = b
    36  //    b = a
    37  //    a = T1 + T2
    38  // }
    39  //
    40  // H0 = a + H0
    41  // H1 = b + H1
    42  // H2 = c + H2
    43  // H3 = d + H3
    44  // H4 = e + H4
    45  // H5 = f + H5
    46  // H6 = g + H6
    47  // H7 = h + H7
    48  
    49  #define REGTMP	R30
    50  #define REGTMP1	R16
    51  #define REGTMP2	R17
    52  #define REGTMP3	R18
    53  #define REGTMP4	R7
    54  #define REGTMP5	R6
    55  
    56  // W[i] = M[i]; for 0 <= i <= 15
    57  #define LOAD0(index) \
    58  	MOVW	(index*4)(R5), REGTMP4; \
    59  	WORD	$0x38e7; \	// REVB2W REGTMP4, REGTMP4 to big-endian
    60  	MOVW	REGTMP4, (index*4)(R3)
    61  
    62  // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
    63  //   SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
    64  //   SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
    65  #define LOAD1(index) \
    66  	MOVW	(((index-2)&0xf)*4)(R3), REGTMP4; \
    67  	MOVW	(((index-15)&0xf)*4)(R3), REGTMP1; \
    68  	MOVW	(((index-7)&0xf)*4)(R3), REGTMP; \
    69  	MOVW	REGTMP4, REGTMP2; \
    70  	MOVW	REGTMP4, REGTMP3; \
    71  	ROTR	$17, REGTMP4; \
    72  	ROTR	$19, REGTMP2; \
    73  	SRL	$10, REGTMP3; \
    74  	XOR	REGTMP2, REGTMP4; \
    75  	XOR	REGTMP3, REGTMP4; \
    76  	ROTR	$7, REGTMP1, REGTMP5; \
    77  	SRL	$3, REGTMP1, REGTMP3; \
    78  	ROTR	$18, REGTMP1, REGTMP2; \
    79  	ADD	REGTMP, REGTMP4; \
    80  	MOVW	(((index-16)&0xf)*4)(R3), REGTMP; \
    81  	XOR	REGTMP3, REGTMP5; \
    82  	XOR	REGTMP2, REGTMP5; \
    83  	ADD	REGTMP, REGTMP5; \
    84  	ADD	REGTMP5, REGTMP4; \
    85  	MOVW	REGTMP4, ((index&0xf)*4)(R3)
    86  
    87  // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
    88  // BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
    89  // Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    90  // Calculate T1 in REGTMP4
    91  #define SHA256T1(const, e, f, g, h) \
    92  	ADDV	$const, h; \
    93  	ADD	REGTMP4, h; \
    94  	ROTR	$6, e, REGTMP4; \
    95  	ROTR	$11, e, REGTMP; \
    96  	ROTR	$25, e, REGTMP3; \
    97  	AND	f, e, REGTMP2; \
    98  	XOR	REGTMP, REGTMP4; \
    99  	MOVV	$0xffffffff, REGTMP; \
   100  	XOR	REGTMP4, REGTMP3; \
   101  	XOR	REGTMP, e, REGTMP5; \
   102  	ADD	REGTMP3, h; \
   103  	AND	g, REGTMP5; \
   104  	XOR	REGTMP2, REGTMP5; \
   105  	ADD	h, REGTMP5, REGTMP4
   106  
   107  // T2 = BIGSIGMA0(a) + Maj(a, b, c)
   108  // BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
   109  // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   110  // Calculate T2 in REGTMP1
   111  #define SHA256T2(a, b, c) \
   112  	ROTR	$2, a, REGTMP5; \
   113  	AND	b, c, REGTMP1; \
   114  	ROTR	$13, a, REGTMP3; \
   115  	AND	c, a, REGTMP; \
   116  	XOR	REGTMP3, REGTMP5; \
   117  	XOR	REGTMP, REGTMP1; \
   118  	ROTR	$22, a, REGTMP2; \
   119  	AND	a, b, REGTMP3; \
   120  	XOR	REGTMP2, REGTMP5; \
   121  	XOR	REGTMP3, REGTMP1; \
   122  	ADD	REGTMP5, REGTMP1
   123  
   124  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   125  // The values for e and a are stored in d and h, ready for rotation.
   126  #define SHA256ROUND(const, a, b, c, d, e, f, g, h) \
   127  	SHA256T1(const, e, f, g, h); \
   128  	SHA256T2(a, b, c); \
   129  	ADD	REGTMP4, d; \
   130  	ADD	REGTMP1, REGTMP4, h
   131  
   132  #define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \
   133  	LOAD0(index); \
   134  	SHA256ROUND(const, a, b, c, d, e, f, g, h)
   135  
   136  #define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \
   137  	LOAD1(index); \
   138  	SHA256ROUND(const, a, b, c, d, e, f, g, h)
   139  
   140  // A stack frame size of 64 bytes is required here, because
   141  // the frame size used for data expansion is 64 bytes.
   142  // See the definition of the macro LOAD1 above (4 bytes * 16 entries).
   143  //
   144  //func block(dig *Digest, p []byte)
   145  TEXT ·block(SB),NOSPLIT,$64-32
   146  	MOVV	p_base+8(FP), R5
   147  	MOVV	p_len+16(FP), R6
   148  	AND	$~63, R6
   149  	BEQ	R6, end
   150  
   151  	// p_len >= 64
   152  	MOVV	dig+0(FP), R4
   153  	ADDV	R5, R6, R25
   154  	MOVW	(0*4)(R4), R8	// a = H0
   155  	MOVW	(1*4)(R4), R9	// b = H1
   156  	MOVW	(2*4)(R4), R10	// c = H2
   157  	MOVW	(3*4)(R4), R11	// d = H3
   158  	MOVW	(4*4)(R4), R12	// e = H4
   159  	MOVW	(5*4)(R4), R13	// f = H5
   160  	MOVW	(6*4)(R4), R14	// g = H6
   161  	MOVW	(7*4)(R4), R15	// h = H7
   162  
   163  loop:
   164  	SHA256ROUND0(0,  0x428a2f98, R8,  R9,  R10, R11, R12, R13, R14, R15)
   165  	SHA256ROUND0(1,  0x71374491, R15, R8,  R9,  R10, R11, R12, R13, R14)
   166  	SHA256ROUND0(2,  0xb5c0fbcf, R14, R15, R8,  R9,  R10, R11, R12, R13)
   167  	SHA256ROUND0(3,  0xe9b5dba5, R13, R14, R15, R8,  R9,  R10, R11, R12)
   168  	SHA256ROUND0(4,  0x3956c25b, R12, R13, R14, R15, R8,  R9,  R10, R11)
   169  	SHA256ROUND0(5,  0x59f111f1, R11, R12, R13, R14, R15, R8,  R9,  R10)
   170  	SHA256ROUND0(6,  0x923f82a4, R10, R11, R12, R13, R14, R15, R8,  R9)
   171  	SHA256ROUND0(7,  0xab1c5ed5, R9,  R10, R11, R12, R13, R14, R15, R8)
   172  	SHA256ROUND0(8,  0xd807aa98, R8,  R9,  R10, R11, R12, R13, R14, R15)
   173  	SHA256ROUND0(9,  0x12835b01, R15, R8,  R9,  R10, R11, R12, R13, R14)
   174  	SHA256ROUND0(10, 0x243185be, R14, R15, R8,  R9,  R10, R11, R12, R13)
   175  	SHA256ROUND0(11, 0x550c7dc3, R13, R14, R15, R8,  R9,  R10, R11, R12)
   176  	SHA256ROUND0(12, 0x72be5d74, R12, R13, R14, R15, R8,  R9,  R10, R11)
   177  	SHA256ROUND0(13, 0x80deb1fe, R11, R12, R13, R14, R15, R8,  R9,  R10)
   178  	SHA256ROUND0(14, 0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8,  R9)
   179  	SHA256ROUND0(15, 0xc19bf174, R9,  R10, R11, R12, R13, R14, R15, R8)
   180  
   181  	SHA256ROUND1(16, 0xe49b69c1, R8,  R9,  R10, R11, R12, R13, R14, R15)
   182  	SHA256ROUND1(17, 0xefbe4786, R15, R8,  R9,  R10, R11, R12, R13, R14)
   183  	SHA256ROUND1(18, 0x0fc19dc6, R14, R15, R8,  R9,  R10, R11, R12, R13)
   184  	SHA256ROUND1(19, 0x240ca1cc, R13, R14, R15, R8,  R9,  R10, R11, R12)
   185  	SHA256ROUND1(20, 0x2de92c6f, R12, R13, R14, R15, R8,  R9,  R10, R11)
   186  	SHA256ROUND1(21, 0x4a7484aa, R11, R12, R13, R14, R15, R8,  R9,  R10)
   187  	SHA256ROUND1(22, 0x5cb0a9dc, R10, R11, R12, R13, R14, R15, R8,  R9)
   188  	SHA256ROUND1(23, 0x76f988da, R9,  R10, R11, R12, R13, R14, R15, R8)
   189  	SHA256ROUND1(24, 0x983e5152, R8,  R9,  R10, R11, R12, R13, R14, R15)
   190  	SHA256ROUND1(25, 0xa831c66d, R15, R8,  R9,  R10, R11, R12, R13, R14)
   191  	SHA256ROUND1(26, 0xb00327c8, R14, R15, R8,  R9,  R10, R11, R12, R13)
   192  	SHA256ROUND1(27, 0xbf597fc7, R13, R14, R15, R8,  R9,  R10, R11, R12)
   193  	SHA256ROUND1(28, 0xc6e00bf3, R12, R13, R14, R15, R8,  R9,  R10, R11)
   194  	SHA256ROUND1(29, 0xd5a79147, R11, R12, R13, R14, R15, R8,  R9,  R10)
   195  	SHA256ROUND1(30, 0x06ca6351, R10, R11, R12, R13, R14, R15, R8,  R9)
   196  	SHA256ROUND1(31, 0x14292967, R9,  R10, R11, R12, R13, R14, R15, R8)
   197  	SHA256ROUND1(32, 0x27b70a85, R8,  R9,  R10, R11, R12, R13, R14, R15)
   198  	SHA256ROUND1(33, 0x2e1b2138, R15, R8,  R9,  R10, R11, R12, R13, R14)
   199  	SHA256ROUND1(34, 0x4d2c6dfc, R14, R15, R8,  R9,  R10, R11, R12, R13)
   200  	SHA256ROUND1(35, 0x53380d13, R13, R14, R15, R8,  R9,  R10, R11, R12)
   201  	SHA256ROUND1(36, 0x650a7354, R12, R13, R14, R15, R8,  R9,  R10, R11)
   202  	SHA256ROUND1(37, 0x766a0abb, R11, R12, R13, R14, R15, R8,  R9,  R10)
   203  	SHA256ROUND1(38, 0x81c2c92e, R10, R11, R12, R13, R14, R15, R8,  R9)
   204  	SHA256ROUND1(39, 0x92722c85, R9,  R10, R11, R12, R13, R14, R15, R8)
   205  	SHA256ROUND1(40, 0xa2bfe8a1, R8,  R9,  R10, R11, R12, R13, R14, R15)
   206  	SHA256ROUND1(41, 0xa81a664b, R15, R8,  R9,  R10, R11, R12, R13, R14)
   207  	SHA256ROUND1(42, 0xc24b8b70, R14, R15, R8,  R9,  R10, R11, R12, R13)
   208  	SHA256ROUND1(43, 0xc76c51a3, R13, R14, R15, R8,  R9,  R10, R11, R12)
   209  	SHA256ROUND1(44, 0xd192e819, R12, R13, R14, R15, R8,  R9,  R10, R11)
   210  	SHA256ROUND1(45, 0xd6990624, R11, R12, R13, R14, R15, R8,  R9,  R10)
   211  	SHA256ROUND1(46, 0xf40e3585, R10, R11, R12, R13, R14, R15, R8,  R9)
   212  	SHA256ROUND1(47, 0x106aa070, R9,  R10, R11, R12, R13, R14, R15, R8)
   213  	SHA256ROUND1(48, 0x19a4c116, R8,  R9,  R10, R11, R12, R13, R14, R15)
   214  	SHA256ROUND1(49, 0x1e376c08, R15, R8,  R9,  R10, R11, R12, R13, R14)
   215  	SHA256ROUND1(50, 0x2748774c, R14, R15, R8,  R9,  R10, R11, R12, R13)
   216  	SHA256ROUND1(51, 0x34b0bcb5, R13, R14, R15, R8,  R9,  R10, R11, R12)
   217  	SHA256ROUND1(52, 0x391c0cb3, R12, R13, R14, R15, R8,  R9,  R10, R11)
   218  	SHA256ROUND1(53, 0x4ed8aa4a, R11, R12, R13, R14, R15, R8,  R9,  R10)
   219  	SHA256ROUND1(54, 0x5b9cca4f, R10, R11, R12, R13, R14, R15, R8,  R9)
   220  	SHA256ROUND1(55, 0x682e6ff3, R9,  R10, R11, R12, R13, R14, R15, R8)
   221  	SHA256ROUND1(56, 0x748f82ee, R8,  R9,  R10, R11, R12, R13, R14, R15)
   222  	SHA256ROUND1(57, 0x78a5636f, R15, R8,  R9,  R10, R11, R12, R13, R14)
   223  	SHA256ROUND1(58, 0x84c87814, R14, R15, R8,  R9,  R10, R11, R12, R13)
   224  	SHA256ROUND1(59, 0x8cc70208, R13, R14, R15, R8,  R9,  R10, R11, R12)
   225  	SHA256ROUND1(60, 0x90befffa, R12, R13, R14, R15, R8,  R9,  R10, R11)
   226  	SHA256ROUND1(61, 0xa4506ceb, R11, R12, R13, R14, R15, R8,  R9,  R10)
   227  	SHA256ROUND1(62, 0xbef9a3f7, R10, R11, R12, R13, R14, R15, R8,  R9)
   228  	SHA256ROUND1(63, 0xc67178f2, R9,  R10, R11, R12, R13, R14, R15, R8)
   229  
   230  	MOVW	(0*4)(R4), REGTMP
   231  	MOVW	(1*4)(R4), REGTMP1
   232  	MOVW	(2*4)(R4), REGTMP2
   233  	MOVW	(3*4)(R4), REGTMP3
   234  	ADD	REGTMP, R8	// H0 = a + H0
   235  	ADD	REGTMP1, R9	// H1 = b + H1
   236  	ADD	REGTMP2, R10	// H2 = c + H2
   237  	ADD	REGTMP3, R11	// H3 = d + H3
   238  	MOVW	R8, (0*4)(R4)
   239  	MOVW	R9, (1*4)(R4)
   240  	MOVW	R10, (2*4)(R4)
   241  	MOVW	R11, (3*4)(R4)
   242  	MOVW	(4*4)(R4), REGTMP
   243  	MOVW	(5*4)(R4), REGTMP1
   244  	MOVW	(6*4)(R4), REGTMP2
   245  	MOVW	(7*4)(R4), REGTMP3
   246  	ADD	REGTMP, R12	// H4 = e + H4
   247  	ADD	REGTMP1, R13	// H5 = f + H5
   248  	ADD	REGTMP2, R14	// H6 = g + H6
   249  	ADD	REGTMP3, R15	// H7 = h + H7
   250  	MOVW	R12, (4*4)(R4)
   251  	MOVW	R13, (5*4)(R4)
   252  	MOVW	R14, (6*4)(R4)
   253  	MOVW	R15, (7*4)(R4)
   254  
   255  	ADDV	$64, R5
   256  	BNE	R5, R25, loop
   257  
   258  end:
   259  	RET
   260  

View as plain text