Text file src/crypto/internal/fips/sha512/sha512block_loong64.s

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // SHA512 block routine. See sha512block.go for Go equivalent.
    10  
    11  #define REGTMP	R30
    12  #define REGTMP1	R16
    13  #define REGTMP2	R17
    14  #define REGTMP3	R18
    15  #define REGTMP4	R7
    16  #define REGTMP5	R6
    17  
    18  // W[i] = M[i]; for 0 <= i <= 15
    19  #define LOAD0(index) \
    20  	MOVV	(index*8)(R5), REGTMP4; \
    21  	WORD	$0x3ce7; \	//REVBV	REGTMP4, REGTMP4
    22  	MOVV	REGTMP4, (index*8)(R3)
    23  
    24  // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 79
    25  //   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
    26  //   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
    27  #define LOAD1(index) \
    28  	MOVV	(((index-2)&0xf)*8)(R3), REGTMP4; \
    29  	MOVV	(((index-15)&0xf)*8)(R3), REGTMP1; \
    30  	MOVV	(((index-7)&0xf)*8)(R3), REGTMP; \
    31  	MOVV	REGTMP4, REGTMP2; \
    32  	MOVV	REGTMP4, REGTMP3; \
    33  	ROTRV	$19, REGTMP4; \
    34  	ROTRV	$61, REGTMP2; \
    35  	SRLV	$6, REGTMP3; \
    36  	XOR	REGTMP2, REGTMP4; \
    37  	XOR	REGTMP3, REGTMP4; \
    38  	ROTRV	$1, REGTMP1, REGTMP5; \
    39  	SRLV	$7, REGTMP1, REGTMP2; \
    40  	ROTRV	$8, REGTMP1; \
    41  	ADDV	REGTMP, REGTMP4; \
    42  	MOVV	(((index-16)&0xf)*8)(R3), REGTMP; \
    43  	XOR	REGTMP1, REGTMP5; \
    44  	XOR	REGTMP2, REGTMP5; \
    45  	ADDV	REGTMP, REGTMP5; \
    46  	ADDV	REGTMP5, REGTMP4; \
    47  	MOVV	REGTMP4, ((index&0xf)*8)(R3)
    48  
    49  // h is also used as an accumulator. Wt is passed in REGTMP4.
    50  //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
    51  //     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
    52  //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    53  // Calculate T1 in REGTMP4
    54  #define SHA512T1(const, e, f, g, h) \
    55  	ADDV	$const, h; \
    56  	ADDV	REGTMP4, h; \
    57  	ROTRV	$14, e, REGTMP4; \
    58  	ROTRV	$18, e, REGTMP; \
    59  	ROTRV	$41, e, REGTMP3; \
    60  	AND	f, e, REGTMP2; \
    61  	XOR	REGTMP, REGTMP4; \
    62  	MOVV	$0xffffffffffffffff, REGTMP; \
    63  	XOR	REGTMP4, REGTMP3; \
    64  	XOR	REGTMP, e, REGTMP5; \
    65  	ADDV	REGTMP3, h; \
    66  	AND	g, REGTMP5; \
    67  	XOR	REGTMP2, REGTMP5; \
    68  	ADDV	h, REGTMP5, REGTMP4
    69  
    70  // T2 = BIGSIGMA0(a) + Maj(a, b, c)
    71  // BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
    72  // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
    73  // Calculate T2 in REGTMP1
    74  #define SHA512T2(a, b, c) \
    75  	ROTRV	$28, a, REGTMP5; \
    76  	AND	b, c, REGTMP1; \
    77  	ROTRV	$34, a, REGTMP3; \
    78  	AND	c, a, REGTMP; \
    79  	XOR	REGTMP3, REGTMP5; \
    80  	XOR	REGTMP, REGTMP1; \
    81  	ROTRV	$39, a, REGTMP2; \
    82  	AND	a, b, REGTMP3; \
    83  	XOR	REGTMP3, REGTMP1; \
    84  	XOR	REGTMP2, REGTMP5; \
    85  	ADDV	REGTMP5, REGTMP1
    86  
    87  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
    88  // The values for e and a are stored in d and h, ready for rotation.
    89  #define SHA512ROUND(const, a, b, c, d, e, f, g, h) \
    90  	SHA512T1(const, e, f, g, h); \
    91  	SHA512T2(a, b, c); \
    92  	ADDV	REGTMP4, d; \
    93  	ADDV	REGTMP1, REGTMP4, h
    94  
    95  #define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \
    96  	LOAD0(index); \
    97  	SHA512ROUND(const, a, b, c, d, e, f, g, h)
    98  
    99  #define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \
   100  	LOAD1(index); \
   101  	SHA512ROUND(const, a, b, c, d, e, f, g, h)
   102  
   103  // A stack frame size of 128 bytes is required here, because
   104  // the frame size used for data expansion is 128 bytes.
   105  // See the definition of the macro LOAD1 above (8 bytes * 16 entries).
   106  //
   107  // func block(dig *Digest, p []byte)
   108  TEXT ·block(SB),NOSPLIT,$128-32
   109  	MOVV	p_len+16(FP), R6
   110  	MOVV	p_base+8(FP), R5
   111  	AND	$~127, R6
   112  	BEQ	R6, end
   113  
   114  	// p_len >= 128
   115  	MOVV	dig+0(FP), R4
   116  	ADDV	R5, R6, R25
   117  	MOVV	(0*8)(R4), R8	// a = H0
   118  	MOVV	(1*8)(R4), R9	// b = H1
   119  	MOVV	(2*8)(R4), R10	// c = H2
   120  	MOVV	(3*8)(R4), R11	// d = H3
   121  	MOVV	(4*8)(R4), R12	// e = H4
   122  	MOVV	(5*8)(R4), R13	// f = H5
   123  	MOVV	(6*8)(R4), R14	// g = H6
   124  	MOVV	(7*8)(R4), R15	// h = H7
   125  
   126  loop:
   127  	SHA512ROUND0( 0, 0x428a2f98d728ae22, R8,  R9,  R10, R11, R12, R13, R14, R15)
   128  	SHA512ROUND0( 1, 0x7137449123ef65cd, R15, R8,  R9,  R10, R11, R12, R13, R14)
   129  	SHA512ROUND0( 2, 0xb5c0fbcfec4d3b2f, R14, R15, R8,  R9,  R10, R11, R12, R13)
   130  	SHA512ROUND0( 3, 0xe9b5dba58189dbbc, R13, R14, R15, R8,  R9,  R10, R11, R12)
   131  	SHA512ROUND0( 4, 0x3956c25bf348b538, R12, R13, R14, R15, R8,  R9,  R10, R11)
   132  	SHA512ROUND0( 5, 0x59f111f1b605d019, R11, R12, R13, R14, R15, R8,  R9,  R10)
   133  	SHA512ROUND0( 6, 0x923f82a4af194f9b, R10, R11, R12, R13, R14, R15, R8,  R9)
   134  	SHA512ROUND0( 7, 0xab1c5ed5da6d8118, R9,  R10, R11, R12, R13, R14, R15, R8)
   135  	SHA512ROUND0( 8, 0xd807aa98a3030242, R8,  R9,  R10, R11, R12, R13, R14, R15)
   136  	SHA512ROUND0( 9, 0x12835b0145706fbe, R15, R8,  R9,  R10, R11, R12, R13, R14)
   137  	SHA512ROUND0(10, 0x243185be4ee4b28c, R14, R15, R8,  R9,  R10, R11, R12, R13)
   138  	SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R13, R14, R15, R8,  R9,  R10, R11, R12)
   139  	SHA512ROUND0(12, 0x72be5d74f27b896f, R12, R13, R14, R15, R8,  R9,  R10, R11)
   140  	SHA512ROUND0(13, 0x80deb1fe3b1696b1, R11, R12, R13, R14, R15, R8,  R9,  R10)
   141  	SHA512ROUND0(14, 0x9bdc06a725c71235, R10, R11, R12, R13, R14, R15, R8,  R9)
   142  	SHA512ROUND0(15, 0xc19bf174cf692694, R9,  R10, R11, R12, R13, R14, R15, R8)
   143  
   144  	SHA512ROUND1(16, 0xe49b69c19ef14ad2, R8,  R9,  R10, R11, R12, R13, R14, R15)
   145  	SHA512ROUND1(17, 0xefbe4786384f25e3, R15, R8,  R9,  R10, R11, R12, R13, R14)
   146  	SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R14, R15, R8,  R9,  R10, R11, R12, R13)
   147  	SHA512ROUND1(19, 0x240ca1cc77ac9c65, R13, R14, R15, R8,  R9,  R10, R11, R12)
   148  	SHA512ROUND1(20, 0x2de92c6f592b0275, R12, R13, R14, R15, R8,  R9,  R10, R11)
   149  	SHA512ROUND1(21, 0x4a7484aa6ea6e483, R11, R12, R13, R14, R15, R8,  R9,  R10)
   150  	SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R10, R11, R12, R13, R14, R15, R8,  R9)
   151  	SHA512ROUND1(23, 0x76f988da831153b5, R9,  R10, R11, R12, R13, R14, R15, R8)
   152  	SHA512ROUND1(24, 0x983e5152ee66dfab, R8,  R9,  R10, R11, R12, R13, R14, R15)
   153  	SHA512ROUND1(25, 0xa831c66d2db43210, R15, R8,  R9,  R10, R11, R12, R13, R14)
   154  	SHA512ROUND1(26, 0xb00327c898fb213f, R14, R15, R8,  R9,  R10, R11, R12, R13)
   155  	SHA512ROUND1(27, 0xbf597fc7beef0ee4, R13, R14, R15, R8,  R9,  R10, R11, R12)
   156  	SHA512ROUND1(28, 0xc6e00bf33da88fc2, R12, R13, R14, R15, R8,  R9,  R10, R11)
   157  	SHA512ROUND1(29, 0xd5a79147930aa725, R11, R12, R13, R14, R15, R8,  R9,  R10)
   158  	SHA512ROUND1(30, 0x06ca6351e003826f, R10, R11, R12, R13, R14, R15, R8,  R9)
   159  	SHA512ROUND1(31, 0x142929670a0e6e70, R9,  R10, R11, R12, R13, R14, R15, R8)
   160  	SHA512ROUND1(32, 0x27b70a8546d22ffc, R8,  R9,  R10, R11, R12, R13, R14, R15)
   161  	SHA512ROUND1(33, 0x2e1b21385c26c926, R15, R8,  R9,  R10, R11, R12, R13, R14)
   162  	SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R14, R15, R8,  R9,  R10, R11, R12, R13)
   163  	SHA512ROUND1(35, 0x53380d139d95b3df, R13, R14, R15, R8,  R9,  R10, R11, R12)
   164  	SHA512ROUND1(36, 0x650a73548baf63de, R12, R13, R14, R15, R8,  R9,  R10, R11)
   165  	SHA512ROUND1(37, 0x766a0abb3c77b2a8, R11, R12, R13, R14, R15, R8,  R9,  R10)
   166  	SHA512ROUND1(38, 0x81c2c92e47edaee6, R10, R11, R12, R13, R14, R15, R8,  R9)
   167  	SHA512ROUND1(39, 0x92722c851482353b, R9,  R10, R11, R12, R13, R14, R15, R8)
   168  	SHA512ROUND1(40, 0xa2bfe8a14cf10364, R8,  R9,  R10, R11, R12, R13, R14, R15)
   169  	SHA512ROUND1(41, 0xa81a664bbc423001, R15, R8,  R9,  R10, R11, R12, R13, R14)
   170  	SHA512ROUND1(42, 0xc24b8b70d0f89791, R14, R15, R8,  R9,  R10, R11, R12, R13)
   171  	SHA512ROUND1(43, 0xc76c51a30654be30, R13, R14, R15, R8,  R9,  R10, R11, R12)
   172  	SHA512ROUND1(44, 0xd192e819d6ef5218, R12, R13, R14, R15, R8,  R9,  R10, R11)
   173  	SHA512ROUND1(45, 0xd69906245565a910, R11, R12, R13, R14, R15, R8,  R9,  R10)
   174  	SHA512ROUND1(46, 0xf40e35855771202a, R10, R11, R12, R13, R14, R15, R8,  R9)
   175  	SHA512ROUND1(47, 0x106aa07032bbd1b8, R9,  R10, R11, R12, R13, R14, R15, R8)
   176  	SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R8,  R9,  R10, R11, R12, R13, R14, R15)
   177  	SHA512ROUND1(49, 0x1e376c085141ab53, R15, R8,  R9,  R10, R11, R12, R13, R14)
   178  	SHA512ROUND1(50, 0x2748774cdf8eeb99, R14, R15, R8,  R9,  R10, R11, R12, R13)
   179  	SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R13, R14, R15, R8,  R9,  R10, R11, R12)
   180  	SHA512ROUND1(52, 0x391c0cb3c5c95a63, R12, R13, R14, R15, R8,  R9,  R10, R11)
   181  	SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R11, R12, R13, R14, R15, R8,  R9,  R10)
   182  	SHA512ROUND1(54, 0x5b9cca4f7763e373, R10, R11, R12, R13, R14, R15, R8,  R9)
   183  	SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R9,  R10, R11, R12, R13, R14, R15, R8)
   184  	SHA512ROUND1(56, 0x748f82ee5defb2fc, R8,  R9,  R10, R11, R12, R13, R14, R15)
   185  	SHA512ROUND1(57, 0x78a5636f43172f60, R15, R8,  R9,  R10, R11, R12, R13, R14)
   186  	SHA512ROUND1(58, 0x84c87814a1f0ab72, R14, R15, R8,  R9,  R10, R11, R12, R13)
   187  	SHA512ROUND1(59, 0x8cc702081a6439ec, R13, R14, R15, R8,  R9,  R10, R11, R12)
   188  	SHA512ROUND1(60, 0x90befffa23631e28, R12, R13, R14, R15, R8,  R9,  R10, R11)
   189  	SHA512ROUND1(61, 0xa4506cebde82bde9, R11, R12, R13, R14, R15, R8,  R9,  R10)
   190  	SHA512ROUND1(62, 0xbef9a3f7b2c67915, R10, R11, R12, R13, R14, R15, R8,  R9)
   191  	SHA512ROUND1(63, 0xc67178f2e372532b, R9,  R10, R11, R12, R13, R14, R15, R8)
   192  	SHA512ROUND1(64, 0xca273eceea26619c, R8,  R9,  R10, R11, R12, R13, R14, R15)
   193  	SHA512ROUND1(65, 0xd186b8c721c0c207, R15, R8,  R9,  R10, R11, R12, R13, R14)
   194  	SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R14, R15, R8,  R9,  R10, R11, R12, R13)
   195  	SHA512ROUND1(67, 0xf57d4f7fee6ed178, R13, R14, R15, R8,  R9,  R10, R11, R12)
   196  	SHA512ROUND1(68, 0x06f067aa72176fba, R12, R13, R14, R15, R8,  R9,  R10, R11)
   197  	SHA512ROUND1(69, 0x0a637dc5a2c898a6, R11, R12, R13, R14, R15, R8,  R9,  R10)
   198  	SHA512ROUND1(70, 0x113f9804bef90dae, R10, R11, R12, R13, R14, R15, R8,  R9)
   199  	SHA512ROUND1(71, 0x1b710b35131c471b, R9,  R10, R11, R12, R13, R14, R15, R8)
   200  	SHA512ROUND1(72, 0x28db77f523047d84, R8,  R9,  R10, R11, R12, R13, R14, R15)
   201  	SHA512ROUND1(73, 0x32caab7b40c72493, R15, R8,  R9,  R10, R11, R12, R13, R14)
   202  	SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R14, R15, R8,  R9,  R10, R11, R12, R13)
   203  	SHA512ROUND1(75, 0x431d67c49c100d4c, R13, R14, R15, R8,  R9,  R10, R11, R12)
   204  	SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R12, R13, R14, R15, R8,  R9,  R10, R11)
   205  	SHA512ROUND1(77, 0x597f299cfc657e2a, R11, R12, R13, R14, R15, R8,  R9,  R10)
   206  	SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R10, R11, R12, R13, R14, R15, R8,  R9)
   207  	SHA512ROUND1(79, 0x6c44198c4a475817, R9,  R10, R11, R12, R13, R14, R15, R8)
   208  
   209  	MOVV	(0*8)(R4), REGTMP
   210  	MOVV	(1*8)(R4), REGTMP1
   211  	MOVV	(2*8)(R4), REGTMP2
   212  	MOVV	(3*8)(R4), REGTMP3
   213  	ADDV	REGTMP, R8	// H0 = a + H0
   214  	ADDV	REGTMP1, R9	// H1 = b + H1
   215  	ADDV	REGTMP2, R10	// H2 = c + H2
   216  	ADDV	REGTMP3, R11	// H3 = d + H3
   217  	MOVV	R8, (0*8)(R4)
   218  	MOVV	R9, (1*8)(R4)
   219  	MOVV	R10, (2*8)(R4)
   220  	MOVV	R11, (3*8)(R4)
   221  	MOVV	(4*8)(R4), REGTMP
   222  	MOVV	(5*8)(R4), REGTMP1
   223  	MOVV	(6*8)(R4), REGTMP2
   224  	MOVV	(7*8)(R4), REGTMP3
   225  	ADDV	REGTMP, R12	// H4 = e + H4
   226  	ADDV	REGTMP1, R13	// H5 = f + H5
   227  	ADDV	REGTMP2, R14	// H6 = g + H6
   228  	ADDV	REGTMP3, R15	// H7 = h + H7
   229  	MOVV	R12, (4*8)(R4)
   230  	MOVV	R13, (5*8)(R4)
   231  	MOVV	R14, (6*8)(R4)
   232  	MOVV	R15, (7*8)(R4)
   233  
   234  	ADDV	$128, R5
   235  	BNE	R5, R25, loop
   236  
   237  end:
   238  	RET
   239  

View as plain text