Text file src/crypto/sha512/sha512block_riscv64.s

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // SHA512 block routine. See sha512block.go for Go equivalent.
    10  //
    11  // The algorithm is detailed in FIPS 180-4:
    12  //
    13  //  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    14  //
    15  // Wt = Mt; for 0 <= t <= 15
    16  // Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
    17  //
    18  // a = H0
    19  // b = H1
    20  // c = H2
    21  // d = H3
    22  // e = H4
    23  // f = H5
    24  // g = H6
    25  // h = H7
    26  //
    27  // for t = 0 to 79 {
    28  //    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    29  //    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    30  //    h = g
    31  //    g = f
    32  //    f = e
    33  //    e = d + T1
    34  //    d = c
    35  //    c = b
    36  //    b = a
    37  //    a = T1 + T2
    38  // }
    39  //
    40  // H0 = a + H0
    41  // H1 = b + H1
    42  // H2 = c + H2
    43  // H3 = d + H3
    44  // H4 = e + H4
    45  // H5 = f + H5
    46  // H6 = g + H6
    47  // H7 = h + H7
    48  
    49  // Wt = Mt; for 0 <= t <= 15
    50  #define MSGSCHEDULE0(index) \
    51  	MOVBU	((index*8)+0)(X29), X5; \
    52  	MOVBU	((index*8)+1)(X29), X6; \
    53  	MOVBU	((index*8)+2)(X29), X7; \
    54  	MOVBU	((index*8)+3)(X29), X8; \
    55  	SLL	$56, X5; \
    56  	SLL	$48, X6; \
    57  	OR	X5, X6, X5; \
    58  	SLL	$40, X7; \
    59  	OR	X5, X7, X5; \
    60  	SLL	$32, X8; \
    61  	OR	X5, X8, X5; \
    62  	MOVBU	((index*8)+4)(X29), X9; \
    63  	MOVBU	((index*8)+5)(X29), X6; \
    64  	MOVBU	((index*8)+6)(X29), X7; \
    65  	MOVBU	((index*8)+7)(X29), X8; \
    66  	SLL	$24, X9; \
    67  	OR	X5, X9, X5; \
    68  	SLL	$16, X6; \
    69  	OR	X5, X6, X5; \
    70  	SLL	$8, X7; \
    71  	OR	X5, X7, X5; \
    72  	OR	X5, X8, X5; \
    73  	MOV	X5, (index*8)(X19)
    74  
    75  // Wt = SIGMA1(Wt-2) + Wt-7 + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 79
    76  //   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
    77  //   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
    78  #define MSGSCHEDULE1(index) \
    79  	MOV	(((index-2)&0xf)*8)(X19), X5; \
    80  	MOV	(((index-15)&0xf)*8)(X19), X6; \
    81  	MOV	(((index-7)&0xf)*8)(X19), X9; \
    82  	MOV	(((index-16)&0xf)*8)(X19), X21; \
    83  	ROR	$19, X5, X7; \
    84  	ROR	$61, X5, X8; \
    85  	SRL	$6, X5; \
    86  	XOR	X7, X5; \
    87  	XOR	X8, X5; \
    88  	ADD	X9, X5; \
    89  	ROR	$1, X6, X7; \
    90  	ROR	$8, X6, X8; \
    91  	SRL	$7, X6; \
    92  	XOR	X7, X6; \
    93  	XOR	X8, X6; \
    94  	ADD	X6, X5; \
    95  	ADD	X21, X5; \
    96  	MOV	X5, ((index&0xf)*8)(X19)
    97  
    98  // Calculate T1 in X5.
    99  // h is also used as an accumulator. Wt is passed in X5.
   100  //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt
   101  //     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
   102  //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
   103  #define SHA512T1(index, e, f, g, h) \
   104  	MOV	(index*8)(X18), X8; \
   105  	ADD	X5, h; \
   106  	ROR	$14, e, X6; \
   107  	ADD	X8, h; \
   108  	ROR	$18, e, X7; \
   109  	XOR	X7, X6; \
   110  	ROR	$41, e, X8; \
   111  	XOR	X8, X6; \
   112  	ADD	X6, h; \
   113  	AND	e, f, X5; \
   114  	NOT	e, X7; \
   115  	AND	g, X7; \
   116  	XOR	X7, X5; \
   117  	ADD	h, X5
   118  
   119  // Calculate T2 in X6.
   120  //   T2 = BIGSIGMA0(a) + Maj(a, b, c)
   121  //     BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
   122  //     Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   123  #define SHA512T2(a, b, c) \
   124  	ROR	$28, a, X6; \
   125  	ROR	$34, a, X7; \
   126  	XOR	X7, X6; \
   127  	ROR	$39, a, X8; \
   128  	XOR	X8, X6; \
   129  	AND	a, b, X7; \
   130  	AND	a, c, X8; \
   131  	XOR	X8, X7; \
   132  	AND	b, c, X9; \
   133  	XOR	X9, X7; \
   134  	ADD	X7, X6
   135  
   136  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   137  // The values for e and a are stored in d and h, ready for rotation.
   138  #define SHA512ROUND(index, a, b, c, d, e, f, g, h) \
   139  	SHA512T1(index, e, f, g, h); \
   140  	SHA512T2(a, b, c); \
   141  	MOV	X6, h; \
   142  	ADD	X5, d; \
   143  	ADD	X5, h
   144  
   145  #define SHA512ROUND0(index, a, b, c, d, e, f, g, h) \
   146  	MSGSCHEDULE0(index); \
   147  	SHA512ROUND(index, a, b, c, d, e, f, g, h)
   148  
   149  #define SHA512ROUND1(index, a, b, c, d, e, f, g, h) \
   150  	MSGSCHEDULE1(index); \
   151  	SHA512ROUND(index, a, b, c, d, e, f, g, h)
   152  
   153  // func block(dig *digest, p []byte)
   154  TEXT ·block(SB),0,$128-32
   155  	MOV	p_base+8(FP), X29
   156  	MOV	p_len+16(FP), X30
   157  	SRL	$7, X30
   158  	SLL	$7, X30
   159  
   160  	ADD	X29, X30, X28
   161  	BEQ	X28, X29, end
   162  
   163  	MOV	·_K(SB), X18		// const table
   164  	ADD	$8, X2, X19		// message schedule
   165  
   166  	MOV	dig+0(FP), X20
   167  	MOV	(0*8)(X20), X10		// a = H0
   168  	MOV	(1*8)(X20), X11		// b = H1
   169  	MOV	(2*8)(X20), X12		// c = H2
   170  	MOV	(3*8)(X20), X13		// d = H3
   171  	MOV	(4*8)(X20), X14		// e = H4
   172  	MOV	(5*8)(X20), X15		// f = H5
   173  	MOV	(6*8)(X20), X16		// g = H6
   174  	MOV	(7*8)(X20), X17		// h = H7
   175  
   176  loop:
   177  	SHA512ROUND0(0, X10, X11, X12, X13, X14, X15, X16, X17)
   178  	SHA512ROUND0(1, X17, X10, X11, X12, X13, X14, X15, X16)
   179  	SHA512ROUND0(2, X16, X17, X10, X11, X12, X13, X14, X15)
   180  	SHA512ROUND0(3, X15, X16, X17, X10, X11, X12, X13, X14)
   181  	SHA512ROUND0(4, X14, X15, X16, X17, X10, X11, X12, X13)
   182  	SHA512ROUND0(5, X13, X14, X15, X16, X17, X10, X11, X12)
   183  	SHA512ROUND0(6, X12, X13, X14, X15, X16, X17, X10, X11)
   184  	SHA512ROUND0(7, X11, X12, X13, X14, X15, X16, X17, X10)
   185  	SHA512ROUND0(8, X10, X11, X12, X13, X14, X15, X16, X17)
   186  	SHA512ROUND0(9, X17, X10, X11, X12, X13, X14, X15, X16)
   187  	SHA512ROUND0(10, X16, X17, X10, X11, X12, X13, X14, X15)
   188  	SHA512ROUND0(11, X15, X16, X17, X10, X11, X12, X13, X14)
   189  	SHA512ROUND0(12, X14, X15, X16, X17, X10, X11, X12, X13)
   190  	SHA512ROUND0(13, X13, X14, X15, X16, X17, X10, X11, X12)
   191  	SHA512ROUND0(14, X12, X13, X14, X15, X16, X17, X10, X11)
   192  	SHA512ROUND0(15, X11, X12, X13, X14, X15, X16, X17, X10)
   193  
   194  	SHA512ROUND1(16, X10, X11, X12, X13, X14, X15, X16, X17)
   195  	SHA512ROUND1(17, X17, X10, X11, X12, X13, X14, X15, X16)
   196  	SHA512ROUND1(18, X16, X17, X10, X11, X12, X13, X14, X15)
   197  	SHA512ROUND1(19, X15, X16, X17, X10, X11, X12, X13, X14)
   198  	SHA512ROUND1(20, X14, X15, X16, X17, X10, X11, X12, X13)
   199  	SHA512ROUND1(21, X13, X14, X15, X16, X17, X10, X11, X12)
   200  	SHA512ROUND1(22, X12, X13, X14, X15, X16, X17, X10, X11)
   201  	SHA512ROUND1(23, X11, X12, X13, X14, X15, X16, X17, X10)
   202  	SHA512ROUND1(24, X10, X11, X12, X13, X14, X15, X16, X17)
   203  	SHA512ROUND1(25, X17, X10, X11, X12, X13, X14, X15, X16)
   204  	SHA512ROUND1(26, X16, X17, X10, X11, X12, X13, X14, X15)
   205  	SHA512ROUND1(27, X15, X16, X17, X10, X11, X12, X13, X14)
   206  	SHA512ROUND1(28, X14, X15, X16, X17, X10, X11, X12, X13)
   207  	SHA512ROUND1(29, X13, X14, X15, X16, X17, X10, X11, X12)
   208  	SHA512ROUND1(30, X12, X13, X14, X15, X16, X17, X10, X11)
   209  	SHA512ROUND1(31, X11, X12, X13, X14, X15, X16, X17, X10)
   210  	SHA512ROUND1(32, X10, X11, X12, X13, X14, X15, X16, X17)
   211  	SHA512ROUND1(33, X17, X10, X11, X12, X13, X14, X15, X16)
   212  	SHA512ROUND1(34, X16, X17, X10, X11, X12, X13, X14, X15)
   213  	SHA512ROUND1(35, X15, X16, X17, X10, X11, X12, X13, X14)
   214  	SHA512ROUND1(36, X14, X15, X16, X17, X10, X11, X12, X13)
   215  	SHA512ROUND1(37, X13, X14, X15, X16, X17, X10, X11, X12)
   216  	SHA512ROUND1(38, X12, X13, X14, X15, X16, X17, X10, X11)
   217  	SHA512ROUND1(39, X11, X12, X13, X14, X15, X16, X17, X10)
   218  	SHA512ROUND1(40, X10, X11, X12, X13, X14, X15, X16, X17)
   219  	SHA512ROUND1(41, X17, X10, X11, X12, X13, X14, X15, X16)
   220  	SHA512ROUND1(42, X16, X17, X10, X11, X12, X13, X14, X15)
   221  	SHA512ROUND1(43, X15, X16, X17, X10, X11, X12, X13, X14)
   222  	SHA512ROUND1(44, X14, X15, X16, X17, X10, X11, X12, X13)
   223  	SHA512ROUND1(45, X13, X14, X15, X16, X17, X10, X11, X12)
   224  	SHA512ROUND1(46, X12, X13, X14, X15, X16, X17, X10, X11)
   225  	SHA512ROUND1(47, X11, X12, X13, X14, X15, X16, X17, X10)
   226  	SHA512ROUND1(48, X10, X11, X12, X13, X14, X15, X16, X17)
   227  	SHA512ROUND1(49, X17, X10, X11, X12, X13, X14, X15, X16)
   228  	SHA512ROUND1(50, X16, X17, X10, X11, X12, X13, X14, X15)
   229  	SHA512ROUND1(51, X15, X16, X17, X10, X11, X12, X13, X14)
   230  	SHA512ROUND1(52, X14, X15, X16, X17, X10, X11, X12, X13)
   231  	SHA512ROUND1(53, X13, X14, X15, X16, X17, X10, X11, X12)
   232  	SHA512ROUND1(54, X12, X13, X14, X15, X16, X17, X10, X11)
   233  	SHA512ROUND1(55, X11, X12, X13, X14, X15, X16, X17, X10)
   234  	SHA512ROUND1(56, X10, X11, X12, X13, X14, X15, X16, X17)
   235  	SHA512ROUND1(57, X17, X10, X11, X12, X13, X14, X15, X16)
   236  	SHA512ROUND1(58, X16, X17, X10, X11, X12, X13, X14, X15)
   237  	SHA512ROUND1(59, X15, X16, X17, X10, X11, X12, X13, X14)
   238  	SHA512ROUND1(60, X14, X15, X16, X17, X10, X11, X12, X13)
   239  	SHA512ROUND1(61, X13, X14, X15, X16, X17, X10, X11, X12)
   240  	SHA512ROUND1(62, X12, X13, X14, X15, X16, X17, X10, X11)
   241  	SHA512ROUND1(63, X11, X12, X13, X14, X15, X16, X17, X10)
   242  	SHA512ROUND1(64, X10, X11, X12, X13, X14, X15, X16, X17)
   243  	SHA512ROUND1(65, X17, X10, X11, X12, X13, X14, X15, X16)
   244  	SHA512ROUND1(66, X16, X17, X10, X11, X12, X13, X14, X15)
   245  	SHA512ROUND1(67, X15, X16, X17, X10, X11, X12, X13, X14)
   246  	SHA512ROUND1(68, X14, X15, X16, X17, X10, X11, X12, X13)
   247  	SHA512ROUND1(69, X13, X14, X15, X16, X17, X10, X11, X12)
   248  	SHA512ROUND1(70, X12, X13, X14, X15, X16, X17, X10, X11)
   249  	SHA512ROUND1(71, X11, X12, X13, X14, X15, X16, X17, X10)
   250  	SHA512ROUND1(72, X10, X11, X12, X13, X14, X15, X16, X17)
   251  	SHA512ROUND1(73, X17, X10, X11, X12, X13, X14, X15, X16)
   252  	SHA512ROUND1(74, X16, X17, X10, X11, X12, X13, X14, X15)
   253  	SHA512ROUND1(75, X15, X16, X17, X10, X11, X12, X13, X14)
   254  	SHA512ROUND1(76, X14, X15, X16, X17, X10, X11, X12, X13)
   255  	SHA512ROUND1(77, X13, X14, X15, X16, X17, X10, X11, X12)
   256  	SHA512ROUND1(78, X12, X13, X14, X15, X16, X17, X10, X11)
   257  	SHA512ROUND1(79, X11, X12, X13, X14, X15, X16, X17, X10)
   258  
   259  	MOV	(0*8)(X20), X5
   260  	MOV	(1*8)(X20), X6
   261  	MOV	(2*8)(X20), X7
   262  	MOV	(3*8)(X20), X8
   263  	ADD	X5, X10		// H0 = a + H0
   264  	ADD	X6, X11		// H1 = b + H1
   265  	ADD	X7, X12		// H2 = c + H2
   266  	ADD	X8, X13		// H3 = d + H3
   267  	MOV	X10, (0*8)(X20)
   268  	MOV	X11, (1*8)(X20)
   269  	MOV	X12, (2*8)(X20)
   270  	MOV	X13, (3*8)(X20)
   271  	MOV	(4*8)(X20), X5
   272  	MOV	(5*8)(X20), X6
   273  	MOV	(6*8)(X20), X7
   274  	MOV	(7*8)(X20), X8
   275  	ADD	X5, X14		// H4 = e + H4
   276  	ADD	X6, X15		// H5 = f + H5
   277  	ADD	X7, X16		// H6 = g + H6
   278  	ADD	X8, X17		// H7 = h + H7
   279  	MOV	X14, (4*8)(X20)
   280  	MOV	X15, (5*8)(X20)
   281  	MOV	X16, (6*8)(X20)
   282  	MOV	X17, (7*8)(X20)
   283  
   284  	ADD	$128, X29
   285  	BNE	X28, X29, loop
   286  
   287  end:
   288  	RET
   289  

View as plain text