Text file src/crypto/md5/md5block_loong64.s

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  //
     5  // Loong64 version of md5block.go
     6  // derived from crypto/md5/md5block_amd64.s
     7  
     8  //go:build !purego
     9  
    10  #define REGTMP	R30
    11  #define REGTMP1 R12
    12  #define REGTMP2 R18
    13  
    14  #include "textflag.h"
    15  
    16  // func block(dig *digest, p []byte)
    17  TEXT	·block(SB),NOSPLIT,$0-32
    18  	MOVV	dig+0(FP), R4
    19  	MOVV	p+8(FP), R5
    20  	MOVV	p_len+16(FP), R6
    21  	AND	$~63, R6
    22  	BEQ	R6, zero
    23  
    24  	// p_len >= 64
    25  	ADDV	R5, R6, R24
    26  	MOVW	(0*4)(R4), R7
    27  	MOVW	(1*4)(R4), R8
    28  	MOVW	(2*4)(R4), R9
    29  	MOVW	(3*4)(R4), R10
    30  
    31  loop:
    32  	MOVW	R7, R14
    33  	MOVW	R8, R15
    34  	MOVW	R9, R16
    35  	MOVW	R10, R17
    36  
    37  	MOVW	(0*4)(R5), R11
    38  	MOVW	R10, REGTMP1
    39  
    40  // F = ((c ^ d) & b) ^ d
    41  #define ROUND1(a, b, c, d, index, const, shift) \
    42  	ADDV	$const, a; \
    43  	ADD	R11, a; \
    44  	MOVW	(index*4)(R5), R11; \
    45  	XOR	c, REGTMP1; \
    46  	AND	b, REGTMP1; \
    47  	XOR	d, REGTMP1; \
    48  	ADD	REGTMP1, a; \
    49  	ROTR	$(32-shift), a; \
    50  	MOVW	c, REGTMP1; \
    51  	ADD	b, a
    52  
    53  	ROUND1(R7,  R8,  R9,  R10,  1, 0xd76aa478,  7);
    54  	ROUND1(R10, R7,  R8,  R9,   2, 0xe8c7b756, 12);
    55  	ROUND1(R9,  R10, R7,  R8,   3, 0x242070db, 17);
    56  	ROUND1(R8,  R9,  R10, R7,   4, 0xc1bdceee, 22);
    57  	ROUND1(R7,  R8,  R9,  R10,  5, 0xf57c0faf,  7);
    58  	ROUND1(R10, R7,  R8,  R9,   6, 0x4787c62a, 12);
    59  	ROUND1(R9,  R10, R7,  R8,   7, 0xa8304613, 17);
    60  	ROUND1(R8,  R9,  R10, R7,   8, 0xfd469501, 22);
    61  	ROUND1(R7,  R8,  R9,  R10,  9, 0x698098d8,  7);
    62  	ROUND1(R10, R7,  R8,  R9,  10, 0x8b44f7af, 12);
    63  	ROUND1(R9,  R10, R7,  R8,  11, 0xffff5bb1, 17);
    64  	ROUND1(R8,  R9,  R10, R7,  12, 0x895cd7be, 22);
    65  	ROUND1(R7,  R8,  R9,  R10, 13, 0x6b901122,  7);
    66  	ROUND1(R10, R7,  R8,  R9,  14, 0xfd987193, 12);
    67  	ROUND1(R9,  R10, R7,  R8,  15, 0xa679438e, 17);
    68  	ROUND1(R8,  R9,  R10, R7,   1, 0x49b40821, 22);
    69  
    70  	MOVW	(1*4)(R5), R11
    71  
    72  // F = ((b ^ c) & d) ^ c
    73  #define ROUND2(a, b, c, d, index, const, shift) \
    74  	ADDV	$const, a; \
    75  	ADD	R11, a; \
    76  	MOVW	(index*4)(R5), R11; \
    77  	XOR	b, c, REGTMP; \
    78  	AND	REGTMP, d, REGTMP; \
    79  	XOR	REGTMP, c, REGTMP; \
    80  	ADD	REGTMP, a; \
    81  	ROTR	$(32-shift), a; \
    82  	ADD	b, a
    83  
    84  	ROUND2(R7,  R8,  R9,  R10,  6, 0xf61e2562,  5);
    85  	ROUND2(R10, R7,  R8,  R9,  11, 0xc040b340,  9);
    86  	ROUND2(R9,  R10, R7,  R8,   0, 0x265e5a51, 14);
    87  	ROUND2(R8,  R9,  R10, R7,   5, 0xe9b6c7aa, 20);
    88  	ROUND2(R7,  R8,  R9,  R10, 10, 0xd62f105d,  5);
    89  	ROUND2(R10, R7,  R8,  R9,  15,  0x2441453,  9);
    90  	ROUND2(R9,  R10, R7,  R8,   4, 0xd8a1e681, 14);
    91  	ROUND2(R8,  R9,  R10, R7,   9, 0xe7d3fbc8, 20);
    92  	ROUND2(R7,  R8,  R9,  R10, 14, 0x21e1cde6,  5);
    93  	ROUND2(R10, R7,  R8,  R9,   3, 0xc33707d6,  9);
    94  	ROUND2(R9,  R10, R7,  R8,   8, 0xf4d50d87, 14);
    95  	ROUND2(R8,  R9,  R10, R7,  13, 0x455a14ed, 20);
    96  	ROUND2(R7,  R8,  R9,  R10,  2, 0xa9e3e905,  5);
    97  	ROUND2(R10, R7,  R8,  R9,   7, 0xfcefa3f8,  9);
    98  	ROUND2(R9,  R10, R7,  R8,  12, 0x676f02d9, 14);
    99  	ROUND2(R8,  R9,  R10, R7,   5, 0x8d2a4c8a, 20);
   100  
   101  	MOVW	(5*4)(R5), R11
   102  	MOVW	R9, REGTMP1
   103  
   104  // F = b ^ c ^ d
   105  #define ROUND3(a, b, c, d, index, const, shift) \
   106  	ADDV	$const, a; \
   107  	ADD	R11, a; \
   108  	MOVW	(index*4)(R5), R11; \
   109  	XOR	d, REGTMP1; \
   110  	XOR	b, REGTMP1; \
   111  	ADD	REGTMP1, a; \
   112  	ROTR	$(32-shift), a; \
   113  	MOVW	b, REGTMP1; \
   114  	ADD	b, a
   115  
   116  	ROUND3(R7,  R8,  R9,  R10,  8, 0xfffa3942,  4);
   117  	ROUND3(R10, R7,  R8,  R9,  11, 0x8771f681, 11);
   118  	ROUND3(R9,  R10, R7,  R8,  14, 0x6d9d6122, 16);
   119  	ROUND3(R8,  R9,  R10, R7,   1, 0xfde5380c, 23);
   120  	ROUND3(R7,  R8,  R9,  R10,  4, 0xa4beea44,  4);
   121  	ROUND3(R10, R7,  R8,  R9,   7, 0x4bdecfa9, 11);
   122  	ROUND3(R9,  R10, R7,  R8,  10, 0xf6bb4b60, 16);
   123  	ROUND3(R8,  R9,  R10, R7,  13, 0xbebfbc70, 23);
   124  	ROUND3(R7,  R8,  R9,  R10,  0, 0x289b7ec6,  4);
   125  	ROUND3(R10, R7,  R8,  R9,   3, 0xeaa127fa, 11);
   126  	ROUND3(R9,  R10, R7,  R8,   6, 0xd4ef3085, 16);
   127  	ROUND3(R8,  R9,  R10, R7,   9,  0x4881d05, 23);
   128  	ROUND3(R7,  R8,  R9,  R10, 12, 0xd9d4d039,  4);
   129  	ROUND3(R10, R7,  R8,  R9,  15, 0xe6db99e5, 11);
   130  	ROUND3(R9,  R10, R7,  R8,   2, 0x1fa27cf8, 16);
   131  	ROUND3(R8,  R9,  R10, R7,   0, 0xc4ac5665, 23);
   132  
   133  	MOVW	(0*4)(R5), R11
   134  	MOVV	$0xffffffff, REGTMP2
   135  	XOR	R10, REGTMP2, REGTMP1	// REGTMP1 = ~d
   136  
   137  // F = c ^ (b | (~d))
   138  #define ROUND4(a, b, c, d, index, const, shift) \
   139  	ADDV	$const, a; \
   140  	ADD	R11, a; \
   141  	MOVW	(index*4)(R5), R11; \
   142  	OR	b, REGTMP1; \
   143  	XOR	c, REGTMP1; \
   144  	ADD	REGTMP1, a; \
   145  	ROTR	$(32-shift), a; \
   146  	MOVV	$0xffffffff, REGTMP2; \
   147  	XOR	c, REGTMP2, REGTMP1; \
   148  	ADD	b, a
   149  
   150  	ROUND4(R7,  R8,  R9,  R10,  7, 0xf4292244,  6);
   151  	ROUND4(R10, R7,  R8,  R9,  14, 0x432aff97, 10);
   152  	ROUND4(R9,  R10, R7,  R8,   5, 0xab9423a7, 15);
   153  	ROUND4(R8,  R9,  R10, R7,  12, 0xfc93a039, 21);
   154  	ROUND4(R7,  R8,  R9,  R10,  3, 0x655b59c3,  6);
   155  	ROUND4(R10, R7,  R8,  R9,  10, 0x8f0ccc92, 10);
   156  	ROUND4(R9,  R10, R7,  R8,   1, 0xffeff47d, 15);
   157  	ROUND4(R8,  R9,  R10, R7,   8, 0x85845dd1, 21);
   158  	ROUND4(R7,  R8,  R9,  R10, 15, 0x6fa87e4f,  6);
   159  	ROUND4(R10, R7,  R8,  R9,   6, 0xfe2ce6e0, 10);
   160  	ROUND4(R9,  R10, R7,  R8,  13, 0xa3014314, 15);
   161  	ROUND4(R8,  R9,  R10, R7,   4, 0x4e0811a1, 21);
   162  	ROUND4(R7,  R8,  R9,  R10, 11, 0xf7537e82,  6);
   163  	ROUND4(R10, R7,  R8,  R9,   2, 0xbd3af235, 10);
   164  	ROUND4(R9,  R10, R7,  R8,   9, 0x2ad7d2bb, 15);
   165  	ROUND4(R8,  R9,  R10, R7,   0, 0xeb86d391, 21);
   166  
   167  	ADD	R14, R7
   168  	ADD	R15, R8
   169  	ADD	R16, R9
   170  	ADD	R17, R10
   171  
   172  	ADDV	$64, R5
   173  	BNE	R5, R24, loop
   174  
   175  	MOVW	R7, (0*4)(R4)
   176  	MOVW	R8, (1*4)(R4)
   177  	MOVW	R9, (2*4)(R4)
   178  	MOVW	R10, (3*4)(R4)
   179  zero:
   180  	RET
   181  

View as plain text