Text file
src/crypto/md5/md5block_loong64.s
1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 //
5 // Loong64 version of md5block.go
6 // derived from crypto/md5/md5block_amd64.s
7
8 //go:build !purego
9
10 #define REGTMP R30
11 #define REGTMP1 R12
12 #define REGTMP2 R18
13
14 #include "textflag.h"
15
16 // func block(dig *digest, p []byte)
17 TEXT ·block(SB),NOSPLIT,$0-32
18 MOVV dig+0(FP), R4
19 MOVV p+8(FP), R5
20 MOVV p_len+16(FP), R6
21 AND $~63, R6
22 BEQ R6, zero
23
24 // p_len >= 64
25 ADDV R5, R6, R24
26 MOVW (0*4)(R4), R7
27 MOVW (1*4)(R4), R8
28 MOVW (2*4)(R4), R9
29 MOVW (3*4)(R4), R10
30
31 loop:
32 MOVW R7, R14
33 MOVW R8, R15
34 MOVW R9, R16
35 MOVW R10, R17
36
37 MOVW (0*4)(R5), R11
38 MOVW R10, REGTMP1
39
40 // F = ((c ^ d) & b) ^ d
41 #define ROUND1(a, b, c, d, index, const, shift) \
42 ADDV $const, a; \
43 ADD R11, a; \
44 MOVW (index*4)(R5), R11; \
45 XOR c, REGTMP1; \
46 AND b, REGTMP1; \
47 XOR d, REGTMP1; \
48 ADD REGTMP1, a; \
49 ROTR $(32-shift), a; \
50 MOVW c, REGTMP1; \
51 ADD b, a
52
53 ROUND1(R7, R8, R9, R10, 1, 0xd76aa478, 7);
54 ROUND1(R10, R7, R8, R9, 2, 0xe8c7b756, 12);
55 ROUND1(R9, R10, R7, R8, 3, 0x242070db, 17);
56 ROUND1(R8, R9, R10, R7, 4, 0xc1bdceee, 22);
57 ROUND1(R7, R8, R9, R10, 5, 0xf57c0faf, 7);
58 ROUND1(R10, R7, R8, R9, 6, 0x4787c62a, 12);
59 ROUND1(R9, R10, R7, R8, 7, 0xa8304613, 17);
60 ROUND1(R8, R9, R10, R7, 8, 0xfd469501, 22);
61 ROUND1(R7, R8, R9, R10, 9, 0x698098d8, 7);
62 ROUND1(R10, R7, R8, R9, 10, 0x8b44f7af, 12);
63 ROUND1(R9, R10, R7, R8, 11, 0xffff5bb1, 17);
64 ROUND1(R8, R9, R10, R7, 12, 0x895cd7be, 22);
65 ROUND1(R7, R8, R9, R10, 13, 0x6b901122, 7);
66 ROUND1(R10, R7, R8, R9, 14, 0xfd987193, 12);
67 ROUND1(R9, R10, R7, R8, 15, 0xa679438e, 17);
68 ROUND1(R8, R9, R10, R7, 1, 0x49b40821, 22);
69
70 MOVW (1*4)(R5), R11
71
72 // F = ((b ^ c) & d) ^ c
73 #define ROUND2(a, b, c, d, index, const, shift) \
74 ADDV $const, a; \
75 ADD R11, a; \
76 MOVW (index*4)(R5), R11; \
77 XOR b, c, REGTMP; \
78 AND REGTMP, d, REGTMP; \
79 XOR REGTMP, c, REGTMP; \
80 ADD REGTMP, a; \
81 ROTR $(32-shift), a; \
82 ADD b, a
83
84 ROUND2(R7, R8, R9, R10, 6, 0xf61e2562, 5);
85 ROUND2(R10, R7, R8, R9, 11, 0xc040b340, 9);
86 ROUND2(R9, R10, R7, R8, 0, 0x265e5a51, 14);
87 ROUND2(R8, R9, R10, R7, 5, 0xe9b6c7aa, 20);
88 ROUND2(R7, R8, R9, R10, 10, 0xd62f105d, 5);
89 ROUND2(R10, R7, R8, R9, 15, 0x2441453, 9);
90 ROUND2(R9, R10, R7, R8, 4, 0xd8a1e681, 14);
91 ROUND2(R8, R9, R10, R7, 9, 0xe7d3fbc8, 20);
92 ROUND2(R7, R8, R9, R10, 14, 0x21e1cde6, 5);
93 ROUND2(R10, R7, R8, R9, 3, 0xc33707d6, 9);
94 ROUND2(R9, R10, R7, R8, 8, 0xf4d50d87, 14);
95 ROUND2(R8, R9, R10, R7, 13, 0x455a14ed, 20);
96 ROUND2(R7, R8, R9, R10, 2, 0xa9e3e905, 5);
97 ROUND2(R10, R7, R8, R9, 7, 0xfcefa3f8, 9);
98 ROUND2(R9, R10, R7, R8, 12, 0x676f02d9, 14);
99 ROUND2(R8, R9, R10, R7, 5, 0x8d2a4c8a, 20);
100
101 MOVW (5*4)(R5), R11
102 MOVW R9, REGTMP1
103
104 // F = b ^ c ^ d
105 #define ROUND3(a, b, c, d, index, const, shift) \
106 ADDV $const, a; \
107 ADD R11, a; \
108 MOVW (index*4)(R5), R11; \
109 XOR d, REGTMP1; \
110 XOR b, REGTMP1; \
111 ADD REGTMP1, a; \
112 ROTR $(32-shift), a; \
113 MOVW b, REGTMP1; \
114 ADD b, a
115
116 ROUND3(R7, R8, R9, R10, 8, 0xfffa3942, 4);
117 ROUND3(R10, R7, R8, R9, 11, 0x8771f681, 11);
118 ROUND3(R9, R10, R7, R8, 14, 0x6d9d6122, 16);
119 ROUND3(R8, R9, R10, R7, 1, 0xfde5380c, 23);
120 ROUND3(R7, R8, R9, R10, 4, 0xa4beea44, 4);
121 ROUND3(R10, R7, R8, R9, 7, 0x4bdecfa9, 11);
122 ROUND3(R9, R10, R7, R8, 10, 0xf6bb4b60, 16);
123 ROUND3(R8, R9, R10, R7, 13, 0xbebfbc70, 23);
124 ROUND3(R7, R8, R9, R10, 0, 0x289b7ec6, 4);
125 ROUND3(R10, R7, R8, R9, 3, 0xeaa127fa, 11);
126 ROUND3(R9, R10, R7, R8, 6, 0xd4ef3085, 16);
127 ROUND3(R8, R9, R10, R7, 9, 0x4881d05, 23);
128 ROUND3(R7, R8, R9, R10, 12, 0xd9d4d039, 4);
129 ROUND3(R10, R7, R8, R9, 15, 0xe6db99e5, 11);
130 ROUND3(R9, R10, R7, R8, 2, 0x1fa27cf8, 16);
131 ROUND3(R8, R9, R10, R7, 0, 0xc4ac5665, 23);
132
133 MOVW (0*4)(R5), R11
134 MOVV $0xffffffff, REGTMP2
135 XOR R10, REGTMP2, REGTMP1 // REGTMP1 = ~d
136
137 // F = c ^ (b | (~d))
138 #define ROUND4(a, b, c, d, index, const, shift) \
139 ADDV $const, a; \
140 ADD R11, a; \
141 MOVW (index*4)(R5), R11; \
142 OR b, REGTMP1; \
143 XOR c, REGTMP1; \
144 ADD REGTMP1, a; \
145 ROTR $(32-shift), a; \
146 MOVV $0xffffffff, REGTMP2; \
147 XOR c, REGTMP2, REGTMP1; \
148 ADD b, a
149
150 ROUND4(R7, R8, R9, R10, 7, 0xf4292244, 6);
151 ROUND4(R10, R7, R8, R9, 14, 0x432aff97, 10);
152 ROUND4(R9, R10, R7, R8, 5, 0xab9423a7, 15);
153 ROUND4(R8, R9, R10, R7, 12, 0xfc93a039, 21);
154 ROUND4(R7, R8, R9, R10, 3, 0x655b59c3, 6);
155 ROUND4(R10, R7, R8, R9, 10, 0x8f0ccc92, 10);
156 ROUND4(R9, R10, R7, R8, 1, 0xffeff47d, 15);
157 ROUND4(R8, R9, R10, R7, 8, 0x85845dd1, 21);
158 ROUND4(R7, R8, R9, R10, 15, 0x6fa87e4f, 6);
159 ROUND4(R10, R7, R8, R9, 6, 0xfe2ce6e0, 10);
160 ROUND4(R9, R10, R7, R8, 13, 0xa3014314, 15);
161 ROUND4(R8, R9, R10, R7, 4, 0x4e0811a1, 21);
162 ROUND4(R7, R8, R9, R10, 11, 0xf7537e82, 6);
163 ROUND4(R10, R7, R8, R9, 2, 0xbd3af235, 10);
164 ROUND4(R9, R10, R7, R8, 9, 0x2ad7d2bb, 15);
165 ROUND4(R8, R9, R10, R7, 0, 0xeb86d391, 21);
166
167 ADD R14, R7
168 ADD R15, R8
169 ADD R16, R9
170 ADD R17, R10
171
172 ADDV $64, R5
173 BNE R5, R24, loop
174
175 MOVW R7, (0*4)(R4)
176 MOVW R8, (1*4)(R4)
177 MOVW R9, (2*4)(R4)
178 MOVW R10, (3*4)(R4)
179 zero:
180 RET
181
View as plain text