Text file
src/crypto/md5/md5block_arm.s
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 //
5 // ARM version of md5block.go
6
7 //go:build !purego
8
9 #include "textflag.h"
10
11 // Register definitions
12 #define Rtable R0 // Pointer to MD5 constants table
13 #define Rdata R1 // Pointer to data to hash
14 #define Ra R2 // MD5 accumulator
15 #define Rb R3 // MD5 accumulator
16 #define Rc R4 // MD5 accumulator
17 #define Rd R5 // MD5 accumulator
18 #define Rc0 R6 // MD5 constant
19 #define Rc1 R7 // MD5 constant
20 #define Rc2 R8 // MD5 constant
21 // r9, r10 are forbidden
22 // r11 is OK provided you check the assembler that no synthetic instructions use it
23 #define Rc3 R11 // MD5 constant
24 #define Rt0 R12 // temporary
25 #define Rt1 R14 // temporary
26
27 // func block(dig *digest, p []byte)
28 // 0(FP) is *digest
29 // 4(FP) is p.array (struct Slice)
30 // 8(FP) is p.len
31 //12(FP) is p.cap
32 //
33 // Stack frame
34 #define p_end end-4(SP) // pointer to the end of data
35 #define p_data data-8(SP) // current data pointer
36 #define buf buffer-(8+4*16)(SP) //16 words temporary buffer
37 // 3 words at 4..12(R13) for called routine parameters
38
39 TEXT ·block(SB), NOSPLIT, $84-16
40 MOVW p+4(FP), Rdata // pointer to the data
41 MOVW p_len+8(FP), Rt0 // number of bytes
42 ADD Rdata, Rt0
43 MOVW Rt0, p_end // pointer to end of data
44
45 loop:
46 MOVW Rdata, p_data // Save Rdata
47 AND.S $3, Rdata, Rt0 // TST $3, Rdata not working see issue 5921
48 BEQ aligned // aligned detected - skip copy
49
50 // Copy the unaligned source data into the aligned temporary buffer
51 // memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
52 MOVW $buf, Rtable // to
53 MOVW $64, Rc0 // n
54 MOVM.IB [Rtable,Rdata,Rc0], (R13)
55 BL runtime·memmove(SB)
56
57 // Point to the local aligned copy of the data
58 MOVW $buf, Rdata
59
60 aligned:
61 // Point to the table of constants
62 // A PC relative add would be cheaper than this
63 MOVW $·table(SB), Rtable
64
65 // Load up initial MD5 accumulator
66 MOVW dig+0(FP), Rc0
67 MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
68
69 // a += (((c^d)&b)^d) + X[index] + const
70 // a = a<<shift | a>>(32-shift) + b
71 #define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
72 EOR Rc, Rd, Rt0 ; \
73 AND Rb, Rt0 ; \
74 EOR Rd, Rt0 ; \
75 MOVW (index<<2)(Rdata), Rt1 ; \
76 ADD Rt1, Rt0 ; \
77 ADD Rconst, Rt0 ; \
78 ADD Rt0, Ra ; \
79 ADD Ra@>(32-shift), Rb, Ra ;
80
81 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
82 ROUND1(Ra, Rb, Rc, Rd, 0, 7, Rc0)
83 ROUND1(Rd, Ra, Rb, Rc, 1, 12, Rc1)
84 ROUND1(Rc, Rd, Ra, Rb, 2, 17, Rc2)
85 ROUND1(Rb, Rc, Rd, Ra, 3, 22, Rc3)
86
87 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
88 ROUND1(Ra, Rb, Rc, Rd, 4, 7, Rc0)
89 ROUND1(Rd, Ra, Rb, Rc, 5, 12, Rc1)
90 ROUND1(Rc, Rd, Ra, Rb, 6, 17, Rc2)
91 ROUND1(Rb, Rc, Rd, Ra, 7, 22, Rc3)
92
93 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
94 ROUND1(Ra, Rb, Rc, Rd, 8, 7, Rc0)
95 ROUND1(Rd, Ra, Rb, Rc, 9, 12, Rc1)
96 ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
97 ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
98
99 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
100 ROUND1(Ra, Rb, Rc, Rd, 12, 7, Rc0)
101 ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
102 ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
103 ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
104
105 // a += (((b^c)&d)^c) + X[index] + const
106 // a = a<<shift | a>>(32-shift) + b
107 #define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
108 EOR Rb, Rc, Rt0 ; \
109 AND Rd, Rt0 ; \
110 EOR Rc, Rt0 ; \
111 MOVW (index<<2)(Rdata), Rt1 ; \
112 ADD Rt1, Rt0 ; \
113 ADD Rconst, Rt0 ; \
114 ADD Rt0, Ra ; \
115 ADD Ra@>(32-shift), Rb, Ra ;
116
117 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
118 ROUND2(Ra, Rb, Rc, Rd, 1, 5, Rc0)
119 ROUND2(Rd, Ra, Rb, Rc, 6, 9, Rc1)
120 ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
121 ROUND2(Rb, Rc, Rd, Ra, 0, 20, Rc3)
122
123 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
124 ROUND2(Ra, Rb, Rc, Rd, 5, 5, Rc0)
125 ROUND2(Rd, Ra, Rb, Rc, 10, 9, Rc1)
126 ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
127 ROUND2(Rb, Rc, Rd, Ra, 4, 20, Rc3)
128
129 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
130 ROUND2(Ra, Rb, Rc, Rd, 9, 5, Rc0)
131 ROUND2(Rd, Ra, Rb, Rc, 14, 9, Rc1)
132 ROUND2(Rc, Rd, Ra, Rb, 3, 14, Rc2)
133 ROUND2(Rb, Rc, Rd, Ra, 8, 20, Rc3)
134
135 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
136 ROUND2(Ra, Rb, Rc, Rd, 13, 5, Rc0)
137 ROUND2(Rd, Ra, Rb, Rc, 2, 9, Rc1)
138 ROUND2(Rc, Rd, Ra, Rb, 7, 14, Rc2)
139 ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
140
141 // a += (b^c^d) + X[index] + const
142 // a = a<<shift | a>>(32-shift) + b
143 #define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
144 EOR Rb, Rc, Rt0 ; \
145 EOR Rd, Rt0 ; \
146 MOVW (index<<2)(Rdata), Rt1 ; \
147 ADD Rt1, Rt0 ; \
148 ADD Rconst, Rt0 ; \
149 ADD Rt0, Ra ; \
150 ADD Ra@>(32-shift), Rb, Ra ;
151
152 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
153 ROUND3(Ra, Rb, Rc, Rd, 5, 4, Rc0)
154 ROUND3(Rd, Ra, Rb, Rc, 8, 11, Rc1)
155 ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
156 ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
157
158 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
159 ROUND3(Ra, Rb, Rc, Rd, 1, 4, Rc0)
160 ROUND3(Rd, Ra, Rb, Rc, 4, 11, Rc1)
161 ROUND3(Rc, Rd, Ra, Rb, 7, 16, Rc2)
162 ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
163
164 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
165 ROUND3(Ra, Rb, Rc, Rd, 13, 4, Rc0)
166 ROUND3(Rd, Ra, Rb, Rc, 0, 11, Rc1)
167 ROUND3(Rc, Rd, Ra, Rb, 3, 16, Rc2)
168 ROUND3(Rb, Rc, Rd, Ra, 6, 23, Rc3)
169
170 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
171 ROUND3(Ra, Rb, Rc, Rd, 9, 4, Rc0)
172 ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
173 ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
174 ROUND3(Rb, Rc, Rd, Ra, 2, 23, Rc3)
175
176 // a += (c^(b|^d)) + X[index] + const
177 // a = a<<shift | a>>(32-shift) + b
178 #define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
179 MVN Rd, Rt0 ; \
180 ORR Rb, Rt0 ; \
181 EOR Rc, Rt0 ; \
182 MOVW (index<<2)(Rdata), Rt1 ; \
183 ADD Rt1, Rt0 ; \
184 ADD Rconst, Rt0 ; \
185 ADD Rt0, Ra ; \
186 ADD Ra@>(32-shift), Rb, Ra ;
187
188 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
189 ROUND4(Ra, Rb, Rc, Rd, 0, 6, Rc0)
190 ROUND4(Rd, Ra, Rb, Rc, 7, 10, Rc1)
191 ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
192 ROUND4(Rb, Rc, Rd, Ra, 5, 21, Rc3)
193
194 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
195 ROUND4(Ra, Rb, Rc, Rd, 12, 6, Rc0)
196 ROUND4(Rd, Ra, Rb, Rc, 3, 10, Rc1)
197 ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
198 ROUND4(Rb, Rc, Rd, Ra, 1, 21, Rc3)
199
200 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
201 ROUND4(Ra, Rb, Rc, Rd, 8, 6, Rc0)
202 ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
203 ROUND4(Rc, Rd, Ra, Rb, 6, 15, Rc2)
204 ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
205
206 MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
207 ROUND4(Ra, Rb, Rc, Rd, 4, 6, Rc0)
208 ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
209 ROUND4(Rc, Rd, Ra, Rb, 2, 15, Rc2)
210 ROUND4(Rb, Rc, Rd, Ra, 9, 21, Rc3)
211
212 MOVW dig+0(FP), Rt0
213 MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
214
215 ADD Rc0, Ra
216 ADD Rc1, Rb
217 ADD Rc2, Rc
218 ADD Rc3, Rd
219
220 MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
221
222 MOVW p_data, Rdata
223 MOVW p_end, Rt0
224 ADD $64, Rdata
225 CMP Rt0, Rdata
226 BLO loop
227
228 RET
229
230 // MD5 constants table
231
232 // Round 1
233 DATA ·table+0x00(SB)/4, $0xd76aa478
234 DATA ·table+0x04(SB)/4, $0xe8c7b756
235 DATA ·table+0x08(SB)/4, $0x242070db
236 DATA ·table+0x0c(SB)/4, $0xc1bdceee
237 DATA ·table+0x10(SB)/4, $0xf57c0faf
238 DATA ·table+0x14(SB)/4, $0x4787c62a
239 DATA ·table+0x18(SB)/4, $0xa8304613
240 DATA ·table+0x1c(SB)/4, $0xfd469501
241 DATA ·table+0x20(SB)/4, $0x698098d8
242 DATA ·table+0x24(SB)/4, $0x8b44f7af
243 DATA ·table+0x28(SB)/4, $0xffff5bb1
244 DATA ·table+0x2c(SB)/4, $0x895cd7be
245 DATA ·table+0x30(SB)/4, $0x6b901122
246 DATA ·table+0x34(SB)/4, $0xfd987193
247 DATA ·table+0x38(SB)/4, $0xa679438e
248 DATA ·table+0x3c(SB)/4, $0x49b40821
249 // Round 2
250 DATA ·table+0x40(SB)/4, $0xf61e2562
251 DATA ·table+0x44(SB)/4, $0xc040b340
252 DATA ·table+0x48(SB)/4, $0x265e5a51
253 DATA ·table+0x4c(SB)/4, $0xe9b6c7aa
254 DATA ·table+0x50(SB)/4, $0xd62f105d
255 DATA ·table+0x54(SB)/4, $0x02441453
256 DATA ·table+0x58(SB)/4, $0xd8a1e681
257 DATA ·table+0x5c(SB)/4, $0xe7d3fbc8
258 DATA ·table+0x60(SB)/4, $0x21e1cde6
259 DATA ·table+0x64(SB)/4, $0xc33707d6
260 DATA ·table+0x68(SB)/4, $0xf4d50d87
261 DATA ·table+0x6c(SB)/4, $0x455a14ed
262 DATA ·table+0x70(SB)/4, $0xa9e3e905
263 DATA ·table+0x74(SB)/4, $0xfcefa3f8
264 DATA ·table+0x78(SB)/4, $0x676f02d9
265 DATA ·table+0x7c(SB)/4, $0x8d2a4c8a
266 // Round 3
267 DATA ·table+0x80(SB)/4, $0xfffa3942
268 DATA ·table+0x84(SB)/4, $0x8771f681
269 DATA ·table+0x88(SB)/4, $0x6d9d6122
270 DATA ·table+0x8c(SB)/4, $0xfde5380c
271 DATA ·table+0x90(SB)/4, $0xa4beea44
272 DATA ·table+0x94(SB)/4, $0x4bdecfa9
273 DATA ·table+0x98(SB)/4, $0xf6bb4b60
274 DATA ·table+0x9c(SB)/4, $0xbebfbc70
275 DATA ·table+0xa0(SB)/4, $0x289b7ec6
276 DATA ·table+0xa4(SB)/4, $0xeaa127fa
277 DATA ·table+0xa8(SB)/4, $0xd4ef3085
278 DATA ·table+0xac(SB)/4, $0x04881d05
279 DATA ·table+0xb0(SB)/4, $0xd9d4d039
280 DATA ·table+0xb4(SB)/4, $0xe6db99e5
281 DATA ·table+0xb8(SB)/4, $0x1fa27cf8
282 DATA ·table+0xbc(SB)/4, $0xc4ac5665
283 // Round 4
284 DATA ·table+0xc0(SB)/4, $0xf4292244
285 DATA ·table+0xc4(SB)/4, $0x432aff97
286 DATA ·table+0xc8(SB)/4, $0xab9423a7
287 DATA ·table+0xcc(SB)/4, $0xfc93a039
288 DATA ·table+0xd0(SB)/4, $0x655b59c3
289 DATA ·table+0xd4(SB)/4, $0x8f0ccc92
290 DATA ·table+0xd8(SB)/4, $0xffeff47d
291 DATA ·table+0xdc(SB)/4, $0x85845dd1
292 DATA ·table+0xe0(SB)/4, $0x6fa87e4f
293 DATA ·table+0xe4(SB)/4, $0xfe2ce6e0
294 DATA ·table+0xe8(SB)/4, $0xa3014314
295 DATA ·table+0xec(SB)/4, $0x4e0811a1
296 DATA ·table+0xf0(SB)/4, $0xf7537e82
297 DATA ·table+0xf4(SB)/4, $0xbd3af235
298 DATA ·table+0xf8(SB)/4, $0x2ad7d2bb
299 DATA ·table+0xfc(SB)/4, $0xeb86d391
300 // Global definition
301 GLOBL ·table(SB),8,$256
302
View as plain text