Text file
src/crypto/sha1/sha1block_arm.s
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 //
5 // ARM version of md5block.go
6
7 //go:build !purego
8
9 #include "textflag.h"
10
11 // SHA-1 block routine. See sha1block.go for Go equivalent.
12 //
13 // There are 80 rounds of 4 types:
14 // - rounds 0-15 are type 1 and load data (ROUND1 macro).
15 // - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
16 // - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
17 // - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
18 // - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
19 //
20 // Each round loads or shuffles the data, then computes a per-round
21 // function of b, c, d, and then mixes the result into and rotates the
22 // five registers a, b, c, d, e holding the intermediate results.
23 //
24 // The register rotation is implemented by rotating the arguments to
25 // the round macros instead of by explicit move instructions.
26
27 // Register definitions
28 #define Rdata R0 // Pointer to incoming data
29 #define Rconst R1 // Current constant for SHA round
30 #define Ra R2 // SHA-1 accumulator
31 #define Rb R3 // SHA-1 accumulator
32 #define Rc R4 // SHA-1 accumulator
33 #define Rd R5 // SHA-1 accumulator
34 #define Re R6 // SHA-1 accumulator
35 #define Rt0 R7 // Temporary
36 #define Rt1 R8 // Temporary
37 // r9, r10 are forbidden
38 // r11 is OK provided you check the assembler that no synthetic instructions use it
39 #define Rt2 R11 // Temporary
40 #define Rctr R12 // loop counter
41 #define Rw R14 // point to w buffer
42
43 // func block(dig *digest, p []byte)
44 // 0(FP) is *digest
45 // 4(FP) is p.array (struct Slice)
46 // 8(FP) is p.len
47 //12(FP) is p.cap
48 //
49 // Stack frame
50 #define p_end end-4(SP) // pointer to the end of data
51 #define p_data data-8(SP) // current data pointer (unused?)
52 #define w_buf buf-(8+4*80)(SP) //80 words temporary buffer w uint32[80]
53 #define saved abcde-(8+4*80+4*5)(SP) // saved sha1 registers a,b,c,d,e - these must be last (unused?)
54 // Total size +4 for saved LR is 352
55
56 // w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3]
57 // e += w[i]
58 #define LOAD(Re) \
59 MOVBU 2(Rdata), Rt0 ; \
60 MOVBU 3(Rdata), Rt1 ; \
61 MOVBU 1(Rdata), Rt2 ; \
62 ORR Rt0<<8, Rt1, Rt0 ; \
63 MOVBU.P 4(Rdata), Rt1 ; \
64 ORR Rt2<<16, Rt0, Rt0 ; \
65 ORR Rt1<<24, Rt0, Rt0 ; \
66 MOVW.P Rt0, 4(Rw) ; \
67 ADD Rt0, Re, Re
68
69 // tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
70 // w[i&0xf] = tmp<<1 | tmp>>(32-1)
71 // e += w[i&0xf]
72 #define SHUFFLE(Re) \
73 MOVW (-16*4)(Rw), Rt0 ; \
74 MOVW (-14*4)(Rw), Rt1 ; \
75 MOVW (-8*4)(Rw), Rt2 ; \
76 EOR Rt0, Rt1, Rt0 ; \
77 MOVW (-3*4)(Rw), Rt1 ; \
78 EOR Rt2, Rt0, Rt0 ; \
79 EOR Rt0, Rt1, Rt0 ; \
80 MOVW Rt0@>(32-1), Rt0 ; \
81 MOVW.P Rt0, 4(Rw) ; \
82 ADD Rt0, Re, Re
83
84 // t1 = (b & c) | ((~b) & d)
85 #define FUNC1(Ra, Rb, Rc, Rd, Re) \
86 MVN Rb, Rt1 ; \
87 AND Rb, Rc, Rt0 ; \
88 AND Rd, Rt1, Rt1 ; \
89 ORR Rt0, Rt1, Rt1
90
91 // t1 = b ^ c ^ d
92 #define FUNC2(Ra, Rb, Rc, Rd, Re) \
93 EOR Rb, Rc, Rt1 ; \
94 EOR Rd, Rt1, Rt1
95
96 // t1 = (b & c) | (b & d) | (c & d) =
97 // t1 = (b & c) | ((b | c) & d)
98 #define FUNC3(Ra, Rb, Rc, Rd, Re) \
99 ORR Rb, Rc, Rt0 ; \
100 AND Rb, Rc, Rt1 ; \
101 AND Rd, Rt0, Rt0 ; \
102 ORR Rt0, Rt1, Rt1
103
104 #define FUNC4 FUNC2
105
106 // a5 := a<<5 | a>>(32-5)
107 // b = b<<30 | b>>(32-30)
108 // e = a5 + t1 + e + const
109 #define MIX(Ra, Rb, Rc, Rd, Re) \
110 ADD Rt1, Re, Re ; \
111 MOVW Rb@>(32-30), Rb ; \
112 ADD Ra@>(32-5), Re, Re ; \
113 ADD Rconst, Re, Re
114
115 #define ROUND1(Ra, Rb, Rc, Rd, Re) \
116 LOAD(Re) ; \
117 FUNC1(Ra, Rb, Rc, Rd, Re) ; \
118 MIX(Ra, Rb, Rc, Rd, Re)
119
120 #define ROUND1x(Ra, Rb, Rc, Rd, Re) \
121 SHUFFLE(Re) ; \
122 FUNC1(Ra, Rb, Rc, Rd, Re) ; \
123 MIX(Ra, Rb, Rc, Rd, Re)
124
125 #define ROUND2(Ra, Rb, Rc, Rd, Re) \
126 SHUFFLE(Re) ; \
127 FUNC2(Ra, Rb, Rc, Rd, Re) ; \
128 MIX(Ra, Rb, Rc, Rd, Re)
129
130 #define ROUND3(Ra, Rb, Rc, Rd, Re) \
131 SHUFFLE(Re) ; \
132 FUNC3(Ra, Rb, Rc, Rd, Re) ; \
133 MIX(Ra, Rb, Rc, Rd, Re)
134
135 #define ROUND4(Ra, Rb, Rc, Rd, Re) \
136 SHUFFLE(Re) ; \
137 FUNC4(Ra, Rb, Rc, Rd, Re) ; \
138 MIX(Ra, Rb, Rc, Rd, Re)
139
140
141 // func block(dig *digest, p []byte)
142 TEXT ·block(SB), 0, $352-16
143 MOVW p+4(FP), Rdata // pointer to the data
144 MOVW p_len+8(FP), Rt0 // number of bytes
145 ADD Rdata, Rt0
146 MOVW Rt0, p_end // pointer to end of data
147
148 // Load up initial SHA-1 accumulator
149 MOVW dig+0(FP), Rt0
150 MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re]
151
152 loop:
153 // Save registers at SP+4 onwards
154 MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13)
155
156 MOVW $w_buf, Rw
157 MOVW $0x5A827999, Rconst
158 MOVW $3, Rctr
159 loop1: ROUND1(Ra, Rb, Rc, Rd, Re)
160 ROUND1(Re, Ra, Rb, Rc, Rd)
161 ROUND1(Rd, Re, Ra, Rb, Rc)
162 ROUND1(Rc, Rd, Re, Ra, Rb)
163 ROUND1(Rb, Rc, Rd, Re, Ra)
164 SUB.S $1, Rctr
165 BNE loop1
166
167 ROUND1(Ra, Rb, Rc, Rd, Re)
168 ROUND1x(Re, Ra, Rb, Rc, Rd)
169 ROUND1x(Rd, Re, Ra, Rb, Rc)
170 ROUND1x(Rc, Rd, Re, Ra, Rb)
171 ROUND1x(Rb, Rc, Rd, Re, Ra)
172
173 MOVW $0x6ED9EBA1, Rconst
174 MOVW $4, Rctr
175 loop2: ROUND2(Ra, Rb, Rc, Rd, Re)
176 ROUND2(Re, Ra, Rb, Rc, Rd)
177 ROUND2(Rd, Re, Ra, Rb, Rc)
178 ROUND2(Rc, Rd, Re, Ra, Rb)
179 ROUND2(Rb, Rc, Rd, Re, Ra)
180 SUB.S $1, Rctr
181 BNE loop2
182
183 MOVW $0x8F1BBCDC, Rconst
184 MOVW $4, Rctr
185 loop3: ROUND3(Ra, Rb, Rc, Rd, Re)
186 ROUND3(Re, Ra, Rb, Rc, Rd)
187 ROUND3(Rd, Re, Ra, Rb, Rc)
188 ROUND3(Rc, Rd, Re, Ra, Rb)
189 ROUND3(Rb, Rc, Rd, Re, Ra)
190 SUB.S $1, Rctr
191 BNE loop3
192
193 MOVW $0xCA62C1D6, Rconst
194 MOVW $4, Rctr
195 loop4: ROUND4(Ra, Rb, Rc, Rd, Re)
196 ROUND4(Re, Ra, Rb, Rc, Rd)
197 ROUND4(Rd, Re, Ra, Rb, Rc)
198 ROUND4(Rc, Rd, Re, Ra, Rb)
199 ROUND4(Rb, Rc, Rd, Re, Ra)
200 SUB.S $1, Rctr
201 BNE loop4
202
203 // Accumulate - restoring registers from SP+4
204 MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw]
205 ADD Rt0, Ra
206 ADD Rt1, Rb
207 ADD Rt2, Rc
208 ADD Rctr, Rd
209 ADD Rw, Re
210
211 MOVW p_end, Rt0
212 CMP Rt0, Rdata
213 BLO loop
214
215 // Save final SHA-1 accumulator
216 MOVW dig+0(FP), Rt0
217 MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0)
218
219 RET
220
View as plain text