1
2
3
4
5 package main
6
7 import (
8 "os"
9
10 . "github.com/mmcloughlin/avo/build"
11 . "github.com/mmcloughlin/avo/operand"
12 . "github.com/mmcloughlin/avo/reg"
13 )
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57 func main() {
58
59 os.Setenv("GOOS", "linux")
60 os.Setenv("GOARCH", "amd64")
61
62 Package("crypto/internal/fips/sha256")
63 ConstraintExpr("!purego")
64 blockAMD64()
65 blockAVX2()
66 blockSHANI()
67 Generate()
68 }
69
70
71 func msgSchedule0(index int) {
72 MOVL(Mem{Base: SI}.Offset(index*4), EAX)
73 BSWAPL(EAX)
74 MOVL(EAX, Mem{Base: BP}.Offset(index*4))
75 }
76
77
78
79
80
81 func msgSchedule1(index int) {
82 MOVL(Mem{Base: BP}.Offset((index-2)*4), EAX)
83 MOVL(EAX, ECX)
84 RORL(Imm(17), EAX)
85 MOVL(ECX, EDX)
86 RORL(Imm(19), ECX)
87 SHRL(Imm(10), EDX)
88 MOVL(Mem{Base: BP}.Offset((index-15)*4), EBX)
89 XORL(ECX, EAX)
90 MOVL(EBX, ECX)
91 XORL(EDX, EAX)
92 RORL(Imm(7), EBX)
93 MOVL(ECX, EDX)
94 SHRL(Imm(3), EDX)
95 RORL(Imm(18), ECX)
96 ADDL(Mem{Base: BP}.Offset((index-7)*4), EAX)
97 XORL(ECX, EBX)
98 XORL(EDX, EBX)
99 ADDL(Mem{Base: BP}.Offset((index-16)*4), EBX)
100 ADDL(EBX, EAX)
101 MOVL(EAX, Mem{Base: BP}.Offset((index)*4))
102 }
103
104
105
106
107
108
109
110 func sha256T1(konst uint32, e, f, g, h GPPhysical) {
111 ADDL(EAX, h)
112 MOVL(e, EAX)
113 ADDL(U32(konst), h)
114 MOVL(e, ECX)
115 RORL(U8(6), EAX)
116 MOVL(e, EDX)
117 RORL(U8(11), ECX)
118 XORL(ECX, EAX)
119 MOVL(e, ECX)
120 RORL(U8(25), EDX)
121 ANDL(f, ECX)
122 XORL(EAX, EDX)
123 MOVL(e, EAX)
124 NOTL(EAX)
125 ADDL(EDX, h)
126 ANDL(g, EAX)
127 XORL(ECX, EAX)
128 ADDL(h, EAX)
129 }
130
131
132
133
134
135
136 func sha256T2(a, b, c GPPhysical) {
137 MOVL(a, EDI)
138 MOVL(c, EBX)
139 RORL(U8(2), EDI)
140 MOVL(a, EDX)
141 ANDL(b, EBX)
142 RORL(U8(13), EDX)
143 MOVL(a, ECX)
144 ANDL(c, ECX)
145 XORL(EDX, EDI)
146 XORL(ECX, EBX)
147 MOVL(a, EDX)
148 MOVL(b, ECX)
149 RORL(U8(22), EDX)
150 ANDL(a, ECX)
151 XORL(ECX, EBX)
152 XORL(EDX, EDI)
153 ADDL(EDI, EBX)
154 }
155
156
157
158 func sha256Round(index int, konst uint32, a, b, c, d, e, f, g, h GPPhysical) {
159 sha256T1(konst, e, f, g, h)
160 sha256T2(a, b, c)
161 MOVL(EBX, h)
162 ADDL(EAX, d)
163 ADDL(EAX, h)
164 }
165
166 func sha256Round0(index int, konst uint32, a, b, c, d, e, f, g, h GPPhysical) {
167 msgSchedule0(index)
168 sha256Round(index, konst, a, b, c, d, e, f, g, h)
169 }
170
171 func sha256Round1(index int, konst uint32, a, b, c, d, e, f, g, h GPPhysical) {
172 msgSchedule1(index)
173 sha256Round(index, konst, a, b, c, d, e, f, g, h)
174 }
175
176 func blockAMD64() {
177 Implement("blockAMD64")
178 AllocLocal(256 + 8)
179
180 Load(Param("p").Base(), RSI)
181 Load(Param("p").Len(), RDX)
182 SHRQ(Imm(6), RDX)
183 SHLQ(Imm(6), RDX)
184
185
186 LEAQ(Mem{Base: RSI, Index: RDX, Scale: 1}, RDI)
187 MOVQ(RDI, Mem{Base: SP}.Offset(256))
188 CMPQ(RSI, RDI)
189 JEQ(LabelRef("end"))
190
191 BP := Mem{Base: BP}
192 Load(Param("dig"), RBP)
193 MOVL(BP.Offset(0*4), R8L)
194 MOVL(BP.Offset(1*4), R9L)
195 MOVL(BP.Offset(2*4), R10L)
196 MOVL(BP.Offset(3*4), R11L)
197 MOVL(BP.Offset(4*4), R12L)
198 MOVL(BP.Offset(5*4), R13L)
199 MOVL(BP.Offset(6*4), R14L)
200 MOVL(BP.Offset(7*4), R15L)
201
202 loop()
203 end()
204 }
205
206 func rotateRight(slice *[]GPPhysical) []GPPhysical {
207 n := len(*slice)
208 new := make([]GPPhysical, n)
209 for i, reg := range *slice {
210 new[(i+1)%n] = reg
211 }
212 return new
213 }
214
215 func loop() {
216 Label("loop")
217 MOVQ(RSP, RBP)
218
219 regs := []GPPhysical{R8L, R9L, R10L, R11L, R12L, R13L, R14L, R15L}
220 n := len(_K)
221
222 for i := 0; i < 16; i++ {
223 sha256Round0(i, _K[i], regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7])
224 regs = rotateRight(®s)
225 }
226
227 for i := 16; i < n; i++ {
228 sha256Round1(i, _K[i], regs[0], regs[1], regs[2], regs[3], regs[4], regs[5], regs[6], regs[7])
229 regs = rotateRight(®s)
230 }
231
232 Load(Param("dig"), RBP)
233 BP := Mem{Base: BP}
234 ADDL(BP.Offset(0*4), R8L)
235 MOVL(R8L, BP.Offset(0*4))
236 ADDL(BP.Offset(1*4), R9L)
237 MOVL(R9L, BP.Offset(1*4))
238 ADDL(BP.Offset(2*4), R10L)
239 MOVL(R10L, BP.Offset(2*4))
240 ADDL(BP.Offset(3*4), R11L)
241 MOVL(R11L, BP.Offset(3*4))
242 ADDL(BP.Offset(4*4), R12L)
243 MOVL(R12L, BP.Offset(4*4))
244 ADDL(BP.Offset(5*4), R13L)
245 MOVL(R13L, BP.Offset(5*4))
246 ADDL(BP.Offset(6*4), R14L)
247 MOVL(R14L, BP.Offset(6*4))
248 ADDL(BP.Offset(7*4), R15L)
249 MOVL(R15L, BP.Offset(7*4))
250
251 ADDQ(Imm(64), RSI)
252 CMPQ(RSI, Mem{Base: SP}.Offset(256))
253 JB(LabelRef("loop"))
254 }
255
256 func end() {
257 Label("end")
258 RET()
259 }
260
261 var _K = []uint32{
262 0x428a2f98,
263 0x71374491,
264 0xb5c0fbcf,
265 0xe9b5dba5,
266 0x3956c25b,
267 0x59f111f1,
268 0x923f82a4,
269 0xab1c5ed5,
270 0xd807aa98,
271 0x12835b01,
272 0x243185be,
273 0x550c7dc3,
274 0x72be5d74,
275 0x80deb1fe,
276 0x9bdc06a7,
277 0xc19bf174,
278 0xe49b69c1,
279 0xefbe4786,
280 0x0fc19dc6,
281 0x240ca1cc,
282 0x2de92c6f,
283 0x4a7484aa,
284 0x5cb0a9dc,
285 0x76f988da,
286 0x983e5152,
287 0xa831c66d,
288 0xb00327c8,
289 0xbf597fc7,
290 0xc6e00bf3,
291 0xd5a79147,
292 0x06ca6351,
293 0x14292967,
294 0x27b70a85,
295 0x2e1b2138,
296 0x4d2c6dfc,
297 0x53380d13,
298 0x650a7354,
299 0x766a0abb,
300 0x81c2c92e,
301 0x92722c85,
302 0xa2bfe8a1,
303 0xa81a664b,
304 0xc24b8b70,
305 0xc76c51a3,
306 0xd192e819,
307 0xd6990624,
308 0xf40e3585,
309 0x106aa070,
310 0x19a4c116,
311 0x1e376c08,
312 0x2748774c,
313 0x34b0bcb5,
314 0x391c0cb3,
315 0x4ed8aa4a,
316 0x5b9cca4f,
317 0x682e6ff3,
318 0x748f82ee,
319 0x78a5636f,
320 0x84c87814,
321 0x8cc70208,
322 0x90befffa,
323 0xa4506ceb,
324 0xbef9a3f7,
325 0xc67178f2,
326 }
327
View as plain text