Source file
src/crypto/md5/_asm/md5block_amd64_asm.go
1
2
3
4
5
6
7
8
9
10
11
12 package main
13
14 import (
15 . "github.com/mmcloughlin/avo/build"
16 . "github.com/mmcloughlin/avo/operand"
17 . "github.com/mmcloughlin/avo/reg"
18 )
19
20
21
22 func main() {
23 Package("crypto/md5")
24 ConstraintExpr("!purego")
25 block()
26 Generate()
27 }
28
29
30
31
32
33
34 func block() {
35 Implement("block")
36 Attributes(NOSPLIT)
37 AllocLocal(8)
38
39 Load(Param("dig"), RBP)
40 Load(Param("p").Base(), RSI)
41 Load(Param("p").Len(), RDX)
42 SHRQ(Imm(6), RDX)
43 SHLQ(Imm(6), RDX)
44
45 LEAQ(Mem{Base: SI, Index: DX, Scale: 1}, RDI)
46 MOVL(Mem{Base: BP}.Offset(0*4), EAX)
47 MOVL(Mem{Base: BP}.Offset(1*4), EBX)
48 MOVL(Mem{Base: BP}.Offset(2*4), ECX)
49 MOVL(Mem{Base: BP}.Offset(3*4), EDX)
50 MOVL(Imm(0xffffffff), R11L)
51
52 CMPQ(RSI, RDI)
53 JEQ(LabelRef("end"))
54
55 loop()
56 end()
57 }
58
59 func loop() {
60 Label("loop")
61 MOVL(EAX, R12L)
62 MOVL(EBX, R13L)
63 MOVL(ECX, R14L)
64 MOVL(EDX, R15L)
65
66 MOVL(Mem{Base: SI}.Offset(0*4), R8L)
67 MOVL(EDX, R9L)
68
69 ROUND1(EAX, EBX, ECX, EDX, 1, 0xd76aa478, 7)
70 ROUND1(EDX, EAX, EBX, ECX, 2, 0xe8c7b756, 12)
71 ROUND1(ECX, EDX, EAX, EBX, 3, 0x242070db, 17)
72 ROUND1(EBX, ECX, EDX, EAX, 4, 0xc1bdceee, 22)
73 ROUND1(EAX, EBX, ECX, EDX, 5, 0xf57c0faf, 7)
74 ROUND1(EDX, EAX, EBX, ECX, 6, 0x4787c62a, 12)
75 ROUND1(ECX, EDX, EAX, EBX, 7, 0xa8304613, 17)
76 ROUND1(EBX, ECX, EDX, EAX, 8, 0xfd469501, 22)
77 ROUND1(EAX, EBX, ECX, EDX, 9, 0x698098d8, 7)
78 ROUND1(EDX, EAX, EBX, ECX, 10, 0x8b44f7af, 12)
79 ROUND1(ECX, EDX, EAX, EBX, 11, 0xffff5bb1, 17)
80 ROUND1(EBX, ECX, EDX, EAX, 12, 0x895cd7be, 22)
81 ROUND1(EAX, EBX, ECX, EDX, 13, 0x6b901122, 7)
82 ROUND1(EDX, EAX, EBX, ECX, 14, 0xfd987193, 12)
83 ROUND1(ECX, EDX, EAX, EBX, 15, 0xa679438e, 17)
84 ROUND1(EBX, ECX, EDX, EAX, 1, 0x49b40821, 22)
85
86 MOVL(EDX, R9L)
87 MOVL(EDX, R10L)
88
89 ROUND2(EAX, EBX, ECX, EDX, 6, 0xf61e2562, 5)
90 ROUND2(EDX, EAX, EBX, ECX, 11, 0xc040b340, 9)
91 ROUND2(ECX, EDX, EAX, EBX, 0, 0x265e5a51, 14)
92 ROUND2(EBX, ECX, EDX, EAX, 5, 0xe9b6c7aa, 20)
93 ROUND2(EAX, EBX, ECX, EDX, 10, 0xd62f105d, 5)
94 ROUND2(EDX, EAX, EBX, ECX, 15, 0x2441453, 9)
95 ROUND2(ECX, EDX, EAX, EBX, 4, 0xd8a1e681, 14)
96 ROUND2(EBX, ECX, EDX, EAX, 9, 0xe7d3fbc8, 20)
97 ROUND2(EAX, EBX, ECX, EDX, 14, 0x21e1cde6, 5)
98 ROUND2(EDX, EAX, EBX, ECX, 3, 0xc33707d6, 9)
99 ROUND2(ECX, EDX, EAX, EBX, 8, 0xf4d50d87, 14)
100 ROUND2(EBX, ECX, EDX, EAX, 13, 0x455a14ed, 20)
101 ROUND2(EAX, EBX, ECX, EDX, 2, 0xa9e3e905, 5)
102 ROUND2(EDX, EAX, EBX, ECX, 7, 0xfcefa3f8, 9)
103 ROUND2(ECX, EDX, EAX, EBX, 12, 0x676f02d9, 14)
104 ROUND2(EBX, ECX, EDX, EAX, 5, 0x8d2a4c8a, 20)
105
106 MOVL(ECX, R9L)
107
108 ROUND3FIRST(EAX, EBX, ECX, EDX, 8, 0xfffa3942, 4)
109 ROUND3(EDX, EAX, EBX, ECX, 11, 0x8771f681, 11)
110 ROUND3(ECX, EDX, EAX, EBX, 14, 0x6d9d6122, 16)
111 ROUND3(EBX, ECX, EDX, EAX, 1, 0xfde5380c, 23)
112 ROUND3(EAX, EBX, ECX, EDX, 4, 0xa4beea44, 4)
113 ROUND3(EDX, EAX, EBX, ECX, 7, 0x4bdecfa9, 11)
114 ROUND3(ECX, EDX, EAX, EBX, 10, 0xf6bb4b60, 16)
115 ROUND3(EBX, ECX, EDX, EAX, 13, 0xbebfbc70, 23)
116 ROUND3(EAX, EBX, ECX, EDX, 0, 0x289b7ec6, 4)
117 ROUND3(EDX, EAX, EBX, ECX, 3, 0xeaa127fa, 11)
118 ROUND3(ECX, EDX, EAX, EBX, 6, 0xd4ef3085, 16)
119 ROUND3(EBX, ECX, EDX, EAX, 9, 0x4881d05, 23)
120 ROUND3(EAX, EBX, ECX, EDX, 12, 0xd9d4d039, 4)
121 ROUND3(EDX, EAX, EBX, ECX, 15, 0xe6db99e5, 11)
122 ROUND3(ECX, EDX, EAX, EBX, 2, 0x1fa27cf8, 16)
123 ROUND3(EBX, ECX, EDX, EAX, 0, 0xc4ac5665, 23)
124
125 MOVL(R11L, R9L)
126 XORL(EDX, R9L)
127
128 ROUND4(EAX, EBX, ECX, EDX, 7, 0xf4292244, 6)
129 ROUND4(EDX, EAX, EBX, ECX, 14, 0x432aff97, 10)
130 ROUND4(ECX, EDX, EAX, EBX, 5, 0xab9423a7, 15)
131 ROUND4(EBX, ECX, EDX, EAX, 12, 0xfc93a039, 21)
132 ROUND4(EAX, EBX, ECX, EDX, 3, 0x655b59c3, 6)
133 ROUND4(EDX, EAX, EBX, ECX, 10, 0x8f0ccc92, 10)
134 ROUND4(ECX, EDX, EAX, EBX, 1, 0xffeff47d, 15)
135 ROUND4(EBX, ECX, EDX, EAX, 8, 0x85845dd1, 21)
136 ROUND4(EAX, EBX, ECX, EDX, 15, 0x6fa87e4f, 6)
137 ROUND4(EDX, EAX, EBX, ECX, 6, 0xfe2ce6e0, 10)
138 ROUND4(ECX, EDX, EAX, EBX, 13, 0xa3014314, 15)
139 ROUND4(EBX, ECX, EDX, EAX, 4, 0x4e0811a1, 21)
140 ROUND4(EAX, EBX, ECX, EDX, 11, 0xf7537e82, 6)
141 ROUND4(EDX, EAX, EBX, ECX, 2, 0xbd3af235, 10)
142 ROUND4(ECX, EDX, EAX, EBX, 9, 0x2ad7d2bb, 15)
143 ROUND4(EBX, ECX, EDX, EAX, 0, 0xeb86d391, 21)
144
145 ADDL(R12L, EAX)
146 ADDL(R13L, EBX)
147 ADDL(R14L, ECX)
148 ADDL(R15L, EDX)
149
150 ADDQ(Imm(64), RSI)
151 CMPQ(RSI, RDI)
152 JB(LabelRef("loop"))
153 }
154
155 func end() {
156 Label("end")
157 MOVL(EAX, Mem{Base: BP}.Offset(0*4))
158 MOVL(EBX, Mem{Base: BP}.Offset(1*4))
159 MOVL(ECX, Mem{Base: BP}.Offset(2*4))
160 MOVL(EDX, Mem{Base: BP}.Offset(3*4))
161 RET()
162 }
163
164 func ROUND1(a, b, c, d GPPhysical, index int, konst, shift uint64) {
165 XORL(c, R9L)
166 ADDL(Imm(konst), a)
167 ADDL(R8L, a)
168 ANDL(b, R9L)
169 XORL(d, R9L)
170 MOVL(Mem{Base: SI}.Offset(index*4), R8L)
171 ADDL(R9L, a)
172 ROLL(Imm(shift), a)
173 MOVL(c, R9L)
174 ADDL(b, a)
175 }
176
177
178 func ROUND2(a, b, c, d GPPhysical, index int, konst, shift uint64) {
179 XORL(R11L, R9L)
180 ADDL(Imm(konst), a)
181 ADDL(R8L, a)
182 ANDL(b, R10L)
183 ANDL(c, R9L)
184 MOVL(Mem{Base: SI}.Offset(index*4), R8L)
185 ADDL(R9L, a)
186 ADDL(R10L, a)
187 MOVL(c, R9L)
188 MOVL(c, R10L)
189 ROLL(Imm(shift), a)
190 ADDL(b, a)
191 }
192
193
194 func ROUND3FIRST(a, b, c, d GPPhysical, index int, konst, shift uint64) {
195 MOVL(d, R9L)
196 XORL(c, R9L)
197 XORL(b, R9L)
198 ADDL(Imm(konst), a)
199 ADDL(R8L, a)
200 MOVL(Mem{Base: SI}.Offset(index*4), R8L)
201 ADDL(R9L, a)
202 ROLL(Imm(shift), a)
203 ADDL(b, a)
204 }
205
206 func ROUND3(a, b, c, d GPPhysical, index int, konst, shift uint64) {
207 XORL(a, R9L)
208 XORL(b, R9L)
209 ADDL(Imm(konst), a)
210 ADDL(R8L, a)
211 MOVL(Mem{Base: SI}.Offset(index*4), R8L)
212 ADDL(R9L, a)
213 ROLL(Imm(shift), a)
214 ADDL(b, a)
215 }
216
217 func ROUND4(a, b, c, d GPPhysical, index int, konst, shift uint64) {
218 ADDL(Imm(konst), a)
219 ADDL(R8L, a)
220 ORL(b, R9L)
221 XORL(c, R9L)
222 ADDL(R9L, a)
223 MOVL(Mem{Base: SI}.Offset(index*4), R8L)
224 MOVL(Imm(0xffffffff), R9L)
225 ROLL(Imm(shift), a)
226 XORL(c, R9L)
227 ADDL(b, a)
228 }
229
View as plain text