1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
10 TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
11 MOVQ nr+0(FP), CX
12 MOVQ xk+8(FP), AX
13 MOVQ dst+16(FP), DX
14 MOVQ src+24(FP), BX
15 MOVUPS 0(AX), X1
16 MOVUPS 0(BX), X0
17 ADDQ $16, AX
18 PXOR X1, X0
19 SUBQ $12, CX
20 JE Lenc192
21 JB Lenc128
22 Lenc256:
23 MOVUPS 0(AX), X1
24 AESENC X1, X0
25 MOVUPS 16(AX), X1
26 AESENC X1, X0
27 ADDQ $32, AX
28 Lenc192:
29 MOVUPS 0(AX), X1
30 AESENC X1, X0
31 MOVUPS 16(AX), X1
32 AESENC X1, X0
33 ADDQ $32, AX
34 Lenc128:
35 MOVUPS 0(AX), X1
36 AESENC X1, X0
37 MOVUPS 16(AX), X1
38 AESENC X1, X0
39 MOVUPS 32(AX), X1
40 AESENC X1, X0
41 MOVUPS 48(AX), X1
42 AESENC X1, X0
43 MOVUPS 64(AX), X1
44 AESENC X1, X0
45 MOVUPS 80(AX), X1
46 AESENC X1, X0
47 MOVUPS 96(AX), X1
48 AESENC X1, X0
49 MOVUPS 112(AX), X1
50 AESENC X1, X0
51 MOVUPS 128(AX), X1
52 AESENC X1, X0
53 MOVUPS 144(AX), X1
54 AESENCLAST X1, X0
55 MOVUPS X0, 0(DX)
56 RET
57
58 // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
59 TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
60 MOVQ nr+0(FP), CX
61 MOVQ xk+8(FP), AX
62 MOVQ dst+16(FP), DX
63 MOVQ src+24(FP), BX
64 MOVUPS 0(AX), X1
65 MOVUPS 0(BX), X0
66 ADDQ $16, AX
67 PXOR X1, X0
68 SUBQ $12, CX
69 JE Ldec192
70 JB Ldec128
71 Ldec256:
72 MOVUPS 0(AX), X1
73 AESDEC X1, X0
74 MOVUPS 16(AX), X1
75 AESDEC X1, X0
76 ADDQ $32, AX
77 Ldec192:
78 MOVUPS 0(AX), X1
79 AESDEC X1, X0
80 MOVUPS 16(AX), X1
81 AESDEC X1, X0
82 ADDQ $32, AX
83 Ldec128:
84 MOVUPS 0(AX), X1
85 AESDEC X1, X0
86 MOVUPS 16(AX), X1
87 AESDEC X1, X0
88 MOVUPS 32(AX), X1
89 AESDEC X1, X0
90 MOVUPS 48(AX), X1
91 AESDEC X1, X0
92 MOVUPS 64(AX), X1
93 AESDEC X1, X0
94 MOVUPS 80(AX), X1
95 AESDEC X1, X0
96 MOVUPS 96(AX), X1
97 AESDEC X1, X0
98 MOVUPS 112(AX), X1
99 AESDEC X1, X0
100 MOVUPS 128(AX), X1
101 AESDEC X1, X0
102 MOVUPS 144(AX), X1
103 AESDECLAST X1, X0
104 MOVUPS X0, 0(DX)
105 RET
106
107 // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
108 // Note that round keys are stored in uint128 format, not uint32
109 TEXT ·expandKeyAsm(SB),NOSPLIT,$0
110 MOVQ nr+0(FP), CX
111 MOVQ key+8(FP), AX
112 MOVQ enc+16(FP), BX
113 MOVQ dec+24(FP), DX
114 MOVUPS (AX), X0
115 // enc
116 MOVUPS X0, (BX)
117 ADDQ $16, BX
118 PXOR X4, X4 // _expand_key_* expect X4 to be zero
119 CMPL CX, $12
120 JE Lexp_enc192
121 JB Lexp_enc128
122 Lexp_enc256:
123 MOVUPS 16(AX), X2
124 MOVUPS X2, (BX)
125 ADDQ $16, BX
126 AESKEYGENASSIST $0x01, X2, X1
127 CALL _expand_key_256a<>(SB)
128 AESKEYGENASSIST $0x01, X0, X1
129 CALL _expand_key_256b<>(SB)
130 AESKEYGENASSIST $0x02, X2, X1
131 CALL _expand_key_256a<>(SB)
132 AESKEYGENASSIST $0x02, X0, X1
133 CALL _expand_key_256b<>(SB)
134 AESKEYGENASSIST $0x04, X2, X1
135 CALL _expand_key_256a<>(SB)
136 AESKEYGENASSIST $0x04, X0, X1
137 CALL _expand_key_256b<>(SB)
138 AESKEYGENASSIST $0x08, X2, X1
139 CALL _expand_key_256a<>(SB)
140 AESKEYGENASSIST $0x08, X0, X1
141 CALL _expand_key_256b<>(SB)
142 AESKEYGENASSIST $0x10, X2, X1
143 CALL _expand_key_256a<>(SB)
144 AESKEYGENASSIST $0x10, X0, X1
145 CALL _expand_key_256b<>(SB)
146 AESKEYGENASSIST $0x20, X2, X1
147 CALL _expand_key_256a<>(SB)
148 AESKEYGENASSIST $0x20, X0, X1
149 CALL _expand_key_256b<>(SB)
150 AESKEYGENASSIST $0x40, X2, X1
151 CALL _expand_key_256a<>(SB)
152 JMP Lexp_dec
153 Lexp_enc192:
154 MOVQ 16(AX), X2
155 AESKEYGENASSIST $0x01, X2, X1
156 CALL _expand_key_192a<>(SB)
157 AESKEYGENASSIST $0x02, X2, X1
158 CALL _expand_key_192b<>(SB)
159 AESKEYGENASSIST $0x04, X2, X1
160 CALL _expand_key_192a<>(SB)
161 AESKEYGENASSIST $0x08, X2, X1
162 CALL _expand_key_192b<>(SB)
163 AESKEYGENASSIST $0x10, X2, X1
164 CALL _expand_key_192a<>(SB)
165 AESKEYGENASSIST $0x20, X2, X1
166 CALL _expand_key_192b<>(SB)
167 AESKEYGENASSIST $0x40, X2, X1
168 CALL _expand_key_192a<>(SB)
169 AESKEYGENASSIST $0x80, X2, X1
170 CALL _expand_key_192b<>(SB)
171 JMP Lexp_dec
172 Lexp_enc128:
173 AESKEYGENASSIST $0x01, X0, X1
174 CALL _expand_key_128<>(SB)
175 AESKEYGENASSIST $0x02, X0, X1
176 CALL _expand_key_128<>(SB)
177 AESKEYGENASSIST $0x04, X0, X1
178 CALL _expand_key_128<>(SB)
179 AESKEYGENASSIST $0x08, X0, X1
180 CALL _expand_key_128<>(SB)
181 AESKEYGENASSIST $0x10, X0, X1
182 CALL _expand_key_128<>(SB)
183 AESKEYGENASSIST $0x20, X0, X1
184 CALL _expand_key_128<>(SB)
185 AESKEYGENASSIST $0x40, X0, X1
186 CALL _expand_key_128<>(SB)
187 AESKEYGENASSIST $0x80, X0, X1
188 CALL _expand_key_128<>(SB)
189 AESKEYGENASSIST $0x1b, X0, X1
190 CALL _expand_key_128<>(SB)
191 AESKEYGENASSIST $0x36, X0, X1
192 CALL _expand_key_128<>(SB)
193 Lexp_dec:
194 // dec
195 SUBQ $16, BX
196 MOVUPS (BX), X1
197 MOVUPS X1, (DX)
198 DECQ CX
199 Lexp_dec_loop:
200 MOVUPS -16(BX), X1
201 AESIMC X1, X0
202 MOVUPS X0, 16(DX)
203 SUBQ $16, BX
204 ADDQ $16, DX
205 DECQ CX
206 JNZ Lexp_dec_loop
207 MOVUPS -16(BX), X0
208 MOVUPS X0, 16(DX)
209 RET
210
211 TEXT _expand_key_128<>(SB),NOSPLIT,$0
212 PSHUFD $0xff, X1, X1
213 SHUFPS $0x10, X0, X4
214 PXOR X4, X0
215 SHUFPS $0x8c, X0, X4
216 PXOR X4, X0
217 PXOR X1, X0
218 MOVUPS X0, (BX)
219 ADDQ $16, BX
220 RET
221
222 TEXT _expand_key_192a<>(SB),NOSPLIT,$0
223 PSHUFD $0x55, X1, X1
224 SHUFPS $0x10, X0, X4
225 PXOR X4, X0
226 SHUFPS $0x8c, X0, X4
227 PXOR X4, X0
228 PXOR X1, X0
229
230 MOVAPS X2, X5
231 MOVAPS X2, X6
232 PSLLDQ $0x4, X5
233 PSHUFD $0xff, X0, X3
234 PXOR X3, X2
235 PXOR X5, X2
236
237 MOVAPS X0, X1
238 SHUFPS $0x44, X0, X6
239 MOVUPS X6, (BX)
240 SHUFPS $0x4e, X2, X1
241 MOVUPS X1, 16(BX)
242 ADDQ $32, BX
243 RET
244
245 TEXT _expand_key_192b<>(SB),NOSPLIT,$0
246 PSHUFD $0x55, X1, X1
247 SHUFPS $0x10, X0, X4
248 PXOR X4, X0
249 SHUFPS $0x8c, X0, X4
250 PXOR X4, X0
251 PXOR X1, X0
252
253 MOVAPS X2, X5
254 PSLLDQ $0x4, X5
255 PSHUFD $0xff, X0, X3
256 PXOR X3, X2
257 PXOR X5, X2
258
259 MOVUPS X0, (BX)
260 ADDQ $16, BX
261 RET
262
263 TEXT _expand_key_256a<>(SB),NOSPLIT,$0
264 JMP _expand_key_128<>(SB)
265
266 TEXT _expand_key_256b<>(SB),NOSPLIT,$0
267 PSHUFD $0xaa, X1, X1
268 SHUFPS $0x10, X2, X4
269 PXOR X4, X2
270 SHUFPS $0x8c, X2, X4
271 PXOR X4, X2
272 PXOR X1, X2
273
274 MOVUPS X2, (BX)
275 ADDQ $16, BX
276 RET
277
View as plain text