1
2
3
4
5 package amd64
6
7 import (
8 "fmt"
9 "math"
10
11 "cmd/compile/internal/base"
12 "cmd/compile/internal/ir"
13 "cmd/compile/internal/logopt"
14 "cmd/compile/internal/objw"
15 "cmd/compile/internal/ssa"
16 "cmd/compile/internal/ssagen"
17 "cmd/compile/internal/types"
18 "cmd/internal/obj"
19 "cmd/internal/obj/x86"
20 "internal/abi"
21 "internal/buildcfg"
22 )
23
24
25 func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
26 flive := b.FlagsLiveAtEnd
27 for _, c := range b.ControlValues() {
28 flive = c.Type.IsFlags() || flive
29 }
30 for i := len(b.Values) - 1; i >= 0; i-- {
31 v := b.Values[i]
32 if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
33
34 v.Aux = ssa.AuxMark
35 }
36 if v.Type.IsFlags() {
37 flive = false
38 }
39 for _, a := range v.Args {
40 if a.Type.IsFlags() {
41 flive = true
42 }
43 }
44 }
45 }
46
47 func isFPReg(r int16) bool {
48 return x86.REG_X0 <= r && r <= x86.REG_Z31
49 }
50
51 func isKReg(r int16) bool {
52 return x86.REG_K0 <= r && r <= x86.REG_K7
53 }
54
55 func isLowFPReg(r int16) bool {
56 return x86.REG_X0 <= r && r <= x86.REG_X15
57 }
58
59
60 func loadByRegWidth(r int16, width int64) obj.As {
61
62 if !isFPReg(r) && !isKReg(r) {
63 switch width {
64 case 1:
65 return x86.AMOVBLZX
66 case 2:
67 return x86.AMOVWLZX
68 }
69 }
70
71 return storeByRegWidth(r, width)
72 }
73
74
75
76 func storeByRegWidth(r int16, width int64) obj.As {
77 if isFPReg(r) {
78 switch width {
79 case 4:
80 return x86.AMOVSS
81 case 8:
82 return x86.AMOVSD
83 case 16:
84
85 if isLowFPReg(r) {
86 return x86.AMOVUPS
87 } else {
88 return x86.AVMOVDQU
89 }
90 case 32:
91 return x86.AVMOVDQU
92 case 64:
93 return x86.AVMOVDQU64
94 }
95 }
96 if isKReg(r) {
97 return x86.AKMOVQ
98 }
99
100 switch width {
101 case 1:
102 return x86.AMOVB
103 case 2:
104 return x86.AMOVW
105 case 4:
106 return x86.AMOVL
107 case 8:
108 return x86.AMOVQ
109 }
110 panic(fmt.Sprintf("bad store reg=%v, width=%d", r, width))
111 }
112
113
114 func moveByRegsWidth(dest, src int16, width int64) obj.As {
115
116 if isFPReg(dest) && isFPReg(src) {
117
118
119
120
121 if isLowFPReg(dest) && isLowFPReg(src) && width <= 16 {
122 return x86.AMOVUPS
123 }
124 if width <= 32 {
125 return x86.AVMOVDQU
126 }
127 return x86.AVMOVDQU64
128 }
129
130 if isKReg(dest) || isKReg(src) {
131 if isFPReg(dest) || isFPReg(src) {
132 panic(fmt.Sprintf("bad move, src=%v, dest=%v, width=%d", src, dest, width))
133 }
134 return x86.AKMOVQ
135 }
136
137 switch width {
138 case 1:
139
140 return x86.AMOVL
141 case 2:
142 return x86.AMOVL
143 case 4:
144 return x86.AMOVL
145 case 8:
146 return x86.AMOVQ
147 case 16:
148 if isLowFPReg(dest) && isLowFPReg(src) {
149
150 return x86.AMOVUPS
151 } else {
152 return x86.AVMOVDQU
153 }
154 case 32:
155 return x86.AVMOVDQU
156 case 64:
157 return x86.AVMOVDQU64
158 }
159 panic(fmt.Sprintf("bad move, src=%v, dest=%v, width=%d", src, dest, width))
160 }
161
162
163
164
165
166
167
168 func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
169 p := s.Prog(op)
170 p.From.Type = obj.TYPE_REG
171 p.To.Type = obj.TYPE_REG
172 p.To.Reg = dest
173 p.From.Reg = src
174 return p
175 }
176
177
178
179
180
181 func memIdx(a *obj.Addr, v *ssa.Value) {
182 r, i := v.Args[0].Reg(), v.Args[1].Reg()
183 a.Type = obj.TYPE_MEM
184 a.Scale = v.Op.Scale()
185 if a.Scale == 1 && i == x86.REG_SP {
186 r, i = i, r
187 }
188 a.Reg = r
189 a.Index = i
190 }
191
192 func getgFromTLS(s *ssagen.State, r int16) {
193
194
195 if x86.CanUse1InsnTLS(base.Ctxt) {
196
197 p := s.Prog(x86.AMOVQ)
198 p.From.Type = obj.TYPE_MEM
199 p.From.Reg = x86.REG_TLS
200 p.To.Type = obj.TYPE_REG
201 p.To.Reg = r
202 } else {
203
204
205 p := s.Prog(x86.AMOVQ)
206 p.From.Type = obj.TYPE_REG
207 p.From.Reg = x86.REG_TLS
208 p.To.Type = obj.TYPE_REG
209 p.To.Reg = r
210 q := s.Prog(x86.AMOVQ)
211 q.From.Type = obj.TYPE_MEM
212 q.From.Reg = r
213 q.From.Index = x86.REG_TLS
214 q.From.Scale = 1
215 q.To.Type = obj.TYPE_REG
216 q.To.Reg = r
217 }
218 }
219
220 func ssaGenValue(s *ssagen.State, v *ssa.Value) {
221 switch v.Op {
222 case ssa.OpAMD64VFMADD231SD, ssa.OpAMD64VFMADD231SS:
223 p := s.Prog(v.Op.Asm())
224 p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
225 p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
226 p.AddRestSourceReg(v.Args[1].Reg())
227 case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
228 r := v.Reg()
229 r1 := v.Args[0].Reg()
230 r2 := v.Args[1].Reg()
231 switch {
232 case r == r1:
233 p := s.Prog(v.Op.Asm())
234 p.From.Type = obj.TYPE_REG
235 p.From.Reg = r2
236 p.To.Type = obj.TYPE_REG
237 p.To.Reg = r
238 case r == r2:
239 p := s.Prog(v.Op.Asm())
240 p.From.Type = obj.TYPE_REG
241 p.From.Reg = r1
242 p.To.Type = obj.TYPE_REG
243 p.To.Reg = r
244 default:
245 var asm obj.As
246 if v.Op == ssa.OpAMD64ADDQ {
247 asm = x86.ALEAQ
248 } else {
249 asm = x86.ALEAL
250 }
251 p := s.Prog(asm)
252 p.From.Type = obj.TYPE_MEM
253 p.From.Reg = r1
254 p.From.Scale = 1
255 p.From.Index = r2
256 p.To.Type = obj.TYPE_REG
257 p.To.Reg = r
258 }
259
260 case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
261 ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
262 ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
263 ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
264 ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
265 ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
266 ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
267 ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
268 ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
269 ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
270 ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
271 ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
272 ssa.OpAMD64MINSS, ssa.OpAMD64MINSD,
273 ssa.OpAMD64POR, ssa.OpAMD64PXOR,
274 ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
275 ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
276 ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ,
277 ssa.OpAMD64PCMPEQB, ssa.OpAMD64PSIGNB,
278 ssa.OpAMD64PUNPCKLBW:
279 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
280
281 case ssa.OpAMD64PSHUFLW:
282 p := s.Prog(v.Op.Asm())
283 imm := v.AuxInt
284 if imm < 0 || imm > 255 {
285 v.Fatalf("Invalid source selection immediate")
286 }
287 p.From.Offset = imm
288 p.From.Type = obj.TYPE_CONST
289 p.AddRestSourceReg(v.Args[0].Reg())
290 p.To.Type = obj.TYPE_REG
291 p.To.Reg = v.Reg()
292
293 case ssa.OpAMD64PSHUFBbroadcast:
294
295
296
297
298 if s.ABI != obj.ABIInternal {
299
300 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
301 }
302
303 p := s.Prog(v.Op.Asm())
304 p.From.Type = obj.TYPE_REG
305 p.To.Type = obj.TYPE_REG
306 p.To.Reg = v.Reg()
307 p.From.Reg = x86.REG_X15
308
309 case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ:
310 p := s.Prog(v.Op.Asm())
311 lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg()
312 p.From.Type = obj.TYPE_REG
313 p.From.Reg = bits
314 p.To.Type = obj.TYPE_REG
315 p.To.Reg = lo
316 p.AddRestSourceReg(hi)
317
318 case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL,
319 ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL,
320 ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
321 p := s.Prog(v.Op.Asm())
322 p.From.Type = obj.TYPE_REG
323 p.From.Reg = v.Args[0].Reg()
324 p.To.Type = obj.TYPE_REG
325 switch v.Op {
326 case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
327 p.To.Reg = v.Reg0()
328 default:
329 p.To.Reg = v.Reg()
330 }
331
332 case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL:
333 p := s.Prog(v.Op.Asm())
334 p.From.Type = obj.TYPE_REG
335 p.From.Reg = v.Args[0].Reg()
336 p.To.Type = obj.TYPE_REG
337 p.To.Reg = v.Reg()
338 p.AddRestSourceReg(v.Args[1].Reg())
339
340 case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
341 ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
342 ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
343 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
344 p.AddRestSourceReg(v.Args[0].Reg())
345
346 case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
347 ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload,
348 ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload:
349 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
350 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
351 ssagen.AddAux(&m, v)
352 p.AddRestSource(m)
353
354 case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8,
355 ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8,
356 ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8,
357 ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8,
358 ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8,
359 ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8:
360 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg())
361 m := obj.Addr{Type: obj.TYPE_MEM}
362 memIdx(&m, v)
363 ssagen.AddAux(&m, v)
364 p.AddRestSource(m)
365
366 case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
367
368
369
370
371 r := v.Args[1].Reg()
372
373
374 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
375
376
377 p := s.Prog(v.Op.Asm())
378 p.From.Type = obj.TYPE_REG
379 p.From.Reg = r
380
381 case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
382
383
384
385
386 r := v.Args[1].Reg()
387
388 var opCMP, opNEG, opSXD obj.As
389 switch v.Op {
390 case ssa.OpAMD64DIVQ:
391 opCMP, opNEG, opSXD = x86.ACMPQ, x86.ANEGQ, x86.ACQO
392 case ssa.OpAMD64DIVL:
393 opCMP, opNEG, opSXD = x86.ACMPL, x86.ANEGL, x86.ACDQ
394 case ssa.OpAMD64DIVW:
395 opCMP, opNEG, opSXD = x86.ACMPW, x86.ANEGW, x86.ACWD
396 }
397
398
399
400 var j1, j2 *obj.Prog
401 if ssa.DivisionNeedsFixUp(v) {
402 c := s.Prog(opCMP)
403 c.From.Type = obj.TYPE_REG
404 c.From.Reg = r
405 c.To.Type = obj.TYPE_CONST
406 c.To.Offset = -1
407
408
409 j1 = s.Prog(x86.AJNE)
410 j1.To.Type = obj.TYPE_BRANCH
411
412
413
414 n1 := s.Prog(opNEG)
415 n1.To.Type = obj.TYPE_REG
416 n1.To.Reg = x86.REG_AX
417
418
419 opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
420
421
422
423
424
425 j2 = s.Prog(obj.AJMP)
426 j2.To.Type = obj.TYPE_BRANCH
427 }
428
429
430 p := s.Prog(opSXD)
431 if j1 != nil {
432 j1.To.SetTarget(p)
433 }
434 p = s.Prog(v.Op.Asm())
435 p.From.Type = obj.TYPE_REG
436 p.From.Reg = r
437
438 if j2 != nil {
439 j2.To.SetTarget(s.Pc())
440 }
441
442 case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
443
444
445
446
447
448
449 p := s.Prog(v.Op.Asm())
450 p.From.Type = obj.TYPE_REG
451 p.From.Reg = v.Args[1].Reg()
452
453
454
455 if v.Type.Size() == 1 {
456 m := s.Prog(x86.AMOVB)
457 m.From.Type = obj.TYPE_REG
458 m.From.Reg = x86.REG_AH
459 m.To.Type = obj.TYPE_REG
460 m.To.Reg = x86.REG_DX
461 }
462
463 case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
464
465
466 p := s.Prog(v.Op.Asm())
467 p.From.Type = obj.TYPE_REG
468 p.From.Reg = v.Args[1].Reg()
469
470 case ssa.OpAMD64MULQU2:
471
472
473 p := s.Prog(v.Op.Asm())
474 p.From.Type = obj.TYPE_REG
475 p.From.Reg = v.Args[1].Reg()
476
477 case ssa.OpAMD64DIVQU2:
478
479
480 p := s.Prog(v.Op.Asm())
481 p.From.Type = obj.TYPE_REG
482 p.From.Reg = v.Args[2].Reg()
483
484 case ssa.OpAMD64AVGQU:
485
486
487
488 p := s.Prog(x86.AADDQ)
489 p.From.Type = obj.TYPE_REG
490 p.To.Type = obj.TYPE_REG
491 p.To.Reg = v.Reg()
492 p.From.Reg = v.Args[1].Reg()
493 p = s.Prog(x86.ARCRQ)
494 p.From.Type = obj.TYPE_CONST
495 p.From.Offset = 1
496 p.To.Type = obj.TYPE_REG
497 p.To.Reg = v.Reg()
498
499 case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
500 r := v.Reg0()
501 r0 := v.Args[0].Reg()
502 r1 := v.Args[1].Reg()
503 switch r {
504 case r0:
505 p := s.Prog(v.Op.Asm())
506 p.From.Type = obj.TYPE_REG
507 p.From.Reg = r1
508 p.To.Type = obj.TYPE_REG
509 p.To.Reg = r
510 case r1:
511 p := s.Prog(v.Op.Asm())
512 p.From.Type = obj.TYPE_REG
513 p.From.Reg = r0
514 p.To.Type = obj.TYPE_REG
515 p.To.Reg = r
516 default:
517 v.Fatalf("output not in same register as an input %s", v.LongString())
518 }
519
520 case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
521 p := s.Prog(v.Op.Asm())
522 p.From.Type = obj.TYPE_REG
523 p.From.Reg = v.Args[1].Reg()
524 p.To.Type = obj.TYPE_REG
525 p.To.Reg = v.Reg0()
526
527 case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
528 p := s.Prog(v.Op.Asm())
529 p.From.Type = obj.TYPE_CONST
530 p.From.Offset = v.AuxInt
531 p.To.Type = obj.TYPE_REG
532 p.To.Reg = v.Reg0()
533
534 case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
535 r := v.Reg()
536 a := v.Args[0].Reg()
537 if r == a {
538 switch v.AuxInt {
539 case 1:
540 var asm obj.As
541
542
543
544
545
546 if v.Op == ssa.OpAMD64ADDQconst {
547 asm = x86.AINCQ
548 } else {
549 asm = x86.AINCL
550 }
551 p := s.Prog(asm)
552 p.To.Type = obj.TYPE_REG
553 p.To.Reg = r
554 return
555 case -1:
556 var asm obj.As
557 if v.Op == ssa.OpAMD64ADDQconst {
558 asm = x86.ADECQ
559 } else {
560 asm = x86.ADECL
561 }
562 p := s.Prog(asm)
563 p.To.Type = obj.TYPE_REG
564 p.To.Reg = r
565 return
566 case 0x80:
567
568
569 asm := x86.ASUBL
570 if v.Op == ssa.OpAMD64ADDQconst {
571 asm = x86.ASUBQ
572 }
573 p := s.Prog(asm)
574 p.From.Type = obj.TYPE_CONST
575 p.From.Offset = -0x80
576 p.To.Type = obj.TYPE_REG
577 p.To.Reg = r
578 return
579
580 }
581 p := s.Prog(v.Op.Asm())
582 p.From.Type = obj.TYPE_CONST
583 p.From.Offset = v.AuxInt
584 p.To.Type = obj.TYPE_REG
585 p.To.Reg = r
586 return
587 }
588 var asm obj.As
589 if v.Op == ssa.OpAMD64ADDQconst {
590 asm = x86.ALEAQ
591 } else {
592 asm = x86.ALEAL
593 }
594 p := s.Prog(asm)
595 p.From.Type = obj.TYPE_MEM
596 p.From.Reg = a
597 p.From.Offset = v.AuxInt
598 p.To.Type = obj.TYPE_REG
599 p.To.Reg = r
600
601 case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
602 ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
603 ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
604 ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
605 ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
606 ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
607 ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
608 ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
609 ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
610 ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
611 ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
612 ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
613 p := s.Prog(v.Op.Asm())
614 p.From.Type = obj.TYPE_REG
615 p.From.Reg = v.Args[1].Reg()
616 p.To.Type = obj.TYPE_REG
617 p.To.Reg = v.Reg()
618
619 case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
620
621
622
623
624 p := s.Prog(v.Op.Asm())
625 p.From.Type = obj.TYPE_REG
626 p.From.Reg = v.Args[1].Reg()
627 p.To.Type = obj.TYPE_REG
628 p.To.Reg = v.Reg()
629 var q *obj.Prog
630 if v.Op == ssa.OpAMD64CMOVQNEF {
631 q = s.Prog(x86.ACMOVQPS)
632 } else if v.Op == ssa.OpAMD64CMOVLNEF {
633 q = s.Prog(x86.ACMOVLPS)
634 } else {
635 q = s.Prog(x86.ACMOVWPS)
636 }
637 q.From.Type = obj.TYPE_REG
638 q.From.Reg = v.Args[1].Reg()
639 q.To.Type = obj.TYPE_REG
640 q.To.Reg = v.Reg()
641
642 case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
643
644
645
646
647
648
649
650
651
652
653
654 t := v.RegTmp()
655 opregreg(s, moveByRegsWidth(t, v.Args[1].Reg(), v.Type.Size()), t, v.Args[1].Reg())
656
657 p := s.Prog(v.Op.Asm())
658 p.From.Type = obj.TYPE_REG
659 p.From.Reg = v.Reg()
660 p.To.Type = obj.TYPE_REG
661 p.To.Reg = t
662 var q *obj.Prog
663 if v.Op == ssa.OpAMD64CMOVQEQF {
664 q = s.Prog(x86.ACMOVQPC)
665 } else if v.Op == ssa.OpAMD64CMOVLEQF {
666 q = s.Prog(x86.ACMOVLPC)
667 } else {
668 q = s.Prog(x86.ACMOVWPC)
669 }
670 q.From.Type = obj.TYPE_REG
671 q.From.Reg = t
672 q.To.Type = obj.TYPE_REG
673 q.To.Reg = v.Reg()
674
675 case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
676 r := v.Reg()
677 p := s.Prog(v.Op.Asm())
678 p.From.Type = obj.TYPE_CONST
679 p.From.Offset = v.AuxInt
680 p.To.Type = obj.TYPE_REG
681 p.To.Reg = r
682 p.AddRestSourceReg(v.Args[0].Reg())
683
684 case ssa.OpAMD64ANDQconst:
685 asm := v.Op.Asm()
686
687
688 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
689 asm = x86.AANDL
690 }
691 p := s.Prog(asm)
692 p.From.Type = obj.TYPE_CONST
693 p.From.Offset = v.AuxInt
694 p.To.Type = obj.TYPE_REG
695 p.To.Reg = v.Reg()
696
697 case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
698 ssa.OpAMD64ANDLconst,
699 ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
700 ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
701 ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
702 ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
703 ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
704 ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
705 p := s.Prog(v.Op.Asm())
706 p.From.Type = obj.TYPE_CONST
707 p.From.Offset = v.AuxInt
708 p.To.Type = obj.TYPE_REG
709 p.To.Reg = v.Reg()
710 case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
711 r := v.Reg()
712 p := s.Prog(v.Op.Asm())
713 p.From.Type = obj.TYPE_REG
714 p.From.Reg = r
715 p.To.Type = obj.TYPE_REG
716 p.To.Reg = r
717 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
718 ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
719 ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
720 p := s.Prog(v.Op.Asm())
721 memIdx(&p.From, v)
722 o := v.Reg()
723 p.To.Type = obj.TYPE_REG
724 p.To.Reg = o
725 if v.AuxInt != 0 && v.Aux == nil {
726
727 switch v.Op {
728 case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
729 p = s.Prog(x86.ALEAQ)
730 case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
731 p = s.Prog(x86.ALEAL)
732 case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
733 p = s.Prog(x86.ALEAW)
734 }
735 p.From.Type = obj.TYPE_MEM
736 p.From.Reg = o
737 p.To.Type = obj.TYPE_REG
738 p.To.Reg = o
739 }
740 ssagen.AddAux(&p.From, v)
741 case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
742 p := s.Prog(v.Op.Asm())
743 p.From.Type = obj.TYPE_MEM
744 p.From.Reg = v.Args[0].Reg()
745 ssagen.AddAux(&p.From, v)
746 p.To.Type = obj.TYPE_REG
747 p.To.Reg = v.Reg()
748 case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
749 ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
750 ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
751 opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
752 case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
753
754
755 opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
756 case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
757 p := s.Prog(v.Op.Asm())
758 p.From.Type = obj.TYPE_REG
759 p.From.Reg = v.Args[0].Reg()
760 p.To.Type = obj.TYPE_CONST
761 p.To.Offset = v.AuxInt
762 case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
763 ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
764 ssa.OpAMD64BTSQconst,
765 ssa.OpAMD64BTCQconst,
766 ssa.OpAMD64BTRQconst:
767 op := v.Op
768 if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
769
770 op = ssa.OpAMD64BTLconst
771 }
772 p := s.Prog(op.Asm())
773 p.From.Type = obj.TYPE_CONST
774 p.From.Offset = v.AuxInt
775 p.To.Type = obj.TYPE_REG
776 p.To.Reg = v.Args[0].Reg()
777 case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
778 p := s.Prog(v.Op.Asm())
779 p.From.Type = obj.TYPE_MEM
780 p.From.Reg = v.Args[0].Reg()
781 ssagen.AddAux(&p.From, v)
782 p.To.Type = obj.TYPE_REG
783 p.To.Reg = v.Args[1].Reg()
784 case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
785 sc := v.AuxValAndOff()
786 p := s.Prog(v.Op.Asm())
787 p.From.Type = obj.TYPE_MEM
788 p.From.Reg = v.Args[0].Reg()
789 ssagen.AddAux2(&p.From, v, sc.Off64())
790 p.To.Type = obj.TYPE_CONST
791 p.To.Offset = sc.Val64()
792 case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1:
793 p := s.Prog(v.Op.Asm())
794 memIdx(&p.From, v)
795 ssagen.AddAux(&p.From, v)
796 p.To.Type = obj.TYPE_REG
797 p.To.Reg = v.Args[2].Reg()
798 case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1:
799 sc := v.AuxValAndOff()
800 p := s.Prog(v.Op.Asm())
801 memIdx(&p.From, v)
802 ssagen.AddAux2(&p.From, v, sc.Off64())
803 p.To.Type = obj.TYPE_CONST
804 p.To.Offset = sc.Val64()
805 case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
806 x := v.Reg()
807
808
809
810 if v.AuxInt == 0 && v.Aux == nil {
811 opregreg(s, x86.AXORL, x, x)
812 break
813 }
814
815 asm := v.Op.Asm()
816
817
818 if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
819
820 asm = x86.AMOVL
821 }
822 p := s.Prog(asm)
823 p.From.Type = obj.TYPE_CONST
824 p.From.Offset = v.AuxInt
825 p.To.Type = obj.TYPE_REG
826 p.To.Reg = x
827
828 case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
829 x := v.Reg()
830 if !isFPReg(x) && v.AuxInt == 0 && v.Aux == nil {
831 opregreg(s, x86.AXORL, x, x)
832 break
833 }
834 p := s.Prog(storeByRegWidth(x, v.Type.Size()))
835 p.From.Type = obj.TYPE_FCONST
836 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
837 p.To.Type = obj.TYPE_REG
838 p.To.Reg = x
839 case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
840 ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
841 ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
842 p := s.Prog(v.Op.Asm())
843 p.From.Type = obj.TYPE_MEM
844 p.From.Reg = v.Args[0].Reg()
845 ssagen.AddAux(&p.From, v)
846 p.To.Type = obj.TYPE_REG
847 p.To.Reg = v.Reg()
848 case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
849 ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2,
850 ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8:
851 p := s.Prog(v.Op.Asm())
852 memIdx(&p.From, v)
853 ssagen.AddAux(&p.From, v)
854 p.To.Type = obj.TYPE_REG
855 p.To.Reg = v.Reg()
856 case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
857 ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
858 ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify,
859 ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore:
860 p := s.Prog(v.Op.Asm())
861 p.From.Type = obj.TYPE_REG
862 p.From.Reg = v.Args[1].Reg()
863 p.To.Type = obj.TYPE_MEM
864 p.To.Reg = v.Args[0].Reg()
865 ssagen.AddAux(&p.To, v)
866 case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
867 ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2,
868 ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8,
869 ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
870 ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
871 ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
872 ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8,
873 ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8:
874 p := s.Prog(v.Op.Asm())
875 p.From.Type = obj.TYPE_REG
876 p.From.Reg = v.Args[2].Reg()
877 memIdx(&p.To, v)
878 ssagen.AddAux(&p.To, v)
879 case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
880 sc := v.AuxValAndOff()
881 off := sc.Off64()
882 val := sc.Val()
883 if val == 1 || val == -1 {
884 var asm obj.As
885 if v.Op == ssa.OpAMD64ADDQconstmodify {
886 if val == 1 {
887 asm = x86.AINCQ
888 } else {
889 asm = x86.ADECQ
890 }
891 } else {
892 if val == 1 {
893 asm = x86.AINCL
894 } else {
895 asm = x86.ADECL
896 }
897 }
898 p := s.Prog(asm)
899 p.To.Type = obj.TYPE_MEM
900 p.To.Reg = v.Args[0].Reg()
901 ssagen.AddAux2(&p.To, v, off)
902 break
903 }
904 fallthrough
905 case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
906 ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify,
907 ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify:
908 sc := v.AuxValAndOff()
909 off := sc.Off64()
910 val := sc.Val64()
911 p := s.Prog(v.Op.Asm())
912 p.From.Type = obj.TYPE_CONST
913 p.From.Offset = val
914 p.To.Type = obj.TYPE_MEM
915 p.To.Reg = v.Args[0].Reg()
916 ssagen.AddAux2(&p.To, v, off)
917
918 case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
919 p := s.Prog(v.Op.Asm())
920 p.From.Type = obj.TYPE_CONST
921 sc := v.AuxValAndOff()
922 p.From.Offset = sc.Val64()
923 p.To.Type = obj.TYPE_MEM
924 p.To.Reg = v.Args[0].Reg()
925 ssagen.AddAux2(&p.To, v, sc.Off64())
926 case ssa.OpAMD64MOVOstoreconst:
927 sc := v.AuxValAndOff()
928 if sc.Val() != 0 {
929 v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString())
930 }
931
932 if s.ABI != obj.ABIInternal {
933
934 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
935 }
936 p := s.Prog(v.Op.Asm())
937 p.From.Type = obj.TYPE_REG
938 p.From.Reg = x86.REG_X15
939 p.To.Type = obj.TYPE_MEM
940 p.To.Reg = v.Args[0].Reg()
941 ssagen.AddAux2(&p.To, v, sc.Off64())
942
943 case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
944 ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
945 ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
946 ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8,
947 ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8:
948 p := s.Prog(v.Op.Asm())
949 p.From.Type = obj.TYPE_CONST
950 sc := v.AuxValAndOff()
951 p.From.Offset = sc.Val64()
952 switch {
953 case p.As == x86.AADDQ && p.From.Offset == 1:
954 p.As = x86.AINCQ
955 p.From.Type = obj.TYPE_NONE
956 case p.As == x86.AADDQ && p.From.Offset == -1:
957 p.As = x86.ADECQ
958 p.From.Type = obj.TYPE_NONE
959 case p.As == x86.AADDL && p.From.Offset == 1:
960 p.As = x86.AINCL
961 p.From.Type = obj.TYPE_NONE
962 case p.As == x86.AADDL && p.From.Offset == -1:
963 p.As = x86.ADECL
964 p.From.Type = obj.TYPE_NONE
965 }
966 memIdx(&p.To, v)
967 ssagen.AddAux2(&p.To, v, sc.Off64())
968 case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
969 ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
970 ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS, ssa.OpAMD64VPBROADCASTB, ssa.OpAMD64PMOVMSKB:
971 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
972 case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
973 r := v.Reg()
974
975 opregreg(s, x86.AXORPS, r, r)
976 opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
977 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
978 var p *obj.Prog
979 switch v.Op {
980 case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
981 p = s.Prog(x86.AMOVQ)
982 case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
983 p = s.Prog(x86.AMOVL)
984 }
985 p.From.Type = obj.TYPE_REG
986 p.From.Reg = v.Args[0].Reg()
987 p.To.Type = obj.TYPE_REG
988 p.To.Reg = v.Reg()
989 case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
990 ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
991 ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
992 ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
993 ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
994 p := s.Prog(v.Op.Asm())
995 p.From.Type = obj.TYPE_MEM
996 p.From.Reg = v.Args[1].Reg()
997 ssagen.AddAux(&p.From, v)
998 p.To.Type = obj.TYPE_REG
999 p.To.Reg = v.Reg()
1000 case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
1001 ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
1002 ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
1003 ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
1004 ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8,
1005 ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8,
1006 ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8,
1007 ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8,
1008 ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8:
1009 p := s.Prog(v.Op.Asm())
1010
1011 r, i := v.Args[1].Reg(), v.Args[2].Reg()
1012 p.From.Type = obj.TYPE_MEM
1013 p.From.Scale = v.Op.Scale()
1014 if p.From.Scale == 1 && i == x86.REG_SP {
1015 r, i = i, r
1016 }
1017 p.From.Reg = r
1018 p.From.Index = i
1019
1020 ssagen.AddAux(&p.From, v)
1021 p.To.Type = obj.TYPE_REG
1022 p.To.Reg = v.Reg()
1023
1024 case ssa.OpAMD64LoweredZero:
1025 if s.ABI != obj.ABIInternal {
1026
1027 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1028 }
1029 ptrReg := v.Args[0].Reg()
1030 n := v.AuxInt
1031 if n < 16 {
1032 v.Fatalf("Zero too small %d", n)
1033 }
1034 zero16 := func(off int64) {
1035 zero16(s, ptrReg, off)
1036 }
1037
1038
1039 var off int64
1040 for n >= 16 {
1041 zero16(off)
1042 off += 16
1043 n -= 16
1044 }
1045 if n != 0 {
1046
1047
1048 zero16(off + n - 16)
1049 }
1050
1051 case ssa.OpAMD64LoweredZeroLoop:
1052 if s.ABI != obj.ABIInternal {
1053
1054 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1055 }
1056 ptrReg := v.Args[0].Reg()
1057 countReg := v.RegTmp()
1058 n := v.AuxInt
1059 loopSize := int64(64)
1060 if n < 3*loopSize {
1061
1062
1063
1064
1065
1066
1067
1068
1069 v.Fatalf("ZeroLoop size too small %d", n)
1070 }
1071 zero16 := func(off int64) {
1072 zero16(s, ptrReg, off)
1073 }
1074
1075
1076
1077 p := s.Prog(x86.AMOVL)
1078 p.From.Type = obj.TYPE_CONST
1079 p.From.Offset = n / loopSize
1080 p.To.Type = obj.TYPE_REG
1081 p.To.Reg = countReg
1082 cntInit := p
1083
1084
1085 for i := range loopSize / 16 {
1086 zero16(i * 16)
1087 }
1088
1089 p = s.Prog(x86.AADDQ)
1090 p.From.Type = obj.TYPE_CONST
1091 p.From.Offset = loopSize
1092 p.To.Type = obj.TYPE_REG
1093 p.To.Reg = ptrReg
1094
1095 p = s.Prog(x86.ADECL)
1096 p.To.Type = obj.TYPE_REG
1097 p.To.Reg = countReg
1098
1099
1100 p = s.Prog(x86.AJNE)
1101 p.To.Type = obj.TYPE_BRANCH
1102 p.To.SetTarget(cntInit.Link)
1103
1104
1105 n %= loopSize
1106
1107
1108 var off int64
1109 for n >= 16 {
1110 zero16(off)
1111 off += 16
1112 n -= 16
1113 }
1114 if n != 0 {
1115
1116
1117 zero16(off + n - 16)
1118 }
1119
1120 case ssa.OpAMD64LoweredMove:
1121 dstReg := v.Args[0].Reg()
1122 srcReg := v.Args[1].Reg()
1123 if dstReg == srcReg {
1124 break
1125 }
1126 tmpReg := int16(x86.REG_X14)
1127 n := v.AuxInt
1128 if n < 16 {
1129 v.Fatalf("Move too small %d", n)
1130 }
1131
1132 move16 := func(off int64) {
1133 move16(s, srcReg, dstReg, tmpReg, off)
1134 }
1135
1136
1137 var off int64
1138 for n >= 16 {
1139 move16(off)
1140 off += 16
1141 n -= 16
1142 }
1143 if n != 0 {
1144
1145
1146 move16(off + n - 16)
1147 }
1148
1149 case ssa.OpAMD64LoweredMoveLoop:
1150 dstReg := v.Args[0].Reg()
1151 srcReg := v.Args[1].Reg()
1152 if dstReg == srcReg {
1153 break
1154 }
1155 countReg := v.RegTmp()
1156 tmpReg := int16(x86.REG_X14)
1157 n := v.AuxInt
1158 loopSize := int64(64)
1159 if n < 3*loopSize {
1160
1161
1162
1163
1164
1165
1166
1167
1168 v.Fatalf("ZeroLoop size too small %d", n)
1169 }
1170
1171 move16 := func(off int64) {
1172 move16(s, srcReg, dstReg, tmpReg, off)
1173 }
1174
1175
1176
1177 p := s.Prog(x86.AMOVL)
1178 p.From.Type = obj.TYPE_CONST
1179 p.From.Offset = n / loopSize
1180 p.To.Type = obj.TYPE_REG
1181 p.To.Reg = countReg
1182 cntInit := p
1183
1184
1185 for i := range loopSize / 16 {
1186 move16(i * 16)
1187 }
1188
1189 p = s.Prog(x86.AADDQ)
1190 p.From.Type = obj.TYPE_CONST
1191 p.From.Offset = loopSize
1192 p.To.Type = obj.TYPE_REG
1193 p.To.Reg = srcReg
1194
1195 p = s.Prog(x86.AADDQ)
1196 p.From.Type = obj.TYPE_CONST
1197 p.From.Offset = loopSize
1198 p.To.Type = obj.TYPE_REG
1199 p.To.Reg = dstReg
1200
1201 p = s.Prog(x86.ADECL)
1202 p.To.Type = obj.TYPE_REG
1203 p.To.Reg = countReg
1204
1205
1206 p = s.Prog(x86.AJNE)
1207 p.To.Type = obj.TYPE_BRANCH
1208 p.To.SetTarget(cntInit.Link)
1209
1210
1211 n %= loopSize
1212
1213
1214 var off int64
1215 for n >= 16 {
1216 move16(off)
1217 off += 16
1218 n -= 16
1219 }
1220 if n != 0 {
1221
1222 move16(off + n - 16)
1223 }
1224
1225 case ssa.OpCopy:
1226 if v.Type.IsMemory() {
1227 return
1228 }
1229 x := v.Args[0].Reg()
1230 y := v.Reg()
1231 if v.Type.IsSIMD() {
1232 x = simdOrMaskReg(v.Args[0])
1233 y = simdOrMaskReg(v)
1234 }
1235 if x != y {
1236 opregreg(s, moveByRegsWidth(y, x, v.Type.Size()), y, x)
1237 }
1238 case ssa.OpLoadReg:
1239 if v.Type.IsFlags() {
1240 v.Fatalf("load flags not implemented: %v", v.LongString())
1241 return
1242 }
1243 r := v.Reg()
1244 p := s.Prog(loadByRegWidth(r, v.Type.Size()))
1245 ssagen.AddrAuto(&p.From, v.Args[0])
1246 p.To.Type = obj.TYPE_REG
1247 if v.Type.IsSIMD() {
1248 r = simdOrMaskReg(v)
1249 }
1250 p.To.Reg = r
1251
1252 case ssa.OpStoreReg:
1253 if v.Type.IsFlags() {
1254 v.Fatalf("store flags not implemented: %v", v.LongString())
1255 return
1256 }
1257 r := v.Args[0].Reg()
1258 if v.Type.IsSIMD() {
1259 r = simdOrMaskReg(v.Args[0])
1260 }
1261 p := s.Prog(storeByRegWidth(r, v.Type.Size()))
1262 p.From.Type = obj.TYPE_REG
1263 p.From.Reg = r
1264 ssagen.AddrAuto(&p.To, v)
1265 case ssa.OpAMD64LoweredHasCPUFeature:
1266 p := s.Prog(x86.AMOVBLZX)
1267 p.From.Type = obj.TYPE_MEM
1268 ssagen.AddAux(&p.From, v)
1269 p.To.Type = obj.TYPE_REG
1270 p.To.Reg = v.Reg()
1271 case ssa.OpArgIntReg, ssa.OpArgFloatReg:
1272
1273
1274 for _, ap := range v.Block.Func.RegArgs {
1275
1276 addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize)
1277 reg := ap.Reg
1278 t := ap.Type
1279 sz := t.Size()
1280 if t.IsSIMD() {
1281 reg = simdRegBySize(reg, sz)
1282 }
1283 s.FuncInfo().AddSpill(
1284 obj.RegSpill{Reg: reg, Addr: addr, Unspill: loadByRegWidth(reg, sz), Spill: storeByRegWidth(reg, sz)})
1285 }
1286 v.Block.Func.RegArgs = nil
1287 ssagen.CheckArgReg(v)
1288 case ssa.OpAMD64LoweredGetClosurePtr:
1289
1290 ssagen.CheckLoweredGetClosurePtr(v)
1291 case ssa.OpAMD64LoweredGetG:
1292 if s.ABI == obj.ABIInternal {
1293 v.Fatalf("LoweredGetG should not appear in ABIInternal")
1294 }
1295 r := v.Reg()
1296 getgFromTLS(s, r)
1297 case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail:
1298 if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal {
1299
1300 zeroX15(s)
1301
1302 getgFromTLS(s, x86.REG_R14)
1303 }
1304 if v.Op == ssa.OpAMD64CALLtail {
1305 s.TailCall(v)
1306 break
1307 }
1308 s.Call(v)
1309 if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 {
1310
1311 zeroX15(s)
1312
1313 getgFromTLS(s, x86.REG_R14)
1314 }
1315 case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
1316 s.Call(v)
1317
1318 case ssa.OpAMD64LoweredGetCallerPC:
1319 p := s.Prog(x86.AMOVQ)
1320 p.From.Type = obj.TYPE_MEM
1321 p.From.Offset = -8
1322 p.From.Name = obj.NAME_PARAM
1323 p.To.Type = obj.TYPE_REG
1324 p.To.Reg = v.Reg()
1325
1326 case ssa.OpAMD64LoweredGetCallerSP:
1327
1328 mov := x86.AMOVQ
1329 if types.PtrSize == 4 {
1330 mov = x86.AMOVL
1331 }
1332 p := s.Prog(mov)
1333 p.From.Type = obj.TYPE_ADDR
1334 p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
1335 p.From.Name = obj.NAME_PARAM
1336 p.To.Type = obj.TYPE_REG
1337 p.To.Reg = v.Reg()
1338
1339 case ssa.OpAMD64LoweredWB:
1340 p := s.Prog(obj.ACALL)
1341 p.To.Type = obj.TYPE_MEM
1342 p.To.Name = obj.NAME_EXTERN
1343
1344 p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
1345
1346 case ssa.OpAMD64LoweredPanicBoundsRR, ssa.OpAMD64LoweredPanicBoundsRC, ssa.OpAMD64LoweredPanicBoundsCR, ssa.OpAMD64LoweredPanicBoundsCC:
1347
1348 code, signed := ssa.BoundsKind(v.AuxInt).Code()
1349 xIsReg := false
1350 yIsReg := false
1351 xVal := 0
1352 yVal := 0
1353 switch v.Op {
1354 case ssa.OpAMD64LoweredPanicBoundsRR:
1355 xIsReg = true
1356 xVal = int(v.Args[0].Reg() - x86.REG_AX)
1357 yIsReg = true
1358 yVal = int(v.Args[1].Reg() - x86.REG_AX)
1359 case ssa.OpAMD64LoweredPanicBoundsRC:
1360 xIsReg = true
1361 xVal = int(v.Args[0].Reg() - x86.REG_AX)
1362 c := v.Aux.(ssa.PanicBoundsC).C
1363 if c >= 0 && c <= abi.BoundsMaxConst {
1364 yVal = int(c)
1365 } else {
1366
1367 yIsReg = true
1368 if yVal == xVal {
1369 yVal = 1
1370 }
1371 p := s.Prog(x86.AMOVQ)
1372 p.From.Type = obj.TYPE_CONST
1373 p.From.Offset = c
1374 p.To.Type = obj.TYPE_REG
1375 p.To.Reg = x86.REG_AX + int16(yVal)
1376 }
1377 case ssa.OpAMD64LoweredPanicBoundsCR:
1378 yIsReg = true
1379 yVal = int(v.Args[0].Reg() - x86.REG_AX)
1380 c := v.Aux.(ssa.PanicBoundsC).C
1381 if c >= 0 && c <= abi.BoundsMaxConst {
1382 xVal = int(c)
1383 } else {
1384
1385 xIsReg = true
1386 if xVal == yVal {
1387 xVal = 1
1388 }
1389 p := s.Prog(x86.AMOVQ)
1390 p.From.Type = obj.TYPE_CONST
1391 p.From.Offset = c
1392 p.To.Type = obj.TYPE_REG
1393 p.To.Reg = x86.REG_AX + int16(xVal)
1394 }
1395 case ssa.OpAMD64LoweredPanicBoundsCC:
1396 c := v.Aux.(ssa.PanicBoundsCC).Cx
1397 if c >= 0 && c <= abi.BoundsMaxConst {
1398 xVal = int(c)
1399 } else {
1400
1401 xIsReg = true
1402 p := s.Prog(x86.AMOVQ)
1403 p.From.Type = obj.TYPE_CONST
1404 p.From.Offset = c
1405 p.To.Type = obj.TYPE_REG
1406 p.To.Reg = x86.REG_AX + int16(xVal)
1407 }
1408 c = v.Aux.(ssa.PanicBoundsCC).Cy
1409 if c >= 0 && c <= abi.BoundsMaxConst {
1410 yVal = int(c)
1411 } else {
1412
1413 yIsReg = true
1414 yVal = 1
1415 p := s.Prog(x86.AMOVQ)
1416 p.From.Type = obj.TYPE_CONST
1417 p.From.Offset = c
1418 p.To.Type = obj.TYPE_REG
1419 p.To.Reg = x86.REG_AX + int16(yVal)
1420 }
1421 }
1422 c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
1423
1424 p := s.Prog(obj.APCDATA)
1425 p.From.SetConst(abi.PCDATA_PanicBounds)
1426 p.To.SetConst(int64(c))
1427 p = s.Prog(obj.ACALL)
1428 p.To.Type = obj.TYPE_MEM
1429 p.To.Name = obj.NAME_EXTERN
1430 p.To.Sym = ir.Syms.PanicBounds
1431
1432 case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
1433 ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
1434 ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
1435 p := s.Prog(v.Op.Asm())
1436 p.To.Type = obj.TYPE_REG
1437 p.To.Reg = v.Reg()
1438
1439 case ssa.OpAMD64NEGLflags:
1440 p := s.Prog(v.Op.Asm())
1441 p.To.Type = obj.TYPE_REG
1442 p.To.Reg = v.Reg0()
1443
1444 case ssa.OpAMD64ADDQconstflags, ssa.OpAMD64ADDLconstflags:
1445 p := s.Prog(v.Op.Asm())
1446 p.From.Type = obj.TYPE_CONST
1447 p.From.Offset = v.AuxInt
1448
1449
1450
1451
1452 switch {
1453 case p.As == x86.AADDQ && p.From.Offset == 1:
1454 p.As = x86.AINCQ
1455 p.From.Type = obj.TYPE_NONE
1456 case p.As == x86.AADDQ && p.From.Offset == -1:
1457 p.As = x86.ADECQ
1458 p.From.Type = obj.TYPE_NONE
1459 case p.As == x86.AADDL && p.From.Offset == 1:
1460 p.As = x86.AINCL
1461 p.From.Type = obj.TYPE_NONE
1462 case p.As == x86.AADDL && p.From.Offset == -1:
1463 p.As = x86.ADECL
1464 p.From.Type = obj.TYPE_NONE
1465 }
1466 p.To.Type = obj.TYPE_REG
1467 p.To.Reg = v.Reg0()
1468
1469 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
1470 p := s.Prog(v.Op.Asm())
1471 p.From.Type = obj.TYPE_REG
1472 p.From.Reg = v.Args[0].Reg()
1473 p.To.Type = obj.TYPE_REG
1474 switch v.Op {
1475 case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
1476 p.To.Reg = v.Reg0()
1477 case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
1478 p.To.Reg = v.Reg()
1479 }
1480 case ssa.OpAMD64LoweredRound32F, ssa.OpAMD64LoweredRound64F:
1481
1482 case ssa.OpAMD64ROUNDSD:
1483 p := s.Prog(v.Op.Asm())
1484 val := v.AuxInt
1485
1486 if val < 0 || val > 3 {
1487 v.Fatalf("Invalid rounding mode")
1488 }
1489 p.From.Offset = val
1490 p.From.Type = obj.TYPE_CONST
1491 p.AddRestSourceReg(v.Args[0].Reg())
1492 p.To.Type = obj.TYPE_REG
1493 p.To.Reg = v.Reg()
1494 case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL,
1495 ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL,
1496 ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL:
1497 if v.Args[0].Reg() != v.Reg() {
1498
1499
1500
1501 opregreg(s, x86.AXORL, v.Reg(), v.Reg())
1502 }
1503 p := s.Prog(v.Op.Asm())
1504 p.From.Type = obj.TYPE_REG
1505 p.From.Reg = v.Args[0].Reg()
1506 p.To.Type = obj.TYPE_REG
1507 p.To.Reg = v.Reg()
1508
1509 case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
1510 ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
1511 ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
1512 ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
1513 ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
1514 ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
1515 ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
1516 ssa.OpAMD64SETO:
1517 p := s.Prog(v.Op.Asm())
1518 p.To.Type = obj.TYPE_REG
1519 p.To.Reg = v.Reg()
1520
1521 case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
1522 ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
1523 ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
1524 ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
1525 ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
1526 p := s.Prog(v.Op.Asm())
1527 p.To.Type = obj.TYPE_MEM
1528 p.To.Reg = v.Args[0].Reg()
1529 ssagen.AddAux(&p.To, v)
1530
1531 case ssa.OpAMD64SETEQstoreidx1, ssa.OpAMD64SETNEstoreidx1,
1532 ssa.OpAMD64SETLstoreidx1, ssa.OpAMD64SETLEstoreidx1,
1533 ssa.OpAMD64SETGstoreidx1, ssa.OpAMD64SETGEstoreidx1,
1534 ssa.OpAMD64SETBstoreidx1, ssa.OpAMD64SETBEstoreidx1,
1535 ssa.OpAMD64SETAstoreidx1, ssa.OpAMD64SETAEstoreidx1:
1536 p := s.Prog(v.Op.Asm())
1537 memIdx(&p.To, v)
1538 ssagen.AddAux(&p.To, v)
1539
1540 case ssa.OpAMD64SETNEF:
1541 t := v.RegTmp()
1542 p := s.Prog(v.Op.Asm())
1543 p.To.Type = obj.TYPE_REG
1544 p.To.Reg = v.Reg()
1545 q := s.Prog(x86.ASETPS)
1546 q.To.Type = obj.TYPE_REG
1547 q.To.Reg = t
1548
1549 opregreg(s, x86.AORL, v.Reg(), t)
1550
1551 case ssa.OpAMD64SETEQF:
1552 t := v.RegTmp()
1553 p := s.Prog(v.Op.Asm())
1554 p.To.Type = obj.TYPE_REG
1555 p.To.Reg = v.Reg()
1556 q := s.Prog(x86.ASETPC)
1557 q.To.Type = obj.TYPE_REG
1558 q.To.Reg = t
1559
1560 opregreg(s, x86.AANDL, v.Reg(), t)
1561
1562 case ssa.OpAMD64InvertFlags:
1563 v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
1564 case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
1565 v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
1566 case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
1567 v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
1568 case ssa.OpAMD64REPSTOSQ:
1569 s.Prog(x86.AREP)
1570 s.Prog(x86.ASTOSQ)
1571 case ssa.OpAMD64REPMOVSQ:
1572 s.Prog(x86.AREP)
1573 s.Prog(x86.AMOVSQ)
1574 case ssa.OpAMD64LoweredNilCheck:
1575
1576
1577
1578
1579
1580
1581 p := s.Prog(x86.ATESTB)
1582 p.From.Type = obj.TYPE_REG
1583 p.From.Reg = x86.REG_AX
1584 p.To.Type = obj.TYPE_MEM
1585 p.To.Reg = v.Args[0].Reg()
1586 if logopt.Enabled() {
1587 logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
1588 }
1589 if base.Debug.Nil != 0 && v.Pos.Line() > 1 {
1590 base.WarnfAt(v.Pos, "generated nil check")
1591 }
1592 case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
1593 p := s.Prog(v.Op.Asm())
1594 p.From.Type = obj.TYPE_MEM
1595 p.From.Reg = v.Args[0].Reg()
1596 ssagen.AddAux(&p.From, v)
1597 p.To.Type = obj.TYPE_REG
1598 p.To.Reg = v.Reg0()
1599 case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
1600 p := s.Prog(v.Op.Asm())
1601 p.From.Type = obj.TYPE_REG
1602 p.From.Reg = v.Reg0()
1603 p.To.Type = obj.TYPE_MEM
1604 p.To.Reg = v.Args[1].Reg()
1605 ssagen.AddAux(&p.To, v)
1606 case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
1607 s.Prog(x86.ALOCK)
1608 p := s.Prog(v.Op.Asm())
1609 p.From.Type = obj.TYPE_REG
1610 p.From.Reg = v.Reg0()
1611 p.To.Type = obj.TYPE_MEM
1612 p.To.Reg = v.Args[1].Reg()
1613 ssagen.AddAux(&p.To, v)
1614 case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
1615 if v.Args[1].Reg() != x86.REG_AX {
1616 v.Fatalf("input[1] not in AX %s", v.LongString())
1617 }
1618 s.Prog(x86.ALOCK)
1619 p := s.Prog(v.Op.Asm())
1620 p.From.Type = obj.TYPE_REG
1621 p.From.Reg = v.Args[2].Reg()
1622 p.To.Type = obj.TYPE_MEM
1623 p.To.Reg = v.Args[0].Reg()
1624 ssagen.AddAux(&p.To, v)
1625 p = s.Prog(x86.ASETEQ)
1626 p.To.Type = obj.TYPE_REG
1627 p.To.Reg = v.Reg0()
1628 case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ANDQlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock, ssa.OpAMD64ORQlock:
1629
1630 s.Prog(x86.ALOCK)
1631 p := s.Prog(v.Op.Asm())
1632 p.From.Type = obj.TYPE_REG
1633 p.From.Reg = v.Args[1].Reg()
1634 p.To.Type = obj.TYPE_MEM
1635 p.To.Reg = v.Args[0].Reg()
1636 ssagen.AddAux(&p.To, v)
1637 case ssa.OpAMD64LoweredAtomicAnd64, ssa.OpAMD64LoweredAtomicOr64, ssa.OpAMD64LoweredAtomicAnd32, ssa.OpAMD64LoweredAtomicOr32:
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647 mov := x86.AMOVQ
1648 op := x86.AANDQ
1649 cmpxchg := x86.ACMPXCHGQ
1650 switch v.Op {
1651 case ssa.OpAMD64LoweredAtomicOr64:
1652 op = x86.AORQ
1653 case ssa.OpAMD64LoweredAtomicAnd32:
1654 mov = x86.AMOVL
1655 op = x86.AANDL
1656 cmpxchg = x86.ACMPXCHGL
1657 case ssa.OpAMD64LoweredAtomicOr32:
1658 mov = x86.AMOVL
1659 op = x86.AORL
1660 cmpxchg = x86.ACMPXCHGL
1661 }
1662 addr := v.Args[0].Reg()
1663 mask := v.Args[1].Reg()
1664 tmp := v.RegTmp()
1665 p1 := s.Prog(mov)
1666 p1.From.Type = obj.TYPE_REG
1667 p1.From.Reg = mask
1668 p1.To.Type = obj.TYPE_REG
1669 p1.To.Reg = tmp
1670 p2 := s.Prog(mov)
1671 p2.From.Type = obj.TYPE_MEM
1672 p2.From.Reg = addr
1673 ssagen.AddAux(&p2.From, v)
1674 p2.To.Type = obj.TYPE_REG
1675 p2.To.Reg = x86.REG_AX
1676 p3 := s.Prog(op)
1677 p3.From.Type = obj.TYPE_REG
1678 p3.From.Reg = x86.REG_AX
1679 p3.To.Type = obj.TYPE_REG
1680 p3.To.Reg = tmp
1681 s.Prog(x86.ALOCK)
1682 p5 := s.Prog(cmpxchg)
1683 p5.From.Type = obj.TYPE_REG
1684 p5.From.Reg = tmp
1685 p5.To.Type = obj.TYPE_MEM
1686 p5.To.Reg = addr
1687 ssagen.AddAux(&p5.To, v)
1688 p6 := s.Prog(x86.AJNE)
1689 p6.To.Type = obj.TYPE_BRANCH
1690 p6.To.SetTarget(p1)
1691 case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA:
1692 p := s.Prog(v.Op.Asm())
1693 p.From.Type = obj.TYPE_MEM
1694 p.From.Reg = v.Args[0].Reg()
1695 case ssa.OpClobber:
1696 p := s.Prog(x86.AMOVL)
1697 p.From.Type = obj.TYPE_CONST
1698 p.From.Offset = 0xdeaddead
1699 p.To.Type = obj.TYPE_MEM
1700 p.To.Reg = x86.REG_SP
1701 ssagen.AddAux(&p.To, v)
1702 p = s.Prog(x86.AMOVL)
1703 p.From.Type = obj.TYPE_CONST
1704 p.From.Offset = 0xdeaddead
1705 p.To.Type = obj.TYPE_MEM
1706 p.To.Reg = x86.REG_SP
1707 ssagen.AddAux(&p.To, v)
1708 p.To.Offset += 4
1709 case ssa.OpClobberReg:
1710 x := uint64(0xdeaddeaddeaddead)
1711 p := s.Prog(x86.AMOVQ)
1712 p.From.Type = obj.TYPE_CONST
1713 p.From.Offset = int64(x)
1714 p.To.Type = obj.TYPE_REG
1715 p.To.Reg = v.Reg()
1716
1717
1718 case ssa.OpAMD64VZEROUPPER, ssa.OpAMD64VZEROALL:
1719 s.Prog(v.Op.Asm())
1720
1721 case ssa.OpAMD64Zero128, ssa.OpAMD64Zero256, ssa.OpAMD64Zero512:
1722
1723 case ssa.OpAMD64VMOVSSf2v, ssa.OpAMD64VMOVSDf2v:
1724
1725 p := s.Prog(v.Op.Asm())
1726 p.From.Type = obj.TYPE_REG
1727 p.From.Reg = v.Args[0].Reg()
1728 p.AddRestSourceReg(x86.REG_X15)
1729 p.To.Type = obj.TYPE_REG
1730 p.To.Reg = simdReg(v)
1731
1732 case ssa.OpAMD64VMOVQload, ssa.OpAMD64VMOVDload,
1733 ssa.OpAMD64VMOVSSload, ssa.OpAMD64VMOVSDload:
1734 p := s.Prog(v.Op.Asm())
1735 p.From.Type = obj.TYPE_MEM
1736 p.From.Reg = v.Args[0].Reg()
1737 ssagen.AddAux(&p.From, v)
1738 p.To.Type = obj.TYPE_REG
1739 p.To.Reg = simdReg(v)
1740
1741 case ssa.OpAMD64VMOVSSconst, ssa.OpAMD64VMOVSDconst:
1742
1743 x := simdReg(v)
1744 p := s.Prog(v.Op.Asm())
1745 p.From.Type = obj.TYPE_FCONST
1746 p.From.Val = math.Float64frombits(uint64(v.AuxInt))
1747 p.To.Type = obj.TYPE_REG
1748 p.To.Reg = x
1749
1750 case ssa.OpAMD64VMOVD, ssa.OpAMD64VMOVQ:
1751
1752 p := s.Prog(v.Op.Asm())
1753 p.From.Type = obj.TYPE_REG
1754 p.From.Reg = v.Args[0].Reg()
1755 p.To.Type = obj.TYPE_REG
1756 p.To.Reg = simdReg(v)
1757
1758 case ssa.OpAMD64VMOVDQUload128, ssa.OpAMD64VMOVDQUload256, ssa.OpAMD64VMOVDQUload512,
1759 ssa.OpAMD64KMOVBload, ssa.OpAMD64KMOVWload, ssa.OpAMD64KMOVDload, ssa.OpAMD64KMOVQload:
1760 p := s.Prog(v.Op.Asm())
1761 p.From.Type = obj.TYPE_MEM
1762 p.From.Reg = v.Args[0].Reg()
1763 ssagen.AddAux(&p.From, v)
1764 p.To.Type = obj.TYPE_REG
1765 p.To.Reg = simdOrMaskReg(v)
1766 case ssa.OpAMD64VMOVDQUstore128, ssa.OpAMD64VMOVDQUstore256, ssa.OpAMD64VMOVDQUstore512,
1767 ssa.OpAMD64KMOVBstore, ssa.OpAMD64KMOVWstore, ssa.OpAMD64KMOVDstore, ssa.OpAMD64KMOVQstore:
1768 p := s.Prog(v.Op.Asm())
1769 p.From.Type = obj.TYPE_REG
1770 p.From.Reg = simdOrMaskReg(v.Args[1])
1771 p.To.Type = obj.TYPE_MEM
1772 p.To.Reg = v.Args[0].Reg()
1773 ssagen.AddAux(&p.To, v)
1774
1775 case ssa.OpAMD64VPMASK32load128, ssa.OpAMD64VPMASK64load128, ssa.OpAMD64VPMASK32load256, ssa.OpAMD64VPMASK64load256:
1776 p := s.Prog(v.Op.Asm())
1777 p.From.Type = obj.TYPE_MEM
1778 p.From.Reg = v.Args[0].Reg()
1779 ssagen.AddAux(&p.From, v)
1780 p.To.Type = obj.TYPE_REG
1781 p.To.Reg = simdReg(v)
1782 p.AddRestSourceReg(simdReg(v.Args[1]))
1783
1784 case ssa.OpAMD64VPMASK32store128, ssa.OpAMD64VPMASK64store128, ssa.OpAMD64VPMASK32store256, ssa.OpAMD64VPMASK64store256:
1785 p := s.Prog(v.Op.Asm())
1786 p.From.Type = obj.TYPE_REG
1787 p.From.Reg = simdReg(v.Args[2])
1788 p.To.Type = obj.TYPE_MEM
1789 p.To.Reg = v.Args[0].Reg()
1790 ssagen.AddAux(&p.To, v)
1791 p.AddRestSourceReg(simdReg(v.Args[1]))
1792
1793 case ssa.OpAMD64VPMASK64load512, ssa.OpAMD64VPMASK32load512, ssa.OpAMD64VPMASK16load512, ssa.OpAMD64VPMASK8load512:
1794 p := s.Prog(v.Op.Asm())
1795 p.From.Type = obj.TYPE_MEM
1796 p.From.Reg = v.Args[0].Reg()
1797 ssagen.AddAux(&p.From, v)
1798 p.To.Type = obj.TYPE_REG
1799 p.To.Reg = simdReg(v)
1800 p.AddRestSourceReg(v.Args[1].Reg())
1801 x86.ParseSuffix(p, "Z")
1802
1803 case ssa.OpAMD64VPMASK64store512, ssa.OpAMD64VPMASK32store512, ssa.OpAMD64VPMASK16store512, ssa.OpAMD64VPMASK8store512:
1804 p := s.Prog(v.Op.Asm())
1805 p.From.Type = obj.TYPE_REG
1806 p.From.Reg = simdReg(v.Args[2])
1807 p.To.Type = obj.TYPE_MEM
1808 p.To.Reg = v.Args[0].Reg()
1809 ssagen.AddAux(&p.To, v)
1810 p.AddRestSourceReg(v.Args[1].Reg())
1811
1812 case ssa.OpAMD64VPMOVMToVec8x16,
1813 ssa.OpAMD64VPMOVMToVec8x32,
1814 ssa.OpAMD64VPMOVMToVec8x64,
1815 ssa.OpAMD64VPMOVMToVec16x8,
1816 ssa.OpAMD64VPMOVMToVec16x16,
1817 ssa.OpAMD64VPMOVMToVec16x32,
1818 ssa.OpAMD64VPMOVMToVec32x4,
1819 ssa.OpAMD64VPMOVMToVec32x8,
1820 ssa.OpAMD64VPMOVMToVec32x16,
1821 ssa.OpAMD64VPMOVMToVec64x2,
1822 ssa.OpAMD64VPMOVMToVec64x4,
1823 ssa.OpAMD64VPMOVMToVec64x8:
1824 p := s.Prog(v.Op.Asm())
1825 p.From.Type = obj.TYPE_REG
1826 p.From.Reg = v.Args[0].Reg()
1827 p.To.Type = obj.TYPE_REG
1828 p.To.Reg = simdReg(v)
1829
1830 case ssa.OpAMD64VPMOVVec8x16ToM,
1831 ssa.OpAMD64VPMOVVec8x32ToM,
1832 ssa.OpAMD64VPMOVVec8x64ToM,
1833 ssa.OpAMD64VPMOVVec16x8ToM,
1834 ssa.OpAMD64VPMOVVec16x16ToM,
1835 ssa.OpAMD64VPMOVVec16x32ToM,
1836 ssa.OpAMD64VPMOVVec32x4ToM,
1837 ssa.OpAMD64VPMOVVec32x8ToM,
1838 ssa.OpAMD64VPMOVVec32x16ToM,
1839 ssa.OpAMD64VPMOVVec64x2ToM,
1840 ssa.OpAMD64VPMOVVec64x4ToM,
1841 ssa.OpAMD64VPMOVVec64x8ToM:
1842 p := s.Prog(v.Op.Asm())
1843 p.From.Type = obj.TYPE_REG
1844 p.From.Reg = simdReg(v.Args[0])
1845 p.To.Type = obj.TYPE_REG
1846 p.To.Reg = v.Reg()
1847
1848 case ssa.OpAMD64KMOVQk, ssa.OpAMD64KMOVDk, ssa.OpAMD64KMOVWk, ssa.OpAMD64KMOVBk,
1849 ssa.OpAMD64KMOVQi, ssa.OpAMD64KMOVDi, ssa.OpAMD64KMOVWi, ssa.OpAMD64KMOVBi:
1850
1851 p := s.Prog(v.Op.Asm())
1852 p.From.Type = obj.TYPE_REG
1853 p.From.Reg = v.Args[0].Reg()
1854 p.To.Type = obj.TYPE_REG
1855 p.To.Reg = v.Reg()
1856 case ssa.OpAMD64VPTEST:
1857
1858
1859 p := s.Prog(v.Op.Asm())
1860 p.From.Type = obj.TYPE_REG
1861 p.From.Reg = simdReg(v.Args[0])
1862 p.To.Type = obj.TYPE_REG
1863 p.To.Reg = simdReg(v.Args[1])
1864
1865 default:
1866 if !ssaGenSIMDValue(s, v) {
1867 v.Fatalf("genValue not implemented: %s", v.LongString())
1868 }
1869 }
1870 }
1871
1872
1873 func zeroX15(s *ssagen.State) {
1874 if !buildcfg.Experiment.SIMD {
1875 opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1876 return
1877 }
1878 vxorps := func(s *ssagen.State) {
1879 p := s.Prog(x86.AVXORPS)
1880 p.From.Type = obj.TYPE_REG
1881 p.From.Reg = x86.REG_X15
1882 p.AddRestSourceReg(x86.REG_X15)
1883 p.To.Type = obj.TYPE_REG
1884 p.To.Reg = x86.REG_X15
1885 }
1886 if buildcfg.GOAMD64 >= 3 {
1887 vxorps(s)
1888 return
1889 }
1890
1891 p := s.Prog(x86.ACMPB)
1892 p.From.Type = obj.TYPE_MEM
1893 p.From.Name = obj.NAME_EXTERN
1894 p.From.Sym = ir.Syms.X86HasAVX
1895 p.To.Type = obj.TYPE_CONST
1896 p.To.Offset = 1
1897 jmp := s.Prog(x86.AJNE)
1898 jmp.To.Type = obj.TYPE_BRANCH
1899 vxorps(s)
1900 sse := opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
1901 jmp.To.SetTarget(sse)
1902 }
1903
1904
1905 func simdV11(s *ssagen.State, v *ssa.Value) *obj.Prog {
1906 p := s.Prog(v.Op.Asm())
1907 p.From.Type = obj.TYPE_REG
1908 p.From.Reg = simdReg(v.Args[0])
1909 p.To.Type = obj.TYPE_REG
1910 p.To.Reg = simdReg(v)
1911 return p
1912 }
1913
1914
1915 func simdV21(s *ssagen.State, v *ssa.Value) *obj.Prog {
1916 p := s.Prog(v.Op.Asm())
1917 p.From.Type = obj.TYPE_REG
1918
1919
1920 p.From.Reg = simdReg(v.Args[1])
1921 p.AddRestSourceReg(simdReg(v.Args[0]))
1922 p.To.Type = obj.TYPE_REG
1923 p.To.Reg = simdReg(v)
1924 return p
1925 }
1926
1927
1928
1929
1930 func simdVfpv(s *ssagen.State, v *ssa.Value) *obj.Prog {
1931 p := s.Prog(v.Op.Asm())
1932 p.From.Type = obj.TYPE_REG
1933
1934
1935 p.From.Reg = v.Args[1].Reg()
1936 p.AddRestSourceReg(simdReg(v.Args[0]))
1937 p.To.Type = obj.TYPE_REG
1938 p.To.Reg = simdReg(v)
1939 return p
1940 }
1941
1942
1943 func simdV2k(s *ssagen.State, v *ssa.Value) *obj.Prog {
1944 p := s.Prog(v.Op.Asm())
1945 p.From.Type = obj.TYPE_REG
1946 p.From.Reg = simdReg(v.Args[1])
1947 p.AddRestSourceReg(simdReg(v.Args[0]))
1948 p.To.Type = obj.TYPE_REG
1949 p.To.Reg = maskReg(v)
1950 return p
1951 }
1952
1953
1954 func simdV2kv(s *ssagen.State, v *ssa.Value) *obj.Prog {
1955 p := s.Prog(v.Op.Asm())
1956 p.From.Type = obj.TYPE_REG
1957 p.From.Reg = simdReg(v.Args[1])
1958 p.AddRestSourceReg(simdReg(v.Args[0]))
1959
1960
1961
1962
1963
1964 p.AddRestSourceReg(maskReg(v.Args[2]))
1965 p.To.Type = obj.TYPE_REG
1966 p.To.Reg = simdReg(v)
1967 return p
1968 }
1969
1970
1971 func simdV2kvResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
1972 p := s.Prog(v.Op.Asm())
1973 p.From.Type = obj.TYPE_REG
1974 p.From.Reg = simdReg(v.Args[1])
1975
1976
1977
1978
1979
1980 p.AddRestSourceReg(maskReg(v.Args[2]))
1981 p.To.Type = obj.TYPE_REG
1982 p.To.Reg = simdReg(v)
1983 return p
1984 }
1985
1986
1987
1988
1989 func simdVfpkv(s *ssagen.State, v *ssa.Value) *obj.Prog {
1990 p := s.Prog(v.Op.Asm())
1991 p.From.Type = obj.TYPE_REG
1992 p.From.Reg = v.Args[1].Reg()
1993 p.AddRestSourceReg(simdReg(v.Args[0]))
1994 p.AddRestSourceReg(maskReg(v.Args[2]))
1995 p.To.Type = obj.TYPE_REG
1996 p.To.Reg = simdReg(v)
1997 return p
1998 }
1999
2000
2001 func simdV2kk(s *ssagen.State, v *ssa.Value) *obj.Prog {
2002 p := s.Prog(v.Op.Asm())
2003 p.From.Type = obj.TYPE_REG
2004 p.From.Reg = simdReg(v.Args[1])
2005 p.AddRestSourceReg(simdReg(v.Args[0]))
2006 p.AddRestSourceReg(maskReg(v.Args[2]))
2007 p.To.Type = obj.TYPE_REG
2008 p.To.Reg = maskReg(v)
2009 return p
2010 }
2011
2012
2013 func simdVkv(s *ssagen.State, v *ssa.Value) *obj.Prog {
2014 p := s.Prog(v.Op.Asm())
2015 p.From.Type = obj.TYPE_REG
2016 p.From.Reg = simdReg(v.Args[0])
2017 p.AddRestSourceReg(maskReg(v.Args[1]))
2018 p.To.Type = obj.TYPE_REG
2019 p.To.Reg = simdReg(v)
2020 return p
2021 }
2022
2023
2024 func simdV11Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2025 p := s.Prog(v.Op.Asm())
2026 p.From.Offset = int64(v.AuxUInt8())
2027 p.From.Type = obj.TYPE_CONST
2028 p.AddRestSourceReg(simdReg(v.Args[0]))
2029 p.To.Type = obj.TYPE_REG
2030 p.To.Reg = simdReg(v)
2031 return p
2032 }
2033
2034
2035 func simdVkvImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2036 p := s.Prog(v.Op.Asm())
2037 p.From.Offset = int64(v.AuxUInt8())
2038 p.From.Type = obj.TYPE_CONST
2039 p.AddRestSourceReg(simdReg(v.Args[0]))
2040 p.AddRestSourceReg(maskReg(v.Args[1]))
2041 p.To.Type = obj.TYPE_REG
2042 p.To.Reg = simdReg(v)
2043 return p
2044 }
2045
2046
2047 func simdV21Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2048 p := s.Prog(v.Op.Asm())
2049 p.From.Offset = int64(v.AuxUInt8())
2050 p.From.Type = obj.TYPE_CONST
2051 p.AddRestSourceReg(simdReg(v.Args[1]))
2052 p.AddRestSourceReg(simdReg(v.Args[0]))
2053 p.To.Type = obj.TYPE_REG
2054 p.To.Reg = simdReg(v)
2055 return p
2056 }
2057
2058
2059 func simdVgpvImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2060 p := s.Prog(v.Op.Asm())
2061 p.From.Offset = int64(v.AuxUInt8())
2062 p.From.Type = obj.TYPE_CONST
2063 p.AddRestSourceReg(v.Args[1].Reg())
2064 p.AddRestSourceReg(simdReg(v.Args[0]))
2065 p.To.Type = obj.TYPE_REG
2066 p.To.Reg = simdReg(v)
2067 return p
2068 }
2069
2070
2071 func simdV2kImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2072 p := s.Prog(v.Op.Asm())
2073 p.From.Offset = int64(v.AuxUInt8())
2074 p.From.Type = obj.TYPE_CONST
2075 p.AddRestSourceReg(simdReg(v.Args[1]))
2076 p.AddRestSourceReg(simdReg(v.Args[0]))
2077 p.To.Type = obj.TYPE_REG
2078 p.To.Reg = maskReg(v)
2079 return p
2080 }
2081
2082
2083 func simdV2kkImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2084 p := s.Prog(v.Op.Asm())
2085 p.From.Offset = int64(v.AuxUInt8())
2086 p.From.Type = obj.TYPE_CONST
2087 p.AddRestSourceReg(simdReg(v.Args[1]))
2088 p.AddRestSourceReg(simdReg(v.Args[0]))
2089 p.AddRestSourceReg(maskReg(v.Args[2]))
2090 p.To.Type = obj.TYPE_REG
2091 p.To.Reg = maskReg(v)
2092 return p
2093 }
2094
2095 func simdV2kvImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2096 p := s.Prog(v.Op.Asm())
2097 p.From.Offset = int64(v.AuxUInt8())
2098 p.From.Type = obj.TYPE_CONST
2099 p.AddRestSourceReg(simdReg(v.Args[1]))
2100 p.AddRestSourceReg(simdReg(v.Args[0]))
2101 p.AddRestSourceReg(maskReg(v.Args[2]))
2102 p.To.Type = obj.TYPE_REG
2103 p.To.Reg = simdReg(v)
2104 return p
2105 }
2106
2107
2108 func simdV31ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2109 p := s.Prog(v.Op.Asm())
2110 p.From.Type = obj.TYPE_REG
2111 p.From.Reg = simdReg(v.Args[2])
2112 p.AddRestSourceReg(simdReg(v.Args[1]))
2113 p.To.Type = obj.TYPE_REG
2114 p.To.Reg = simdReg(v)
2115 return p
2116 }
2117
2118 func simdV31ResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2119 p := s.Prog(v.Op.Asm())
2120 p.From.Offset = int64(v.AuxUInt8())
2121 p.From.Type = obj.TYPE_CONST
2122
2123 p.AddRestSourceReg(simdReg(v.Args[2]))
2124 p.AddRestSourceReg(simdReg(v.Args[1]))
2125
2126 p.To.Type = obj.TYPE_REG
2127 p.To.Reg = simdReg(v)
2128 return p
2129 }
2130
2131
2132
2133
2134 func simdV31loadResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2135 sc := v.AuxValAndOff()
2136 p := s.Prog(v.Op.Asm())
2137
2138 p.From.Type = obj.TYPE_CONST
2139 p.From.Offset = sc.Val64()
2140
2141 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[2].Reg()}
2142 ssagen.AddAux2(&m, v, sc.Off64())
2143 p.AddRestSource(m)
2144
2145 p.AddRestSourceReg(simdReg(v.Args[1]))
2146 return p
2147 }
2148
2149
2150 func simdV3kvResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2151 p := s.Prog(v.Op.Asm())
2152 p.From.Type = obj.TYPE_REG
2153 p.From.Reg = simdReg(v.Args[2])
2154 p.AddRestSourceReg(simdReg(v.Args[1]))
2155 p.AddRestSourceReg(maskReg(v.Args[3]))
2156 p.To.Type = obj.TYPE_REG
2157 p.To.Reg = simdReg(v)
2158 return p
2159 }
2160
2161 func simdVgpImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2162 p := s.Prog(v.Op.Asm())
2163 p.From.Offset = int64(v.AuxUInt8())
2164 p.From.Type = obj.TYPE_CONST
2165 p.AddRestSourceReg(simdReg(v.Args[0]))
2166 p.To.Type = obj.TYPE_REG
2167 p.To.Reg = v.Reg()
2168 return p
2169 }
2170
2171
2172 func simdV31(s *ssagen.State, v *ssa.Value) *obj.Prog {
2173 p := s.Prog(v.Op.Asm())
2174 p.From.Type = obj.TYPE_REG
2175 p.From.Reg = simdReg(v.Args[2])
2176 p.AddRestSourceReg(simdReg(v.Args[1]))
2177 p.AddRestSourceReg(simdReg(v.Args[0]))
2178 p.To.Type = obj.TYPE_REG
2179 p.To.Reg = simdReg(v)
2180 return p
2181 }
2182
2183
2184 func simdV3kv(s *ssagen.State, v *ssa.Value) *obj.Prog {
2185 p := s.Prog(v.Op.Asm())
2186 p.From.Type = obj.TYPE_REG
2187 p.From.Reg = simdReg(v.Args[2])
2188 p.AddRestSourceReg(simdReg(v.Args[1]))
2189 p.AddRestSourceReg(simdReg(v.Args[0]))
2190 p.AddRestSourceReg(maskReg(v.Args[3]))
2191 p.To.Type = obj.TYPE_REG
2192 p.To.Reg = simdReg(v)
2193 return p
2194 }
2195
2196
2197 func simdVkvload(s *ssagen.State, v *ssa.Value) *obj.Prog {
2198 p := s.Prog(v.Op.Asm())
2199 p.From.Type = obj.TYPE_MEM
2200 p.From.Reg = v.Args[0].Reg()
2201 ssagen.AddAux(&p.From, v)
2202 p.AddRestSourceReg(maskReg(v.Args[1]))
2203 p.To.Type = obj.TYPE_REG
2204 p.To.Reg = simdReg(v)
2205 return p
2206 }
2207
2208
2209 func simdV21load(s *ssagen.State, v *ssa.Value) *obj.Prog {
2210 p := s.Prog(v.Op.Asm())
2211 p.From.Type = obj.TYPE_MEM
2212 p.From.Reg = v.Args[1].Reg()
2213 ssagen.AddAux(&p.From, v)
2214 p.AddRestSourceReg(simdReg(v.Args[0]))
2215 p.To.Type = obj.TYPE_REG
2216 p.To.Reg = simdReg(v)
2217 return p
2218 }
2219
2220
2221 func simdV31loadResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2222 p := s.Prog(v.Op.Asm())
2223 p.From.Type = obj.TYPE_MEM
2224 p.From.Reg = v.Args[2].Reg()
2225 ssagen.AddAux(&p.From, v)
2226 p.AddRestSourceReg(simdReg(v.Args[1]))
2227 p.To.Type = obj.TYPE_REG
2228 p.To.Reg = simdReg(v)
2229 return p
2230 }
2231
2232
2233 func simdV3kvloadResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2234 p := s.Prog(v.Op.Asm())
2235 p.From.Type = obj.TYPE_MEM
2236 p.From.Reg = v.Args[2].Reg()
2237 ssagen.AddAux(&p.From, v)
2238 p.AddRestSourceReg(simdReg(v.Args[1]))
2239 p.AddRestSourceReg(maskReg(v.Args[3]))
2240 p.To.Type = obj.TYPE_REG
2241 p.To.Reg = simdReg(v)
2242 return p
2243 }
2244
2245
2246 func simdV2kvload(s *ssagen.State, v *ssa.Value) *obj.Prog {
2247 p := s.Prog(v.Op.Asm())
2248 p.From.Type = obj.TYPE_MEM
2249 p.From.Reg = v.Args[1].Reg()
2250 ssagen.AddAux(&p.From, v)
2251 p.AddRestSourceReg(simdReg(v.Args[0]))
2252 p.AddRestSourceReg(maskReg(v.Args[2]))
2253 p.To.Type = obj.TYPE_REG
2254 p.To.Reg = simdReg(v)
2255 return p
2256 }
2257
2258
2259 func simdV2kload(s *ssagen.State, v *ssa.Value) *obj.Prog {
2260 p := s.Prog(v.Op.Asm())
2261 p.From.Type = obj.TYPE_MEM
2262 p.From.Reg = v.Args[1].Reg()
2263 ssagen.AddAux(&p.From, v)
2264 p.AddRestSourceReg(simdReg(v.Args[0]))
2265 p.To.Type = obj.TYPE_REG
2266 p.To.Reg = maskReg(v)
2267 return p
2268 }
2269
2270
2271 func simdV11load(s *ssagen.State, v *ssa.Value) *obj.Prog {
2272 p := s.Prog(v.Op.Asm())
2273 p.From.Type = obj.TYPE_MEM
2274 p.From.Reg = v.Args[0].Reg()
2275 ssagen.AddAux(&p.From, v)
2276 p.To.Type = obj.TYPE_REG
2277 p.To.Reg = simdReg(v)
2278 return p
2279 }
2280
2281
2282 func simdV11loadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2283 sc := v.AuxValAndOff()
2284 p := s.Prog(v.Op.Asm())
2285 p.From.Type = obj.TYPE_CONST
2286 p.From.Offset = sc.Val64()
2287 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
2288 ssagen.AddAux2(&m, v, sc.Off64())
2289 p.AddRestSource(m)
2290 p.To.Type = obj.TYPE_REG
2291 p.To.Reg = simdReg(v)
2292 return p
2293 }
2294
2295
2296 func simdVkvloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2297 sc := v.AuxValAndOff()
2298 p := s.Prog(v.Op.Asm())
2299 p.From.Type = obj.TYPE_CONST
2300 p.From.Offset = sc.Val64()
2301 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
2302 ssagen.AddAux2(&m, v, sc.Off64())
2303 p.AddRestSource(m)
2304 p.AddRestSourceReg(maskReg(v.Args[1]))
2305 p.To.Type = obj.TYPE_REG
2306 p.To.Reg = simdReg(v)
2307 return p
2308 }
2309
2310
2311 func simdV21loadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2312 sc := v.AuxValAndOff()
2313 p := s.Prog(v.Op.Asm())
2314 p.From.Type = obj.TYPE_CONST
2315 p.From.Offset = sc.Val64()
2316 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
2317 ssagen.AddAux2(&m, v, sc.Off64())
2318 p.AddRestSource(m)
2319 p.AddRestSourceReg(simdReg(v.Args[0]))
2320 p.To.Type = obj.TYPE_REG
2321 p.To.Reg = simdReg(v)
2322 return p
2323 }
2324
2325
2326 func simdV2kloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2327 sc := v.AuxValAndOff()
2328 p := s.Prog(v.Op.Asm())
2329 p.From.Type = obj.TYPE_CONST
2330 p.From.Offset = sc.Val64()
2331 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
2332 ssagen.AddAux2(&m, v, sc.Off64())
2333 p.AddRestSource(m)
2334 p.AddRestSourceReg(simdReg(v.Args[0]))
2335 p.To.Type = obj.TYPE_REG
2336 p.To.Reg = maskReg(v)
2337 return p
2338 }
2339
2340
2341 func simdV2kkloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2342 sc := v.AuxValAndOff()
2343 p := s.Prog(v.Op.Asm())
2344 p.From.Type = obj.TYPE_CONST
2345 p.From.Offset = sc.Val64()
2346 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
2347 ssagen.AddAux2(&m, v, sc.Off64())
2348 p.AddRestSource(m)
2349 p.AddRestSourceReg(simdReg(v.Args[0]))
2350 p.AddRestSourceReg(maskReg(v.Args[2]))
2351 p.To.Type = obj.TYPE_REG
2352 p.To.Reg = maskReg(v)
2353 return p
2354 }
2355
2356
2357 func simdV2kvloadImm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2358 sc := v.AuxValAndOff()
2359 p := s.Prog(v.Op.Asm())
2360 p.From.Type = obj.TYPE_CONST
2361 p.From.Offset = sc.Val64()
2362 m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[1].Reg()}
2363 ssagen.AddAux2(&m, v, sc.Off64())
2364 p.AddRestSource(m)
2365 p.AddRestSourceReg(simdReg(v.Args[0]))
2366 p.AddRestSourceReg(maskReg(v.Args[2]))
2367 p.To.Type = obj.TYPE_REG
2368 p.To.Reg = simdReg(v)
2369 return p
2370 }
2371
2372
2373 func simdV21ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2374 p := s.Prog(v.Op.Asm())
2375 p.From.Type = obj.TYPE_REG
2376 p.From.Reg = simdReg(v.Args[1])
2377 p.To.Type = obj.TYPE_REG
2378 p.To.Reg = simdReg(v)
2379 return p
2380 }
2381
2382
2383 func simdV21ResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
2384 p := s.Prog(v.Op.Asm())
2385 p.From.Offset = int64(v.AuxUInt8())
2386 p.From.Type = obj.TYPE_CONST
2387 p.AddRestSourceReg(simdReg(v.Args[1]))
2388 p.To.Type = obj.TYPE_REG
2389 p.To.Reg = simdReg(v)
2390 return p
2391 }
2392
2393
2394 func simdV31x0AtIn2ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
2395 return simdV31ResultInArg0(s, v)
2396 }
2397
2398 var blockJump = [...]struct {
2399 asm, invasm obj.As
2400 }{
2401 ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE},
2402 ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ},
2403 ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE},
2404 ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT},
2405 ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT},
2406 ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE},
2407 ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC},
2408 ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS},
2409 ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
2410 ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
2411 ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
2412 ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
2413 ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
2414 ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
2415 }
2416
2417 var eqfJumps = [2][2]ssagen.IndexJump{
2418 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}},
2419 {{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}},
2420 }
2421 var nefJumps = [2][2]ssagen.IndexJump{
2422 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}},
2423 {{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}},
2424 }
2425
2426 func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
2427 switch b.Kind {
2428 case ssa.BlockPlain, ssa.BlockDefer:
2429 if b.Succs[0].Block() != next {
2430 p := s.Prog(obj.AJMP)
2431 p.To.Type = obj.TYPE_BRANCH
2432 s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
2433 }
2434 case ssa.BlockExit, ssa.BlockRetJmp:
2435 case ssa.BlockRet:
2436 s.Prog(obj.ARET)
2437
2438 case ssa.BlockAMD64EQF:
2439 s.CombJump(b, next, &eqfJumps)
2440
2441 case ssa.BlockAMD64NEF:
2442 s.CombJump(b, next, &nefJumps)
2443
2444 case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
2445 ssa.BlockAMD64LT, ssa.BlockAMD64GE,
2446 ssa.BlockAMD64LE, ssa.BlockAMD64GT,
2447 ssa.BlockAMD64OS, ssa.BlockAMD64OC,
2448 ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
2449 ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
2450 jmp := blockJump[b.Kind]
2451 switch next {
2452 case b.Succs[0].Block():
2453 s.Br(jmp.invasm, b.Succs[1].Block())
2454 case b.Succs[1].Block():
2455 s.Br(jmp.asm, b.Succs[0].Block())
2456 default:
2457 if b.Likely != ssa.BranchUnlikely {
2458 s.Br(jmp.asm, b.Succs[0].Block())
2459 s.Br(obj.AJMP, b.Succs[1].Block())
2460 } else {
2461 s.Br(jmp.invasm, b.Succs[1].Block())
2462 s.Br(obj.AJMP, b.Succs[0].Block())
2463 }
2464 }
2465
2466 case ssa.BlockAMD64JUMPTABLE:
2467
2468 p := s.Prog(obj.AJMP)
2469 p.To.Type = obj.TYPE_MEM
2470 p.To.Reg = b.Controls[1].Reg()
2471 p.To.Index = b.Controls[0].Reg()
2472 p.To.Scale = 8
2473
2474 s.JumpTables = append(s.JumpTables, b)
2475
2476 default:
2477 b.Fatalf("branch not implemented: %s", b.LongString())
2478 }
2479 }
2480
2481 func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2482 p := s.Prog(loadByRegWidth(reg, t.Size()))
2483 p.From.Type = obj.TYPE_MEM
2484 p.From.Name = obj.NAME_AUTO
2485 p.From.Sym = n.Linksym()
2486 p.From.Offset = n.FrameOffset() + off
2487 p.To.Type = obj.TYPE_REG
2488 p.To.Reg = reg
2489 return p
2490 }
2491
2492 func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
2493 p = pp.Append(p, storeByRegWidth(reg, t.Size()), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
2494 p.To.Name = obj.NAME_PARAM
2495 p.To.Sym = n.Linksym()
2496 p.Pos = p.Pos.WithNotStmt()
2497 return p
2498 }
2499
2500
2501 func zero16(s *ssagen.State, reg int16, off int64) {
2502
2503 p := s.Prog(x86.AMOVUPS)
2504 p.From.Type = obj.TYPE_REG
2505 p.From.Reg = x86.REG_X15
2506 p.To.Type = obj.TYPE_MEM
2507 p.To.Reg = reg
2508 p.To.Offset = off
2509 }
2510
2511
2512 func move16(s *ssagen.State, src, dst, tmp int16, off int64) {
2513
2514
2515 p := s.Prog(x86.AMOVUPS)
2516 p.From.Type = obj.TYPE_MEM
2517 p.From.Reg = src
2518 p.From.Offset = off
2519 p.To.Type = obj.TYPE_REG
2520 p.To.Reg = tmp
2521 p = s.Prog(x86.AMOVUPS)
2522 p.From.Type = obj.TYPE_REG
2523 p.From.Reg = tmp
2524 p.To.Type = obj.TYPE_MEM
2525 p.To.Reg = dst
2526 p.To.Offset = off
2527 }
2528
2529
2530
2531 func simdReg(v *ssa.Value) int16 {
2532 t := v.Type
2533 if !t.IsSIMD() {
2534 base.Fatalf("simdReg: not a simd type; v=%s, b=b%d, f=%s", v.LongString(), v.Block.ID, v.Block.Func.Name)
2535 }
2536 return simdRegBySize(v.Reg(), t.Size())
2537 }
2538
2539 func simdRegBySize(reg int16, size int64) int16 {
2540 switch size {
2541 case 16:
2542 return reg
2543 case 32:
2544 return reg + (x86.REG_Y0 - x86.REG_X0)
2545 case 64:
2546 return reg + (x86.REG_Z0 - x86.REG_X0)
2547 }
2548 panic("simdRegBySize: bad size")
2549 }
2550
2551
2552 func maskReg(v *ssa.Value) int16 {
2553 t := v.Type
2554 if !t.IsSIMD() {
2555 base.Fatalf("maskReg: not a simd type; v=%s, b=b%d, f=%s", v.LongString(), v.Block.ID, v.Block.Func.Name)
2556 }
2557 switch t.Size() {
2558 case 8:
2559 return v.Reg()
2560 }
2561 panic("unreachable")
2562 }
2563
2564
2565 func simdOrMaskReg(v *ssa.Value) int16 {
2566 t := v.Type
2567 if t.Size() <= 8 {
2568 return maskReg(v)
2569 }
2570 return simdReg(v)
2571 }
2572
2573
2574
2575
2576
2577 func simdCheckRegOnly(v *ssa.Value, regStart, regEnd int16) int16 {
2578 if v.Reg() > regEnd || v.Reg() < regStart {
2579 panic("simdCheckRegOnly: not the desired register")
2580 }
2581 return v.Reg()
2582 }
2583
View as plain text