1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "fmt"
123 "internal/buildcfg"
124 "math"
125 "math/bits"
126 "unsafe"
127 )
128
129 const (
130 moveSpills = iota
131 logSpills
132 regDebug
133 stackDebug
134 )
135
136
137
138 const (
139 likelyDistance = 1
140 normalDistance = 10
141 unlikelyDistance = 100
142 )
143
144
145
146 func regalloc(f *Func) {
147 var s regAllocState
148 s.init(f)
149 s.regalloc(f)
150 s.close()
151 }
152
153 type register uint8
154
155 const noRegister register = 255
156
157
158 var noRegisters [32]register = [32]register{
159 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
160 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 }
164
165
166
167 type regMask uint64
168
169 func (m regMask) String() string {
170 s := ""
171 for r := register(0); m != 0; r++ {
172 if m>>r&1 == 0 {
173 continue
174 }
175 m &^= regMask(1) << r
176 if s != "" {
177 s += " "
178 }
179 s += fmt.Sprintf("r%d", r)
180 }
181 return s
182 }
183
184 func (m regMask) contains(r register) bool {
185 return m>>r&1 != 0
186 }
187
188 func (s *regAllocState) RegMaskString(m regMask) string {
189 str := ""
190 for r := register(0); m != 0; r++ {
191 if m>>r&1 == 0 {
192 continue
193 }
194 m &^= regMask(1) << r
195 if str != "" {
196 str += " "
197 }
198 str += s.registers[r].String()
199 }
200 return str
201 }
202
203
204 func countRegs(r regMask) int {
205 return bits.OnesCount64(uint64(r))
206 }
207
208
209 func pickReg(r regMask) register {
210 if r == 0 {
211 panic("can't pick a register from an empty set")
212 }
213
214 return register(bits.TrailingZeros64(uint64(r)))
215 }
216
217 type use struct {
218
219
220
221
222
223 dist int32
224 pos src.XPos
225 next *use
226 }
227
228
229 type valState struct {
230 regs regMask
231 uses *use
232 spill *Value
233 restoreMin int32
234 restoreMax int32
235 needReg bool
236 rematerializeable bool
237 }
238
239 type regState struct {
240 v *Value
241 c *Value
242
243 }
244
245 type regAllocState struct {
246 f *Func
247
248 sdom SparseTree
249 registers []Register
250 numRegs register
251 SPReg register
252 SBReg register
253 GReg register
254 ZeroIntReg register
255 allocatable regMask
256
257
258
259
260 live [][]liveInfo
261
262
263
264
265 desired []desiredState
266
267
268 values []valState
269
270
271 sp, sb ID
272
273
274
275 orig []*Value
276
277
278
279 regs []regState
280
281
282 nospill regMask
283
284
285 used regMask
286
287
288 usedSinceBlockStart regMask
289
290
291 tmpused regMask
292
293
294 curBlock *Block
295
296
297 freeUseRecords *use
298
299
300
301 endRegs [][]endReg
302
303
304
305 startRegs [][]startReg
306
307
308
309
310 startRegsMask regMask
311
312
313 spillLive [][]ID
314
315
316
317 copies map[*Value]bool
318
319 loopnest *loopnest
320
321
322 visitOrder []*Block
323
324
325 blockOrder []int32
326
327
328 doClobber bool
329
330
331
332
333
334
335 nextCall []int32
336
337
338
339 curIdx int
340 }
341
342 type endReg struct {
343 r register
344 v *Value
345 c *Value
346 }
347
348 type startReg struct {
349 r register
350 v *Value
351 c *Value
352 pos src.XPos
353 }
354
355
356 func (s *regAllocState) freeReg(r register) {
357 if !s.allocatable.contains(r) && !s.isGReg(r) {
358 return
359 }
360 v := s.regs[r].v
361 if v == nil {
362 s.f.Fatalf("tried to free an already free register %d\n", r)
363 }
364
365
366 if s.f.pass.debug > regDebug {
367 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
368 }
369 s.regs[r] = regState{}
370 s.values[v.ID].regs &^= regMask(1) << r
371 s.used &^= regMask(1) << r
372 }
373
374
375 func (s *regAllocState) freeRegs(m regMask) {
376 for m&s.used != 0 {
377 s.freeReg(pickReg(m & s.used))
378 }
379 }
380
381
382 func (s *regAllocState) clobberRegs(m regMask) {
383 m &= s.allocatable & s.f.Config.gpRegMask
384 for m != 0 {
385 r := pickReg(m)
386 m &^= 1 << r
387 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
388 s.f.setHome(x, &s.registers[r])
389 }
390 }
391
392
393
394 func (s *regAllocState) setOrig(c *Value, v *Value) {
395 if int(c.ID) >= cap(s.orig) {
396 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
397 copy(x, s.orig)
398 s.f.Cache.freeValueSlice(s.orig)
399 s.orig = x
400 }
401 for int(c.ID) >= len(s.orig) {
402 s.orig = append(s.orig, nil)
403 }
404 if s.orig[c.ID] != nil {
405 s.f.Fatalf("orig value set twice %s %s", c, v)
406 }
407 s.orig[c.ID] = s.orig[v.ID]
408 }
409
410
411
412 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
413 if s.f.pass.debug > regDebug {
414 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
415 }
416
417 s.values[v.ID].regs |= regMask(1) << r
418 s.f.setHome(c, &s.registers[r])
419
420
421 if !s.allocatable.contains(r) && !s.isGReg(r) {
422 return
423 }
424 if s.regs[r].v != nil {
425 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
426 }
427 s.regs[r] = regState{v, c}
428 s.used |= regMask(1) << r
429 }
430
431
432
433
434 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
435 if v.OnWasmStack {
436 return noRegister
437 }
438
439 mask &= s.allocatable
440 mask &^= s.nospill
441 if mask == 0 {
442 s.f.Fatalf("no register available for %s", v.LongString())
443 }
444
445
446 if mask&^s.used != 0 {
447 r := pickReg(mask &^ s.used)
448 s.usedSinceBlockStart |= regMask(1) << r
449 return r
450 }
451
452
453
454
455
456
457
458
459
460
461
462 var r register
463 maxuse := int32(-1)
464 for t := register(0); t < s.numRegs; t++ {
465 if mask>>t&1 == 0 {
466 continue
467 }
468 v := s.regs[t].v
469 if n := s.values[v.ID].uses.dist; n > maxuse {
470
471
472 r = t
473 maxuse = n
474 }
475 }
476 if maxuse == -1 {
477 s.f.Fatalf("couldn't find register to spill")
478 }
479
480 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
481
482
483
484 s.freeReg(r)
485 return r
486 }
487
488
489
490 v2 := s.regs[r].v
491 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
492 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
493 s.usedSinceBlockStart |= regMask(1) << r
494 r2 := pickReg(m)
495 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
496 s.copies[c] = false
497 if s.f.pass.debug > regDebug {
498 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
499 }
500 s.setOrig(c, v2)
501 s.assignReg(r2, v2, c)
502 }
503
504
505
506
507 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
508 if s.startRegsMask&(regMask(1)<<r) == 1 {
509 if s.f.pass.debug > regDebug {
510 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
511 }
512 s.startRegsMask &^= regMask(1) << r
513 }
514 }
515
516 s.freeReg(r)
517 s.usedSinceBlockStart |= regMask(1) << r
518 return r
519 }
520
521
522
523 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
524 vi := &s.values[v.ID]
525 if vi.spill != nil {
526
527 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
528 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
529 return vi.spill
530 }
531
532
533 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
534
535
536 s.setOrig(spill, v)
537 vi.spill = spill
538 vi.restoreMin = s.sdom[b.ID].entry
539 vi.restoreMax = s.sdom[b.ID].exit
540 return spill
541 }
542
543
544
545
546
547
548
549 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
550 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
551 c := v.copyIntoWithXPos(s.curBlock, pos)
552 c.OnWasmStack = true
553 s.setOrig(c, v)
554 return c
555 }
556 if v.OnWasmStack {
557 return v
558 }
559
560 vi := &s.values[v.ID]
561 pos = pos.WithNotStmt()
562
563 if mask&vi.regs != 0 {
564 mask &= vi.regs
565 r := pickReg(mask)
566 if mask.contains(s.SPReg) {
567
568
569
570 r = s.SPReg
571 }
572 if !s.allocatable.contains(r) {
573 return v
574 }
575 if s.regs[r].v != v || s.regs[r].c == nil {
576 panic("bad register state")
577 }
578 if nospill {
579 s.nospill |= regMask(1) << r
580 }
581 s.usedSinceBlockStart |= regMask(1) << r
582 return s.regs[r].c
583 }
584
585 var r register
586
587 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
588 if !onWasmStack {
589
590 r = s.allocReg(mask, v)
591 }
592
593
594 var c *Value
595 if vi.regs != 0 {
596
597 r2 := pickReg(vi.regs)
598 var current *Value
599 if !s.allocatable.contains(r2) {
600 current = v
601 } else {
602 if s.regs[r2].v != v {
603 panic("bad register state")
604 }
605 current = s.regs[r2].c
606 }
607 s.usedSinceBlockStart |= regMask(1) << r2
608 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
609 } else if v.rematerializeable() {
610
611 c = v.copyIntoWithXPos(s.curBlock, pos)
612 } else {
613
614 spill := s.makeSpill(v, s.curBlock)
615 if s.f.pass.debug > logSpills {
616 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
617 }
618 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
619 }
620
621 s.setOrig(c, v)
622
623 if onWasmStack {
624 c.OnWasmStack = true
625 return c
626 }
627
628 s.assignReg(r, v, c)
629 if c.Op == OpLoadReg && s.isGReg(r) {
630 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
631 }
632 if nospill {
633 s.nospill |= regMask(1) << r
634 }
635 return c
636 }
637
638
639 func isLeaf(f *Func) bool {
640 for _, b := range f.Blocks {
641 for _, v := range b.Values {
642 if v.Op.IsCall() && !v.Op.IsTailCall() {
643
644 return false
645 }
646 }
647 }
648 return true
649 }
650
651
652 func (v *Value) needRegister() bool {
653 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
654 }
655
656 func (s *regAllocState) init(f *Func) {
657 s.f = f
658 s.f.RegAlloc = s.f.Cache.locs[:0]
659 s.registers = f.Config.registers
660 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
661 s.f.Fatalf("bad number of registers: %d", nr)
662 } else {
663 s.numRegs = register(nr)
664 }
665
666 s.SPReg = noRegister
667 s.SBReg = noRegister
668 s.GReg = noRegister
669 s.ZeroIntReg = noRegister
670 for r := register(0); r < s.numRegs; r++ {
671 switch s.registers[r].String() {
672 case "SP":
673 s.SPReg = r
674 case "SB":
675 s.SBReg = r
676 case "g":
677 s.GReg = r
678 case "ZERO":
679 s.ZeroIntReg = r
680 }
681 }
682
683 switch noRegister {
684 case s.SPReg:
685 s.f.Fatalf("no SP register found")
686 case s.SBReg:
687 s.f.Fatalf("no SB register found")
688 case s.GReg:
689 if f.Config.hasGReg {
690 s.f.Fatalf("no g register found")
691 }
692 }
693
694
695 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
696 s.allocatable &^= 1 << s.SPReg
697 s.allocatable &^= 1 << s.SBReg
698 if s.f.Config.hasGReg {
699 s.allocatable &^= 1 << s.GReg
700 }
701 if s.ZeroIntReg != noRegister {
702 s.allocatable &^= 1 << s.ZeroIntReg
703 }
704 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
705 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
706 }
707 if s.f.Config.LinkReg != -1 {
708 if isLeaf(f) {
709
710 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
711 }
712 }
713 if s.f.Config.ctxt.Flag_dynlink {
714 switch s.f.Config.arch {
715 case "386":
716
717
718
719
720
721 case "amd64":
722 s.allocatable &^= 1 << 15
723 case "arm":
724 s.allocatable &^= 1 << 9
725 case "arm64":
726
727 case "loong64":
728
729 case "ppc64le":
730
731 case "riscv64":
732
733 case "s390x":
734 s.allocatable &^= 1 << 11
735 default:
736 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
737 }
738 }
739
740
741
742
743 s.visitOrder = layoutRegallocOrder(f)
744
745
746
747 s.blockOrder = make([]int32, f.NumBlocks())
748 for i, b := range s.visitOrder {
749 s.blockOrder[b.ID] = int32(i)
750 }
751
752 s.regs = make([]regState, s.numRegs)
753 nv := f.NumValues()
754 if cap(s.f.Cache.regallocValues) >= nv {
755 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
756 } else {
757 s.f.Cache.regallocValues = make([]valState, nv)
758 }
759 s.values = s.f.Cache.regallocValues
760 s.orig = s.f.Cache.allocValueSlice(nv)
761 s.copies = make(map[*Value]bool)
762 for _, b := range s.visitOrder {
763 for _, v := range b.Values {
764 if v.needRegister() {
765 s.values[v.ID].needReg = true
766 s.values[v.ID].rematerializeable = v.rematerializeable()
767 s.orig[v.ID] = v
768 }
769
770
771 }
772 }
773 s.computeLive()
774
775 s.endRegs = make([][]endReg, f.NumBlocks())
776 s.startRegs = make([][]startReg, f.NumBlocks())
777 s.spillLive = make([][]ID, f.NumBlocks())
778 s.sdom = f.Sdom()
779
780
781 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
782 canLiveOnStack := f.newSparseSet(f.NumValues())
783 defer f.retSparseSet(canLiveOnStack)
784 for _, b := range f.Blocks {
785
786 canLiveOnStack.clear()
787 for _, c := range b.ControlValues() {
788 if c.Uses == 1 && !opcodeTable[c.Op].generic {
789 canLiveOnStack.add(c.ID)
790 }
791 }
792
793 for i := len(b.Values) - 1; i >= 0; i-- {
794 v := b.Values[i]
795 if canLiveOnStack.contains(v.ID) {
796 v.OnWasmStack = true
797 } else {
798
799 canLiveOnStack.clear()
800 }
801 for _, arg := range v.Args {
802
803
804
805
806
807 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
808 canLiveOnStack.add(arg.ID)
809 }
810 }
811 }
812 }
813 }
814
815
816
817
818 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
819
820 s.doClobber = true
821 }
822 }
823
824 func (s *regAllocState) close() {
825 s.f.Cache.freeValueSlice(s.orig)
826 }
827
828
829
830 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
831 r := s.freeUseRecords
832 if r != nil {
833 s.freeUseRecords = r.next
834 } else {
835 r = &use{}
836 }
837 r.dist = dist
838 r.pos = pos
839 r.next = s.values[id].uses
840 s.values[id].uses = r
841 if r.next != nil && dist > r.next.dist {
842 s.f.Fatalf("uses added in wrong order")
843 }
844 }
845
846
847
848 func (s *regAllocState) advanceUses(v *Value) {
849 for _, a := range v.Args {
850 if !s.values[a.ID].needReg {
851 continue
852 }
853 ai := &s.values[a.ID]
854 r := ai.uses
855 ai.uses = r.next
856 if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
857
858 s.freeRegs(ai.regs)
859 }
860 r.next = s.freeUseRecords
861 s.freeUseRecords = r
862 }
863 s.dropIfUnused(v)
864 }
865
866
867
868 func (s *regAllocState) dropIfUnused(v *Value) {
869 if !s.values[v.ID].needReg {
870 return
871 }
872 vi := &s.values[v.ID]
873 r := vi.uses
874 if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
875 s.freeRegs(vi.regs)
876 }
877 }
878
879
880
881
882 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
883 u := s.values[v.ID].uses
884 if u == nil {
885 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
886 }
887 d := u.dist
888 for u != nil && u.dist == d {
889 u = u.next
890 }
891 return u != nil && u.dist > d
892 }
893
894
895 func (s *regAllocState) setState(regs []endReg) {
896 s.freeRegs(s.used)
897 for _, x := range regs {
898 s.assignReg(x.r, x.v, x.c)
899 }
900 }
901
902
903 func (s *regAllocState) compatRegs(t *types.Type) regMask {
904 var m regMask
905 if t.IsTuple() || t.IsFlags() {
906 return 0
907 }
908 if t.IsFloat() || t == types.TypeInt128 {
909 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
910 m = s.f.Config.fp32RegMask
911 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
912 m = s.f.Config.fp64RegMask
913 } else {
914 m = s.f.Config.fpRegMask
915 }
916 } else {
917 m = s.f.Config.gpRegMask
918 }
919 return m & s.allocatable
920 }
921
922
923 func (s *regAllocState) regspec(v *Value) regInfo {
924 op := v.Op
925 if op == OpConvert {
926
927
928
929 m := s.allocatable & s.f.Config.gpRegMask
930 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
931 }
932 if op == OpArgIntReg {
933 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
934 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
935 }
936 if op == OpArgFloatReg {
937 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
938 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
939 }
940 if op.IsCall() {
941 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
942 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
943 }
944 }
945 if op == OpMakeResult && s.f.OwnAux.reg != nil {
946 return *s.f.OwnAux.ResultReg(s.f.Config)
947 }
948 return opcodeTable[op].reg
949 }
950
951 func (s *regAllocState) isGReg(r register) bool {
952 return s.f.Config.hasGReg && s.GReg == r
953 }
954
955
956 var tmpVal Value
957
958 func (s *regAllocState) regalloc(f *Func) {
959 regValLiveSet := f.newSparseSet(f.NumValues())
960 defer f.retSparseSet(regValLiveSet)
961 var oldSched []*Value
962 var phis []*Value
963 var phiRegs []register
964 var args []*Value
965
966
967 var desired desiredState
968 desiredSecondReg := map[ID][4]register{}
969
970
971 type dentry struct {
972 out [4]register
973 in [3][4]register
974 }
975 var dinfo []dentry
976
977 if f.Entry != f.Blocks[0] {
978 f.Fatalf("entry block must be first")
979 }
980
981 for _, b := range s.visitOrder {
982 if s.f.pass.debug > regDebug {
983 fmt.Printf("Begin processing block %v\n", b)
984 }
985 s.curBlock = b
986 s.startRegsMask = 0
987 s.usedSinceBlockStart = 0
988 clear(desiredSecondReg)
989
990
991
992 regValLiveSet.clear()
993 for _, e := range s.live[b.ID] {
994 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
995 regValLiveSet.add(e.ID)
996 }
997 for _, v := range b.ControlValues() {
998 if s.values[v.ID].needReg {
999 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
1000 regValLiveSet.add(v.ID)
1001 }
1002 }
1003 if len(s.nextCall) < len(b.Values) {
1004 s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
1005 }
1006 var nextCall int32 = math.MaxInt32
1007 for i := len(b.Values) - 1; i >= 0; i-- {
1008 v := b.Values[i]
1009 regValLiveSet.remove(v.ID)
1010 if v.Op == OpPhi {
1011
1012
1013
1014 s.nextCall[i] = nextCall
1015 continue
1016 }
1017 if opcodeTable[v.Op].call {
1018
1019 regValLiveSet.clear()
1020 if s.sp != 0 && s.values[s.sp].uses != nil {
1021 regValLiveSet.add(s.sp)
1022 }
1023 if s.sb != 0 && s.values[s.sb].uses != nil {
1024 regValLiveSet.add(s.sb)
1025 }
1026 nextCall = int32(i)
1027 }
1028 for _, a := range v.Args {
1029 if !s.values[a.ID].needReg {
1030 continue
1031 }
1032 s.addUse(a.ID, int32(i), v.Pos)
1033 regValLiveSet.add(a.ID)
1034 }
1035 s.nextCall[i] = nextCall
1036 }
1037 if s.f.pass.debug > regDebug {
1038 fmt.Printf("use distances for %s\n", b)
1039 for i := range s.values {
1040 vi := &s.values[i]
1041 u := vi.uses
1042 if u == nil {
1043 continue
1044 }
1045 fmt.Printf(" v%d:", i)
1046 for u != nil {
1047 fmt.Printf(" %d", u.dist)
1048 u = u.next
1049 }
1050 fmt.Println()
1051 }
1052 }
1053
1054
1055
1056 nphi := 0
1057 for _, v := range b.Values {
1058 if v.Op != OpPhi {
1059 break
1060 }
1061 nphi++
1062 }
1063 phis = append(phis[:0], b.Values[:nphi]...)
1064 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1065 b.Values = b.Values[:0]
1066
1067
1068 if b == f.Entry {
1069
1070 if nphi > 0 {
1071 f.Fatalf("phis in entry block")
1072 }
1073 } else if len(b.Preds) == 1 {
1074
1075 s.setState(s.endRegs[b.Preds[0].b.ID])
1076 if nphi > 0 {
1077 f.Fatalf("phis in single-predecessor block")
1078 }
1079
1080
1081
1082 for r := register(0); r < s.numRegs; r++ {
1083 v := s.regs[r].v
1084 if v != nil && !regValLiveSet.contains(v.ID) {
1085 s.freeReg(r)
1086 }
1087 }
1088 } else {
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102 idx := -1
1103 for i, p := range b.Preds {
1104
1105
1106 pb := p.b
1107 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1108 continue
1109 }
1110 if idx == -1 {
1111 idx = i
1112 continue
1113 }
1114 pSel := b.Preds[idx].b
1115 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1116 idx = i
1117 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1129 idx = i
1130 }
1131 }
1132 }
1133 if idx < 0 {
1134 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1135 }
1136 p := b.Preds[idx].b
1137 s.setState(s.endRegs[p.ID])
1138
1139 if s.f.pass.debug > regDebug {
1140 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1141 for _, x := range s.endRegs[p.ID] {
1142 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1143 }
1144 }
1145
1146
1147
1148
1149
1150 phiRegs = phiRegs[:0]
1151 var phiUsed regMask
1152
1153 for _, v := range phis {
1154 if !s.values[v.ID].needReg {
1155 phiRegs = append(phiRegs, noRegister)
1156 continue
1157 }
1158 a := v.Args[idx]
1159
1160
1161 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1162 if m != 0 {
1163 r := pickReg(m)
1164 phiUsed |= regMask(1) << r
1165 phiRegs = append(phiRegs, r)
1166 } else {
1167 phiRegs = append(phiRegs, noRegister)
1168 }
1169 }
1170
1171
1172 for i, v := range phis {
1173 if !s.values[v.ID].needReg {
1174 continue
1175 }
1176 a := v.Args[idx]
1177 r := phiRegs[i]
1178 if r == noRegister {
1179 continue
1180 }
1181 if regValLiveSet.contains(a.ID) {
1182
1183
1184
1185
1186
1187
1188
1189
1190 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1191 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1192 r2 := pickReg(m)
1193 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1194 s.copies[c] = false
1195 if s.f.pass.debug > regDebug {
1196 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1197 }
1198 s.setOrig(c, a)
1199 s.assignReg(r2, a, c)
1200 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1201 }
1202 }
1203 s.freeReg(r)
1204 }
1205
1206
1207 b.Values = append(b.Values, phis...)
1208
1209
1210
1211 for i, v := range phis {
1212 if !s.values[v.ID].needReg {
1213 continue
1214 }
1215 if phiRegs[i] != noRegister {
1216 continue
1217 }
1218 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1219
1220
1221 for i, pe := range b.Preds {
1222 if i == idx {
1223 continue
1224 }
1225 ri := noRegister
1226 for _, er := range s.endRegs[pe.b.ID] {
1227 if er.v == s.orig[v.Args[i].ID] {
1228 ri = er.r
1229 break
1230 }
1231 }
1232 if ri != noRegister && m>>ri&1 != 0 {
1233 m = regMask(1) << ri
1234 break
1235 }
1236 }
1237 if m != 0 {
1238 r := pickReg(m)
1239 phiRegs[i] = r
1240 phiUsed |= regMask(1) << r
1241 }
1242 }
1243
1244
1245 for i, v := range phis {
1246 if !s.values[v.ID].needReg {
1247 continue
1248 }
1249 r := phiRegs[i]
1250 if r == noRegister {
1251
1252
1253 s.values[v.ID].spill = v
1254 continue
1255 }
1256
1257 s.assignReg(r, v, v)
1258 }
1259
1260
1261 for r := register(0); r < s.numRegs; r++ {
1262 if phiUsed>>r&1 != 0 {
1263 continue
1264 }
1265 v := s.regs[r].v
1266 if v != nil && !regValLiveSet.contains(v.ID) {
1267 s.freeReg(r)
1268 }
1269 }
1270
1271
1272
1273
1274
1275 regList := make([]startReg, 0, 32)
1276 for r := register(0); r < s.numRegs; r++ {
1277 v := s.regs[r].v
1278 if v == nil {
1279 continue
1280 }
1281 if phiUsed>>r&1 != 0 {
1282
1283
1284 continue
1285 }
1286 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1287 s.startRegsMask |= regMask(1) << r
1288 }
1289 s.startRegs[b.ID] = make([]startReg, len(regList))
1290 copy(s.startRegs[b.ID], regList)
1291
1292 if s.f.pass.debug > regDebug {
1293 fmt.Printf("after phis\n")
1294 for _, x := range s.startRegs[b.ID] {
1295 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1296 }
1297 }
1298 }
1299
1300
1301 for i, v := range phis {
1302 s.curIdx = i
1303 s.dropIfUnused(v)
1304 }
1305
1306
1307 if l := len(oldSched); cap(dinfo) < l {
1308 dinfo = make([]dentry, l)
1309 } else {
1310 dinfo = dinfo[:l]
1311 clear(dinfo)
1312 }
1313
1314
1315 desired.copy(&s.desired[b.ID])
1316
1317
1318
1319
1320
1321
1322 for _, e := range b.Succs {
1323 succ := e.b
1324
1325 for _, x := range s.startRegs[succ.ID] {
1326 desired.add(x.v.ID, x.r)
1327 }
1328
1329 pidx := e.i
1330 for _, v := range succ.Values {
1331 if v.Op != OpPhi {
1332 break
1333 }
1334 if !s.values[v.ID].needReg {
1335 continue
1336 }
1337 rp, ok := s.f.getHome(v.ID).(*Register)
1338 if !ok {
1339
1340
1341
1342
1343 for _, a := range v.Args {
1344 rp, ok = s.f.getHome(a.ID).(*Register)
1345 if ok {
1346 break
1347 }
1348 }
1349 if !ok {
1350 continue
1351 }
1352 }
1353 desired.add(v.Args[pidx].ID, register(rp.num))
1354 }
1355 }
1356
1357
1358 for i := len(oldSched) - 1; i >= 0; i-- {
1359 v := oldSched[i]
1360 prefs := desired.remove(v.ID)
1361 regspec := s.regspec(v)
1362 desired.clobber(regspec.clobbers)
1363 for _, j := range regspec.inputs {
1364 if countRegs(j.regs) != 1 {
1365 continue
1366 }
1367 desired.clobber(j.regs)
1368 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1369 }
1370 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1371 if opcodeTable[v.Op].commutative {
1372 desired.addList(v.Args[1].ID, prefs)
1373 }
1374 desired.addList(v.Args[0].ID, prefs)
1375 }
1376
1377 dinfo[i].out = prefs
1378 for j, a := range v.Args {
1379 if j >= len(dinfo[i].in) {
1380 break
1381 }
1382 dinfo[i].in[j] = desired.get(a.ID)
1383 }
1384 if v.Op == OpSelect1 && prefs[0] != noRegister {
1385
1386
1387 desiredSecondReg[v.Args[0].ID] = prefs
1388 }
1389 }
1390
1391
1392 for idx, v := range oldSched {
1393 s.curIdx = nphi + idx
1394 tmpReg := noRegister
1395 if s.f.pass.debug > regDebug {
1396 fmt.Printf(" processing %s\n", v.LongString())
1397 }
1398 regspec := s.regspec(v)
1399 if v.Op == OpPhi {
1400 f.Fatalf("phi %s not at start of block", v)
1401 }
1402 if opcodeTable[v.Op].fixedReg {
1403 switch v.Op {
1404 case OpSP:
1405 s.assignReg(s.SPReg, v, v)
1406 s.sp = v.ID
1407 case OpSB:
1408 s.assignReg(s.SBReg, v, v)
1409 s.sb = v.ID
1410 case OpARM64ZERO, OpLOONG64ZERO, OpMIPS64ZERO:
1411 s.assignReg(s.ZeroIntReg, v, v)
1412 default:
1413 f.Fatalf("unknown fixed-register op %s", v)
1414 }
1415 b.Values = append(b.Values, v)
1416 s.advanceUses(v)
1417 continue
1418 }
1419 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1420 if s.values[v.ID].needReg {
1421 if v.Op == OpSelectN {
1422 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1423 } else {
1424 var i = 0
1425 if v.Op == OpSelect1 {
1426 i = 1
1427 }
1428 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1429 }
1430 }
1431 b.Values = append(b.Values, v)
1432 s.advanceUses(v)
1433 continue
1434 }
1435 if v.Op == OpGetG && s.f.Config.hasGReg {
1436
1437 if s.regs[s.GReg].v != nil {
1438 s.freeReg(s.GReg)
1439 }
1440 s.assignReg(s.GReg, v, v)
1441 b.Values = append(b.Values, v)
1442 s.advanceUses(v)
1443 continue
1444 }
1445 if v.Op == OpArg {
1446
1447
1448
1449 s.values[v.ID].spill = v
1450 b.Values = append(b.Values, v)
1451 s.advanceUses(v)
1452 continue
1453 }
1454 if v.Op == OpKeepAlive {
1455
1456 s.advanceUses(v)
1457 a := v.Args[0]
1458 vi := &s.values[a.ID]
1459 if vi.regs == 0 && !vi.rematerializeable {
1460
1461
1462
1463 v.SetArg(0, s.makeSpill(a, b))
1464 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1465
1466
1467
1468 v.Op = OpVarLive
1469 v.SetArgs1(v.Args[1])
1470 v.Aux = a.Aux
1471 } else {
1472
1473
1474
1475 v.Op = OpCopy
1476 v.SetArgs1(v.Args[1])
1477 }
1478 b.Values = append(b.Values, v)
1479 continue
1480 }
1481 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1482
1483 if s.doClobber && v.Op.IsCall() {
1484 s.clobberRegs(regspec.clobbers)
1485 }
1486 s.freeRegs(regspec.clobbers)
1487 b.Values = append(b.Values, v)
1488 s.advanceUses(v)
1489 continue
1490 }
1491
1492 if s.values[v.ID].rematerializeable {
1493
1494
1495
1496 for _, a := range v.Args {
1497 a.Uses--
1498 }
1499 s.advanceUses(v)
1500 continue
1501 }
1502
1503 if s.f.pass.debug > regDebug {
1504 fmt.Printf("value %s\n", v.LongString())
1505 fmt.Printf(" out:")
1506 for _, r := range dinfo[idx].out {
1507 if r != noRegister {
1508 fmt.Printf(" %s", &s.registers[r])
1509 }
1510 }
1511 fmt.Println()
1512 for i := 0; i < len(v.Args) && i < 3; i++ {
1513 fmt.Printf(" in%d:", i)
1514 for _, r := range dinfo[idx].in[i] {
1515 if r != noRegister {
1516 fmt.Printf(" %s", &s.registers[r])
1517 }
1518 }
1519 fmt.Println()
1520 }
1521 }
1522
1523
1524
1525
1526 args = append(args[:0], make([]*Value, len(v.Args))...)
1527 for i, a := range v.Args {
1528 if !s.values[a.ID].needReg {
1529 args[i] = a
1530 }
1531 }
1532 for _, i := range regspec.inputs {
1533 mask := i.regs
1534 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1535 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1536 }
1537 }
1538
1539
1540
1541
1542
1543
1544 for {
1545 freed := false
1546 for _, i := range regspec.inputs {
1547 if args[i.idx] != nil {
1548 continue
1549 }
1550 mask := i.regs
1551 if countRegs(mask) == 1 && mask&^s.used != 0 {
1552 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1553
1554
1555
1556 oldregs := s.values[v.Args[i.idx].ID].regs
1557 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1558 s.freeRegs(oldregs &^ mask &^ s.nospill)
1559 freed = true
1560 }
1561 }
1562 }
1563 if !freed {
1564 break
1565 }
1566 }
1567
1568
1569 for _, i := range regspec.inputs {
1570 if args[i.idx] != nil {
1571 continue
1572 }
1573 mask := i.regs
1574 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1575
1576 mask &= s.allocatable
1577 mask &^= s.nospill
1578
1579 if i.idx < 3 {
1580 for _, r := range dinfo[idx].in[i.idx] {
1581 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1582
1583 mask = regMask(1) << r
1584 break
1585 }
1586 }
1587 }
1588
1589 if mask&^desired.avoid != 0 {
1590 mask &^= desired.avoid
1591 }
1592 }
1593 if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
1594
1595
1596
1597 mask = 1 << s.SPReg
1598 }
1599 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1600 }
1601
1602
1603
1604
1605 if opcodeTable[v.Op].resultInArg0 {
1606 var m regMask
1607 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1608
1609 goto ok
1610 }
1611 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1612 args[0], args[1] = args[1], args[0]
1613 goto ok
1614 }
1615 if s.values[v.Args[0].ID].rematerializeable {
1616
1617 goto ok
1618 }
1619 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1620 args[0], args[1] = args[1], args[0]
1621 goto ok
1622 }
1623 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1624
1625 goto ok
1626 }
1627 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1628 args[0], args[1] = args[1], args[0]
1629 goto ok
1630 }
1631
1632
1633
1634
1635
1636 m = s.compatRegs(v.Args[0].Type) &^ s.used
1637 if m == 0 {
1638
1639
1640
1641
1642 goto ok
1643 }
1644
1645
1646 for _, r := range dinfo[idx].out {
1647 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1648 m = regMask(1) << r
1649 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1650
1651
1652 goto ok
1653 }
1654 }
1655
1656
1657 for _, r := range dinfo[idx].in[0] {
1658 if r != noRegister && m>>r&1 != 0 {
1659 m = regMask(1) << r
1660 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1661 s.copies[c] = false
1662
1663
1664 goto ok
1665 }
1666 }
1667 if opcodeTable[v.Op].commutative {
1668 for _, r := range dinfo[idx].in[1] {
1669 if r != noRegister && m>>r&1 != 0 {
1670 m = regMask(1) << r
1671 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1672 s.copies[c] = false
1673 args[0], args[1] = args[1], args[0]
1674 goto ok
1675 }
1676 }
1677 }
1678
1679
1680 if m&^desired.avoid != 0 {
1681 m &^= desired.avoid
1682 }
1683
1684 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1685 s.copies[c] = false
1686
1687
1688
1689
1690 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1691 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1692 r := register(rp.num)
1693 for _, r2 := range dinfo[idx].in[0] {
1694 if r == r2 {
1695 args[0] = c
1696 break
1697 }
1698 }
1699 }
1700 }
1701 }
1702 ok:
1703 for i := 0; i < 2; i++ {
1704 if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
1705 continue
1706 }
1707 if !s.liveAfterCurrentInstruction(v.Args[i]) {
1708
1709 continue
1710 }
1711 if s.values[v.Args[i].ID].rematerializeable {
1712
1713 continue
1714 }
1715 if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
1716
1717 continue
1718 }
1719
1720 m := s.compatRegs(v.Args[i].Type) &^ s.used
1721 if m == 0 {
1722
1723
1724
1725
1726 continue
1727 }
1728
1729 c := s.allocValToReg(v.Args[i], m, true, v.Pos)
1730 s.copies[c] = false
1731 }
1732
1733
1734
1735
1736
1737
1738
1739 if opcodeTable[v.Op].needIntTemp {
1740 m := s.allocatable & s.f.Config.gpRegMask
1741 for _, out := range regspec.outputs {
1742 if countRegs(out.regs) == 1 {
1743 m &^= out.regs
1744 }
1745 }
1746 if m&^desired.avoid&^s.nospill != 0 {
1747 m &^= desired.avoid
1748 }
1749 tmpReg = s.allocReg(m, &tmpVal)
1750 s.nospill |= regMask(1) << tmpReg
1751 s.tmpused |= regMask(1) << tmpReg
1752 }
1753
1754 if regspec.clobbersArg0 {
1755 s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
1756 }
1757 if regspec.clobbersArg1 {
1758 s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
1759 }
1760
1761
1762
1763
1764
1765 if !opcodeTable[v.Op].resultNotInArgs {
1766 s.tmpused = s.nospill
1767 s.nospill = 0
1768 s.advanceUses(v)
1769 }
1770
1771
1772 if s.doClobber && v.Op.IsCall() {
1773
1774
1775 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1776 }
1777 s.freeRegs(regspec.clobbers)
1778 s.tmpused |= regspec.clobbers
1779
1780
1781 {
1782 outRegs := noRegisters
1783 maxOutIdx := -1
1784 var used regMask
1785 if tmpReg != noRegister {
1786
1787
1788 used |= regMask(1) << tmpReg
1789 }
1790 for _, out := range regspec.outputs {
1791 if out.regs == 0 {
1792 continue
1793 }
1794 mask := out.regs & s.allocatable &^ used
1795 if mask == 0 {
1796 s.f.Fatalf("can't find any output register %s", v.LongString())
1797 }
1798 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1799 if !opcodeTable[v.Op].commutative {
1800
1801 r := register(s.f.getHome(args[0].ID).(*Register).num)
1802 if mask>>r&1 == 0 {
1803 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1804 }
1805 mask = regMask(1) << r
1806 } else {
1807
1808 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1809 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1810
1811 found := false
1812 for _, r := range dinfo[idx].out {
1813 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1814 mask = regMask(1) << r
1815 found = true
1816 if r == r1 {
1817 args[0], args[1] = args[1], args[0]
1818 }
1819 break
1820 }
1821 }
1822 if !found {
1823
1824 mask = regMask(1) << r0
1825 }
1826 }
1827 }
1828 if out.idx == 0 {
1829 for _, r := range dinfo[idx].out {
1830 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1831
1832 mask = regMask(1) << r
1833 break
1834 }
1835 }
1836 }
1837 if out.idx == 1 {
1838 if prefs, ok := desiredSecondReg[v.ID]; ok {
1839 for _, r := range prefs {
1840 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1841
1842 mask = regMask(1) << r
1843 break
1844 }
1845 }
1846 }
1847 }
1848
1849 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1850 mask &^= desired.avoid
1851 }
1852 r := s.allocReg(mask, v)
1853 if out.idx > maxOutIdx {
1854 maxOutIdx = out.idx
1855 }
1856 outRegs[out.idx] = r
1857 used |= regMask(1) << r
1858 s.tmpused |= regMask(1) << r
1859 }
1860
1861 if v.Type.IsTuple() {
1862 var outLocs LocPair
1863 if r := outRegs[0]; r != noRegister {
1864 outLocs[0] = &s.registers[r]
1865 }
1866 if r := outRegs[1]; r != noRegister {
1867 outLocs[1] = &s.registers[r]
1868 }
1869 s.f.setHome(v, outLocs)
1870
1871 } else if v.Type.IsResults() {
1872
1873 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1874 for i := 0; i <= maxOutIdx; i++ {
1875 if r := outRegs[i]; r != noRegister {
1876 outLocs[i] = &s.registers[r]
1877 }
1878 }
1879 s.f.setHome(v, outLocs)
1880 } else {
1881 if r := outRegs[0]; r != noRegister {
1882 s.assignReg(r, v, v)
1883 }
1884 }
1885 if tmpReg != noRegister {
1886
1887 if s.f.tempRegs == nil {
1888 s.f.tempRegs = map[ID]*Register{}
1889 }
1890 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1891 }
1892 }
1893
1894
1895 if opcodeTable[v.Op].resultNotInArgs {
1896 s.nospill = 0
1897 s.advanceUses(v)
1898 }
1899 s.tmpused = 0
1900
1901
1902 for i, a := range args {
1903 v.SetArg(i, a)
1904 }
1905 b.Values = append(b.Values, v)
1906 s.dropIfUnused(v)
1907 }
1908
1909
1910
1911 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1912
1913
1914 for i, v := range b.ControlValues() {
1915 if !s.values[v.ID].needReg {
1916 continue
1917 }
1918 if s.f.pass.debug > regDebug {
1919 fmt.Printf(" processing control %s\n", v.LongString())
1920 }
1921
1922
1923
1924 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1925 }
1926
1927
1928
1929 for _, v := range controls {
1930 vi := &s.values[v.ID]
1931 if !vi.needReg {
1932 continue
1933 }
1934
1935 u := vi.uses
1936 vi.uses = u.next
1937 if u.next == nil {
1938 s.freeRegs(vi.regs)
1939 }
1940 u.next = s.freeUseRecords
1941 s.freeUseRecords = u
1942 }
1943
1944
1945
1946
1947 if len(b.Succs) == 1 {
1948 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1949 s.freeReg(s.GReg)
1950 }
1951 if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
1952
1953 goto badloop
1954 }
1955
1956 top := b.Succs[0].b
1957 loop := s.loopnest.b2l[top.ID]
1958 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1959 goto badloop
1960 }
1961
1962
1963 for _, live := range s.live[b.ID] {
1964 if live.dist >= unlikelyDistance {
1965
1966 continue
1967 }
1968 vid := live.ID
1969 vi := &s.values[vid]
1970 if vi.regs != 0 {
1971 continue
1972 }
1973 if vi.rematerializeable {
1974 continue
1975 }
1976 v := s.orig[vid]
1977 m := s.compatRegs(v.Type) &^ s.used
1978
1979 outerloop:
1980 for _, e := range desired.entries {
1981 if e.ID != v.ID {
1982 continue
1983 }
1984 for _, r := range e.regs {
1985 if r != noRegister && m>>r&1 != 0 {
1986 m = regMask(1) << r
1987 break outerloop
1988 }
1989 }
1990 }
1991 if m&^desired.avoid != 0 {
1992 m &^= desired.avoid
1993 }
1994 if m != 0 {
1995 s.allocValToReg(v, m, false, b.Pos)
1996 }
1997 }
1998 }
1999 badloop:
2000 ;
2001
2002
2003
2004 k := 0
2005 for r := register(0); r < s.numRegs; r++ {
2006 v := s.regs[r].v
2007 if v == nil {
2008 continue
2009 }
2010 k++
2011 }
2012 regList := make([]endReg, 0, k)
2013 for r := register(0); r < s.numRegs; r++ {
2014 v := s.regs[r].v
2015 if v == nil {
2016 continue
2017 }
2018 regList = append(regList, endReg{r, v, s.regs[r].c})
2019 }
2020 s.endRegs[b.ID] = regList
2021
2022 if checkEnabled {
2023 regValLiveSet.clear()
2024 for _, x := range s.live[b.ID] {
2025 regValLiveSet.add(x.ID)
2026 }
2027 for r := register(0); r < s.numRegs; r++ {
2028 v := s.regs[r].v
2029 if v == nil {
2030 continue
2031 }
2032 if !regValLiveSet.contains(v.ID) {
2033 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
2034 }
2035 }
2036 }
2037
2038
2039
2040
2041
2042 for _, e := range s.live[b.ID] {
2043 vi := &s.values[e.ID]
2044 if vi.regs != 0 {
2045
2046 continue
2047 }
2048 if vi.rematerializeable {
2049
2050 continue
2051 }
2052 if s.f.pass.debug > regDebug {
2053 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
2054 }
2055 spill := s.makeSpill(s.orig[e.ID], b)
2056 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
2057 }
2058
2059
2060
2061
2062 for _, e := range s.live[b.ID] {
2063 u := s.values[e.ID].uses
2064 if u == nil {
2065 f.Fatalf("live at end, no uses v%d", e.ID)
2066 }
2067 if u.next != nil {
2068 f.Fatalf("live at end, too many uses v%d", e.ID)
2069 }
2070 s.values[e.ID].uses = nil
2071 u.next = s.freeUseRecords
2072 s.freeUseRecords = u
2073 }
2074
2075
2076
2077
2078
2079
2080
2081 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
2082 regs := make([]startReg, 0, c)
2083 for _, sr := range s.startRegs[b.ID] {
2084 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
2085 continue
2086 }
2087 regs = append(regs, sr)
2088 }
2089 s.startRegs[b.ID] = regs
2090 }
2091 }
2092
2093
2094 s.placeSpills()
2095
2096
2097
2098 stacklive := stackalloc(s.f, s.spillLive)
2099
2100
2101 s.shuffle(stacklive)
2102
2103
2104
2105
2106 for {
2107 progress := false
2108 for c, used := range s.copies {
2109 if !used && c.Uses == 0 {
2110 if s.f.pass.debug > regDebug {
2111 fmt.Printf("delete copied value %s\n", c.LongString())
2112 }
2113 c.resetArgs()
2114 f.freeValue(c)
2115 delete(s.copies, c)
2116 progress = true
2117 }
2118 }
2119 if !progress {
2120 break
2121 }
2122 }
2123
2124 for _, b := range s.visitOrder {
2125 i := 0
2126 for _, v := range b.Values {
2127 if v.Op == OpInvalid {
2128 continue
2129 }
2130 b.Values[i] = v
2131 i++
2132 }
2133 b.Values = b.Values[:i]
2134 }
2135 }
2136
2137 func (s *regAllocState) placeSpills() {
2138 mustBeFirst := func(op Op) bool {
2139 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2140 }
2141
2142
2143
2144 start := map[ID][]*Value{}
2145
2146
2147 after := map[ID][]*Value{}
2148
2149 for i := range s.values {
2150 vi := s.values[i]
2151 spill := vi.spill
2152 if spill == nil {
2153 continue
2154 }
2155 if spill.Block != nil {
2156
2157
2158 continue
2159 }
2160 v := s.orig[i]
2161
2162
2163
2164
2165
2166 if v == nil {
2167 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2168 }
2169 best := v.Block
2170 bestArg := v
2171 var bestDepth int16
2172 if l := s.loopnest.b2l[best.ID]; l != nil {
2173 bestDepth = l.depth
2174 }
2175 b := best
2176 const maxSpillSearch = 100
2177 for i := 0; i < maxSpillSearch; i++ {
2178
2179
2180 p := b
2181 b = nil
2182 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2183 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2184
2185 b = c
2186 break
2187 }
2188 }
2189 if b == nil {
2190
2191 break
2192 }
2193
2194 var depth int16
2195 if l := s.loopnest.b2l[b.ID]; l != nil {
2196 depth = l.depth
2197 }
2198 if depth > bestDepth {
2199
2200 continue
2201 }
2202
2203
2204
2205 if len(b.Preds) == 1 {
2206 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2207 if e.v == v {
2208
2209 best = b
2210 bestArg = e.c
2211 bestDepth = depth
2212 break
2213 }
2214 }
2215 } else {
2216 for _, e := range s.startRegs[b.ID] {
2217 if e.v == v {
2218
2219 best = b
2220 bestArg = e.c
2221 bestDepth = depth
2222 break
2223 }
2224 }
2225 }
2226 }
2227
2228
2229 spill.Block = best
2230 spill.AddArg(bestArg)
2231 if best == v.Block && !mustBeFirst(v.Op) {
2232
2233 after[v.ID] = append(after[v.ID], spill)
2234 } else {
2235
2236 start[best.ID] = append(start[best.ID], spill)
2237 }
2238 }
2239
2240
2241 var oldSched []*Value
2242 for _, b := range s.visitOrder {
2243 nfirst := 0
2244 for _, v := range b.Values {
2245 if !mustBeFirst(v.Op) {
2246 break
2247 }
2248 nfirst++
2249 }
2250 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2251 b.Values = b.Values[:nfirst]
2252 b.Values = append(b.Values, start[b.ID]...)
2253 for _, v := range oldSched {
2254 b.Values = append(b.Values, v)
2255 b.Values = append(b.Values, after[v.ID]...)
2256 }
2257 }
2258 }
2259
2260
2261 func (s *regAllocState) shuffle(stacklive [][]ID) {
2262 var e edgeState
2263 e.s = s
2264 e.cache = map[ID][]*Value{}
2265 e.contents = map[Location]contentRecord{}
2266 if s.f.pass.debug > regDebug {
2267 fmt.Printf("shuffle %s\n", s.f.Name)
2268 fmt.Println(s.f.String())
2269 }
2270
2271 for _, b := range s.visitOrder {
2272 if len(b.Preds) <= 1 {
2273 continue
2274 }
2275 e.b = b
2276 for i, edge := range b.Preds {
2277 p := edge.b
2278 e.p = p
2279 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2280 e.process()
2281 }
2282 }
2283
2284 if s.f.pass.debug > regDebug {
2285 fmt.Printf("post shuffle %s\n", s.f.Name)
2286 fmt.Println(s.f.String())
2287 }
2288 }
2289
2290 type edgeState struct {
2291 s *regAllocState
2292 p, b *Block
2293
2294
2295 cache map[ID][]*Value
2296 cachedVals []ID
2297
2298
2299 contents map[Location]contentRecord
2300
2301
2302 destinations []dstRecord
2303 extra []dstRecord
2304
2305 usedRegs regMask
2306 uniqueRegs regMask
2307 finalRegs regMask
2308 rematerializeableRegs regMask
2309 }
2310
2311 type contentRecord struct {
2312 vid ID
2313 c *Value
2314 final bool
2315 pos src.XPos
2316 }
2317
2318 type dstRecord struct {
2319 loc Location
2320 vid ID
2321 splice **Value
2322 pos src.XPos
2323 }
2324
2325
2326 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2327 if e.s.f.pass.debug > regDebug {
2328 fmt.Printf("edge %s->%s\n", e.p, e.b)
2329 }
2330
2331
2332 clear(e.cache)
2333 e.cachedVals = e.cachedVals[:0]
2334 clear(e.contents)
2335 e.usedRegs = 0
2336 e.uniqueRegs = 0
2337 e.finalRegs = 0
2338 e.rematerializeableRegs = 0
2339
2340
2341 for _, x := range srcReg {
2342 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2343 }
2344
2345 for _, spillID := range stacklive {
2346 v := e.s.orig[spillID]
2347 spill := e.s.values[v.ID].spill
2348 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2349
2350
2351
2352
2353
2354
2355
2356
2357 continue
2358 }
2359 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2360 }
2361
2362
2363 dsts := e.destinations[:0]
2364 for _, x := range dstReg {
2365 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2366 }
2367
2368 for _, v := range e.b.Values {
2369 if v.Op != OpPhi {
2370 break
2371 }
2372 loc := e.s.f.getHome(v.ID)
2373 if loc == nil {
2374 continue
2375 }
2376 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2377 }
2378 e.destinations = dsts
2379
2380 if e.s.f.pass.debug > regDebug {
2381 for _, vid := range e.cachedVals {
2382 a := e.cache[vid]
2383 for _, c := range a {
2384 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2385 }
2386 }
2387 for _, d := range e.destinations {
2388 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2389 }
2390 }
2391 }
2392
2393
2394 func (e *edgeState) process() {
2395 dsts := e.destinations
2396
2397
2398 for len(dsts) > 0 {
2399 i := 0
2400 for _, d := range dsts {
2401 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2402
2403 dsts[i] = d
2404 i++
2405 }
2406 }
2407 if i < len(dsts) {
2408
2409 dsts = dsts[:i]
2410
2411
2412 dsts = append(dsts, e.extra...)
2413 e.extra = e.extra[:0]
2414 continue
2415 }
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439 d := dsts[0]
2440 loc := d.loc
2441 vid := e.contents[loc].vid
2442 c := e.contents[loc].c
2443 r := e.findRegFor(c.Type)
2444 if e.s.f.pass.debug > regDebug {
2445 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2446 }
2447 e.erase(r)
2448 pos := d.pos.WithNotStmt()
2449 if _, isReg := loc.(*Register); isReg {
2450 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2451 } else {
2452 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2453 }
2454 e.set(r, vid, c, false, pos)
2455 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2456 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2457 }
2458 }
2459 }
2460
2461
2462
2463 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2464 pos = pos.WithNotStmt()
2465 occupant := e.contents[loc]
2466 if occupant.vid == vid {
2467
2468 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2469 if splice != nil {
2470 (*splice).Uses--
2471 *splice = occupant.c
2472 occupant.c.Uses++
2473 }
2474
2475
2476
2477 if _, ok := e.s.copies[occupant.c]; ok {
2478
2479 e.s.copies[occupant.c] = true
2480 }
2481 return true
2482 }
2483
2484
2485 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
2486
2487
2488 return false
2489 }
2490
2491
2492 v := e.s.orig[vid]
2493 var c *Value
2494 var src Location
2495 if e.s.f.pass.debug > regDebug {
2496 fmt.Printf("moving v%d to %s\n", vid, loc)
2497 fmt.Printf("sources of v%d:", vid)
2498 }
2499 if opcodeTable[v.Op].fixedReg {
2500 c = v
2501 src = e.s.f.getHome(v.ID)
2502 } else {
2503 for _, w := range e.cache[vid] {
2504 h := e.s.f.getHome(w.ID)
2505 if e.s.f.pass.debug > regDebug {
2506 fmt.Printf(" %s:%s", h, w)
2507 }
2508 _, isreg := h.(*Register)
2509 if src == nil || isreg {
2510 c = w
2511 src = h
2512 }
2513 }
2514 }
2515 if e.s.f.pass.debug > regDebug {
2516 if src != nil {
2517 fmt.Printf(" [use %s]\n", src)
2518 } else {
2519 fmt.Printf(" [no source]\n")
2520 }
2521 }
2522 _, dstReg := loc.(*Register)
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534 e.erase(loc)
2535 var x *Value
2536 if c == nil || e.s.values[vid].rematerializeable {
2537 if !e.s.values[vid].rematerializeable {
2538 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2539 }
2540 if dstReg {
2541 x = v.copyInto(e.p)
2542 } else {
2543
2544
2545 r := e.findRegFor(v.Type)
2546 e.erase(r)
2547 x = v.copyIntoWithXPos(e.p, pos)
2548 e.set(r, vid, x, false, pos)
2549
2550
2551
2552 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2553 }
2554 } else {
2555
2556 _, srcReg := src.(*Register)
2557 if srcReg {
2558 if dstReg {
2559 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2560 } else {
2561 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2562 }
2563 } else {
2564 if dstReg {
2565 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2566 } else {
2567
2568 r := e.findRegFor(c.Type)
2569 e.erase(r)
2570 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2571 e.set(r, vid, t, false, pos)
2572 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2573 }
2574 }
2575 }
2576 e.set(loc, vid, x, true, pos)
2577 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2578 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2579 }
2580 if splice != nil {
2581 (*splice).Uses--
2582 *splice = x
2583 x.Uses++
2584 }
2585 return true
2586 }
2587
2588
2589 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2590 e.s.f.setHome(c, loc)
2591 e.contents[loc] = contentRecord{vid, c, final, pos}
2592 a := e.cache[vid]
2593 if len(a) == 0 {
2594 e.cachedVals = append(e.cachedVals, vid)
2595 }
2596 a = append(a, c)
2597 e.cache[vid] = a
2598 if r, ok := loc.(*Register); ok {
2599 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2600 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2601 }
2602 e.usedRegs |= regMask(1) << uint(r.num)
2603 if final {
2604 e.finalRegs |= regMask(1) << uint(r.num)
2605 }
2606 if len(a) == 1 {
2607 e.uniqueRegs |= regMask(1) << uint(r.num)
2608 }
2609 if len(a) == 2 {
2610 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2611 e.uniqueRegs &^= regMask(1) << uint(t.num)
2612 }
2613 }
2614 if e.s.values[vid].rematerializeable {
2615 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2616 }
2617 }
2618 if e.s.f.pass.debug > regDebug {
2619 fmt.Printf("%s\n", c.LongString())
2620 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2621 }
2622 }
2623
2624
2625 func (e *edgeState) erase(loc Location) {
2626 cr := e.contents[loc]
2627 if cr.c == nil {
2628 return
2629 }
2630 vid := cr.vid
2631
2632 if cr.final {
2633
2634
2635
2636 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2637 }
2638
2639
2640 a := e.cache[vid]
2641 for i, c := range a {
2642 if e.s.f.getHome(c.ID) == loc {
2643 if e.s.f.pass.debug > regDebug {
2644 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2645 }
2646 a[i], a = a[len(a)-1], a[:len(a)-1]
2647 break
2648 }
2649 }
2650 e.cache[vid] = a
2651
2652
2653 if r, ok := loc.(*Register); ok {
2654 e.usedRegs &^= regMask(1) << uint(r.num)
2655 if cr.final {
2656 e.finalRegs &^= regMask(1) << uint(r.num)
2657 }
2658 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2659 }
2660 if len(a) == 1 {
2661 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2662 e.uniqueRegs |= regMask(1) << uint(r.num)
2663 }
2664 }
2665 }
2666
2667
2668 func (e *edgeState) findRegFor(typ *types.Type) Location {
2669
2670 types := &e.s.f.Config.Types
2671 m := e.s.compatRegs(typ)
2672
2673
2674
2675
2676
2677
2678 x := m &^ e.usedRegs
2679 if x != 0 {
2680 return &e.s.registers[pickReg(x)]
2681 }
2682 x = m &^ e.uniqueRegs &^ e.finalRegs
2683 if x != 0 {
2684 return &e.s.registers[pickReg(x)]
2685 }
2686 x = m &^ e.uniqueRegs
2687 if x != 0 {
2688 return &e.s.registers[pickReg(x)]
2689 }
2690 x = m & e.rematerializeableRegs
2691 if x != 0 {
2692 return &e.s.registers[pickReg(x)]
2693 }
2694
2695
2696
2697 for _, vid := range e.cachedVals {
2698 a := e.cache[vid]
2699 for _, c := range a {
2700 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2701 if !c.rematerializeable() {
2702 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2703
2704
2705
2706 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64}
2707
2708 e.set(t, vid, x, false, c.Pos)
2709 if e.s.f.pass.debug > regDebug {
2710 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2711 }
2712 }
2713
2714
2715
2716 return r
2717 }
2718 }
2719 }
2720
2721 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2722 for _, vid := range e.cachedVals {
2723 a := e.cache[vid]
2724 for _, c := range a {
2725 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2726 }
2727 }
2728 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2729 return nil
2730 }
2731
2732
2733
2734 func (v *Value) rematerializeable() bool {
2735 if !opcodeTable[v.Op].rematerializeable {
2736 return false
2737 }
2738 for _, a := range v.Args {
2739
2740
2741 if !opcodeTable[a.Op].fixedReg {
2742 return false
2743 }
2744 }
2745 return true
2746 }
2747
2748 type liveInfo struct {
2749 ID ID
2750 dist int32
2751 pos src.XPos
2752 }
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762 func (s *regAllocState) computeLive() {
2763 f := s.f
2764 s.live = make([][]liveInfo, f.NumBlocks())
2765 s.desired = make([]desiredState, f.NumBlocks())
2766 var phis []*Value
2767
2768 live := f.newSparseMapPos(f.NumValues())
2769 defer f.retSparseMapPos(live)
2770 t := f.newSparseMapPos(f.NumValues())
2771 defer f.retSparseMapPos(t)
2772
2773
2774 var desired desiredState
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785 po := f.postorder()
2786 s.loopnest = f.loopnest()
2787 s.loopnest.computeUnavoidableCalls()
2788 for {
2789 changed := false
2790
2791 for _, b := range po {
2792
2793
2794
2795 live.clear()
2796 for _, e := range s.live[b.ID] {
2797 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2798 }
2799
2800
2801 for _, c := range b.ControlValues() {
2802 if s.values[c.ID].needReg {
2803 live.set(c.ID, int32(len(b.Values)), b.Pos)
2804 }
2805 }
2806
2807
2808
2809 phis = phis[:0]
2810 for i := len(b.Values) - 1; i >= 0; i-- {
2811 v := b.Values[i]
2812 live.remove(v.ID)
2813 if v.Op == OpPhi {
2814
2815 phis = append(phis, v)
2816 continue
2817 }
2818 if opcodeTable[v.Op].call {
2819 c := live.contents()
2820 for i := range c {
2821 c[i].val += unlikelyDistance
2822 }
2823 }
2824 for _, a := range v.Args {
2825 if s.values[a.ID].needReg {
2826 live.set(a.ID, int32(i), v.Pos)
2827 }
2828 }
2829 }
2830
2831 desired.copy(&s.desired[b.ID])
2832 for i := len(b.Values) - 1; i >= 0; i-- {
2833 v := b.Values[i]
2834 prefs := desired.remove(v.ID)
2835 if v.Op == OpPhi {
2836
2837
2838
2839 continue
2840 }
2841 regspec := s.regspec(v)
2842
2843 desired.clobber(regspec.clobbers)
2844
2845 for _, j := range regspec.inputs {
2846 if countRegs(j.regs) != 1 {
2847 continue
2848 }
2849 desired.clobber(j.regs)
2850 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2851 }
2852
2853 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
2854
2855
2856
2857
2858
2859 if opcodeTable[v.Op].commutative {
2860 desired.addList(v.Args[1].ID, prefs)
2861 }
2862 desired.addList(v.Args[0].ID, prefs)
2863 }
2864 }
2865
2866
2867
2868 for i, e := range b.Preds {
2869 p := e.b
2870
2871
2872
2873 delta := int32(normalDistance)
2874 if len(p.Succs) == 2 {
2875 if p.Succs[0].b == b && p.Likely == BranchLikely ||
2876 p.Succs[1].b == b && p.Likely == BranchUnlikely {
2877 delta = likelyDistance
2878 }
2879 if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2880 p.Succs[1].b == b && p.Likely == BranchLikely {
2881 delta = unlikelyDistance
2882 }
2883 }
2884
2885
2886 s.desired[p.ID].merge(&desired)
2887
2888
2889 t.clear()
2890 for _, e := range s.live[p.ID] {
2891 t.set(e.ID, e.dist, e.pos)
2892 }
2893 update := false
2894
2895
2896 for _, e := range live.contents() {
2897 d := e.val + delta
2898 if !t.contains(e.key) || d < t.get(e.key) {
2899 update = true
2900 t.set(e.key, d, e.pos)
2901 }
2902 }
2903
2904
2905
2906 for _, v := range phis {
2907 id := v.Args[i].ID
2908 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2909 update = true
2910 t.set(id, delta, v.Pos)
2911 }
2912 }
2913
2914 if !update {
2915 continue
2916 }
2917
2918 l := s.live[p.ID][:0]
2919 if cap(l) < t.size() {
2920 l = make([]liveInfo, 0, t.size())
2921 }
2922 for _, e := range t.contents() {
2923 l = append(l, liveInfo{e.key, e.val, e.pos})
2924 }
2925 s.live[p.ID] = l
2926 changed = true
2927 }
2928 }
2929
2930 if !changed {
2931 break
2932 }
2933 }
2934 if f.pass.debug > regDebug {
2935 fmt.Println("live values at end of each block")
2936 for _, b := range f.Blocks {
2937 fmt.Printf(" %s:", b)
2938 for _, x := range s.live[b.ID] {
2939 fmt.Printf(" v%d(%d)", x.ID, x.dist)
2940 for _, e := range s.desired[b.ID].entries {
2941 if e.ID != x.ID {
2942 continue
2943 }
2944 fmt.Printf("[")
2945 first := true
2946 for _, r := range e.regs {
2947 if r == noRegister {
2948 continue
2949 }
2950 if !first {
2951 fmt.Printf(",")
2952 }
2953 fmt.Print(&s.registers[r])
2954 first = false
2955 }
2956 fmt.Printf("]")
2957 }
2958 }
2959 if avoid := s.desired[b.ID].avoid; avoid != 0 {
2960 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
2961 }
2962 fmt.Println()
2963 }
2964 }
2965 }
2966
2967
2968 type desiredState struct {
2969
2970
2971 entries []desiredStateEntry
2972
2973
2974
2975
2976 avoid regMask
2977 }
2978 type desiredStateEntry struct {
2979
2980 ID ID
2981
2982
2983
2984
2985
2986 regs [4]register
2987 }
2988
2989
2990 func (d *desiredState) get(vid ID) [4]register {
2991 for _, e := range d.entries {
2992 if e.ID == vid {
2993 return e.regs
2994 }
2995 }
2996 return [4]register{noRegister, noRegister, noRegister, noRegister}
2997 }
2998
2999
3000 func (d *desiredState) add(vid ID, r register) {
3001 d.avoid |= regMask(1) << r
3002 for i := range d.entries {
3003 e := &d.entries[i]
3004 if e.ID != vid {
3005 continue
3006 }
3007 if e.regs[0] == r {
3008
3009 return
3010 }
3011 for j := 1; j < len(e.regs); j++ {
3012 if e.regs[j] == r {
3013
3014 copy(e.regs[1:], e.regs[:j])
3015 e.regs[0] = r
3016 return
3017 }
3018 }
3019 copy(e.regs[1:], e.regs[:])
3020 e.regs[0] = r
3021 return
3022 }
3023 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
3024 }
3025
3026 func (d *desiredState) addList(vid ID, regs [4]register) {
3027
3028 for i := len(regs) - 1; i >= 0; i-- {
3029 r := regs[i]
3030 if r != noRegister {
3031 d.add(vid, r)
3032 }
3033 }
3034 }
3035
3036
3037 func (d *desiredState) clobber(m regMask) {
3038 for i := 0; i < len(d.entries); {
3039 e := &d.entries[i]
3040 j := 0
3041 for _, r := range e.regs {
3042 if r != noRegister && m>>r&1 == 0 {
3043 e.regs[j] = r
3044 j++
3045 }
3046 }
3047 if j == 0 {
3048
3049 d.entries[i] = d.entries[len(d.entries)-1]
3050 d.entries = d.entries[:len(d.entries)-1]
3051 continue
3052 }
3053 for ; j < len(e.regs); j++ {
3054 e.regs[j] = noRegister
3055 }
3056 i++
3057 }
3058 d.avoid &^= m
3059 }
3060
3061
3062 func (d *desiredState) copy(x *desiredState) {
3063 d.entries = append(d.entries[:0], x.entries...)
3064 d.avoid = x.avoid
3065 }
3066
3067
3068 func (d *desiredState) remove(vid ID) [4]register {
3069 for i := range d.entries {
3070 if d.entries[i].ID == vid {
3071 regs := d.entries[i].regs
3072 d.entries[i] = d.entries[len(d.entries)-1]
3073 d.entries = d.entries[:len(d.entries)-1]
3074 return regs
3075 }
3076 }
3077 return [4]register{noRegister, noRegister, noRegister, noRegister}
3078 }
3079
3080
3081 func (d *desiredState) merge(x *desiredState) {
3082 d.avoid |= x.avoid
3083
3084
3085 for _, e := range x.entries {
3086 d.addList(e.ID, e.regs)
3087 }
3088 }
3089
3090
3091 func (loopnest *loopnest) computeUnavoidableCalls() {
3092 f := loopnest.f
3093
3094 hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
3095 defer f.Cache.freeBoolSlice(hasCall)
3096 for _, b := range f.Blocks {
3097 if b.containsCall() {
3098 hasCall[b.ID] = true
3099 }
3100 }
3101 found := f.Cache.allocSparseSet(f.NumBlocks())
3102 defer f.Cache.freeSparseSet(found)
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112 loopLoop:
3113 for _, l := range loopnest.loops {
3114 found.clear()
3115 tovisit := make([]*Block, 0, 8)
3116 tovisit = append(tovisit, l.header)
3117 for len(tovisit) > 0 {
3118 cur := tovisit[len(tovisit)-1]
3119 tovisit = tovisit[:len(tovisit)-1]
3120 if hasCall[cur.ID] {
3121 continue
3122 }
3123 for _, s := range cur.Succs {
3124 nb := s.Block()
3125 if nb == l.header {
3126
3127 continue loopLoop
3128 }
3129 if found.contains(nb.ID) {
3130
3131 continue
3132 }
3133 nl := loopnest.b2l[nb.ID]
3134 if nl == nil || (nl.depth <= l.depth && nl != l) {
3135
3136 continue
3137 }
3138 tovisit = append(tovisit, nb)
3139 found.add(nb.ID)
3140 }
3141 }
3142
3143 l.containsUnavoidableCall = true
3144 }
3145 }
3146
3147 func (b *Block) containsCall() bool {
3148 if b.Kind == BlockDefer {
3149 return true
3150 }
3151 for _, v := range b.Values {
3152 if opcodeTable[v.Op].call {
3153 return true
3154 }
3155 }
3156 return false
3157 }
3158
View as plain text