1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "fmt"
123 "internal/buildcfg"
124 "math"
125 "math/bits"
126 "unsafe"
127 )
128
129 const (
130 moveSpills = iota
131 logSpills
132 regDebug
133 stackDebug
134 )
135
136
137
138 const (
139 likelyDistance = 1
140 normalDistance = 10
141 unlikelyDistance = 100
142 )
143
144
145
146 func regalloc(f *Func) {
147 var s regAllocState
148 s.init(f)
149 s.regalloc(f)
150 s.close()
151 }
152
153 type register uint8
154
155 const noRegister register = 255
156
157
158 var noRegisters [32]register = [32]register{
159 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
160 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 }
164
165
166
167 type regMask uint64
168
169 func (m regMask) String() string {
170 s := ""
171 for r := register(0); m != 0; r++ {
172 if m>>r&1 == 0 {
173 continue
174 }
175 m &^= regMask(1) << r
176 if s != "" {
177 s += " "
178 }
179 s += fmt.Sprintf("r%d", r)
180 }
181 return s
182 }
183
184 func (m regMask) contains(r register) bool {
185 return m>>r&1 != 0
186 }
187
188 func (s *regAllocState) RegMaskString(m regMask) string {
189 str := ""
190 for r := register(0); m != 0; r++ {
191 if m>>r&1 == 0 {
192 continue
193 }
194 m &^= regMask(1) << r
195 if str != "" {
196 str += " "
197 }
198 str += s.registers[r].String()
199 }
200 return str
201 }
202
203
204 func countRegs(r regMask) int {
205 return bits.OnesCount64(uint64(r))
206 }
207
208
209 func pickReg(r regMask) register {
210 if r == 0 {
211 panic("can't pick a register from an empty set")
212 }
213
214 return register(bits.TrailingZeros64(uint64(r)))
215 }
216
217 type use struct {
218
219
220
221
222
223 dist int32
224 pos src.XPos
225 next *use
226 }
227
228
229 type valState struct {
230 regs regMask
231 uses *use
232 spill *Value
233 restoreMin int32
234 restoreMax int32
235 needReg bool
236 rematerializeable bool
237 }
238
239 type regState struct {
240 v *Value
241 c *Value
242
243 }
244
245 type regAllocState struct {
246 f *Func
247
248 sdom SparseTree
249 registers []Register
250 numRegs register
251 SPReg register
252 SBReg register
253 GReg register
254 ZeroIntReg register
255 allocatable regMask
256
257
258
259
260 live [][]liveInfo
261
262
263
264
265 desired []desiredState
266
267
268 values []valState
269
270
271 sp, sb ID
272
273
274
275 orig []*Value
276
277
278
279 regs []regState
280
281
282 nospill regMask
283
284
285 used regMask
286
287
288 usedSinceBlockStart regMask
289
290
291 tmpused regMask
292
293
294 curBlock *Block
295
296
297 freeUseRecords *use
298
299
300
301 endRegs [][]endReg
302
303
304
305 startRegs [][]startReg
306
307
308
309
310 startRegsMask regMask
311
312
313 spillLive [][]ID
314
315
316
317 copies map[*Value]bool
318
319 loopnest *loopnest
320
321
322 visitOrder []*Block
323
324
325 blockOrder []int32
326
327
328 doClobber bool
329
330
331
332
333
334
335 nextCall []int32
336
337
338
339 curIdx int
340 }
341
342 type endReg struct {
343 r register
344 v *Value
345 c *Value
346 }
347
348 type startReg struct {
349 r register
350 v *Value
351 c *Value
352 pos src.XPos
353 }
354
355
356 func (s *regAllocState) freeReg(r register) {
357 if !s.allocatable.contains(r) && !s.isGReg(r) {
358 return
359 }
360 v := s.regs[r].v
361 if v == nil {
362 s.f.Fatalf("tried to free an already free register %d\n", r)
363 }
364
365
366 if s.f.pass.debug > regDebug {
367 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
368 }
369 s.regs[r] = regState{}
370 s.values[v.ID].regs &^= regMask(1) << r
371 s.used &^= regMask(1) << r
372 }
373
374
375 func (s *regAllocState) freeRegs(m regMask) {
376 for m&s.used != 0 {
377 s.freeReg(pickReg(m & s.used))
378 }
379 }
380
381
382 func (s *regAllocState) clobberRegs(m regMask) {
383 m &= s.allocatable & s.f.Config.gpRegMask
384 for m != 0 {
385 r := pickReg(m)
386 m &^= 1 << r
387 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
388 s.f.setHome(x, &s.registers[r])
389 }
390 }
391
392
393
394 func (s *regAllocState) setOrig(c *Value, v *Value) {
395 if int(c.ID) >= cap(s.orig) {
396 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
397 copy(x, s.orig)
398 s.f.Cache.freeValueSlice(s.orig)
399 s.orig = x
400 }
401 for int(c.ID) >= len(s.orig) {
402 s.orig = append(s.orig, nil)
403 }
404 if s.orig[c.ID] != nil {
405 s.f.Fatalf("orig value set twice %s %s", c, v)
406 }
407 s.orig[c.ID] = s.orig[v.ID]
408 }
409
410
411
412 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
413 if s.f.pass.debug > regDebug {
414 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
415 }
416
417 s.values[v.ID].regs |= regMask(1) << r
418 s.f.setHome(c, &s.registers[r])
419
420
421 if !s.allocatable.contains(r) && !s.isGReg(r) {
422 return
423 }
424 if s.regs[r].v != nil {
425 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
426 }
427 s.regs[r] = regState{v, c}
428 s.used |= regMask(1) << r
429 }
430
431
432
433
434 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
435 if v.OnWasmStack {
436 return noRegister
437 }
438
439 mask &= s.allocatable
440 mask &^= s.nospill
441 if mask == 0 {
442 s.f.Fatalf("no register available for %s", v.LongString())
443 }
444
445
446 if mask&^s.used != 0 {
447 r := pickReg(mask &^ s.used)
448 s.usedSinceBlockStart |= regMask(1) << r
449 return r
450 }
451
452
453
454
455
456
457
458
459
460
461
462 var r register
463 maxuse := int32(-1)
464 for t := register(0); t < s.numRegs; t++ {
465 if mask>>t&1 == 0 {
466 continue
467 }
468 v := s.regs[t].v
469 if n := s.values[v.ID].uses.dist; n > maxuse {
470
471
472 r = t
473 maxuse = n
474 }
475 }
476 if maxuse == -1 {
477 s.f.Fatalf("couldn't find register to spill")
478 }
479
480 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
481
482
483
484 s.freeReg(r)
485 return r
486 }
487
488
489
490 v2 := s.regs[r].v
491 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
492 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
493 s.usedSinceBlockStart |= regMask(1) << r
494 r2 := pickReg(m)
495 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
496 s.copies[c] = false
497 if s.f.pass.debug > regDebug {
498 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
499 }
500 s.setOrig(c, v2)
501 s.assignReg(r2, v2, c)
502 }
503
504
505
506
507 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
508 if s.startRegsMask&(regMask(1)<<r) == 1 {
509 if s.f.pass.debug > regDebug {
510 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
511 }
512 s.startRegsMask &^= regMask(1) << r
513 }
514 }
515
516 s.freeReg(r)
517 s.usedSinceBlockStart |= regMask(1) << r
518 return r
519 }
520
521
522
523 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
524 vi := &s.values[v.ID]
525 if vi.spill != nil {
526
527 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
528 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
529 return vi.spill
530 }
531
532
533 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
534
535
536 s.setOrig(spill, v)
537 vi.spill = spill
538 vi.restoreMin = s.sdom[b.ID].entry
539 vi.restoreMax = s.sdom[b.ID].exit
540 return spill
541 }
542
543
544
545
546
547
548
549 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
550 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
551 c := v.copyIntoWithXPos(s.curBlock, pos)
552 c.OnWasmStack = true
553 s.setOrig(c, v)
554 return c
555 }
556 if v.OnWasmStack {
557 return v
558 }
559
560 vi := &s.values[v.ID]
561 pos = pos.WithNotStmt()
562
563 if mask&vi.regs != 0 {
564 r := pickReg(mask & vi.regs)
565 if !s.allocatable.contains(r) {
566 return v
567 }
568 if s.regs[r].v != v || s.regs[r].c == nil {
569 panic("bad register state")
570 }
571 if nospill {
572 s.nospill |= regMask(1) << r
573 }
574 s.usedSinceBlockStart |= regMask(1) << r
575 return s.regs[r].c
576 }
577
578 var r register
579
580 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
581 if !onWasmStack {
582
583 r = s.allocReg(mask, v)
584 }
585
586
587 var c *Value
588 if vi.regs != 0 {
589
590 r2 := pickReg(vi.regs)
591 var current *Value
592 if !s.allocatable.contains(r2) {
593 current = v
594 } else {
595 if s.regs[r2].v != v {
596 panic("bad register state")
597 }
598 current = s.regs[r2].c
599 }
600 s.usedSinceBlockStart |= regMask(1) << r2
601 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
602 } else if v.rematerializeable() {
603
604 c = v.copyIntoWithXPos(s.curBlock, pos)
605 } else {
606
607 spill := s.makeSpill(v, s.curBlock)
608 if s.f.pass.debug > logSpills {
609 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
610 }
611 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
612 }
613
614 s.setOrig(c, v)
615
616 if onWasmStack {
617 c.OnWasmStack = true
618 return c
619 }
620
621 s.assignReg(r, v, c)
622 if c.Op == OpLoadReg && s.isGReg(r) {
623 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
624 }
625 if nospill {
626 s.nospill |= regMask(1) << r
627 }
628 return c
629 }
630
631
632 func isLeaf(f *Func) bool {
633 for _, b := range f.Blocks {
634 for _, v := range b.Values {
635 if v.Op.IsCall() && !v.Op.IsTailCall() {
636
637 return false
638 }
639 }
640 }
641 return true
642 }
643
644
645 func (v *Value) needRegister() bool {
646 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
647 }
648
649 func (s *regAllocState) init(f *Func) {
650 s.f = f
651 s.f.RegAlloc = s.f.Cache.locs[:0]
652 s.registers = f.Config.registers
653 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
654 s.f.Fatalf("bad number of registers: %d", nr)
655 } else {
656 s.numRegs = register(nr)
657 }
658
659 s.SPReg = noRegister
660 s.SBReg = noRegister
661 s.GReg = noRegister
662 s.ZeroIntReg = noRegister
663 for r := register(0); r < s.numRegs; r++ {
664 switch s.registers[r].String() {
665 case "SP":
666 s.SPReg = r
667 case "SB":
668 s.SBReg = r
669 case "g":
670 s.GReg = r
671 case "ZERO":
672 s.ZeroIntReg = r
673 }
674 }
675
676 switch noRegister {
677 case s.SPReg:
678 s.f.Fatalf("no SP register found")
679 case s.SBReg:
680 s.f.Fatalf("no SB register found")
681 case s.GReg:
682 if f.Config.hasGReg {
683 s.f.Fatalf("no g register found")
684 }
685 }
686
687
688 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
689 s.allocatable &^= 1 << s.SPReg
690 s.allocatable &^= 1 << s.SBReg
691 if s.f.Config.hasGReg {
692 s.allocatable &^= 1 << s.GReg
693 }
694 if s.ZeroIntReg != noRegister {
695 s.allocatable &^= 1 << s.ZeroIntReg
696 }
697 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
698 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
699 }
700 if s.f.Config.LinkReg != -1 {
701 if isLeaf(f) {
702
703 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
704 }
705 }
706 if s.f.Config.ctxt.Flag_dynlink {
707 switch s.f.Config.arch {
708 case "386":
709
710
711
712
713
714 case "amd64":
715 s.allocatable &^= 1 << 15
716 case "arm":
717 s.allocatable &^= 1 << 9
718 case "arm64":
719
720 case "loong64":
721
722 case "ppc64le":
723
724 case "riscv64":
725
726 case "s390x":
727 s.allocatable &^= 1 << 11
728 default:
729 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
730 }
731 }
732
733
734
735
736 s.visitOrder = layoutRegallocOrder(f)
737
738
739
740 s.blockOrder = make([]int32, f.NumBlocks())
741 for i, b := range s.visitOrder {
742 s.blockOrder[b.ID] = int32(i)
743 }
744
745 s.regs = make([]regState, s.numRegs)
746 nv := f.NumValues()
747 if cap(s.f.Cache.regallocValues) >= nv {
748 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
749 } else {
750 s.f.Cache.regallocValues = make([]valState, nv)
751 }
752 s.values = s.f.Cache.regallocValues
753 s.orig = s.f.Cache.allocValueSlice(nv)
754 s.copies = make(map[*Value]bool)
755 for _, b := range s.visitOrder {
756 for _, v := range b.Values {
757 if v.needRegister() {
758 s.values[v.ID].needReg = true
759 s.values[v.ID].rematerializeable = v.rematerializeable()
760 s.orig[v.ID] = v
761 }
762
763
764 }
765 }
766 s.computeLive()
767
768 s.endRegs = make([][]endReg, f.NumBlocks())
769 s.startRegs = make([][]startReg, f.NumBlocks())
770 s.spillLive = make([][]ID, f.NumBlocks())
771 s.sdom = f.Sdom()
772
773
774 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
775 canLiveOnStack := f.newSparseSet(f.NumValues())
776 defer f.retSparseSet(canLiveOnStack)
777 for _, b := range f.Blocks {
778
779 canLiveOnStack.clear()
780 for _, c := range b.ControlValues() {
781 if c.Uses == 1 && !opcodeTable[c.Op].generic {
782 canLiveOnStack.add(c.ID)
783 }
784 }
785
786 for i := len(b.Values) - 1; i >= 0; i-- {
787 v := b.Values[i]
788 if canLiveOnStack.contains(v.ID) {
789 v.OnWasmStack = true
790 } else {
791
792 canLiveOnStack.clear()
793 }
794 for _, arg := range v.Args {
795
796
797
798
799
800 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
801 canLiveOnStack.add(arg.ID)
802 }
803 }
804 }
805 }
806 }
807
808
809
810
811 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
812
813 s.doClobber = true
814 }
815 }
816
817 func (s *regAllocState) close() {
818 s.f.Cache.freeValueSlice(s.orig)
819 }
820
821
822
823 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
824 r := s.freeUseRecords
825 if r != nil {
826 s.freeUseRecords = r.next
827 } else {
828 r = &use{}
829 }
830 r.dist = dist
831 r.pos = pos
832 r.next = s.values[id].uses
833 s.values[id].uses = r
834 if r.next != nil && dist > r.next.dist {
835 s.f.Fatalf("uses added in wrong order")
836 }
837 }
838
839
840
841 func (s *regAllocState) advanceUses(v *Value) {
842 for _, a := range v.Args {
843 if !s.values[a.ID].needReg {
844 continue
845 }
846 ai := &s.values[a.ID]
847 r := ai.uses
848 ai.uses = r.next
849 if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
850
851 s.freeRegs(ai.regs)
852 }
853 r.next = s.freeUseRecords
854 s.freeUseRecords = r
855 }
856 s.dropIfUnused(v)
857 }
858
859
860
861 func (s *regAllocState) dropIfUnused(v *Value) {
862 if !s.values[v.ID].needReg {
863 return
864 }
865 vi := &s.values[v.ID]
866 r := vi.uses
867 if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
868 s.freeRegs(vi.regs)
869 }
870 }
871
872
873
874
875 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
876 u := s.values[v.ID].uses
877 if u == nil {
878 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
879 }
880 d := u.dist
881 for u != nil && u.dist == d {
882 u = u.next
883 }
884 return u != nil && u.dist > d
885 }
886
887
888 func (s *regAllocState) setState(regs []endReg) {
889 s.freeRegs(s.used)
890 for _, x := range regs {
891 s.assignReg(x.r, x.v, x.c)
892 }
893 }
894
895
896 func (s *regAllocState) compatRegs(t *types.Type) regMask {
897 var m regMask
898 if t.IsTuple() || t.IsFlags() {
899 return 0
900 }
901 if t.IsFloat() || t == types.TypeInt128 {
902 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
903 m = s.f.Config.fp32RegMask
904 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
905 m = s.f.Config.fp64RegMask
906 } else {
907 m = s.f.Config.fpRegMask
908 }
909 } else {
910 m = s.f.Config.gpRegMask
911 }
912 return m & s.allocatable
913 }
914
915
916 func (s *regAllocState) regspec(v *Value) regInfo {
917 op := v.Op
918 if op == OpConvert {
919
920
921
922 m := s.allocatable & s.f.Config.gpRegMask
923 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
924 }
925 if op == OpArgIntReg {
926 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
927 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
928 }
929 if op == OpArgFloatReg {
930 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
931 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
932 }
933 if op.IsCall() {
934 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
935 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
936 }
937 }
938 if op == OpMakeResult && s.f.OwnAux.reg != nil {
939 return *s.f.OwnAux.ResultReg(s.f.Config)
940 }
941 return opcodeTable[op].reg
942 }
943
944 func (s *regAllocState) isGReg(r register) bool {
945 return s.f.Config.hasGReg && s.GReg == r
946 }
947
948
949 var tmpVal Value
950
951 func (s *regAllocState) regalloc(f *Func) {
952 regValLiveSet := f.newSparseSet(f.NumValues())
953 defer f.retSparseSet(regValLiveSet)
954 var oldSched []*Value
955 var phis []*Value
956 var phiRegs []register
957 var args []*Value
958
959
960 var desired desiredState
961 desiredSecondReg := map[ID][4]register{}
962
963
964 type dentry struct {
965 out [4]register
966 in [3][4]register
967 }
968 var dinfo []dentry
969
970 if f.Entry != f.Blocks[0] {
971 f.Fatalf("entry block must be first")
972 }
973
974 for _, b := range s.visitOrder {
975 if s.f.pass.debug > regDebug {
976 fmt.Printf("Begin processing block %v\n", b)
977 }
978 s.curBlock = b
979 s.startRegsMask = 0
980 s.usedSinceBlockStart = 0
981 clear(desiredSecondReg)
982
983
984
985 regValLiveSet.clear()
986 for _, e := range s.live[b.ID] {
987 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
988 regValLiveSet.add(e.ID)
989 }
990 for _, v := range b.ControlValues() {
991 if s.values[v.ID].needReg {
992 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
993 regValLiveSet.add(v.ID)
994 }
995 }
996 if len(s.nextCall) < len(b.Values) {
997 s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
998 }
999 var nextCall int32 = math.MaxInt32
1000 for i := len(b.Values) - 1; i >= 0; i-- {
1001 v := b.Values[i]
1002 regValLiveSet.remove(v.ID)
1003 if v.Op == OpPhi {
1004
1005
1006
1007 s.nextCall[i] = nextCall
1008 continue
1009 }
1010 if opcodeTable[v.Op].call {
1011
1012 regValLiveSet.clear()
1013 if s.sp != 0 && s.values[s.sp].uses != nil {
1014 regValLiveSet.add(s.sp)
1015 }
1016 if s.sb != 0 && s.values[s.sb].uses != nil {
1017 regValLiveSet.add(s.sb)
1018 }
1019 nextCall = int32(i)
1020 }
1021 for _, a := range v.Args {
1022 if !s.values[a.ID].needReg {
1023 continue
1024 }
1025 s.addUse(a.ID, int32(i), v.Pos)
1026 regValLiveSet.add(a.ID)
1027 }
1028 s.nextCall[i] = nextCall
1029 }
1030 if s.f.pass.debug > regDebug {
1031 fmt.Printf("use distances for %s\n", b)
1032 for i := range s.values {
1033 vi := &s.values[i]
1034 u := vi.uses
1035 if u == nil {
1036 continue
1037 }
1038 fmt.Printf(" v%d:", i)
1039 for u != nil {
1040 fmt.Printf(" %d", u.dist)
1041 u = u.next
1042 }
1043 fmt.Println()
1044 }
1045 }
1046
1047
1048
1049 nphi := 0
1050 for _, v := range b.Values {
1051 if v.Op != OpPhi {
1052 break
1053 }
1054 nphi++
1055 }
1056 phis = append(phis[:0], b.Values[:nphi]...)
1057 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1058 b.Values = b.Values[:0]
1059
1060
1061 if b == f.Entry {
1062
1063 if nphi > 0 {
1064 f.Fatalf("phis in entry block")
1065 }
1066 } else if len(b.Preds) == 1 {
1067
1068 s.setState(s.endRegs[b.Preds[0].b.ID])
1069 if nphi > 0 {
1070 f.Fatalf("phis in single-predecessor block")
1071 }
1072
1073
1074
1075 for r := register(0); r < s.numRegs; r++ {
1076 v := s.regs[r].v
1077 if v != nil && !regValLiveSet.contains(v.ID) {
1078 s.freeReg(r)
1079 }
1080 }
1081 } else {
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095 idx := -1
1096 for i, p := range b.Preds {
1097
1098
1099 pb := p.b
1100 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1101 continue
1102 }
1103 if idx == -1 {
1104 idx = i
1105 continue
1106 }
1107 pSel := b.Preds[idx].b
1108 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1109 idx = i
1110 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1122 idx = i
1123 }
1124 }
1125 }
1126 if idx < 0 {
1127 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1128 }
1129 p := b.Preds[idx].b
1130 s.setState(s.endRegs[p.ID])
1131
1132 if s.f.pass.debug > regDebug {
1133 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1134 for _, x := range s.endRegs[p.ID] {
1135 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1136 }
1137 }
1138
1139
1140
1141
1142
1143 phiRegs = phiRegs[:0]
1144 var phiUsed regMask
1145
1146 for _, v := range phis {
1147 if !s.values[v.ID].needReg {
1148 phiRegs = append(phiRegs, noRegister)
1149 continue
1150 }
1151 a := v.Args[idx]
1152
1153
1154 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1155 if m != 0 {
1156 r := pickReg(m)
1157 phiUsed |= regMask(1) << r
1158 phiRegs = append(phiRegs, r)
1159 } else {
1160 phiRegs = append(phiRegs, noRegister)
1161 }
1162 }
1163
1164
1165 for i, v := range phis {
1166 if !s.values[v.ID].needReg {
1167 continue
1168 }
1169 a := v.Args[idx]
1170 r := phiRegs[i]
1171 if r == noRegister {
1172 continue
1173 }
1174 if regValLiveSet.contains(a.ID) {
1175
1176
1177
1178
1179
1180
1181
1182
1183 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1184 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1185 r2 := pickReg(m)
1186 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1187 s.copies[c] = false
1188 if s.f.pass.debug > regDebug {
1189 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1190 }
1191 s.setOrig(c, a)
1192 s.assignReg(r2, a, c)
1193 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1194 }
1195 }
1196 s.freeReg(r)
1197 }
1198
1199
1200 b.Values = append(b.Values, phis...)
1201
1202
1203
1204 for i, v := range phis {
1205 if !s.values[v.ID].needReg {
1206 continue
1207 }
1208 if phiRegs[i] != noRegister {
1209 continue
1210 }
1211 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1212
1213
1214 for i, pe := range b.Preds {
1215 if i == idx {
1216 continue
1217 }
1218 ri := noRegister
1219 for _, er := range s.endRegs[pe.b.ID] {
1220 if er.v == s.orig[v.Args[i].ID] {
1221 ri = er.r
1222 break
1223 }
1224 }
1225 if ri != noRegister && m>>ri&1 != 0 {
1226 m = regMask(1) << ri
1227 break
1228 }
1229 }
1230 if m != 0 {
1231 r := pickReg(m)
1232 phiRegs[i] = r
1233 phiUsed |= regMask(1) << r
1234 }
1235 }
1236
1237
1238 for i, v := range phis {
1239 if !s.values[v.ID].needReg {
1240 continue
1241 }
1242 r := phiRegs[i]
1243 if r == noRegister {
1244
1245
1246 s.values[v.ID].spill = v
1247 continue
1248 }
1249
1250 s.assignReg(r, v, v)
1251 }
1252
1253
1254 for r := register(0); r < s.numRegs; r++ {
1255 if phiUsed>>r&1 != 0 {
1256 continue
1257 }
1258 v := s.regs[r].v
1259 if v != nil && !regValLiveSet.contains(v.ID) {
1260 s.freeReg(r)
1261 }
1262 }
1263
1264
1265
1266
1267
1268 regList := make([]startReg, 0, 32)
1269 for r := register(0); r < s.numRegs; r++ {
1270 v := s.regs[r].v
1271 if v == nil {
1272 continue
1273 }
1274 if phiUsed>>r&1 != 0 {
1275
1276
1277 continue
1278 }
1279 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1280 s.startRegsMask |= regMask(1) << r
1281 }
1282 s.startRegs[b.ID] = make([]startReg, len(regList))
1283 copy(s.startRegs[b.ID], regList)
1284
1285 if s.f.pass.debug > regDebug {
1286 fmt.Printf("after phis\n")
1287 for _, x := range s.startRegs[b.ID] {
1288 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1289 }
1290 }
1291 }
1292
1293
1294 for i, v := range phis {
1295 s.curIdx = i
1296 s.dropIfUnused(v)
1297 }
1298
1299
1300 if l := len(oldSched); cap(dinfo) < l {
1301 dinfo = make([]dentry, l)
1302 } else {
1303 dinfo = dinfo[:l]
1304 clear(dinfo)
1305 }
1306
1307
1308 desired.copy(&s.desired[b.ID])
1309
1310
1311
1312
1313
1314
1315 for _, e := range b.Succs {
1316 succ := e.b
1317
1318 for _, x := range s.startRegs[succ.ID] {
1319 desired.add(x.v.ID, x.r)
1320 }
1321
1322 pidx := e.i
1323 for _, v := range succ.Values {
1324 if v.Op != OpPhi {
1325 break
1326 }
1327 if !s.values[v.ID].needReg {
1328 continue
1329 }
1330 rp, ok := s.f.getHome(v.ID).(*Register)
1331 if !ok {
1332
1333
1334
1335
1336 for _, a := range v.Args {
1337 rp, ok = s.f.getHome(a.ID).(*Register)
1338 if ok {
1339 break
1340 }
1341 }
1342 if !ok {
1343 continue
1344 }
1345 }
1346 desired.add(v.Args[pidx].ID, register(rp.num))
1347 }
1348 }
1349
1350
1351 for i := len(oldSched) - 1; i >= 0; i-- {
1352 v := oldSched[i]
1353 prefs := desired.remove(v.ID)
1354 regspec := s.regspec(v)
1355 desired.clobber(regspec.clobbers)
1356 for _, j := range regspec.inputs {
1357 if countRegs(j.regs) != 1 {
1358 continue
1359 }
1360 desired.clobber(j.regs)
1361 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1362 }
1363 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1364 if opcodeTable[v.Op].commutative {
1365 desired.addList(v.Args[1].ID, prefs)
1366 }
1367 desired.addList(v.Args[0].ID, prefs)
1368 }
1369
1370 dinfo[i].out = prefs
1371 for j, a := range v.Args {
1372 if j >= len(dinfo[i].in) {
1373 break
1374 }
1375 dinfo[i].in[j] = desired.get(a.ID)
1376 }
1377 if v.Op == OpSelect1 && prefs[0] != noRegister {
1378
1379
1380 desiredSecondReg[v.Args[0].ID] = prefs
1381 }
1382 }
1383
1384
1385 for idx, v := range oldSched {
1386 s.curIdx = nphi + idx
1387 tmpReg := noRegister
1388 if s.f.pass.debug > regDebug {
1389 fmt.Printf(" processing %s\n", v.LongString())
1390 }
1391 regspec := s.regspec(v)
1392 if v.Op == OpPhi {
1393 f.Fatalf("phi %s not at start of block", v)
1394 }
1395 if opcodeTable[v.Op].fixedReg {
1396 switch v.Op {
1397 case OpSP:
1398 s.assignReg(s.SPReg, v, v)
1399 s.sp = v.ID
1400 case OpSB:
1401 s.assignReg(s.SBReg, v, v)
1402 s.sb = v.ID
1403 case OpARM64ZERO:
1404 s.assignReg(s.ZeroIntReg, v, v)
1405 default:
1406 f.Fatalf("unknown fixed-register op %s", v)
1407 }
1408 b.Values = append(b.Values, v)
1409 s.advanceUses(v)
1410 continue
1411 }
1412 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1413 if s.values[v.ID].needReg {
1414 if v.Op == OpSelectN {
1415 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1416 } else {
1417 var i = 0
1418 if v.Op == OpSelect1 {
1419 i = 1
1420 }
1421 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1422 }
1423 }
1424 b.Values = append(b.Values, v)
1425 s.advanceUses(v)
1426 continue
1427 }
1428 if v.Op == OpGetG && s.f.Config.hasGReg {
1429
1430 if s.regs[s.GReg].v != nil {
1431 s.freeReg(s.GReg)
1432 }
1433 s.assignReg(s.GReg, v, v)
1434 b.Values = append(b.Values, v)
1435 s.advanceUses(v)
1436 continue
1437 }
1438 if v.Op == OpArg {
1439
1440
1441
1442 s.values[v.ID].spill = v
1443 b.Values = append(b.Values, v)
1444 s.advanceUses(v)
1445 continue
1446 }
1447 if v.Op == OpKeepAlive {
1448
1449 s.advanceUses(v)
1450 a := v.Args[0]
1451 vi := &s.values[a.ID]
1452 if vi.regs == 0 && !vi.rematerializeable {
1453
1454
1455
1456 v.SetArg(0, s.makeSpill(a, b))
1457 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1458
1459
1460
1461 v.Op = OpVarLive
1462 v.SetArgs1(v.Args[1])
1463 v.Aux = a.Aux
1464 } else {
1465
1466
1467
1468 v.Op = OpCopy
1469 v.SetArgs1(v.Args[1])
1470 }
1471 b.Values = append(b.Values, v)
1472 continue
1473 }
1474 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1475
1476 if s.doClobber && v.Op.IsCall() {
1477 s.clobberRegs(regspec.clobbers)
1478 }
1479 s.freeRegs(regspec.clobbers)
1480 b.Values = append(b.Values, v)
1481 s.advanceUses(v)
1482 continue
1483 }
1484
1485 if s.values[v.ID].rematerializeable {
1486
1487
1488
1489 for _, a := range v.Args {
1490 a.Uses--
1491 }
1492 s.advanceUses(v)
1493 continue
1494 }
1495
1496 if s.f.pass.debug > regDebug {
1497 fmt.Printf("value %s\n", v.LongString())
1498 fmt.Printf(" out:")
1499 for _, r := range dinfo[idx].out {
1500 if r != noRegister {
1501 fmt.Printf(" %s", &s.registers[r])
1502 }
1503 }
1504 fmt.Println()
1505 for i := 0; i < len(v.Args) && i < 3; i++ {
1506 fmt.Printf(" in%d:", i)
1507 for _, r := range dinfo[idx].in[i] {
1508 if r != noRegister {
1509 fmt.Printf(" %s", &s.registers[r])
1510 }
1511 }
1512 fmt.Println()
1513 }
1514 }
1515
1516
1517
1518
1519 args = append(args[:0], make([]*Value, len(v.Args))...)
1520 for i, a := range v.Args {
1521 if !s.values[a.ID].needReg {
1522 args[i] = a
1523 }
1524 }
1525 for _, i := range regspec.inputs {
1526 mask := i.regs
1527 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1528 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1529 }
1530 }
1531
1532
1533
1534
1535
1536
1537 for {
1538 freed := false
1539 for _, i := range regspec.inputs {
1540 if args[i.idx] != nil {
1541 continue
1542 }
1543 mask := i.regs
1544 if countRegs(mask) == 1 && mask&^s.used != 0 {
1545 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1546
1547
1548
1549 oldregs := s.values[v.Args[i.idx].ID].regs
1550 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1551 s.freeRegs(oldregs &^ mask &^ s.nospill)
1552 freed = true
1553 }
1554 }
1555 }
1556 if !freed {
1557 break
1558 }
1559 }
1560
1561
1562 for _, i := range regspec.inputs {
1563 if args[i.idx] != nil {
1564 continue
1565 }
1566 mask := i.regs
1567 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1568
1569 mask &= s.allocatable
1570 mask &^= s.nospill
1571
1572 if i.idx < 3 {
1573 for _, r := range dinfo[idx].in[i.idx] {
1574 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1575
1576 mask = regMask(1) << r
1577 break
1578 }
1579 }
1580 }
1581
1582 if mask&^desired.avoid != 0 {
1583 mask &^= desired.avoid
1584 }
1585 }
1586 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1587 }
1588
1589
1590
1591
1592 if opcodeTable[v.Op].resultInArg0 {
1593 var m regMask
1594 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1595
1596 goto ok
1597 }
1598 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1599 args[0], args[1] = args[1], args[0]
1600 goto ok
1601 }
1602 if s.values[v.Args[0].ID].rematerializeable {
1603
1604 goto ok
1605 }
1606 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1607 args[0], args[1] = args[1], args[0]
1608 goto ok
1609 }
1610 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1611
1612 goto ok
1613 }
1614 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1615 args[0], args[1] = args[1], args[0]
1616 goto ok
1617 }
1618
1619
1620
1621
1622
1623 m = s.compatRegs(v.Args[0].Type) &^ s.used
1624 if m == 0 {
1625
1626
1627
1628
1629 goto ok
1630 }
1631
1632
1633 for _, r := range dinfo[idx].out {
1634 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1635 m = regMask(1) << r
1636 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1637
1638
1639 goto ok
1640 }
1641 }
1642
1643
1644 for _, r := range dinfo[idx].in[0] {
1645 if r != noRegister && m>>r&1 != 0 {
1646 m = regMask(1) << r
1647 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1648 s.copies[c] = false
1649
1650
1651 goto ok
1652 }
1653 }
1654 if opcodeTable[v.Op].commutative {
1655 for _, r := range dinfo[idx].in[1] {
1656 if r != noRegister && m>>r&1 != 0 {
1657 m = regMask(1) << r
1658 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1659 s.copies[c] = false
1660 args[0], args[1] = args[1], args[0]
1661 goto ok
1662 }
1663 }
1664 }
1665
1666
1667 if m&^desired.avoid != 0 {
1668 m &^= desired.avoid
1669 }
1670
1671 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1672 s.copies[c] = false
1673
1674
1675
1676
1677 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1678 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1679 r := register(rp.num)
1680 for _, r2 := range dinfo[idx].in[0] {
1681 if r == r2 {
1682 args[0] = c
1683 break
1684 }
1685 }
1686 }
1687 }
1688 }
1689
1690 ok:
1691
1692
1693
1694
1695
1696
1697 if opcodeTable[v.Op].needIntTemp {
1698 m := s.allocatable & s.f.Config.gpRegMask
1699 for _, out := range regspec.outputs {
1700 if countRegs(out.regs) == 1 {
1701 m &^= out.regs
1702 }
1703 }
1704 if m&^desired.avoid&^s.nospill != 0 {
1705 m &^= desired.avoid
1706 }
1707 tmpReg = s.allocReg(m, &tmpVal)
1708 s.nospill |= regMask(1) << tmpReg
1709 s.tmpused |= regMask(1) << tmpReg
1710 }
1711
1712
1713
1714
1715
1716 if !opcodeTable[v.Op].resultNotInArgs {
1717 s.tmpused = s.nospill
1718 s.nospill = 0
1719 s.advanceUses(v)
1720 }
1721
1722
1723 if s.doClobber && v.Op.IsCall() {
1724
1725
1726 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1727 }
1728 s.freeRegs(regspec.clobbers)
1729 s.tmpused |= regspec.clobbers
1730
1731
1732 {
1733 outRegs := noRegisters
1734 maxOutIdx := -1
1735 var used regMask
1736 if tmpReg != noRegister {
1737
1738
1739 used |= regMask(1) << tmpReg
1740 }
1741 for _, out := range regspec.outputs {
1742 if out.regs == 0 {
1743 continue
1744 }
1745 mask := out.regs & s.allocatable &^ used
1746 if mask == 0 {
1747 s.f.Fatalf("can't find any output register %s", v.LongString())
1748 }
1749 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1750 if !opcodeTable[v.Op].commutative {
1751
1752 r := register(s.f.getHome(args[0].ID).(*Register).num)
1753 if mask>>r&1 == 0 {
1754 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1755 }
1756 mask = regMask(1) << r
1757 } else {
1758
1759 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1760 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1761
1762 found := false
1763 for _, r := range dinfo[idx].out {
1764 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1765 mask = regMask(1) << r
1766 found = true
1767 if r == r1 {
1768 args[0], args[1] = args[1], args[0]
1769 }
1770 break
1771 }
1772 }
1773 if !found {
1774
1775 mask = regMask(1) << r0
1776 }
1777 }
1778 }
1779 if out.idx == 0 {
1780 for _, r := range dinfo[idx].out {
1781 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1782
1783 mask = regMask(1) << r
1784 break
1785 }
1786 }
1787 }
1788 if out.idx == 1 {
1789 if prefs, ok := desiredSecondReg[v.ID]; ok {
1790 for _, r := range prefs {
1791 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1792
1793 mask = regMask(1) << r
1794 break
1795 }
1796 }
1797 }
1798 }
1799
1800 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1801 mask &^= desired.avoid
1802 }
1803 r := s.allocReg(mask, v)
1804 if out.idx > maxOutIdx {
1805 maxOutIdx = out.idx
1806 }
1807 outRegs[out.idx] = r
1808 used |= regMask(1) << r
1809 s.tmpused |= regMask(1) << r
1810 }
1811
1812 if v.Type.IsTuple() {
1813 var outLocs LocPair
1814 if r := outRegs[0]; r != noRegister {
1815 outLocs[0] = &s.registers[r]
1816 }
1817 if r := outRegs[1]; r != noRegister {
1818 outLocs[1] = &s.registers[r]
1819 }
1820 s.f.setHome(v, outLocs)
1821
1822 } else if v.Type.IsResults() {
1823
1824 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1825 for i := 0; i <= maxOutIdx; i++ {
1826 if r := outRegs[i]; r != noRegister {
1827 outLocs[i] = &s.registers[r]
1828 }
1829 }
1830 s.f.setHome(v, outLocs)
1831 } else {
1832 if r := outRegs[0]; r != noRegister {
1833 s.assignReg(r, v, v)
1834 }
1835 }
1836 if tmpReg != noRegister {
1837
1838 if s.f.tempRegs == nil {
1839 s.f.tempRegs = map[ID]*Register{}
1840 }
1841 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1842 }
1843 }
1844
1845
1846 if opcodeTable[v.Op].resultNotInArgs {
1847 s.nospill = 0
1848 s.advanceUses(v)
1849 }
1850 s.tmpused = 0
1851
1852
1853 for i, a := range args {
1854 v.SetArg(i, a)
1855 }
1856 b.Values = append(b.Values, v)
1857 s.dropIfUnused(v)
1858 }
1859
1860
1861
1862 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1863
1864
1865 for i, v := range b.ControlValues() {
1866 if !s.values[v.ID].needReg {
1867 continue
1868 }
1869 if s.f.pass.debug > regDebug {
1870 fmt.Printf(" processing control %s\n", v.LongString())
1871 }
1872
1873
1874
1875 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1876 }
1877
1878
1879
1880 for _, v := range controls {
1881 vi := &s.values[v.ID]
1882 if !vi.needReg {
1883 continue
1884 }
1885
1886 u := vi.uses
1887 vi.uses = u.next
1888 if u.next == nil {
1889 s.freeRegs(vi.regs)
1890 }
1891 u.next = s.freeUseRecords
1892 s.freeUseRecords = u
1893 }
1894
1895
1896
1897
1898 if len(b.Succs) == 1 {
1899 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1900 s.freeReg(s.GReg)
1901 }
1902 if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
1903
1904 goto badloop
1905 }
1906
1907 top := b.Succs[0].b
1908 loop := s.loopnest.b2l[top.ID]
1909 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1910 goto badloop
1911 }
1912
1913
1914 for _, live := range s.live[b.ID] {
1915 if live.dist >= unlikelyDistance {
1916
1917 continue
1918 }
1919 vid := live.ID
1920 vi := &s.values[vid]
1921 if vi.regs != 0 {
1922 continue
1923 }
1924 if vi.rematerializeable {
1925 continue
1926 }
1927 v := s.orig[vid]
1928 m := s.compatRegs(v.Type) &^ s.used
1929
1930 outerloop:
1931 for _, e := range desired.entries {
1932 if e.ID != v.ID {
1933 continue
1934 }
1935 for _, r := range e.regs {
1936 if r != noRegister && m>>r&1 != 0 {
1937 m = regMask(1) << r
1938 break outerloop
1939 }
1940 }
1941 }
1942 if m&^desired.avoid != 0 {
1943 m &^= desired.avoid
1944 }
1945 if m != 0 {
1946 s.allocValToReg(v, m, false, b.Pos)
1947 }
1948 }
1949 }
1950 badloop:
1951 ;
1952
1953
1954
1955 k := 0
1956 for r := register(0); r < s.numRegs; r++ {
1957 v := s.regs[r].v
1958 if v == nil {
1959 continue
1960 }
1961 k++
1962 }
1963 regList := make([]endReg, 0, k)
1964 for r := register(0); r < s.numRegs; r++ {
1965 v := s.regs[r].v
1966 if v == nil {
1967 continue
1968 }
1969 regList = append(regList, endReg{r, v, s.regs[r].c})
1970 }
1971 s.endRegs[b.ID] = regList
1972
1973 if checkEnabled {
1974 regValLiveSet.clear()
1975 for _, x := range s.live[b.ID] {
1976 regValLiveSet.add(x.ID)
1977 }
1978 for r := register(0); r < s.numRegs; r++ {
1979 v := s.regs[r].v
1980 if v == nil {
1981 continue
1982 }
1983 if !regValLiveSet.contains(v.ID) {
1984 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
1985 }
1986 }
1987 }
1988
1989
1990
1991
1992
1993 for _, e := range s.live[b.ID] {
1994 vi := &s.values[e.ID]
1995 if vi.regs != 0 {
1996
1997 continue
1998 }
1999 if vi.rematerializeable {
2000
2001 continue
2002 }
2003 if s.f.pass.debug > regDebug {
2004 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
2005 }
2006 spill := s.makeSpill(s.orig[e.ID], b)
2007 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
2008 }
2009
2010
2011
2012
2013 for _, e := range s.live[b.ID] {
2014 u := s.values[e.ID].uses
2015 if u == nil {
2016 f.Fatalf("live at end, no uses v%d", e.ID)
2017 }
2018 if u.next != nil {
2019 f.Fatalf("live at end, too many uses v%d", e.ID)
2020 }
2021 s.values[e.ID].uses = nil
2022 u.next = s.freeUseRecords
2023 s.freeUseRecords = u
2024 }
2025
2026
2027
2028
2029
2030
2031
2032 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
2033 regs := make([]startReg, 0, c)
2034 for _, sr := range s.startRegs[b.ID] {
2035 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
2036 continue
2037 }
2038 regs = append(regs, sr)
2039 }
2040 s.startRegs[b.ID] = regs
2041 }
2042 }
2043
2044
2045 s.placeSpills()
2046
2047
2048
2049 stacklive := stackalloc(s.f, s.spillLive)
2050
2051
2052 s.shuffle(stacklive)
2053
2054
2055
2056
2057 for {
2058 progress := false
2059 for c, used := range s.copies {
2060 if !used && c.Uses == 0 {
2061 if s.f.pass.debug > regDebug {
2062 fmt.Printf("delete copied value %s\n", c.LongString())
2063 }
2064 c.resetArgs()
2065 f.freeValue(c)
2066 delete(s.copies, c)
2067 progress = true
2068 }
2069 }
2070 if !progress {
2071 break
2072 }
2073 }
2074
2075 for _, b := range s.visitOrder {
2076 i := 0
2077 for _, v := range b.Values {
2078 if v.Op == OpInvalid {
2079 continue
2080 }
2081 b.Values[i] = v
2082 i++
2083 }
2084 b.Values = b.Values[:i]
2085 }
2086 }
2087
2088 func (s *regAllocState) placeSpills() {
2089 mustBeFirst := func(op Op) bool {
2090 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2091 }
2092
2093
2094
2095 start := map[ID][]*Value{}
2096
2097
2098 after := map[ID][]*Value{}
2099
2100 for i := range s.values {
2101 vi := s.values[i]
2102 spill := vi.spill
2103 if spill == nil {
2104 continue
2105 }
2106 if spill.Block != nil {
2107
2108
2109 continue
2110 }
2111 v := s.orig[i]
2112
2113
2114
2115
2116
2117 if v == nil {
2118 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2119 }
2120 best := v.Block
2121 bestArg := v
2122 var bestDepth int16
2123 if l := s.loopnest.b2l[best.ID]; l != nil {
2124 bestDepth = l.depth
2125 }
2126 b := best
2127 const maxSpillSearch = 100
2128 for i := 0; i < maxSpillSearch; i++ {
2129
2130
2131 p := b
2132 b = nil
2133 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2134 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2135
2136 b = c
2137 break
2138 }
2139 }
2140 if b == nil {
2141
2142 break
2143 }
2144
2145 var depth int16
2146 if l := s.loopnest.b2l[b.ID]; l != nil {
2147 depth = l.depth
2148 }
2149 if depth > bestDepth {
2150
2151 continue
2152 }
2153
2154
2155
2156 if len(b.Preds) == 1 {
2157 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2158 if e.v == v {
2159
2160 best = b
2161 bestArg = e.c
2162 bestDepth = depth
2163 break
2164 }
2165 }
2166 } else {
2167 for _, e := range s.startRegs[b.ID] {
2168 if e.v == v {
2169
2170 best = b
2171 bestArg = e.c
2172 bestDepth = depth
2173 break
2174 }
2175 }
2176 }
2177 }
2178
2179
2180 spill.Block = best
2181 spill.AddArg(bestArg)
2182 if best == v.Block && !mustBeFirst(v.Op) {
2183
2184 after[v.ID] = append(after[v.ID], spill)
2185 } else {
2186
2187 start[best.ID] = append(start[best.ID], spill)
2188 }
2189 }
2190
2191
2192 var oldSched []*Value
2193 for _, b := range s.visitOrder {
2194 nfirst := 0
2195 for _, v := range b.Values {
2196 if !mustBeFirst(v.Op) {
2197 break
2198 }
2199 nfirst++
2200 }
2201 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2202 b.Values = b.Values[:nfirst]
2203 b.Values = append(b.Values, start[b.ID]...)
2204 for _, v := range oldSched {
2205 b.Values = append(b.Values, v)
2206 b.Values = append(b.Values, after[v.ID]...)
2207 }
2208 }
2209 }
2210
2211
2212 func (s *regAllocState) shuffle(stacklive [][]ID) {
2213 var e edgeState
2214 e.s = s
2215 e.cache = map[ID][]*Value{}
2216 e.contents = map[Location]contentRecord{}
2217 if s.f.pass.debug > regDebug {
2218 fmt.Printf("shuffle %s\n", s.f.Name)
2219 fmt.Println(s.f.String())
2220 }
2221
2222 for _, b := range s.visitOrder {
2223 if len(b.Preds) <= 1 {
2224 continue
2225 }
2226 e.b = b
2227 for i, edge := range b.Preds {
2228 p := edge.b
2229 e.p = p
2230 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2231 e.process()
2232 }
2233 }
2234
2235 if s.f.pass.debug > regDebug {
2236 fmt.Printf("post shuffle %s\n", s.f.Name)
2237 fmt.Println(s.f.String())
2238 }
2239 }
2240
2241 type edgeState struct {
2242 s *regAllocState
2243 p, b *Block
2244
2245
2246 cache map[ID][]*Value
2247 cachedVals []ID
2248
2249
2250 contents map[Location]contentRecord
2251
2252
2253 destinations []dstRecord
2254 extra []dstRecord
2255
2256 usedRegs regMask
2257 uniqueRegs regMask
2258 finalRegs regMask
2259 rematerializeableRegs regMask
2260 }
2261
2262 type contentRecord struct {
2263 vid ID
2264 c *Value
2265 final bool
2266 pos src.XPos
2267 }
2268
2269 type dstRecord struct {
2270 loc Location
2271 vid ID
2272 splice **Value
2273 pos src.XPos
2274 }
2275
2276
2277 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2278 if e.s.f.pass.debug > regDebug {
2279 fmt.Printf("edge %s->%s\n", e.p, e.b)
2280 }
2281
2282
2283 clear(e.cache)
2284 e.cachedVals = e.cachedVals[:0]
2285 clear(e.contents)
2286 e.usedRegs = 0
2287 e.uniqueRegs = 0
2288 e.finalRegs = 0
2289 e.rematerializeableRegs = 0
2290
2291
2292 for _, x := range srcReg {
2293 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2294 }
2295
2296 for _, spillID := range stacklive {
2297 v := e.s.orig[spillID]
2298 spill := e.s.values[v.ID].spill
2299 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2300
2301
2302
2303
2304
2305
2306
2307
2308 continue
2309 }
2310 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2311 }
2312
2313
2314 dsts := e.destinations[:0]
2315 for _, x := range dstReg {
2316 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2317 }
2318
2319 for _, v := range e.b.Values {
2320 if v.Op != OpPhi {
2321 break
2322 }
2323 loc := e.s.f.getHome(v.ID)
2324 if loc == nil {
2325 continue
2326 }
2327 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2328 }
2329 e.destinations = dsts
2330
2331 if e.s.f.pass.debug > regDebug {
2332 for _, vid := range e.cachedVals {
2333 a := e.cache[vid]
2334 for _, c := range a {
2335 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2336 }
2337 }
2338 for _, d := range e.destinations {
2339 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2340 }
2341 }
2342 }
2343
2344
2345 func (e *edgeState) process() {
2346 dsts := e.destinations
2347
2348
2349 for len(dsts) > 0 {
2350 i := 0
2351 for _, d := range dsts {
2352 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2353
2354 dsts[i] = d
2355 i++
2356 }
2357 }
2358 if i < len(dsts) {
2359
2360 dsts = dsts[:i]
2361
2362
2363 dsts = append(dsts, e.extra...)
2364 e.extra = e.extra[:0]
2365 continue
2366 }
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390 d := dsts[0]
2391 loc := d.loc
2392 vid := e.contents[loc].vid
2393 c := e.contents[loc].c
2394 r := e.findRegFor(c.Type)
2395 if e.s.f.pass.debug > regDebug {
2396 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2397 }
2398 e.erase(r)
2399 pos := d.pos.WithNotStmt()
2400 if _, isReg := loc.(*Register); isReg {
2401 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2402 } else {
2403 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2404 }
2405 e.set(r, vid, c, false, pos)
2406 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2407 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2408 }
2409 }
2410 }
2411
2412
2413
2414 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2415 pos = pos.WithNotStmt()
2416 occupant := e.contents[loc]
2417 if occupant.vid == vid {
2418
2419 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2420 if splice != nil {
2421 (*splice).Uses--
2422 *splice = occupant.c
2423 occupant.c.Uses++
2424 }
2425
2426
2427
2428 if _, ok := e.s.copies[occupant.c]; ok {
2429
2430 e.s.copies[occupant.c] = true
2431 }
2432 return true
2433 }
2434
2435
2436 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
2437
2438
2439 return false
2440 }
2441
2442
2443 v := e.s.orig[vid]
2444 var c *Value
2445 var src Location
2446 if e.s.f.pass.debug > regDebug {
2447 fmt.Printf("moving v%d to %s\n", vid, loc)
2448 fmt.Printf("sources of v%d:", vid)
2449 }
2450 if opcodeTable[v.Op].fixedReg {
2451 c = v
2452 src = e.s.f.getHome(v.ID)
2453 } else {
2454 for _, w := range e.cache[vid] {
2455 h := e.s.f.getHome(w.ID)
2456 if e.s.f.pass.debug > regDebug {
2457 fmt.Printf(" %s:%s", h, w)
2458 }
2459 _, isreg := h.(*Register)
2460 if src == nil || isreg {
2461 c = w
2462 src = h
2463 }
2464 }
2465 }
2466 if e.s.f.pass.debug > regDebug {
2467 if src != nil {
2468 fmt.Printf(" [use %s]\n", src)
2469 } else {
2470 fmt.Printf(" [no source]\n")
2471 }
2472 }
2473 _, dstReg := loc.(*Register)
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485 e.erase(loc)
2486 var x *Value
2487 if c == nil || e.s.values[vid].rematerializeable {
2488 if !e.s.values[vid].rematerializeable {
2489 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2490 }
2491 if dstReg {
2492 x = v.copyInto(e.p)
2493 } else {
2494
2495
2496 r := e.findRegFor(v.Type)
2497 e.erase(r)
2498 x = v.copyIntoWithXPos(e.p, pos)
2499 e.set(r, vid, x, false, pos)
2500
2501
2502
2503 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2504 }
2505 } else {
2506
2507 _, srcReg := src.(*Register)
2508 if srcReg {
2509 if dstReg {
2510 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2511 } else {
2512 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2513 }
2514 } else {
2515 if dstReg {
2516 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2517 } else {
2518
2519 r := e.findRegFor(c.Type)
2520 e.erase(r)
2521 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2522 e.set(r, vid, t, false, pos)
2523 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2524 }
2525 }
2526 }
2527 e.set(loc, vid, x, true, pos)
2528 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2529 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2530 }
2531 if splice != nil {
2532 (*splice).Uses--
2533 *splice = x
2534 x.Uses++
2535 }
2536 return true
2537 }
2538
2539
2540 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2541 e.s.f.setHome(c, loc)
2542 e.contents[loc] = contentRecord{vid, c, final, pos}
2543 a := e.cache[vid]
2544 if len(a) == 0 {
2545 e.cachedVals = append(e.cachedVals, vid)
2546 }
2547 a = append(a, c)
2548 e.cache[vid] = a
2549 if r, ok := loc.(*Register); ok {
2550 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2551 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2552 }
2553 e.usedRegs |= regMask(1) << uint(r.num)
2554 if final {
2555 e.finalRegs |= regMask(1) << uint(r.num)
2556 }
2557 if len(a) == 1 {
2558 e.uniqueRegs |= regMask(1) << uint(r.num)
2559 }
2560 if len(a) == 2 {
2561 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2562 e.uniqueRegs &^= regMask(1) << uint(t.num)
2563 }
2564 }
2565 if e.s.values[vid].rematerializeable {
2566 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2567 }
2568 }
2569 if e.s.f.pass.debug > regDebug {
2570 fmt.Printf("%s\n", c.LongString())
2571 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2572 }
2573 }
2574
2575
2576 func (e *edgeState) erase(loc Location) {
2577 cr := e.contents[loc]
2578 if cr.c == nil {
2579 return
2580 }
2581 vid := cr.vid
2582
2583 if cr.final {
2584
2585
2586
2587 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2588 }
2589
2590
2591 a := e.cache[vid]
2592 for i, c := range a {
2593 if e.s.f.getHome(c.ID) == loc {
2594 if e.s.f.pass.debug > regDebug {
2595 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2596 }
2597 a[i], a = a[len(a)-1], a[:len(a)-1]
2598 break
2599 }
2600 }
2601 e.cache[vid] = a
2602
2603
2604 if r, ok := loc.(*Register); ok {
2605 e.usedRegs &^= regMask(1) << uint(r.num)
2606 if cr.final {
2607 e.finalRegs &^= regMask(1) << uint(r.num)
2608 }
2609 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2610 }
2611 if len(a) == 1 {
2612 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2613 e.uniqueRegs |= regMask(1) << uint(r.num)
2614 }
2615 }
2616 }
2617
2618
2619 func (e *edgeState) findRegFor(typ *types.Type) Location {
2620
2621 types := &e.s.f.Config.Types
2622 m := e.s.compatRegs(typ)
2623
2624
2625
2626
2627
2628
2629 x := m &^ e.usedRegs
2630 if x != 0 {
2631 return &e.s.registers[pickReg(x)]
2632 }
2633 x = m &^ e.uniqueRegs &^ e.finalRegs
2634 if x != 0 {
2635 return &e.s.registers[pickReg(x)]
2636 }
2637 x = m &^ e.uniqueRegs
2638 if x != 0 {
2639 return &e.s.registers[pickReg(x)]
2640 }
2641 x = m & e.rematerializeableRegs
2642 if x != 0 {
2643 return &e.s.registers[pickReg(x)]
2644 }
2645
2646
2647
2648 for _, vid := range e.cachedVals {
2649 a := e.cache[vid]
2650 for _, c := range a {
2651 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2652 if !c.rematerializeable() {
2653 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2654
2655
2656
2657 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64}
2658
2659 e.set(t, vid, x, false, c.Pos)
2660 if e.s.f.pass.debug > regDebug {
2661 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2662 }
2663 }
2664
2665
2666
2667 return r
2668 }
2669 }
2670 }
2671
2672 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2673 for _, vid := range e.cachedVals {
2674 a := e.cache[vid]
2675 for _, c := range a {
2676 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2677 }
2678 }
2679 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2680 return nil
2681 }
2682
2683
2684
2685 func (v *Value) rematerializeable() bool {
2686 if !opcodeTable[v.Op].rematerializeable {
2687 return false
2688 }
2689 for _, a := range v.Args {
2690
2691
2692 if !opcodeTable[a.Op].fixedReg {
2693 return false
2694 }
2695 }
2696 return true
2697 }
2698
2699 type liveInfo struct {
2700 ID ID
2701 dist int32
2702 pos src.XPos
2703 }
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713 func (s *regAllocState) computeLive() {
2714 f := s.f
2715 s.live = make([][]liveInfo, f.NumBlocks())
2716 s.desired = make([]desiredState, f.NumBlocks())
2717 var phis []*Value
2718
2719 live := f.newSparseMapPos(f.NumValues())
2720 defer f.retSparseMapPos(live)
2721 t := f.newSparseMapPos(f.NumValues())
2722 defer f.retSparseMapPos(t)
2723
2724
2725 var desired desiredState
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736 po := f.postorder()
2737 s.loopnest = f.loopnest()
2738 s.loopnest.calculateDepths()
2739 for {
2740 changed := false
2741
2742 for _, b := range po {
2743
2744
2745
2746 live.clear()
2747 for _, e := range s.live[b.ID] {
2748 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2749 }
2750
2751
2752 for _, c := range b.ControlValues() {
2753 if s.values[c.ID].needReg {
2754 live.set(c.ID, int32(len(b.Values)), b.Pos)
2755 }
2756 }
2757
2758
2759
2760 phis = phis[:0]
2761 for i := len(b.Values) - 1; i >= 0; i-- {
2762 v := b.Values[i]
2763 live.remove(v.ID)
2764 if v.Op == OpPhi {
2765
2766 phis = append(phis, v)
2767 continue
2768 }
2769 if opcodeTable[v.Op].call {
2770 c := live.contents()
2771 for i := range c {
2772 c[i].val += unlikelyDistance
2773 }
2774 }
2775 for _, a := range v.Args {
2776 if s.values[a.ID].needReg {
2777 live.set(a.ID, int32(i), v.Pos)
2778 }
2779 }
2780 }
2781
2782 desired.copy(&s.desired[b.ID])
2783 for i := len(b.Values) - 1; i >= 0; i-- {
2784 v := b.Values[i]
2785 prefs := desired.remove(v.ID)
2786 if v.Op == OpPhi {
2787
2788
2789
2790 continue
2791 }
2792 regspec := s.regspec(v)
2793
2794 desired.clobber(regspec.clobbers)
2795
2796 for _, j := range regspec.inputs {
2797 if countRegs(j.regs) != 1 {
2798 continue
2799 }
2800 desired.clobber(j.regs)
2801 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2802 }
2803
2804 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
2805
2806
2807
2808
2809
2810 if opcodeTable[v.Op].commutative {
2811 desired.addList(v.Args[1].ID, prefs)
2812 }
2813 desired.addList(v.Args[0].ID, prefs)
2814 }
2815 }
2816
2817
2818
2819 for i, e := range b.Preds {
2820 p := e.b
2821
2822
2823
2824 delta := int32(normalDistance)
2825 if len(p.Succs) == 2 {
2826 if p.Succs[0].b == b && p.Likely == BranchLikely ||
2827 p.Succs[1].b == b && p.Likely == BranchUnlikely {
2828 delta = likelyDistance
2829 }
2830 if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2831 p.Succs[1].b == b && p.Likely == BranchLikely {
2832 delta = unlikelyDistance
2833 }
2834 }
2835
2836
2837 s.desired[p.ID].merge(&desired)
2838
2839
2840 t.clear()
2841 for _, e := range s.live[p.ID] {
2842 t.set(e.ID, e.dist, e.pos)
2843 }
2844 update := false
2845
2846
2847 for _, e := range live.contents() {
2848 d := e.val + delta
2849 if !t.contains(e.key) || d < t.get(e.key) {
2850 update = true
2851 t.set(e.key, d, e.pos)
2852 }
2853 }
2854
2855
2856
2857 for _, v := range phis {
2858 id := v.Args[i].ID
2859 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2860 update = true
2861 t.set(id, delta, v.Pos)
2862 }
2863 }
2864
2865 if !update {
2866 continue
2867 }
2868
2869 l := s.live[p.ID][:0]
2870 if cap(l) < t.size() {
2871 l = make([]liveInfo, 0, t.size())
2872 }
2873 for _, e := range t.contents() {
2874 l = append(l, liveInfo{e.key, e.val, e.pos})
2875 }
2876 s.live[p.ID] = l
2877 changed = true
2878 }
2879 }
2880
2881 if !changed {
2882 break
2883 }
2884 }
2885 if f.pass.debug > regDebug {
2886 fmt.Println("live values at end of each block")
2887 for _, b := range f.Blocks {
2888 fmt.Printf(" %s:", b)
2889 for _, x := range s.live[b.ID] {
2890 fmt.Printf(" v%d(%d)", x.ID, x.dist)
2891 for _, e := range s.desired[b.ID].entries {
2892 if e.ID != x.ID {
2893 continue
2894 }
2895 fmt.Printf("[")
2896 first := true
2897 for _, r := range e.regs {
2898 if r == noRegister {
2899 continue
2900 }
2901 if !first {
2902 fmt.Printf(",")
2903 }
2904 fmt.Print(&s.registers[r])
2905 first = false
2906 }
2907 fmt.Printf("]")
2908 }
2909 }
2910 if avoid := s.desired[b.ID].avoid; avoid != 0 {
2911 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
2912 }
2913 fmt.Println()
2914 }
2915 }
2916 }
2917
2918
2919 type desiredState struct {
2920
2921
2922 entries []desiredStateEntry
2923
2924
2925
2926
2927 avoid regMask
2928 }
2929 type desiredStateEntry struct {
2930
2931 ID ID
2932
2933
2934
2935
2936
2937 regs [4]register
2938 }
2939
2940 func (d *desiredState) clear() {
2941 d.entries = d.entries[:0]
2942 d.avoid = 0
2943 }
2944
2945
2946 func (d *desiredState) get(vid ID) [4]register {
2947 for _, e := range d.entries {
2948 if e.ID == vid {
2949 return e.regs
2950 }
2951 }
2952 return [4]register{noRegister, noRegister, noRegister, noRegister}
2953 }
2954
2955
2956 func (d *desiredState) add(vid ID, r register) {
2957 d.avoid |= regMask(1) << r
2958 for i := range d.entries {
2959 e := &d.entries[i]
2960 if e.ID != vid {
2961 continue
2962 }
2963 if e.regs[0] == r {
2964
2965 return
2966 }
2967 for j := 1; j < len(e.regs); j++ {
2968 if e.regs[j] == r {
2969
2970 copy(e.regs[1:], e.regs[:j])
2971 e.regs[0] = r
2972 return
2973 }
2974 }
2975 copy(e.regs[1:], e.regs[:])
2976 e.regs[0] = r
2977 return
2978 }
2979 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
2980 }
2981
2982 func (d *desiredState) addList(vid ID, regs [4]register) {
2983
2984 for i := len(regs) - 1; i >= 0; i-- {
2985 r := regs[i]
2986 if r != noRegister {
2987 d.add(vid, r)
2988 }
2989 }
2990 }
2991
2992
2993 func (d *desiredState) clobber(m regMask) {
2994 for i := 0; i < len(d.entries); {
2995 e := &d.entries[i]
2996 j := 0
2997 for _, r := range e.regs {
2998 if r != noRegister && m>>r&1 == 0 {
2999 e.regs[j] = r
3000 j++
3001 }
3002 }
3003 if j == 0 {
3004
3005 d.entries[i] = d.entries[len(d.entries)-1]
3006 d.entries = d.entries[:len(d.entries)-1]
3007 continue
3008 }
3009 for ; j < len(e.regs); j++ {
3010 e.regs[j] = noRegister
3011 }
3012 i++
3013 }
3014 d.avoid &^= m
3015 }
3016
3017
3018 func (d *desiredState) copy(x *desiredState) {
3019 d.entries = append(d.entries[:0], x.entries...)
3020 d.avoid = x.avoid
3021 }
3022
3023
3024 func (d *desiredState) remove(vid ID) [4]register {
3025 for i := range d.entries {
3026 if d.entries[i].ID == vid {
3027 regs := d.entries[i].regs
3028 d.entries[i] = d.entries[len(d.entries)-1]
3029 d.entries = d.entries[:len(d.entries)-1]
3030 return regs
3031 }
3032 }
3033 return [4]register{noRegister, noRegister, noRegister, noRegister}
3034 }
3035
3036
3037 func (d *desiredState) merge(x *desiredState) {
3038 d.avoid |= x.avoid
3039
3040
3041 for _, e := range x.entries {
3042 d.addList(e.ID, e.regs)
3043 }
3044 }
3045
View as plain text