1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "cmp"
123 "fmt"
124 "internal/buildcfg"
125 "math"
126 "math/bits"
127 "slices"
128 "unsafe"
129 )
130
131 const (
132 moveSpills = iota
133 logSpills
134 regDebug
135 stackDebug
136 )
137
138
139
140 const (
141 likelyDistance = 1
142 normalDistance = 10
143 unlikelyDistance = 100
144 )
145
146
147
148 func regalloc(f *Func) {
149 var s regAllocState
150 s.init(f)
151 s.regalloc(f)
152 s.close()
153 }
154
155 type register uint8
156
157 const noRegister register = 255
158
159
160 var noRegisters [32]register = [32]register{
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
164 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
165 }
166
167
168
169 type regMask uint64
170
171 func (m regMask) String() string {
172 s := ""
173 for r := register(0); m != 0; r++ {
174 if m>>r&1 == 0 {
175 continue
176 }
177 m &^= regMask(1) << r
178 if s != "" {
179 s += " "
180 }
181 s += fmt.Sprintf("r%d", r)
182 }
183 return s
184 }
185
186 func (m regMask) contains(r register) bool {
187 return m>>r&1 != 0
188 }
189
190 func (s *regAllocState) RegMaskString(m regMask) string {
191 str := ""
192 for r := register(0); m != 0; r++ {
193 if m>>r&1 == 0 {
194 continue
195 }
196 m &^= regMask(1) << r
197 if str != "" {
198 str += " "
199 }
200 str += s.registers[r].String()
201 }
202 return str
203 }
204
205
206 func countRegs(r regMask) int {
207 return bits.OnesCount64(uint64(r))
208 }
209
210
211 func pickReg(r regMask) register {
212 if r == 0 {
213 panic("can't pick a register from an empty set")
214 }
215
216 return register(bits.TrailingZeros64(uint64(r)))
217 }
218
219 type use struct {
220
221
222
223
224
225 dist int32
226 pos src.XPos
227 next *use
228 }
229
230
231 type valState struct {
232 regs regMask
233 uses *use
234 spill *Value
235 restoreMin int32
236 restoreMax int32
237 needReg bool
238 rematerializeable bool
239 }
240
241 type regState struct {
242 v *Value
243 c *Value
244
245 }
246
247 type regAllocState struct {
248 f *Func
249
250 sdom SparseTree
251 registers []Register
252 numRegs register
253 SPReg register
254 SBReg register
255 GReg register
256 ZeroIntReg register
257 allocatable regMask
258
259
260
261
262 live [][]liveInfo
263
264
265
266
267 desired []desiredState
268
269
270 values []valState
271
272
273 sp, sb ID
274
275
276
277 orig []*Value
278
279
280
281 regs []regState
282
283
284 nospill regMask
285
286
287 used regMask
288
289
290 usedSinceBlockStart regMask
291
292
293 tmpused regMask
294
295
296 curBlock *Block
297
298
299 freeUseRecords *use
300
301
302
303 endRegs [][]endReg
304
305
306
307 startRegs [][]startReg
308
309
310
311
312 startRegsMask regMask
313
314
315 spillLive [][]ID
316
317
318
319 copies map[*Value]bool
320
321 loopnest *loopnest
322
323
324 visitOrder []*Block
325
326
327 blockOrder []int32
328
329
330 doClobber bool
331
332
333
334
335
336
337 nextCall []int32
338
339
340
341 curIdx int
342 }
343
344 type endReg struct {
345 r register
346 v *Value
347 c *Value
348 }
349
350 type startReg struct {
351 r register
352 v *Value
353 c *Value
354 pos src.XPos
355 }
356
357
358 func (s *regAllocState) freeReg(r register) {
359 if !s.allocatable.contains(r) && !s.isGReg(r) {
360 return
361 }
362 v := s.regs[r].v
363 if v == nil {
364 s.f.Fatalf("tried to free an already free register %d\n", r)
365 }
366
367
368 if s.f.pass.debug > regDebug {
369 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
370 }
371 s.regs[r] = regState{}
372 s.values[v.ID].regs &^= regMask(1) << r
373 s.used &^= regMask(1) << r
374 }
375
376
377 func (s *regAllocState) freeRegs(m regMask) {
378 for m&s.used != 0 {
379 s.freeReg(pickReg(m & s.used))
380 }
381 }
382
383
384 func (s *regAllocState) clobberRegs(m regMask) {
385 m &= s.allocatable & s.f.Config.gpRegMask
386 for m != 0 {
387 r := pickReg(m)
388 m &^= 1 << r
389 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
390 s.f.setHome(x, &s.registers[r])
391 }
392 }
393
394
395
396 func (s *regAllocState) setOrig(c *Value, v *Value) {
397 if int(c.ID) >= cap(s.orig) {
398 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
399 copy(x, s.orig)
400 s.f.Cache.freeValueSlice(s.orig)
401 s.orig = x
402 }
403 for int(c.ID) >= len(s.orig) {
404 s.orig = append(s.orig, nil)
405 }
406 if s.orig[c.ID] != nil {
407 s.f.Fatalf("orig value set twice %s %s", c, v)
408 }
409 s.orig[c.ID] = s.orig[v.ID]
410 }
411
412
413
414 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
415 if s.f.pass.debug > regDebug {
416 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
417 }
418
419 s.values[v.ID].regs |= regMask(1) << r
420 s.f.setHome(c, &s.registers[r])
421
422
423 if !s.allocatable.contains(r) && !s.isGReg(r) {
424 return
425 }
426 if s.regs[r].v != nil {
427 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
428 }
429 s.regs[r] = regState{v, c}
430 s.used |= regMask(1) << r
431 }
432
433
434
435
436 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
437 if v.OnWasmStack {
438 return noRegister
439 }
440
441 mask &= s.allocatable
442 mask &^= s.nospill
443 if mask == 0 {
444 s.f.Fatalf("no register available for %s", v.LongString())
445 }
446
447
448 if mask&^s.used != 0 {
449 r := pickReg(mask &^ s.used)
450 s.usedSinceBlockStart |= regMask(1) << r
451 return r
452 }
453
454
455
456
457
458
459
460
461
462
463
464 var r register
465 maxuse := int32(-1)
466 for t := register(0); t < s.numRegs; t++ {
467 if mask>>t&1 == 0 {
468 continue
469 }
470 v := s.regs[t].v
471 if n := s.values[v.ID].uses.dist; n > maxuse {
472
473
474 r = t
475 maxuse = n
476 }
477 }
478 if maxuse == -1 {
479 s.f.Fatalf("couldn't find register to spill")
480 }
481
482 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
483
484
485
486 s.freeReg(r)
487 return r
488 }
489
490
491
492 v2 := s.regs[r].v
493 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
494 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
495 s.usedSinceBlockStart |= regMask(1) << r
496 r2 := pickReg(m)
497 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
498 s.copies[c] = false
499 if s.f.pass.debug > regDebug {
500 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
501 }
502 s.setOrig(c, v2)
503 s.assignReg(r2, v2, c)
504 }
505
506
507
508
509 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
510 if s.startRegsMask&(regMask(1)<<r) == 1 {
511 if s.f.pass.debug > regDebug {
512 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
513 }
514 s.startRegsMask &^= regMask(1) << r
515 }
516 }
517
518 s.freeReg(r)
519 s.usedSinceBlockStart |= regMask(1) << r
520 return r
521 }
522
523
524
525 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
526 vi := &s.values[v.ID]
527 if vi.spill != nil {
528
529 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
530 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
531 return vi.spill
532 }
533
534
535 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
536
537
538 s.setOrig(spill, v)
539 vi.spill = spill
540 vi.restoreMin = s.sdom[b.ID].entry
541 vi.restoreMax = s.sdom[b.ID].exit
542 return spill
543 }
544
545
546
547
548
549
550
551 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
552 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
553 c := v.copyIntoWithXPos(s.curBlock, pos)
554 c.OnWasmStack = true
555 s.setOrig(c, v)
556 return c
557 }
558 if v.OnWasmStack {
559 return v
560 }
561
562 vi := &s.values[v.ID]
563 pos = pos.WithNotStmt()
564
565 if mask&vi.regs != 0 {
566 mask &= vi.regs
567 r := pickReg(mask)
568 if mask.contains(s.SPReg) {
569
570
571
572 r = s.SPReg
573 }
574 if !s.allocatable.contains(r) {
575 return v
576 }
577 if s.regs[r].v != v || s.regs[r].c == nil {
578 panic("bad register state")
579 }
580 if nospill {
581 s.nospill |= regMask(1) << r
582 }
583 s.usedSinceBlockStart |= regMask(1) << r
584 return s.regs[r].c
585 }
586
587 var r register
588
589 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
590 if !onWasmStack {
591
592 r = s.allocReg(mask, v)
593 }
594
595
596 var c *Value
597 if vi.regs != 0 {
598
599 var current *Value
600 if vi.regs&^s.allocatable != 0 {
601
602 current = v
603 } else {
604 r2 := pickReg(vi.regs)
605 if s.regs[r2].v != v {
606 panic("bad register state")
607 }
608 current = s.regs[r2].c
609 s.usedSinceBlockStart |= regMask(1) << r2
610 }
611 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
612 } else if v.rematerializeable() {
613
614 c = v.copyIntoWithXPos(s.curBlock, pos)
615
616
617
618
619
620
621
622 sourceMask := s.regspec(c).outputs[0].regs
623 if mask&sourceMask == 0 && !onWasmStack {
624 s.setOrig(c, v)
625 s.assignReg(s.allocReg(sourceMask, v), v, c)
626
627
628
629
630
631
632
633
634
635
636 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
637 }
638 } else {
639
640 spill := s.makeSpill(v, s.curBlock)
641 if s.f.pass.debug > logSpills {
642 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
643 }
644 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
645 }
646
647 s.setOrig(c, v)
648
649 if onWasmStack {
650 c.OnWasmStack = true
651 return c
652 }
653
654 s.assignReg(r, v, c)
655 if c.Op == OpLoadReg && s.isGReg(r) {
656 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
657 }
658 if nospill {
659 s.nospill |= regMask(1) << r
660 }
661 return c
662 }
663
664
665 func isLeaf(f *Func) bool {
666 for _, b := range f.Blocks {
667 for _, v := range b.Values {
668 if v.Op.IsCall() && !v.Op.IsTailCall() {
669
670 return false
671 }
672 }
673 }
674 return true
675 }
676
677
678 func (v *Value) needRegister() bool {
679 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
680 }
681
682 func (s *regAllocState) init(f *Func) {
683 s.f = f
684 s.f.RegAlloc = s.f.Cache.locs[:0]
685 s.registers = f.Config.registers
686 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
687 s.f.Fatalf("bad number of registers: %d", nr)
688 } else {
689 s.numRegs = register(nr)
690 }
691
692 s.SPReg = noRegister
693 s.SBReg = noRegister
694 s.GReg = noRegister
695 s.ZeroIntReg = noRegister
696 for r := register(0); r < s.numRegs; r++ {
697 switch s.registers[r].String() {
698 case "SP":
699 s.SPReg = r
700 case "SB":
701 s.SBReg = r
702 case "g":
703 s.GReg = r
704 case "ZERO":
705 s.ZeroIntReg = r
706 }
707 }
708
709 switch noRegister {
710 case s.SPReg:
711 s.f.Fatalf("no SP register found")
712 case s.SBReg:
713 s.f.Fatalf("no SB register found")
714 case s.GReg:
715 if f.Config.hasGReg {
716 s.f.Fatalf("no g register found")
717 }
718 }
719
720
721 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
722 s.allocatable &^= 1 << s.SPReg
723 s.allocatable &^= 1 << s.SBReg
724 if s.f.Config.hasGReg {
725 s.allocatable &^= 1 << s.GReg
726 }
727 if s.ZeroIntReg != noRegister {
728 s.allocatable &^= 1 << s.ZeroIntReg
729 }
730 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
731 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
732 }
733 if s.f.Config.LinkReg != -1 {
734 if isLeaf(f) {
735
736 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
737 }
738 }
739 if s.f.Config.ctxt.Flag_dynlink {
740 switch s.f.Config.arch {
741 case "386":
742
743
744
745
746
747 case "amd64":
748 s.allocatable &^= 1 << 15
749 case "arm":
750 s.allocatable &^= 1 << 9
751 case "arm64":
752
753 case "loong64":
754
755 case "ppc64le":
756
757 case "riscv64":
758
759 case "s390x":
760 s.allocatable &^= 1 << 11
761 default:
762 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
763 }
764 }
765
766
767
768
769 s.visitOrder = layoutRegallocOrder(f)
770
771
772
773 s.blockOrder = make([]int32, f.NumBlocks())
774 for i, b := range s.visitOrder {
775 s.blockOrder[b.ID] = int32(i)
776 }
777
778 s.regs = make([]regState, s.numRegs)
779 nv := f.NumValues()
780 if cap(s.f.Cache.regallocValues) >= nv {
781 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
782 } else {
783 s.f.Cache.regallocValues = make([]valState, nv)
784 }
785 s.values = s.f.Cache.regallocValues
786 s.orig = s.f.Cache.allocValueSlice(nv)
787 s.copies = make(map[*Value]bool)
788 for _, b := range s.visitOrder {
789 for _, v := range b.Values {
790 if v.needRegister() {
791 s.values[v.ID].needReg = true
792 s.values[v.ID].rematerializeable = v.rematerializeable()
793 s.orig[v.ID] = v
794 }
795
796
797 }
798 }
799 s.computeLive()
800
801 s.endRegs = make([][]endReg, f.NumBlocks())
802 s.startRegs = make([][]startReg, f.NumBlocks())
803 s.spillLive = make([][]ID, f.NumBlocks())
804 s.sdom = f.Sdom()
805
806
807 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
808 canLiveOnStack := f.newSparseSet(f.NumValues())
809 defer f.retSparseSet(canLiveOnStack)
810 for _, b := range f.Blocks {
811
812 canLiveOnStack.clear()
813 for _, c := range b.ControlValues() {
814 if c.Uses == 1 && !opcodeTable[c.Op].generic {
815 canLiveOnStack.add(c.ID)
816 }
817 }
818
819 for i := len(b.Values) - 1; i >= 0; i-- {
820 v := b.Values[i]
821 if canLiveOnStack.contains(v.ID) {
822 v.OnWasmStack = true
823 } else {
824
825 canLiveOnStack.clear()
826 }
827 for _, arg := range v.Args {
828
829
830
831
832
833 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
834 canLiveOnStack.add(arg.ID)
835 }
836 }
837 }
838 }
839 }
840
841
842
843
844 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
845
846 s.doClobber = true
847 }
848 }
849
850 func (s *regAllocState) close() {
851 s.f.Cache.freeValueSlice(s.orig)
852 }
853
854
855
856 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
857 r := s.freeUseRecords
858 if r != nil {
859 s.freeUseRecords = r.next
860 } else {
861 r = &use{}
862 }
863 r.dist = dist
864 r.pos = pos
865 r.next = s.values[id].uses
866 s.values[id].uses = r
867 if r.next != nil && dist > r.next.dist {
868 s.f.Fatalf("uses added in wrong order")
869 }
870 }
871
872
873
874 func (s *regAllocState) advanceUses(v *Value) {
875 for _, a := range v.Args {
876 if !s.values[a.ID].needReg {
877 continue
878 }
879 ai := &s.values[a.ID]
880 r := ai.uses
881 ai.uses = r.next
882 if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
883
884 s.freeRegs(ai.regs)
885 }
886 r.next = s.freeUseRecords
887 s.freeUseRecords = r
888 }
889 s.dropIfUnused(v)
890 }
891
892
893
894 func (s *regAllocState) dropIfUnused(v *Value) {
895 if !s.values[v.ID].needReg {
896 return
897 }
898 vi := &s.values[v.ID]
899 r := vi.uses
900 nextCall := s.nextCall[s.curIdx]
901 if opcodeTable[v.Op].call {
902 if s.curIdx == len(s.nextCall)-1 {
903 nextCall = math.MaxInt32
904 } else {
905 nextCall = s.nextCall[s.curIdx+1]
906 }
907 }
908 if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > nextCall) {
909 s.freeRegs(vi.regs)
910 }
911 }
912
913
914
915
916 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
917 u := s.values[v.ID].uses
918 if u == nil {
919 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
920 }
921 d := u.dist
922 for u != nil && u.dist == d {
923 u = u.next
924 }
925 return u != nil && u.dist > d
926 }
927
928
929 func (s *regAllocState) setState(regs []endReg) {
930 s.freeRegs(s.used)
931 for _, x := range regs {
932 s.assignReg(x.r, x.v, x.c)
933 }
934 }
935
936
937 func (s *regAllocState) compatRegs(t *types.Type) regMask {
938 var m regMask
939 if t.IsTuple() || t.IsFlags() {
940 return 0
941 }
942 if t.IsSIMD() {
943 if t.Size() > 8 {
944 return s.f.Config.fpRegMask & s.allocatable
945 } else {
946
947 return s.f.Config.gpRegMask & s.allocatable
948 }
949 }
950 if t.IsFloat() || t == types.TypeInt128 {
951 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
952 m = s.f.Config.fp32RegMask
953 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
954 m = s.f.Config.fp64RegMask
955 } else {
956 m = s.f.Config.fpRegMask
957 }
958 } else {
959 m = s.f.Config.gpRegMask
960 }
961 return m & s.allocatable
962 }
963
964
965 func (s *regAllocState) regspec(v *Value) regInfo {
966 op := v.Op
967 if op == OpConvert {
968
969
970
971 m := s.allocatable & s.f.Config.gpRegMask
972 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
973 }
974 if op == OpArgIntReg {
975 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
976 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
977 }
978 if op == OpArgFloatReg {
979 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
980 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
981 }
982 if op.IsCall() {
983 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
984 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
985 }
986 }
987 if op == OpMakeResult && s.f.OwnAux.reg != nil {
988 return *s.f.OwnAux.ResultReg(s.f.Config)
989 }
990 return opcodeTable[op].reg
991 }
992
993 func (s *regAllocState) isGReg(r register) bool {
994 return s.f.Config.hasGReg && s.GReg == r
995 }
996
997
998 var tmpVal Value
999
1000 func (s *regAllocState) regalloc(f *Func) {
1001 regValLiveSet := f.newSparseSet(f.NumValues())
1002 defer f.retSparseSet(regValLiveSet)
1003 var oldSched []*Value
1004 var phis []*Value
1005 var phiRegs []register
1006 var args []*Value
1007
1008
1009 var desired desiredState
1010 desiredSecondReg := map[ID][4]register{}
1011
1012
1013 type dentry struct {
1014 out [4]register
1015 in [3][4]register
1016 }
1017 var dinfo []dentry
1018
1019 if f.Entry != f.Blocks[0] {
1020 f.Fatalf("entry block must be first")
1021 }
1022
1023 for _, b := range s.visitOrder {
1024 if s.f.pass.debug > regDebug {
1025 fmt.Printf("Begin processing block %v\n", b)
1026 }
1027 s.curBlock = b
1028 s.startRegsMask = 0
1029 s.usedSinceBlockStart = 0
1030 clear(desiredSecondReg)
1031
1032
1033
1034 regValLiveSet.clear()
1035 if s.live != nil {
1036 for _, e := range s.live[b.ID] {
1037 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
1038 regValLiveSet.add(e.ID)
1039 }
1040 }
1041 for _, v := range b.ControlValues() {
1042 if s.values[v.ID].needReg {
1043 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
1044 regValLiveSet.add(v.ID)
1045 }
1046 }
1047 if cap(s.nextCall) < len(b.Values) {
1048 c := cap(s.nextCall)
1049 s.nextCall = append(s.nextCall[:c], make([]int32, len(b.Values)-c)...)
1050 } else {
1051 s.nextCall = s.nextCall[:len(b.Values)]
1052 }
1053 var nextCall int32 = math.MaxInt32
1054 for i := len(b.Values) - 1; i >= 0; i-- {
1055 v := b.Values[i]
1056 regValLiveSet.remove(v.ID)
1057 if v.Op == OpPhi {
1058
1059
1060
1061 s.nextCall[i] = nextCall
1062 continue
1063 }
1064 if opcodeTable[v.Op].call {
1065
1066 regValLiveSet.clear()
1067 if s.sp != 0 && s.values[s.sp].uses != nil {
1068 regValLiveSet.add(s.sp)
1069 }
1070 if s.sb != 0 && s.values[s.sb].uses != nil {
1071 regValLiveSet.add(s.sb)
1072 }
1073 nextCall = int32(i)
1074 }
1075 for _, a := range v.Args {
1076 if !s.values[a.ID].needReg {
1077 continue
1078 }
1079 s.addUse(a.ID, int32(i), v.Pos)
1080 regValLiveSet.add(a.ID)
1081 }
1082 s.nextCall[i] = nextCall
1083 }
1084 if s.f.pass.debug > regDebug {
1085 fmt.Printf("use distances for %s\n", b)
1086 for i := range s.values {
1087 vi := &s.values[i]
1088 u := vi.uses
1089 if u == nil {
1090 continue
1091 }
1092 fmt.Printf(" v%d:", i)
1093 for u != nil {
1094 fmt.Printf(" %d", u.dist)
1095 u = u.next
1096 }
1097 fmt.Println()
1098 }
1099 }
1100
1101
1102
1103 nphi := 0
1104 for _, v := range b.Values {
1105 if v.Op != OpPhi {
1106 break
1107 }
1108 nphi++
1109 }
1110 phis = append(phis[:0], b.Values[:nphi]...)
1111 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1112 b.Values = b.Values[:0]
1113
1114
1115 if b == f.Entry {
1116
1117 if nphi > 0 {
1118 f.Fatalf("phis in entry block")
1119 }
1120 } else if len(b.Preds) == 1 {
1121
1122 s.setState(s.endRegs[b.Preds[0].b.ID])
1123 if nphi > 0 {
1124 f.Fatalf("phis in single-predecessor block")
1125 }
1126
1127
1128
1129 for r := register(0); r < s.numRegs; r++ {
1130 v := s.regs[r].v
1131 if v != nil && !regValLiveSet.contains(v.ID) {
1132 s.freeReg(r)
1133 }
1134 }
1135 } else {
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149 idx := -1
1150 for i, p := range b.Preds {
1151
1152
1153 pb := p.b
1154 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1155 continue
1156 }
1157 if idx == -1 {
1158 idx = i
1159 continue
1160 }
1161 pSel := b.Preds[idx].b
1162 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1163 idx = i
1164 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1176 idx = i
1177 }
1178 }
1179 }
1180 if idx < 0 {
1181 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1182 }
1183 p := b.Preds[idx].b
1184 s.setState(s.endRegs[p.ID])
1185
1186 if s.f.pass.debug > regDebug {
1187 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1188 for _, x := range s.endRegs[p.ID] {
1189 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1190 }
1191 }
1192
1193
1194
1195
1196
1197 phiRegs = phiRegs[:0]
1198 var phiUsed regMask
1199
1200 for _, v := range phis {
1201 if !s.values[v.ID].needReg {
1202 phiRegs = append(phiRegs, noRegister)
1203 continue
1204 }
1205 a := v.Args[idx]
1206
1207
1208 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1209 if m != 0 {
1210 r := pickReg(m)
1211 phiUsed |= regMask(1) << r
1212 phiRegs = append(phiRegs, r)
1213 } else {
1214 phiRegs = append(phiRegs, noRegister)
1215 }
1216 }
1217
1218
1219 for i, v := range phis {
1220 if !s.values[v.ID].needReg {
1221 continue
1222 }
1223 a := v.Args[idx]
1224 r := phiRegs[i]
1225 if r == noRegister {
1226 continue
1227 }
1228 if regValLiveSet.contains(a.ID) {
1229
1230
1231
1232
1233
1234
1235
1236
1237 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1238 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1239 r2 := pickReg(m)
1240 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1241 s.copies[c] = false
1242 if s.f.pass.debug > regDebug {
1243 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1244 }
1245 s.setOrig(c, a)
1246 s.assignReg(r2, a, c)
1247 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1248 }
1249 }
1250 s.freeReg(r)
1251 }
1252
1253
1254 b.Values = append(b.Values, phis...)
1255
1256
1257
1258 for i, v := range phis {
1259 if !s.values[v.ID].needReg {
1260 continue
1261 }
1262 if phiRegs[i] != noRegister {
1263 continue
1264 }
1265 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1266
1267
1268 for i, pe := range b.Preds {
1269 if i == idx {
1270 continue
1271 }
1272 ri := noRegister
1273 for _, er := range s.endRegs[pe.b.ID] {
1274 if er.v == s.orig[v.Args[i].ID] {
1275 ri = er.r
1276 break
1277 }
1278 }
1279 if ri != noRegister && m>>ri&1 != 0 {
1280 m = regMask(1) << ri
1281 break
1282 }
1283 }
1284 if m != 0 {
1285 r := pickReg(m)
1286 phiRegs[i] = r
1287 phiUsed |= regMask(1) << r
1288 }
1289 }
1290
1291
1292 for i, v := range phis {
1293 if !s.values[v.ID].needReg {
1294 continue
1295 }
1296 r := phiRegs[i]
1297 if r == noRegister {
1298
1299
1300 s.values[v.ID].spill = v
1301 continue
1302 }
1303
1304 s.assignReg(r, v, v)
1305 }
1306
1307
1308 for r := register(0); r < s.numRegs; r++ {
1309 if phiUsed>>r&1 != 0 {
1310 continue
1311 }
1312 v := s.regs[r].v
1313 if v != nil && !regValLiveSet.contains(v.ID) {
1314 s.freeReg(r)
1315 }
1316 }
1317
1318
1319
1320
1321
1322 regList := make([]startReg, 0, 32)
1323 for r := register(0); r < s.numRegs; r++ {
1324 v := s.regs[r].v
1325 if v == nil {
1326 continue
1327 }
1328 if phiUsed>>r&1 != 0 {
1329
1330
1331 continue
1332 }
1333 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1334 s.startRegsMask |= regMask(1) << r
1335 }
1336 s.startRegs[b.ID] = make([]startReg, len(regList))
1337 copy(s.startRegs[b.ID], regList)
1338
1339 if s.f.pass.debug > regDebug {
1340 fmt.Printf("after phis\n")
1341 for _, x := range s.startRegs[b.ID] {
1342 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1343 }
1344 }
1345 }
1346
1347
1348 for i, v := range phis {
1349 s.curIdx = i
1350 s.dropIfUnused(v)
1351 }
1352
1353
1354 if l := len(oldSched); cap(dinfo) < l {
1355 dinfo = make([]dentry, l)
1356 } else {
1357 dinfo = dinfo[:l]
1358 clear(dinfo)
1359 }
1360
1361
1362 if s.desired != nil {
1363 desired.copy(&s.desired[b.ID])
1364 }
1365
1366
1367
1368
1369
1370
1371 for _, e := range b.Succs {
1372 succ := e.b
1373
1374 for _, x := range s.startRegs[succ.ID] {
1375 desired.add(x.v.ID, x.r)
1376 }
1377
1378 pidx := e.i
1379 for _, v := range succ.Values {
1380 if v.Op != OpPhi {
1381 break
1382 }
1383 if !s.values[v.ID].needReg {
1384 continue
1385 }
1386 rp, ok := s.f.getHome(v.ID).(*Register)
1387 if !ok {
1388
1389
1390
1391
1392 for _, a := range v.Args {
1393 rp, ok = s.f.getHome(a.ID).(*Register)
1394 if ok {
1395 break
1396 }
1397 }
1398 if !ok {
1399 continue
1400 }
1401 }
1402 desired.add(v.Args[pidx].ID, register(rp.num))
1403 }
1404 }
1405
1406
1407 for i := len(oldSched) - 1; i >= 0; i-- {
1408 v := oldSched[i]
1409 prefs := desired.remove(v.ID)
1410 regspec := s.regspec(v)
1411 desired.clobber(regspec.clobbers)
1412 for _, j := range regspec.inputs {
1413 if countRegs(j.regs) != 1 {
1414 continue
1415 }
1416 desired.clobber(j.regs)
1417 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1418 }
1419 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1420 if opcodeTable[v.Op].commutative {
1421 desired.addList(v.Args[1].ID, prefs)
1422 }
1423 desired.addList(v.Args[0].ID, prefs)
1424 }
1425
1426 dinfo[i].out = prefs
1427 for j, a := range v.Args {
1428 if j >= len(dinfo[i].in) {
1429 break
1430 }
1431 dinfo[i].in[j] = desired.get(a.ID)
1432 }
1433 if v.Op == OpSelect1 && prefs[0] != noRegister {
1434
1435
1436 desiredSecondReg[v.Args[0].ID] = prefs
1437 }
1438 }
1439
1440
1441 for idx, v := range oldSched {
1442 s.curIdx = nphi + idx
1443 tmpReg := noRegister
1444 if s.f.pass.debug > regDebug {
1445 fmt.Printf(" processing %s\n", v.LongString())
1446 }
1447 regspec := s.regspec(v)
1448 if v.Op == OpPhi {
1449 f.Fatalf("phi %s not at start of block", v)
1450 }
1451 if opcodeTable[v.Op].fixedReg {
1452 switch v.Op {
1453 case OpSP:
1454 s.assignReg(s.SPReg, v, v)
1455 s.sp = v.ID
1456 case OpSB:
1457 s.assignReg(s.SBReg, v, v)
1458 s.sb = v.ID
1459 case OpARM64ZERO, OpLOONG64ZERO, OpMIPS64ZERO:
1460 s.assignReg(s.ZeroIntReg, v, v)
1461 case OpAMD64Zero128, OpAMD64Zero256, OpAMD64Zero512:
1462 regspec := s.regspec(v)
1463 m := regspec.outputs[0].regs
1464 if countRegs(m) != 1 {
1465 f.Fatalf("bad fixed-register op %s", v)
1466 }
1467 s.assignReg(pickReg(m), v, v)
1468 default:
1469 f.Fatalf("unknown fixed-register op %s", v)
1470 }
1471 b.Values = append(b.Values, v)
1472 s.advanceUses(v)
1473 continue
1474 }
1475 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1476 if s.values[v.ID].needReg {
1477 if v.Op == OpSelectN {
1478 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1479 } else {
1480 var i = 0
1481 if v.Op == OpSelect1 {
1482 i = 1
1483 }
1484 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1485 }
1486 }
1487 b.Values = append(b.Values, v)
1488 s.advanceUses(v)
1489 continue
1490 }
1491 if v.Op == OpGetG && s.f.Config.hasGReg {
1492
1493 if s.regs[s.GReg].v != nil {
1494 s.freeReg(s.GReg)
1495 }
1496 s.assignReg(s.GReg, v, v)
1497 b.Values = append(b.Values, v)
1498 s.advanceUses(v)
1499 continue
1500 }
1501 if v.Op == OpArg {
1502
1503
1504
1505 s.values[v.ID].spill = v
1506 b.Values = append(b.Values, v)
1507 s.advanceUses(v)
1508 continue
1509 }
1510 if v.Op == OpKeepAlive {
1511
1512 s.advanceUses(v)
1513 a := v.Args[0]
1514 vi := &s.values[a.ID]
1515 if vi.regs == 0 && !vi.rematerializeable {
1516
1517
1518
1519 v.SetArg(0, s.makeSpill(a, b))
1520 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1521
1522
1523
1524 v.Op = OpVarLive
1525 v.SetArgs1(v.Args[1])
1526 v.Aux = a.Aux
1527 } else {
1528
1529
1530
1531 v.Op = OpCopy
1532 v.SetArgs1(v.Args[1])
1533 }
1534 b.Values = append(b.Values, v)
1535 continue
1536 }
1537 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1538
1539 if s.doClobber && v.Op.IsCall() {
1540 s.clobberRegs(regspec.clobbers)
1541 }
1542 s.freeRegs(regspec.clobbers)
1543 b.Values = append(b.Values, v)
1544 s.advanceUses(v)
1545 continue
1546 }
1547
1548 if s.values[v.ID].rematerializeable {
1549
1550
1551
1552 for _, a := range v.Args {
1553 a.Uses--
1554 }
1555 s.advanceUses(v)
1556 continue
1557 }
1558
1559 if s.f.pass.debug > regDebug {
1560 fmt.Printf("value %s\n", v.LongString())
1561 fmt.Printf(" out:")
1562 for _, r := range dinfo[idx].out {
1563 if r != noRegister {
1564 fmt.Printf(" %s", &s.registers[r])
1565 }
1566 }
1567 fmt.Println()
1568 for i := 0; i < len(v.Args) && i < 3; i++ {
1569 fmt.Printf(" in%d:", i)
1570 for _, r := range dinfo[idx].in[i] {
1571 if r != noRegister {
1572 fmt.Printf(" %s", &s.registers[r])
1573 }
1574 }
1575 fmt.Println()
1576 }
1577 }
1578
1579
1580
1581
1582 args = append(args[:0], make([]*Value, len(v.Args))...)
1583 for i, a := range v.Args {
1584 if !s.values[a.ID].needReg {
1585 args[i] = a
1586 }
1587 }
1588 for _, i := range regspec.inputs {
1589 mask := i.regs
1590 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1591 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1592 }
1593 }
1594
1595
1596
1597
1598
1599
1600 for {
1601 freed := false
1602 for _, i := range regspec.inputs {
1603 if args[i.idx] != nil {
1604 continue
1605 }
1606 mask := i.regs
1607 if countRegs(mask) == 1 && mask&^s.used != 0 {
1608 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1609
1610
1611
1612 oldregs := s.values[v.Args[i.idx].ID].regs
1613 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1614 s.freeRegs(oldregs &^ mask &^ s.nospill)
1615 freed = true
1616 }
1617 }
1618 }
1619 if !freed {
1620 break
1621 }
1622 }
1623
1624
1625 for _, i := range regspec.inputs {
1626 if args[i.idx] != nil {
1627 continue
1628 }
1629 mask := i.regs
1630 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1631
1632 mask &= s.allocatable
1633 mask &^= s.nospill
1634
1635 if i.idx < 3 {
1636 for _, r := range dinfo[idx].in[i.idx] {
1637 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1638
1639 mask = regMask(1) << r
1640 break
1641 }
1642 }
1643 }
1644
1645 if mask&^desired.avoid != 0 {
1646 mask &^= desired.avoid
1647 }
1648 }
1649 if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
1650
1651
1652
1653 mask = 1 << s.SPReg
1654 }
1655 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1656 }
1657
1658
1659
1660
1661 if opcodeTable[v.Op].resultInArg0 {
1662 var m regMask
1663 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1664
1665 goto ok
1666 }
1667 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1668 args[0], args[1] = args[1], args[0]
1669 goto ok
1670 }
1671 if s.values[v.Args[0].ID].rematerializeable {
1672
1673 goto ok
1674 }
1675 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1676 args[0], args[1] = args[1], args[0]
1677 goto ok
1678 }
1679 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1680
1681 goto ok
1682 }
1683 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1684 args[0], args[1] = args[1], args[0]
1685 goto ok
1686 }
1687
1688
1689
1690
1691
1692 m = s.compatRegs(v.Args[0].Type) &^ s.used
1693 if m == 0 {
1694
1695
1696
1697
1698 goto ok
1699 }
1700
1701
1702 for _, r := range dinfo[idx].out {
1703 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1704 m = regMask(1) << r
1705 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1706
1707
1708 goto ok
1709 }
1710 }
1711
1712
1713 for _, r := range dinfo[idx].in[0] {
1714 if r != noRegister && m>>r&1 != 0 {
1715 m = regMask(1) << r
1716 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1717 s.copies[c] = false
1718
1719
1720 goto ok
1721 }
1722 }
1723 if opcodeTable[v.Op].commutative {
1724 for _, r := range dinfo[idx].in[1] {
1725 if r != noRegister && m>>r&1 != 0 {
1726 m = regMask(1) << r
1727 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1728 s.copies[c] = false
1729 args[0], args[1] = args[1], args[0]
1730 goto ok
1731 }
1732 }
1733 }
1734
1735
1736 if m&^desired.avoid != 0 {
1737 m &^= desired.avoid
1738 }
1739
1740 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1741 s.copies[c] = false
1742
1743
1744
1745
1746 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1747 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1748 r := register(rp.num)
1749 for _, r2 := range dinfo[idx].in[0] {
1750 if r == r2 {
1751 args[0] = c
1752 break
1753 }
1754 }
1755 }
1756 }
1757 }
1758 ok:
1759 for i := 0; i < 2; i++ {
1760 if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
1761 continue
1762 }
1763 if !s.liveAfterCurrentInstruction(v.Args[i]) {
1764
1765 continue
1766 }
1767 if s.values[v.Args[i].ID].rematerializeable {
1768
1769 continue
1770 }
1771 if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
1772
1773 continue
1774 }
1775
1776 m := s.compatRegs(v.Args[i].Type) &^ s.used
1777 if m == 0 {
1778
1779
1780
1781
1782 continue
1783 }
1784
1785 c := s.allocValToReg(v.Args[i], m, true, v.Pos)
1786 s.copies[c] = false
1787 }
1788
1789
1790
1791
1792
1793
1794
1795 if opcodeTable[v.Op].needIntTemp {
1796 m := s.allocatable & s.f.Config.gpRegMask
1797 for _, out := range regspec.outputs {
1798 if countRegs(out.regs) == 1 {
1799 m &^= out.regs
1800 }
1801 }
1802 if m&^desired.avoid&^s.nospill != 0 {
1803 m &^= desired.avoid
1804 }
1805 tmpReg = s.allocReg(m, &tmpVal)
1806 s.nospill |= regMask(1) << tmpReg
1807 s.tmpused |= regMask(1) << tmpReg
1808 }
1809
1810 if regspec.clobbersArg0 {
1811 s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
1812 }
1813 if regspec.clobbersArg1 && !(regspec.clobbersArg0 && s.f.getHome(args[0].ID) == s.f.getHome(args[1].ID)) {
1814 s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
1815 }
1816
1817
1818
1819
1820
1821 if !opcodeTable[v.Op].resultNotInArgs {
1822 s.tmpused = s.nospill
1823 s.nospill = 0
1824 s.advanceUses(v)
1825 }
1826
1827
1828 if s.doClobber && v.Op.IsCall() {
1829
1830
1831 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1832 }
1833 s.freeRegs(regspec.clobbers)
1834 s.tmpused |= regspec.clobbers
1835
1836
1837 {
1838 outRegs := noRegisters
1839 maxOutIdx := -1
1840 var used regMask
1841 if tmpReg != noRegister {
1842
1843
1844 used |= regMask(1) << tmpReg
1845 }
1846 for _, out := range regspec.outputs {
1847 if out.regs == 0 {
1848 continue
1849 }
1850 mask := out.regs & s.allocatable &^ used
1851 if mask == 0 {
1852 s.f.Fatalf("can't find any output register %s", v.LongString())
1853 }
1854 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1855 if !opcodeTable[v.Op].commutative {
1856
1857 r := register(s.f.getHome(args[0].ID).(*Register).num)
1858 if mask>>r&1 == 0 {
1859 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1860 }
1861 mask = regMask(1) << r
1862 } else {
1863
1864 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1865 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1866
1867 found := false
1868 for _, r := range dinfo[idx].out {
1869 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1870 mask = regMask(1) << r
1871 found = true
1872 if r == r1 {
1873 args[0], args[1] = args[1], args[0]
1874 }
1875 break
1876 }
1877 }
1878 if !found {
1879
1880 mask = regMask(1) << r0
1881 }
1882 }
1883 }
1884 if out.idx == 0 {
1885 for _, r := range dinfo[idx].out {
1886 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1887
1888 mask = regMask(1) << r
1889 break
1890 }
1891 }
1892 }
1893 if out.idx == 1 {
1894 if prefs, ok := desiredSecondReg[v.ID]; ok {
1895 for _, r := range prefs {
1896 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1897
1898 mask = regMask(1) << r
1899 break
1900 }
1901 }
1902 }
1903 }
1904
1905 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1906 mask &^= desired.avoid
1907 }
1908 r := s.allocReg(mask, v)
1909 if out.idx > maxOutIdx {
1910 maxOutIdx = out.idx
1911 }
1912 outRegs[out.idx] = r
1913 used |= regMask(1) << r
1914 s.tmpused |= regMask(1) << r
1915 }
1916
1917 if v.Type.IsTuple() {
1918 var outLocs LocPair
1919 if r := outRegs[0]; r != noRegister {
1920 outLocs[0] = &s.registers[r]
1921 }
1922 if r := outRegs[1]; r != noRegister {
1923 outLocs[1] = &s.registers[r]
1924 }
1925 s.f.setHome(v, outLocs)
1926
1927 } else if v.Type.IsResults() {
1928
1929 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1930 for i := 0; i <= maxOutIdx; i++ {
1931 if r := outRegs[i]; r != noRegister {
1932 outLocs[i] = &s.registers[r]
1933 }
1934 }
1935 s.f.setHome(v, outLocs)
1936 } else {
1937 if r := outRegs[0]; r != noRegister {
1938 s.assignReg(r, v, v)
1939 }
1940 }
1941 if tmpReg != noRegister {
1942
1943 if s.f.tempRegs == nil {
1944 s.f.tempRegs = map[ID]*Register{}
1945 }
1946 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1947 }
1948 }
1949
1950
1951 if opcodeTable[v.Op].resultNotInArgs {
1952 s.nospill = 0
1953 s.advanceUses(v)
1954 }
1955 s.tmpused = 0
1956
1957
1958 for i, a := range args {
1959 v.SetArg(i, a)
1960 }
1961 b.Values = append(b.Values, v)
1962 s.dropIfUnused(v)
1963 }
1964
1965
1966
1967 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1968
1969
1970 for i, v := range b.ControlValues() {
1971 if !s.values[v.ID].needReg {
1972 continue
1973 }
1974 if s.f.pass.debug > regDebug {
1975 fmt.Printf(" processing control %s\n", v.LongString())
1976 }
1977
1978
1979
1980 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1981 }
1982
1983
1984
1985 for _, v := range controls {
1986 vi := &s.values[v.ID]
1987 if !vi.needReg {
1988 continue
1989 }
1990
1991 u := vi.uses
1992 vi.uses = u.next
1993 if u.next == nil {
1994 s.freeRegs(vi.regs)
1995 }
1996 u.next = s.freeUseRecords
1997 s.freeUseRecords = u
1998 }
1999
2000
2001
2002
2003 if len(b.Succs) == 1 {
2004 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
2005 s.freeReg(s.GReg)
2006 }
2007 if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
2008
2009 goto badloop
2010 }
2011
2012 top := b.Succs[0].b
2013 loop := s.loopnest.b2l[top.ID]
2014 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
2015 goto badloop
2016 }
2017
2018
2019 phiArgs := regValLiveSet
2020 phiArgs.clear()
2021 for _, v := range b.Succs[0].b.Values {
2022 if v.Op == OpPhi {
2023 phiArgs.add(v.Args[b.Succs[0].i].ID)
2024 }
2025 }
2026
2027
2028
2029
2030 var likelyUsedRegs regMask
2031 for _, live := range s.live[b.ID] {
2032 if live.dist < unlikelyDistance {
2033 likelyUsedRegs |= s.values[live.ID].regs
2034 }
2035 }
2036
2037
2038
2039 for _, live := range s.live[b.ID] {
2040 if live.dist >= unlikelyDistance {
2041
2042 continue
2043 }
2044 vid := live.ID
2045 vi := &s.values[vid]
2046 v := s.orig[vid]
2047 if phiArgs.contains(vid) {
2048
2049
2050
2051
2052 if vi.regs&s.compatRegs(v.Type) != 0 {
2053 continue
2054 }
2055 } else {
2056 if vi.regs != 0 {
2057 continue
2058 }
2059 if vi.rematerializeable {
2060
2061
2062
2063
2064
2065
2066
2067 continue
2068 }
2069 }
2070 if vi.rematerializeable && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
2071 continue
2072 }
2073
2074
2075 m := s.compatRegs(v.Type) &^ likelyUsedRegs
2076 if m == 0 {
2077
2078 continue
2079 }
2080
2081
2082 outerloop:
2083 for _, e := range desired.entries {
2084 if e.ID != v.ID {
2085 continue
2086 }
2087 for _, r := range e.regs {
2088 if r != noRegister && m>>r&1 != 0 {
2089 m = regMask(1) << r
2090 break outerloop
2091 }
2092 }
2093 }
2094 if m&^desired.avoid != 0 {
2095 m &^= desired.avoid
2096 }
2097 s.allocValToReg(v, m, false, b.Pos)
2098 likelyUsedRegs |= s.values[v.ID].regs
2099 }
2100 }
2101 badloop:
2102 ;
2103
2104
2105
2106 k := 0
2107 for r := register(0); r < s.numRegs; r++ {
2108 v := s.regs[r].v
2109 if v == nil {
2110 continue
2111 }
2112 k++
2113 }
2114 regList := make([]endReg, 0, k)
2115 for r := register(0); r < s.numRegs; r++ {
2116 v := s.regs[r].v
2117 if v == nil {
2118 continue
2119 }
2120 regList = append(regList, endReg{r, v, s.regs[r].c})
2121 }
2122 s.endRegs[b.ID] = regList
2123
2124 if checkEnabled {
2125 regValLiveSet.clear()
2126 if s.live != nil {
2127 for _, x := range s.live[b.ID] {
2128 regValLiveSet.add(x.ID)
2129 }
2130 }
2131 for r := register(0); r < s.numRegs; r++ {
2132 v := s.regs[r].v
2133 if v == nil {
2134 continue
2135 }
2136 if !regValLiveSet.contains(v.ID) {
2137 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
2138 }
2139 }
2140 }
2141
2142
2143
2144
2145
2146 if s.live != nil {
2147 for _, e := range s.live[b.ID] {
2148 vi := &s.values[e.ID]
2149 if vi.regs != 0 {
2150
2151 continue
2152 }
2153 if vi.rematerializeable {
2154
2155 continue
2156 }
2157 if s.f.pass.debug > regDebug {
2158 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
2159 }
2160 spill := s.makeSpill(s.orig[e.ID], b)
2161 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
2162 }
2163
2164
2165
2166
2167 for _, e := range s.live[b.ID] {
2168 u := s.values[e.ID].uses
2169 if u == nil {
2170 f.Fatalf("live at end, no uses v%d", e.ID)
2171 }
2172 if u.next != nil {
2173 f.Fatalf("live at end, too many uses v%d", e.ID)
2174 }
2175 s.values[e.ID].uses = nil
2176 u.next = s.freeUseRecords
2177 s.freeUseRecords = u
2178 }
2179 }
2180
2181
2182
2183
2184
2185
2186
2187 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
2188 regs := make([]startReg, 0, c)
2189 for _, sr := range s.startRegs[b.ID] {
2190 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
2191 continue
2192 }
2193 regs = append(regs, sr)
2194 }
2195 s.startRegs[b.ID] = regs
2196 }
2197 }
2198
2199
2200 s.placeSpills()
2201
2202
2203
2204 stacklive := stackalloc(s.f, s.spillLive)
2205
2206
2207 s.shuffle(stacklive)
2208
2209
2210
2211
2212 for {
2213 progress := false
2214 for c, used := range s.copies {
2215 if !used && c.Uses == 0 {
2216 if s.f.pass.debug > regDebug {
2217 fmt.Printf("delete copied value %s\n", c.LongString())
2218 }
2219 c.resetArgs()
2220 f.freeValue(c)
2221 delete(s.copies, c)
2222 progress = true
2223 }
2224 }
2225 if !progress {
2226 break
2227 }
2228 }
2229
2230 for _, b := range s.visitOrder {
2231 i := 0
2232 for _, v := range b.Values {
2233 if v.Op == OpInvalid {
2234 continue
2235 }
2236 b.Values[i] = v
2237 i++
2238 }
2239 b.Values = b.Values[:i]
2240 }
2241 }
2242
2243 func (s *regAllocState) placeSpills() {
2244 mustBeFirst := func(op Op) bool {
2245 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2246 }
2247
2248
2249
2250 start := map[ID][]*Value{}
2251
2252
2253 after := map[ID][]*Value{}
2254
2255 for i := range s.values {
2256 vi := s.values[i]
2257 spill := vi.spill
2258 if spill == nil {
2259 continue
2260 }
2261 if spill.Block != nil {
2262
2263
2264 continue
2265 }
2266 v := s.orig[i]
2267
2268
2269
2270
2271
2272 if v == nil {
2273 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2274 }
2275 best := v.Block
2276 bestArg := v
2277 var bestDepth int16
2278 if s.loopnest != nil && s.loopnest.b2l[best.ID] != nil {
2279 bestDepth = s.loopnest.b2l[best.ID].depth
2280 }
2281 b := best
2282 const maxSpillSearch = 100
2283 for i := 0; i < maxSpillSearch; i++ {
2284
2285
2286 p := b
2287 b = nil
2288 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2289 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2290
2291 b = c
2292 break
2293 }
2294 }
2295 if b == nil {
2296
2297 break
2298 }
2299
2300 var depth int16
2301 if s.loopnest != nil && s.loopnest.b2l[b.ID] != nil {
2302 depth = s.loopnest.b2l[b.ID].depth
2303 }
2304 if depth > bestDepth {
2305
2306 continue
2307 }
2308
2309
2310
2311 if len(b.Preds) == 1 {
2312 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2313 if e.v == v {
2314
2315 best = b
2316 bestArg = e.c
2317 bestDepth = depth
2318 break
2319 }
2320 }
2321 } else {
2322 for _, e := range s.startRegs[b.ID] {
2323 if e.v == v {
2324
2325 best = b
2326 bestArg = e.c
2327 bestDepth = depth
2328 break
2329 }
2330 }
2331 }
2332 }
2333
2334
2335 spill.Block = best
2336 spill.AddArg(bestArg)
2337 if best == v.Block && !mustBeFirst(v.Op) {
2338
2339 after[v.ID] = append(after[v.ID], spill)
2340 } else {
2341
2342 start[best.ID] = append(start[best.ID], spill)
2343 }
2344 }
2345
2346
2347 var oldSched []*Value
2348 for _, b := range s.visitOrder {
2349 nfirst := 0
2350 for _, v := range b.Values {
2351 if !mustBeFirst(v.Op) {
2352 break
2353 }
2354 nfirst++
2355 }
2356 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2357 b.Values = b.Values[:nfirst]
2358 b.Values = append(b.Values, start[b.ID]...)
2359 for _, v := range oldSched {
2360 b.Values = append(b.Values, v)
2361 b.Values = append(b.Values, after[v.ID]...)
2362 }
2363 }
2364 }
2365
2366
2367 func (s *regAllocState) shuffle(stacklive [][]ID) {
2368 var e edgeState
2369 e.s = s
2370 e.cache = map[ID][]*Value{}
2371 e.contents = map[Location]contentRecord{}
2372 if s.f.pass.debug > regDebug {
2373 fmt.Printf("shuffle %s\n", s.f.Name)
2374 fmt.Println(s.f.String())
2375 }
2376
2377 for _, b := range s.visitOrder {
2378 if len(b.Preds) <= 1 {
2379 continue
2380 }
2381 e.b = b
2382 for i, edge := range b.Preds {
2383 p := edge.b
2384 e.p = p
2385 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2386 e.process()
2387 }
2388 }
2389
2390 if s.f.pass.debug > regDebug {
2391 fmt.Printf("post shuffle %s\n", s.f.Name)
2392 fmt.Println(s.f.String())
2393 }
2394 }
2395
2396 type edgeState struct {
2397 s *regAllocState
2398 p, b *Block
2399
2400
2401 cache map[ID][]*Value
2402 cachedVals []ID
2403
2404
2405 contents map[Location]contentRecord
2406
2407
2408 destinations []dstRecord
2409 extra []dstRecord
2410
2411 usedRegs regMask
2412 uniqueRegs regMask
2413 finalRegs regMask
2414 rematerializeableRegs regMask
2415 }
2416
2417 type contentRecord struct {
2418 vid ID
2419 c *Value
2420 final bool
2421 pos src.XPos
2422 }
2423
2424 type dstRecord struct {
2425 loc Location
2426 vid ID
2427 splice **Value
2428 pos src.XPos
2429 }
2430
2431
2432 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2433 if e.s.f.pass.debug > regDebug {
2434 fmt.Printf("edge %s->%s\n", e.p, e.b)
2435 }
2436
2437
2438 clear(e.cache)
2439 e.cachedVals = e.cachedVals[:0]
2440 clear(e.contents)
2441 e.usedRegs = 0
2442 e.uniqueRegs = 0
2443 e.finalRegs = 0
2444 e.rematerializeableRegs = 0
2445
2446
2447 for _, x := range srcReg {
2448 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2449 }
2450
2451 for _, spillID := range stacklive {
2452 v := e.s.orig[spillID]
2453 spill := e.s.values[v.ID].spill
2454 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2455
2456
2457
2458
2459
2460
2461
2462
2463 continue
2464 }
2465 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2466 }
2467
2468
2469 dsts := e.destinations[:0]
2470 for _, x := range dstReg {
2471 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2472 }
2473
2474 for _, v := range e.b.Values {
2475 if v.Op != OpPhi {
2476 break
2477 }
2478 loc := e.s.f.getHome(v.ID)
2479 if loc == nil {
2480 continue
2481 }
2482 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2483 }
2484 e.destinations = dsts
2485
2486 if e.s.f.pass.debug > regDebug {
2487 for _, vid := range e.cachedVals {
2488 a := e.cache[vid]
2489 for _, c := range a {
2490 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2491 }
2492 }
2493 for _, d := range e.destinations {
2494 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2495 }
2496 }
2497 }
2498
2499
2500 func (e *edgeState) process() {
2501 dsts := e.destinations
2502
2503
2504 for len(dsts) > 0 {
2505 i := 0
2506 for _, d := range dsts {
2507 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2508
2509 dsts[i] = d
2510 i++
2511 }
2512 }
2513 if i < len(dsts) {
2514
2515 dsts = dsts[:i]
2516
2517
2518 dsts = append(dsts, e.extra...)
2519 e.extra = e.extra[:0]
2520 continue
2521 }
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545 d := dsts[0]
2546 loc := d.loc
2547 vid := e.contents[loc].vid
2548 c := e.contents[loc].c
2549 r := e.findRegFor(c.Type)
2550 if e.s.f.pass.debug > regDebug {
2551 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2552 }
2553 e.erase(r)
2554 pos := d.pos.WithNotStmt()
2555 if _, isReg := loc.(*Register); isReg {
2556 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2557 } else {
2558 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2559 }
2560 e.set(r, vid, c, false, pos)
2561 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2562 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2563 }
2564 }
2565 }
2566
2567
2568
2569 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2570 pos = pos.WithNotStmt()
2571 occupant := e.contents[loc]
2572 if occupant.vid == vid {
2573
2574 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2575 if splice != nil {
2576 (*splice).Uses--
2577 *splice = occupant.c
2578 occupant.c.Uses++
2579 }
2580
2581
2582
2583 if _, ok := e.s.copies[occupant.c]; ok {
2584
2585 e.s.copies[occupant.c] = true
2586 }
2587 return true
2588 }
2589
2590
2591 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
2592
2593
2594 return false
2595 }
2596
2597
2598 v := e.s.orig[vid]
2599 var c *Value
2600 var src Location
2601 if e.s.f.pass.debug > regDebug {
2602 fmt.Printf("moving v%d to %s\n", vid, loc)
2603 fmt.Printf("sources of v%d:", vid)
2604 }
2605 if opcodeTable[v.Op].fixedReg {
2606 c = v
2607 src = e.s.f.getHome(v.ID)
2608 } else {
2609 for _, w := range e.cache[vid] {
2610 h := e.s.f.getHome(w.ID)
2611 if e.s.f.pass.debug > regDebug {
2612 fmt.Printf(" %s:%s", h, w)
2613 }
2614 _, isreg := h.(*Register)
2615 if src == nil || isreg {
2616 c = w
2617 src = h
2618 }
2619 }
2620 }
2621 if e.s.f.pass.debug > regDebug {
2622 if src != nil {
2623 fmt.Printf(" [use %s]\n", src)
2624 } else {
2625 fmt.Printf(" [no source]\n")
2626 }
2627 }
2628 _, dstReg := loc.(*Register)
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640 e.erase(loc)
2641 var x *Value
2642 if c == nil || e.s.values[vid].rematerializeable {
2643 if !e.s.values[vid].rematerializeable {
2644 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2645 }
2646 if dstReg {
2647
2648
2649
2650
2651 if e.s.regspec(v).outputs[0].regs®Mask(1<<register(loc.(*Register).num)) == 0 {
2652 _, srcReg := src.(*Register)
2653 if srcReg {
2654
2655
2656 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2657 } else {
2658
2659 x = v.copyInto(e.p)
2660 r := e.findRegFor(x.Type)
2661 e.erase(r)
2662
2663 e.set(r, vid, x, false, pos)
2664
2665 x = e.p.NewValue1(pos, OpCopy, x.Type, x)
2666 }
2667 } else {
2668 x = v.copyInto(e.p)
2669 }
2670 } else {
2671
2672
2673 r := e.findRegFor(v.Type)
2674 e.erase(r)
2675 x = v.copyIntoWithXPos(e.p, pos)
2676 e.set(r, vid, x, false, pos)
2677
2678
2679
2680 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2681 }
2682 } else {
2683
2684 _, srcReg := src.(*Register)
2685 if srcReg {
2686 if dstReg {
2687 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2688 } else {
2689 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2690 }
2691 } else {
2692 if dstReg {
2693 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2694 } else {
2695
2696 r := e.findRegFor(c.Type)
2697 e.erase(r)
2698 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2699 e.set(r, vid, t, false, pos)
2700 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2701 }
2702 }
2703 }
2704 e.set(loc, vid, x, true, pos)
2705 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2706 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2707 }
2708 if splice != nil {
2709 (*splice).Uses--
2710 *splice = x
2711 x.Uses++
2712 }
2713 return true
2714 }
2715
2716
2717 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2718 e.s.f.setHome(c, loc)
2719 e.contents[loc] = contentRecord{vid, c, final, pos}
2720 a := e.cache[vid]
2721 if len(a) == 0 {
2722 e.cachedVals = append(e.cachedVals, vid)
2723 }
2724 a = append(a, c)
2725 e.cache[vid] = a
2726 if r, ok := loc.(*Register); ok {
2727 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2728 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2729 }
2730 e.usedRegs |= regMask(1) << uint(r.num)
2731 if final {
2732 e.finalRegs |= regMask(1) << uint(r.num)
2733 }
2734 if len(a) == 1 {
2735 e.uniqueRegs |= regMask(1) << uint(r.num)
2736 }
2737 if len(a) == 2 {
2738 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2739 e.uniqueRegs &^= regMask(1) << uint(t.num)
2740 }
2741 }
2742 if e.s.values[vid].rematerializeable {
2743 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2744 }
2745 }
2746 if e.s.f.pass.debug > regDebug {
2747 fmt.Printf("%s\n", c.LongString())
2748 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2749 }
2750 }
2751
2752
2753 func (e *edgeState) erase(loc Location) {
2754 cr := e.contents[loc]
2755 if cr.c == nil {
2756 return
2757 }
2758 vid := cr.vid
2759
2760 if cr.final {
2761
2762
2763
2764 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2765 }
2766
2767
2768 a := e.cache[vid]
2769 for i, c := range a {
2770 if e.s.f.getHome(c.ID) == loc {
2771 if e.s.f.pass.debug > regDebug {
2772 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2773 }
2774 a[i], a = a[len(a)-1], a[:len(a)-1]
2775 break
2776 }
2777 }
2778 e.cache[vid] = a
2779
2780
2781 if r, ok := loc.(*Register); ok {
2782 e.usedRegs &^= regMask(1) << uint(r.num)
2783 if cr.final {
2784 e.finalRegs &^= regMask(1) << uint(r.num)
2785 }
2786 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2787 }
2788 if len(a) == 1 {
2789 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2790 e.uniqueRegs |= regMask(1) << uint(r.num)
2791 }
2792 }
2793 }
2794
2795
2796 func (e *edgeState) findRegFor(typ *types.Type) Location {
2797
2798 m := e.s.compatRegs(typ)
2799
2800
2801
2802
2803
2804
2805 x := m &^ e.usedRegs
2806 if x != 0 {
2807 return &e.s.registers[pickReg(x)]
2808 }
2809 x = m &^ e.uniqueRegs &^ e.finalRegs
2810 if x != 0 {
2811 return &e.s.registers[pickReg(x)]
2812 }
2813 x = m &^ e.uniqueRegs
2814 if x != 0 {
2815 return &e.s.registers[pickReg(x)]
2816 }
2817 x = m & e.rematerializeableRegs
2818 if x != 0 {
2819 return &e.s.registers[pickReg(x)]
2820 }
2821
2822
2823
2824 for _, vid := range e.cachedVals {
2825 a := e.cache[vid]
2826 for _, c := range a {
2827 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2828 if !c.rematerializeable() {
2829 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2830
2831 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, c.Type), Type: c.Type}
2832
2833 e.set(t, vid, x, false, c.Pos)
2834 if e.s.f.pass.debug > regDebug {
2835 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2836 }
2837 }
2838
2839
2840
2841 return r
2842 }
2843 }
2844 }
2845
2846 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2847 for _, vid := range e.cachedVals {
2848 a := e.cache[vid]
2849 for _, c := range a {
2850 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2851 }
2852 }
2853 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2854 return nil
2855 }
2856
2857
2858
2859 func (v *Value) rematerializeable() bool {
2860 if !opcodeTable[v.Op].rematerializeable {
2861 return false
2862 }
2863 for _, a := range v.Args {
2864
2865
2866 if !opcodeTable[a.Op].fixedReg {
2867 return false
2868 }
2869 }
2870 return true
2871 }
2872
2873 type liveInfo struct {
2874 ID ID
2875 dist int32
2876 pos src.XPos
2877 }
2878
2879
2880
2881
2882 func (s *regAllocState) computeLive() {
2883 f := s.f
2884
2885
2886 if len(f.Blocks) == 1 {
2887 return
2888 }
2889 po := f.postorder()
2890 s.live = make([][]liveInfo, f.NumBlocks())
2891 s.desired = make([]desiredState, f.NumBlocks())
2892 s.loopnest = f.loopnest()
2893
2894 rematIDs := make([]ID, 0, 64)
2895
2896 live := f.newSparseMapPos(f.NumValues())
2897 defer f.retSparseMapPos(live)
2898 t := f.newSparseMapPos(f.NumValues())
2899 defer f.retSparseMapPos(t)
2900
2901 s.loopnest.computeUnavoidableCalls()
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917 var loopLiveIn map[*loop][]liveInfo
2918 var numCalls []int32
2919 if len(s.loopnest.loops) > 0 && !s.loopnest.hasIrreducible {
2920 loopLiveIn = make(map[*loop][]liveInfo)
2921 numCalls = f.Cache.allocInt32Slice(f.NumBlocks())
2922 defer f.Cache.freeInt32Slice(numCalls)
2923 }
2924
2925 for {
2926 changed := false
2927
2928 for _, b := range po {
2929
2930 live.clear()
2931 for _, e := range s.live[b.ID] {
2932 live.set(e.ID, e.dist, e.pos)
2933 }
2934 update := false
2935
2936 for _, e := range b.Succs {
2937 succ := e.b
2938 delta := branchDistance(b, succ)
2939 for _, v := range succ.Values {
2940 if v.Op != OpPhi {
2941 break
2942 }
2943 arg := v.Args[e.i]
2944 if s.values[arg.ID].needReg && (!live.contains(arg.ID) || delta < live.get(arg.ID)) {
2945 live.set(arg.ID, delta, v.Pos)
2946 update = true
2947 }
2948 }
2949 }
2950 if update {
2951 s.live[b.ID] = updateLive(live, s.live[b.ID])
2952 }
2953
2954
2955 c := live.contents()
2956 for i := range c {
2957 c[i].val += int32(len(b.Values))
2958 }
2959
2960
2961 for _, c := range b.ControlValues() {
2962 if s.values[c.ID].needReg {
2963 live.set(c.ID, int32(len(b.Values)), b.Pos)
2964 }
2965 }
2966
2967 for i := len(b.Values) - 1; i >= 0; i-- {
2968 v := b.Values[i]
2969 live.remove(v.ID)
2970 if v.Op == OpPhi {
2971 continue
2972 }
2973 if opcodeTable[v.Op].call {
2974 if numCalls != nil {
2975 numCalls[b.ID]++
2976 }
2977 rematIDs = rematIDs[:0]
2978 c := live.contents()
2979 for i := range c {
2980 c[i].val += unlikelyDistance
2981 vid := c[i].key
2982 if s.values[vid].rematerializeable {
2983 rematIDs = append(rematIDs, vid)
2984 }
2985 }
2986
2987
2988
2989 for _, r := range rematIDs {
2990 live.remove(r)
2991 }
2992 }
2993 for _, a := range v.Args {
2994 if s.values[a.ID].needReg {
2995 live.set(a.ID, int32(i), v.Pos)
2996 }
2997 }
2998 }
2999
3000
3001 if loopLiveIn != nil {
3002 loop := s.loopnest.b2l[b.ID]
3003 if loop != nil && loop.header.ID == b.ID {
3004 loopLiveIn[loop] = updateLive(live, nil)
3005 }
3006 }
3007
3008
3009 for _, e := range b.Preds {
3010 p := e.b
3011 delta := branchDistance(p, b)
3012
3013
3014 t.clear()
3015 for _, e := range s.live[p.ID] {
3016 t.set(e.ID, e.dist, e.pos)
3017 }
3018 update := false
3019
3020
3021 for _, e := range live.contents() {
3022 d := e.val + delta
3023 if !t.contains(e.key) || d < t.get(e.key) {
3024 update = true
3025 t.set(e.key, d, e.pos)
3026 }
3027 }
3028
3029 if !update {
3030 continue
3031 }
3032 s.live[p.ID] = updateLive(t, s.live[p.ID])
3033 changed = true
3034 }
3035 }
3036
3037
3038
3039 if !changed {
3040 break
3041 }
3042
3043
3044
3045 if loopLiveIn != nil {
3046 break
3047 }
3048
3049
3050 if len(s.loopnest.loops) == 0 {
3051 break
3052 }
3053 }
3054 if f.pass.debug > regDebug {
3055 s.debugPrintLive("after dfs walk", f, s.live, s.desired)
3056 }
3057
3058
3059
3060 if loopLiveIn == nil {
3061 s.computeDesired()
3062 return
3063 }
3064
3065
3066
3067
3068
3069 loops := slices.Clone(s.loopnest.loops)
3070 slices.SortFunc(loops, func(a, b *loop) int {
3071 return cmp.Compare(a.depth, b.depth)
3072 })
3073
3074 loopset := f.newSparseMapPos(f.NumValues())
3075 defer f.retSparseMapPos(loopset)
3076 for _, loop := range loops {
3077 if loop.outer == nil {
3078 continue
3079 }
3080 livein := loopLiveIn[loop]
3081 loopset.clear()
3082 for _, l := range livein {
3083 loopset.set(l.ID, l.dist, l.pos)
3084 }
3085 update := false
3086 for _, l := range loopLiveIn[loop.outer] {
3087 if !loopset.contains(l.ID) {
3088 loopset.set(l.ID, l.dist, l.pos)
3089 update = true
3090 }
3091 }
3092 if update {
3093 loopLiveIn[loop] = updateLive(loopset, livein)
3094 }
3095 }
3096
3097
3098
3099 const unknownDistance = -1
3100
3101
3102
3103
3104 for _, b := range po {
3105 loop := s.loopnest.b2l[b.ID]
3106 if loop == nil {
3107 continue
3108 }
3109 headerLive := loopLiveIn[loop]
3110 loopset.clear()
3111 for _, l := range s.live[b.ID] {
3112 loopset.set(l.ID, l.dist, l.pos)
3113 }
3114 update := false
3115 for _, l := range headerLive {
3116 if !loopset.contains(l.ID) {
3117 loopset.set(l.ID, unknownDistance, src.NoXPos)
3118 update = true
3119 }
3120 }
3121 if update {
3122 s.live[b.ID] = updateLive(loopset, s.live[b.ID])
3123 }
3124 }
3125 if f.pass.debug > regDebug {
3126 s.debugPrintLive("after live loop prop", f, s.live, s.desired)
3127 }
3128
3129
3130
3131
3132 unfinishedBlocks := f.Cache.allocBlockSlice(len(po))
3133 defer f.Cache.freeBlockSlice(unfinishedBlocks)
3134 copy(unfinishedBlocks, po)
3135
3136 for len(unfinishedBlocks) > 0 {
3137 n := 0
3138 for _, b := range unfinishedBlocks {
3139 live.clear()
3140 unfinishedValues := 0
3141 for _, l := range s.live[b.ID] {
3142 if l.dist == unknownDistance {
3143 unfinishedValues++
3144 }
3145 live.set(l.ID, l.dist, l.pos)
3146 }
3147 update := false
3148 for _, e := range b.Succs {
3149 succ := e.b
3150 for _, l := range s.live[succ.ID] {
3151 if !live.contains(l.ID) || l.dist == unknownDistance {
3152 continue
3153 }
3154 dist := int32(len(succ.Values)) + l.dist + branchDistance(b, succ)
3155 dist += numCalls[succ.ID] * unlikelyDistance
3156 val := live.get(l.ID)
3157 switch {
3158 case val == unknownDistance:
3159 unfinishedValues--
3160 fallthrough
3161 case dist < val:
3162 update = true
3163 live.set(l.ID, dist, l.pos)
3164 }
3165 }
3166 }
3167 if update {
3168 s.live[b.ID] = updateLive(live, s.live[b.ID])
3169 }
3170 if unfinishedValues > 0 {
3171 unfinishedBlocks[n] = b
3172 n++
3173 }
3174 }
3175 unfinishedBlocks = unfinishedBlocks[:n]
3176 }
3177
3178
3179
3180 for _, b := range f.Blocks {
3181 slices.SortFunc(s.live[b.ID], func(a, b liveInfo) int {
3182 if a.dist != b.dist {
3183 return cmp.Compare(a.dist, b.dist)
3184 }
3185 return cmp.Compare(a.ID, b.ID)
3186 })
3187 }
3188
3189 s.computeDesired()
3190
3191 if f.pass.debug > regDebug {
3192 s.debugPrintLive("final", f, s.live, s.desired)
3193 }
3194 }
3195
3196
3197
3198
3199 func (s *regAllocState) computeDesired() {
3200
3201
3202
3203
3204
3205 var desired desiredState
3206 f := s.f
3207 po := f.postorder()
3208 for {
3209 changed := false
3210 for _, b := range po {
3211 desired.copy(&s.desired[b.ID])
3212 for i := len(b.Values) - 1; i >= 0; i-- {
3213 v := b.Values[i]
3214 prefs := desired.remove(v.ID)
3215 if v.Op == OpPhi {
3216
3217
3218
3219 continue
3220 }
3221 regspec := s.regspec(v)
3222
3223 desired.clobber(regspec.clobbers)
3224
3225 for _, j := range regspec.inputs {
3226 if countRegs(j.regs) != 1 {
3227 continue
3228 }
3229 desired.clobber(j.regs)
3230 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
3231 }
3232
3233 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
3234
3235
3236
3237
3238
3239 if opcodeTable[v.Op].commutative {
3240 desired.addList(v.Args[1].ID, prefs)
3241 }
3242 desired.addList(v.Args[0].ID, prefs)
3243 }
3244 }
3245 for _, e := range b.Preds {
3246 p := e.b
3247 changed = s.desired[p.ID].merge(&desired) || changed
3248 }
3249 }
3250 if !changed || (!s.loopnest.hasIrreducible && len(s.loopnest.loops) == 0) {
3251 break
3252 }
3253 }
3254 }
3255
3256
3257 func updateLive(t *sparseMapPos, live []liveInfo) []liveInfo {
3258 live = live[:0]
3259 if cap(live) < t.size() {
3260 live = make([]liveInfo, 0, t.size())
3261 }
3262 for _, e := range t.contents() {
3263 live = append(live, liveInfo{e.key, e.val, e.pos})
3264 }
3265 return live
3266 }
3267
3268
3269
3270
3271 func branchDistance(b *Block, s *Block) int32 {
3272 if len(b.Succs) == 2 {
3273 if b.Succs[0].b == s && b.Likely == BranchLikely ||
3274 b.Succs[1].b == s && b.Likely == BranchUnlikely {
3275 return likelyDistance
3276 }
3277 if b.Succs[0].b == s && b.Likely == BranchUnlikely ||
3278 b.Succs[1].b == s && b.Likely == BranchLikely {
3279 return unlikelyDistance
3280 }
3281 }
3282
3283
3284 return normalDistance
3285 }
3286
3287 func (s *regAllocState) debugPrintLive(stage string, f *Func, live [][]liveInfo, desired []desiredState) {
3288 fmt.Printf("%s: live values at end of each block: %s\n", stage, f.Name)
3289 for _, b := range f.Blocks {
3290 s.debugPrintLiveBlock(b, live[b.ID], &desired[b.ID])
3291 }
3292 }
3293
3294 func (s *regAllocState) debugPrintLiveBlock(b *Block, live []liveInfo, desired *desiredState) {
3295 fmt.Printf(" %s:", b)
3296 slices.SortFunc(live, func(a, b liveInfo) int {
3297 return cmp.Compare(a.ID, b.ID)
3298 })
3299 for _, x := range live {
3300 fmt.Printf(" v%d(%d)", x.ID, x.dist)
3301 for _, e := range desired.entries {
3302 if e.ID != x.ID {
3303 continue
3304 }
3305 fmt.Printf("[")
3306 first := true
3307 for _, r := range e.regs {
3308 if r == noRegister {
3309 continue
3310 }
3311 if !first {
3312 fmt.Printf(",")
3313 }
3314 fmt.Print(&s.registers[r])
3315 first = false
3316 }
3317 fmt.Printf("]")
3318 }
3319 }
3320 if avoid := desired.avoid; avoid != 0 {
3321 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
3322 }
3323 fmt.Println()
3324 }
3325
3326
3327 type desiredState struct {
3328
3329
3330 entries []desiredStateEntry
3331
3332
3333
3334
3335 avoid regMask
3336 }
3337 type desiredStateEntry struct {
3338
3339 ID ID
3340
3341
3342
3343
3344
3345 regs [4]register
3346 }
3347
3348
3349 func (d *desiredState) get(vid ID) [4]register {
3350 for _, e := range d.entries {
3351 if e.ID == vid {
3352 return e.regs
3353 }
3354 }
3355 return [4]register{noRegister, noRegister, noRegister, noRegister}
3356 }
3357
3358
3359 func (d *desiredState) add(vid ID, r register) {
3360 d.avoid |= regMask(1) << r
3361 for i := range d.entries {
3362 e := &d.entries[i]
3363 if e.ID != vid {
3364 continue
3365 }
3366 if e.regs[0] == r {
3367
3368 return
3369 }
3370 for j := 1; j < len(e.regs); j++ {
3371 if e.regs[j] == r {
3372
3373 copy(e.regs[1:], e.regs[:j])
3374 e.regs[0] = r
3375 return
3376 }
3377 }
3378 copy(e.regs[1:], e.regs[:])
3379 e.regs[0] = r
3380 return
3381 }
3382 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
3383 }
3384
3385 func (d *desiredState) addList(vid ID, regs [4]register) {
3386
3387 for i := len(regs) - 1; i >= 0; i-- {
3388 r := regs[i]
3389 if r != noRegister {
3390 d.add(vid, r)
3391 }
3392 }
3393 }
3394
3395
3396 func (d *desiredState) clobber(m regMask) {
3397 for i := 0; i < len(d.entries); {
3398 e := &d.entries[i]
3399 j := 0
3400 for _, r := range e.regs {
3401 if r != noRegister && m>>r&1 == 0 {
3402 e.regs[j] = r
3403 j++
3404 }
3405 }
3406 if j == 0 {
3407
3408 d.entries[i] = d.entries[len(d.entries)-1]
3409 d.entries = d.entries[:len(d.entries)-1]
3410 continue
3411 }
3412 for ; j < len(e.regs); j++ {
3413 e.regs[j] = noRegister
3414 }
3415 i++
3416 }
3417 d.avoid &^= m
3418 }
3419
3420
3421 func (d *desiredState) copy(x *desiredState) {
3422 d.entries = append(d.entries[:0], x.entries...)
3423 d.avoid = x.avoid
3424 }
3425
3426
3427 func (d *desiredState) remove(vid ID) [4]register {
3428 for i := range d.entries {
3429 if d.entries[i].ID == vid {
3430 regs := d.entries[i].regs
3431 d.entries[i] = d.entries[len(d.entries)-1]
3432 d.entries = d.entries[:len(d.entries)-1]
3433 return regs
3434 }
3435 }
3436 return [4]register{noRegister, noRegister, noRegister, noRegister}
3437 }
3438
3439
3440
3441 func (d *desiredState) merge(x *desiredState) bool {
3442 oldAvoid := d.avoid
3443 d.avoid |= x.avoid
3444
3445
3446 for _, e := range x.entries {
3447 d.addList(e.ID, e.regs)
3448 }
3449 return oldAvoid != d.avoid
3450 }
3451
3452
3453 func (loopnest *loopnest) computeUnavoidableCalls() {
3454 f := loopnest.f
3455
3456 hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
3457 defer f.Cache.freeBoolSlice(hasCall)
3458 for _, b := range f.Blocks {
3459 if b.containsCall() {
3460 hasCall[b.ID] = true
3461 }
3462 }
3463 found := f.Cache.allocSparseSet(f.NumBlocks())
3464 defer f.Cache.freeSparseSet(found)
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474 loopLoop:
3475 for _, l := range loopnest.loops {
3476 found.clear()
3477 tovisit := make([]*Block, 0, 8)
3478 tovisit = append(tovisit, l.header)
3479 for len(tovisit) > 0 {
3480 cur := tovisit[len(tovisit)-1]
3481 tovisit = tovisit[:len(tovisit)-1]
3482 if hasCall[cur.ID] {
3483 continue
3484 }
3485 for _, s := range cur.Succs {
3486 nb := s.Block()
3487 if nb == l.header {
3488
3489 continue loopLoop
3490 }
3491 if found.contains(nb.ID) {
3492
3493 continue
3494 }
3495 nl := loopnest.b2l[nb.ID]
3496 if nl == nil || (nl.depth <= l.depth && nl != l) {
3497
3498 continue
3499 }
3500 tovisit = append(tovisit, nb)
3501 found.add(nb.ID)
3502 }
3503 }
3504
3505 l.containsUnavoidableCall = true
3506 }
3507 }
3508
3509 func (b *Block) containsCall() bool {
3510 if b.Kind == BlockDefer {
3511 return true
3512 }
3513 for _, v := range b.Values {
3514 if opcodeTable[v.Op].call {
3515 return true
3516 }
3517 }
3518 return false
3519 }
3520
View as plain text