1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "cmp"
123 "fmt"
124 "internal/buildcfg"
125 "math"
126 "math/bits"
127 "slices"
128 "unsafe"
129 )
130
131 const (
132 moveSpills = iota
133 logSpills
134 regDebug
135 stackDebug
136 )
137
138
139
140 const (
141 likelyDistance = 1
142 normalDistance = 10
143 unlikelyDistance = 100
144 )
145
146
147
148 func regalloc(f *Func) {
149 var s regAllocState
150 s.init(f)
151 s.regalloc(f)
152 s.close()
153 }
154
155 type register uint8
156
157 const noRegister register = 255
158
159
160 var noRegisters [32]register = [32]register{
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
164 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
165 }
166
167
168
169 type regMask uint64
170
171 func (m regMask) String() string {
172 s := ""
173 for r := register(0); m != 0; r++ {
174 if m>>r&1 == 0 {
175 continue
176 }
177 m &^= regMask(1) << r
178 if s != "" {
179 s += " "
180 }
181 s += fmt.Sprintf("r%d", r)
182 }
183 return s
184 }
185
186 func (m regMask) contains(r register) bool {
187 return m>>r&1 != 0
188 }
189
190 func (s *regAllocState) RegMaskString(m regMask) string {
191 str := ""
192 for r := register(0); m != 0; r++ {
193 if m>>r&1 == 0 {
194 continue
195 }
196 m &^= regMask(1) << r
197 if str != "" {
198 str += " "
199 }
200 str += s.registers[r].String()
201 }
202 return str
203 }
204
205
206 func countRegs(r regMask) int {
207 return bits.OnesCount64(uint64(r))
208 }
209
210
211 func pickReg(r regMask) register {
212 if r == 0 {
213 panic("can't pick a register from an empty set")
214 }
215
216 return register(bits.TrailingZeros64(uint64(r)))
217 }
218
219 type use struct {
220
221
222
223
224
225 dist int32
226 pos src.XPos
227 next *use
228 }
229
230
231 type valState struct {
232 regs regMask
233 uses *use
234 spill *Value
235 restoreMin int32
236 restoreMax int32
237 needReg bool
238 rematerializeable bool
239 }
240
241 type regState struct {
242 v *Value
243 c *Value
244
245 }
246
247 type regAllocState struct {
248 f *Func
249
250 sdom SparseTree
251 registers []Register
252 numRegs register
253 SPReg register
254 SBReg register
255 GReg register
256 ZeroIntReg register
257 allocatable regMask
258
259
260
261
262 live [][]liveInfo
263
264
265
266
267 desired []desiredState
268
269
270 values []valState
271
272
273 sp, sb ID
274
275
276
277 orig []*Value
278
279
280
281 regs []regState
282
283
284 nospill regMask
285
286
287 used regMask
288
289
290 usedSinceBlockStart regMask
291
292
293 tmpused regMask
294
295
296 curBlock *Block
297
298
299 freeUseRecords *use
300
301
302
303 endRegs [][]endReg
304
305
306
307 startRegs [][]startReg
308
309
310
311
312 startRegsMask regMask
313
314
315 spillLive [][]ID
316
317
318
319 copies map[*Value]bool
320
321 loopnest *loopnest
322
323
324 visitOrder []*Block
325
326
327 blockOrder []int32
328
329
330 doClobber bool
331
332
333
334
335
336
337 nextCall []int32
338
339
340
341 curIdx int
342 }
343
344 type endReg struct {
345 r register
346 v *Value
347 c *Value
348 }
349
350 type startReg struct {
351 r register
352 v *Value
353 c *Value
354 pos src.XPos
355 }
356
357
358 func (s *regAllocState) freeReg(r register) {
359 if !s.allocatable.contains(r) && !s.isGReg(r) {
360 return
361 }
362 v := s.regs[r].v
363 if v == nil {
364 s.f.Fatalf("tried to free an already free register %d\n", r)
365 }
366
367
368 if s.f.pass.debug > regDebug {
369 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
370 }
371 s.regs[r] = regState{}
372 s.values[v.ID].regs &^= regMask(1) << r
373 s.used &^= regMask(1) << r
374 }
375
376
377 func (s *regAllocState) freeRegs(m regMask) {
378 for m&s.used != 0 {
379 s.freeReg(pickReg(m & s.used))
380 }
381 }
382
383
384 func (s *regAllocState) clobberRegs(m regMask) {
385 m &= s.allocatable & s.f.Config.gpRegMask
386 for m != 0 {
387 r := pickReg(m)
388 m &^= 1 << r
389 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
390 s.f.setHome(x, &s.registers[r])
391 }
392 }
393
394
395
396 func (s *regAllocState) setOrig(c *Value, v *Value) {
397 if int(c.ID) >= cap(s.orig) {
398 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
399 copy(x, s.orig)
400 s.f.Cache.freeValueSlice(s.orig)
401 s.orig = x
402 }
403 for int(c.ID) >= len(s.orig) {
404 s.orig = append(s.orig, nil)
405 }
406 if s.orig[c.ID] != nil {
407 s.f.Fatalf("orig value set twice %s %s", c, v)
408 }
409 s.orig[c.ID] = s.orig[v.ID]
410 }
411
412
413
414 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
415 if s.f.pass.debug > regDebug {
416 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
417 }
418
419 s.values[v.ID].regs |= regMask(1) << r
420 s.f.setHome(c, &s.registers[r])
421
422
423 if !s.allocatable.contains(r) && !s.isGReg(r) {
424 return
425 }
426 if s.regs[r].v != nil {
427 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
428 }
429 s.regs[r] = regState{v, c}
430 s.used |= regMask(1) << r
431 }
432
433
434
435
436 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
437 if v.OnWasmStack {
438 return noRegister
439 }
440
441 mask &= s.allocatable
442 mask &^= s.nospill
443 if mask == 0 {
444 s.f.Fatalf("no register available for %s", v.LongString())
445 }
446
447
448 if mask&^s.used != 0 {
449 r := pickReg(mask &^ s.used)
450 s.usedSinceBlockStart |= regMask(1) << r
451 return r
452 }
453
454
455
456
457
458
459
460
461
462
463
464 var r register
465 maxuse := int32(-1)
466 for t := register(0); t < s.numRegs; t++ {
467 if mask>>t&1 == 0 {
468 continue
469 }
470 v := s.regs[t].v
471 if n := s.values[v.ID].uses.dist; n > maxuse {
472
473
474 r = t
475 maxuse = n
476 }
477 }
478 if maxuse == -1 {
479 s.f.Fatalf("couldn't find register to spill")
480 }
481
482 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
483
484
485
486 s.freeReg(r)
487 return r
488 }
489
490
491
492 v2 := s.regs[r].v
493 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
494 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
495 s.usedSinceBlockStart |= regMask(1) << r
496 r2 := pickReg(m)
497 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
498 s.copies[c] = false
499 if s.f.pass.debug > regDebug {
500 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
501 }
502 s.setOrig(c, v2)
503 s.assignReg(r2, v2, c)
504 }
505
506
507
508
509 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
510 if s.startRegsMask&(regMask(1)<<r) == 1 {
511 if s.f.pass.debug > regDebug {
512 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
513 }
514 s.startRegsMask &^= regMask(1) << r
515 }
516 }
517
518 s.freeReg(r)
519 s.usedSinceBlockStart |= regMask(1) << r
520 return r
521 }
522
523
524
525 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
526 vi := &s.values[v.ID]
527 if vi.spill != nil {
528
529 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
530 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
531 return vi.spill
532 }
533
534
535 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
536
537
538 s.setOrig(spill, v)
539 vi.spill = spill
540 vi.restoreMin = s.sdom[b.ID].entry
541 vi.restoreMax = s.sdom[b.ID].exit
542 return spill
543 }
544
545
546
547
548
549
550
551 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
552 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
553 c := v.copyIntoWithXPos(s.curBlock, pos)
554 c.OnWasmStack = true
555 s.setOrig(c, v)
556 return c
557 }
558 if v.OnWasmStack {
559 return v
560 }
561
562 vi := &s.values[v.ID]
563 pos = pos.WithNotStmt()
564
565 if mask&vi.regs != 0 {
566 mask &= vi.regs
567 r := pickReg(mask)
568 if mask.contains(s.SPReg) {
569
570
571
572 r = s.SPReg
573 }
574 if !s.allocatable.contains(r) {
575 return v
576 }
577 if s.regs[r].v != v || s.regs[r].c == nil {
578 panic("bad register state")
579 }
580 if nospill {
581 s.nospill |= regMask(1) << r
582 }
583 s.usedSinceBlockStart |= regMask(1) << r
584 return s.regs[r].c
585 }
586
587 var r register
588
589 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
590 if !onWasmStack {
591
592 r = s.allocReg(mask, v)
593 }
594
595
596 var c *Value
597 if vi.regs != 0 {
598
599 var current *Value
600 if vi.regs&^s.allocatable != 0 {
601
602 current = v
603 } else {
604 r2 := pickReg(vi.regs)
605 if s.regs[r2].v != v {
606 panic("bad register state")
607 }
608 current = s.regs[r2].c
609 s.usedSinceBlockStart |= regMask(1) << r2
610 }
611 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
612 } else if v.rematerializeable() {
613
614 c = v.copyIntoWithXPos(s.curBlock, pos)
615
616
617
618
619
620
621
622 sourceMask := s.regspec(c).outputs[0].regs
623 if mask&sourceMask == 0 && !onWasmStack {
624 s.setOrig(c, v)
625 s.assignReg(s.allocReg(sourceMask, v), v, c)
626
627
628
629
630
631
632
633
634
635
636 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
637 }
638 } else {
639
640 spill := s.makeSpill(v, s.curBlock)
641 if s.f.pass.debug > logSpills {
642 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
643 }
644 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
645 }
646
647 s.setOrig(c, v)
648
649 if onWasmStack {
650 c.OnWasmStack = true
651 return c
652 }
653
654 s.assignReg(r, v, c)
655 if c.Op == OpLoadReg && s.isGReg(r) {
656 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
657 }
658 if nospill {
659 s.nospill |= regMask(1) << r
660 }
661 return c
662 }
663
664
665 func isLeaf(f *Func) bool {
666 for _, b := range f.Blocks {
667 for _, v := range b.Values {
668 if v.Op.IsCall() && !v.Op.IsTailCall() {
669
670 return false
671 }
672 }
673 }
674 return true
675 }
676
677
678 func (v *Value) needRegister() bool {
679 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
680 }
681
682 func (s *regAllocState) init(f *Func) {
683 s.f = f
684 s.f.RegAlloc = s.f.Cache.locs[:0]
685 s.registers = f.Config.registers
686 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
687 s.f.Fatalf("bad number of registers: %d", nr)
688 } else {
689 s.numRegs = register(nr)
690 }
691
692 s.SPReg = noRegister
693 s.SBReg = noRegister
694 s.GReg = noRegister
695 s.ZeroIntReg = noRegister
696 for r := register(0); r < s.numRegs; r++ {
697 switch s.registers[r].String() {
698 case "SP":
699 s.SPReg = r
700 case "SB":
701 s.SBReg = r
702 case "g":
703 s.GReg = r
704 case "ZERO":
705 s.ZeroIntReg = r
706 }
707 }
708
709 switch noRegister {
710 case s.SPReg:
711 s.f.Fatalf("no SP register found")
712 case s.SBReg:
713 s.f.Fatalf("no SB register found")
714 case s.GReg:
715 if f.Config.hasGReg {
716 s.f.Fatalf("no g register found")
717 }
718 }
719
720
721 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
722 s.allocatable &^= 1 << s.SPReg
723 s.allocatable &^= 1 << s.SBReg
724 if s.f.Config.hasGReg {
725 s.allocatable &^= 1 << s.GReg
726 }
727 if s.ZeroIntReg != noRegister {
728 s.allocatable &^= 1 << s.ZeroIntReg
729 }
730 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
731 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
732 }
733 if s.f.Config.LinkReg != -1 {
734 if isLeaf(f) {
735
736 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
737 }
738 }
739 if s.f.Config.ctxt.Flag_dynlink {
740 switch s.f.Config.arch {
741 case "386":
742
743
744
745
746
747 case "amd64":
748 s.allocatable &^= 1 << 15
749 case "arm":
750 s.allocatable &^= 1 << 9
751 case "arm64":
752
753 case "loong64":
754
755 case "ppc64le":
756
757 case "riscv64":
758
759 case "s390x":
760 s.allocatable &^= 1 << 11
761 default:
762 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
763 }
764 }
765
766
767
768
769 s.visitOrder = layoutRegallocOrder(f)
770
771
772
773 s.blockOrder = make([]int32, f.NumBlocks())
774 for i, b := range s.visitOrder {
775 s.blockOrder[b.ID] = int32(i)
776 }
777
778 s.regs = make([]regState, s.numRegs)
779 nv := f.NumValues()
780 if cap(s.f.Cache.regallocValues) >= nv {
781 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
782 } else {
783 s.f.Cache.regallocValues = make([]valState, nv)
784 }
785 s.values = s.f.Cache.regallocValues
786 s.orig = s.f.Cache.allocValueSlice(nv)
787 s.copies = make(map[*Value]bool)
788 for _, b := range s.visitOrder {
789 for _, v := range b.Values {
790 if v.needRegister() {
791 s.values[v.ID].needReg = true
792 s.values[v.ID].rematerializeable = v.rematerializeable()
793 s.orig[v.ID] = v
794 }
795
796
797 }
798 }
799 s.computeLive()
800
801 s.endRegs = make([][]endReg, f.NumBlocks())
802 s.startRegs = make([][]startReg, f.NumBlocks())
803 s.spillLive = make([][]ID, f.NumBlocks())
804 s.sdom = f.Sdom()
805
806
807 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
808 canLiveOnStack := f.newSparseSet(f.NumValues())
809 defer f.retSparseSet(canLiveOnStack)
810 for _, b := range f.Blocks {
811
812 canLiveOnStack.clear()
813 for _, c := range b.ControlValues() {
814 if c.Uses == 1 && !opcodeTable[c.Op].generic {
815 canLiveOnStack.add(c.ID)
816 }
817 }
818
819 for i := len(b.Values) - 1; i >= 0; i-- {
820 v := b.Values[i]
821 if canLiveOnStack.contains(v.ID) {
822 v.OnWasmStack = true
823 } else {
824
825 canLiveOnStack.clear()
826 }
827 for _, arg := range v.Args {
828
829
830
831
832
833 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
834 canLiveOnStack.add(arg.ID)
835 }
836 }
837 }
838 }
839 }
840
841
842
843
844 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
845
846 s.doClobber = true
847 }
848 }
849
850 func (s *regAllocState) close() {
851 s.f.Cache.freeValueSlice(s.orig)
852 }
853
854
855
856 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
857 r := s.freeUseRecords
858 if r != nil {
859 s.freeUseRecords = r.next
860 } else {
861 r = &use{}
862 }
863 r.dist = dist
864 r.pos = pos
865 r.next = s.values[id].uses
866 s.values[id].uses = r
867 if r.next != nil && dist > r.next.dist {
868 s.f.Fatalf("uses added in wrong order")
869 }
870 }
871
872
873
874 func (s *regAllocState) advanceUses(v *Value) {
875 for _, a := range v.Args {
876 if !s.values[a.ID].needReg {
877 continue
878 }
879 ai := &s.values[a.ID]
880 r := ai.uses
881 ai.uses = r.next
882 if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
883
884 s.freeRegs(ai.regs)
885 }
886 r.next = s.freeUseRecords
887 s.freeUseRecords = r
888 }
889 s.dropIfUnused(v)
890 }
891
892
893
894 func (s *regAllocState) dropIfUnused(v *Value) {
895 if !s.values[v.ID].needReg {
896 return
897 }
898 vi := &s.values[v.ID]
899 r := vi.uses
900 if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
901 s.freeRegs(vi.regs)
902 }
903 }
904
905
906
907
908 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
909 u := s.values[v.ID].uses
910 if u == nil {
911 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
912 }
913 d := u.dist
914 for u != nil && u.dist == d {
915 u = u.next
916 }
917 return u != nil && u.dist > d
918 }
919
920
921 func (s *regAllocState) setState(regs []endReg) {
922 s.freeRegs(s.used)
923 for _, x := range regs {
924 s.assignReg(x.r, x.v, x.c)
925 }
926 }
927
928
929 func (s *regAllocState) compatRegs(t *types.Type) regMask {
930 var m regMask
931 if t.IsTuple() || t.IsFlags() {
932 return 0
933 }
934 if t.IsSIMD() {
935 if t.Size() > 8 {
936 return s.f.Config.fpRegMask & s.allocatable
937 } else {
938
939 return s.f.Config.gpRegMask & s.allocatable
940 }
941 }
942 if t.IsFloat() || t == types.TypeInt128 {
943 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
944 m = s.f.Config.fp32RegMask
945 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
946 m = s.f.Config.fp64RegMask
947 } else {
948 m = s.f.Config.fpRegMask
949 }
950 } else {
951 m = s.f.Config.gpRegMask
952 }
953 return m & s.allocatable
954 }
955
956
957 func (s *regAllocState) regspec(v *Value) regInfo {
958 op := v.Op
959 if op == OpConvert {
960
961
962
963 m := s.allocatable & s.f.Config.gpRegMask
964 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
965 }
966 if op == OpArgIntReg {
967 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
968 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
969 }
970 if op == OpArgFloatReg {
971 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
972 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
973 }
974 if op.IsCall() {
975 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
976 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
977 }
978 }
979 if op == OpMakeResult && s.f.OwnAux.reg != nil {
980 return *s.f.OwnAux.ResultReg(s.f.Config)
981 }
982 return opcodeTable[op].reg
983 }
984
985 func (s *regAllocState) isGReg(r register) bool {
986 return s.f.Config.hasGReg && s.GReg == r
987 }
988
989
990 var tmpVal Value
991
992 func (s *regAllocState) regalloc(f *Func) {
993 regValLiveSet := f.newSparseSet(f.NumValues())
994 defer f.retSparseSet(regValLiveSet)
995 var oldSched []*Value
996 var phis []*Value
997 var phiRegs []register
998 var args []*Value
999
1000
1001 var desired desiredState
1002 desiredSecondReg := map[ID][4]register{}
1003
1004
1005 type dentry struct {
1006 out [4]register
1007 in [3][4]register
1008 }
1009 var dinfo []dentry
1010
1011 if f.Entry != f.Blocks[0] {
1012 f.Fatalf("entry block must be first")
1013 }
1014
1015 for _, b := range s.visitOrder {
1016 if s.f.pass.debug > regDebug {
1017 fmt.Printf("Begin processing block %v\n", b)
1018 }
1019 s.curBlock = b
1020 s.startRegsMask = 0
1021 s.usedSinceBlockStart = 0
1022 clear(desiredSecondReg)
1023
1024
1025
1026 regValLiveSet.clear()
1027 if s.live != nil {
1028 for _, e := range s.live[b.ID] {
1029 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
1030 regValLiveSet.add(e.ID)
1031 }
1032 }
1033 for _, v := range b.ControlValues() {
1034 if s.values[v.ID].needReg {
1035 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
1036 regValLiveSet.add(v.ID)
1037 }
1038 }
1039 if len(s.nextCall) < len(b.Values) {
1040 s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
1041 }
1042 var nextCall int32 = math.MaxInt32
1043 for i := len(b.Values) - 1; i >= 0; i-- {
1044 v := b.Values[i]
1045 regValLiveSet.remove(v.ID)
1046 if v.Op == OpPhi {
1047
1048
1049
1050 s.nextCall[i] = nextCall
1051 continue
1052 }
1053 if opcodeTable[v.Op].call {
1054
1055 regValLiveSet.clear()
1056 if s.sp != 0 && s.values[s.sp].uses != nil {
1057 regValLiveSet.add(s.sp)
1058 }
1059 if s.sb != 0 && s.values[s.sb].uses != nil {
1060 regValLiveSet.add(s.sb)
1061 }
1062 nextCall = int32(i)
1063 }
1064 for _, a := range v.Args {
1065 if !s.values[a.ID].needReg {
1066 continue
1067 }
1068 s.addUse(a.ID, int32(i), v.Pos)
1069 regValLiveSet.add(a.ID)
1070 }
1071 s.nextCall[i] = nextCall
1072 }
1073 if s.f.pass.debug > regDebug {
1074 fmt.Printf("use distances for %s\n", b)
1075 for i := range s.values {
1076 vi := &s.values[i]
1077 u := vi.uses
1078 if u == nil {
1079 continue
1080 }
1081 fmt.Printf(" v%d:", i)
1082 for u != nil {
1083 fmt.Printf(" %d", u.dist)
1084 u = u.next
1085 }
1086 fmt.Println()
1087 }
1088 }
1089
1090
1091
1092 nphi := 0
1093 for _, v := range b.Values {
1094 if v.Op != OpPhi {
1095 break
1096 }
1097 nphi++
1098 }
1099 phis = append(phis[:0], b.Values[:nphi]...)
1100 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1101 b.Values = b.Values[:0]
1102
1103
1104 if b == f.Entry {
1105
1106 if nphi > 0 {
1107 f.Fatalf("phis in entry block")
1108 }
1109 } else if len(b.Preds) == 1 {
1110
1111 s.setState(s.endRegs[b.Preds[0].b.ID])
1112 if nphi > 0 {
1113 f.Fatalf("phis in single-predecessor block")
1114 }
1115
1116
1117
1118 for r := register(0); r < s.numRegs; r++ {
1119 v := s.regs[r].v
1120 if v != nil && !regValLiveSet.contains(v.ID) {
1121 s.freeReg(r)
1122 }
1123 }
1124 } else {
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138 idx := -1
1139 for i, p := range b.Preds {
1140
1141
1142 pb := p.b
1143 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1144 continue
1145 }
1146 if idx == -1 {
1147 idx = i
1148 continue
1149 }
1150 pSel := b.Preds[idx].b
1151 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1152 idx = i
1153 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1165 idx = i
1166 }
1167 }
1168 }
1169 if idx < 0 {
1170 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1171 }
1172 p := b.Preds[idx].b
1173 s.setState(s.endRegs[p.ID])
1174
1175 if s.f.pass.debug > regDebug {
1176 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1177 for _, x := range s.endRegs[p.ID] {
1178 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1179 }
1180 }
1181
1182
1183
1184
1185
1186 phiRegs = phiRegs[:0]
1187 var phiUsed regMask
1188
1189 for _, v := range phis {
1190 if !s.values[v.ID].needReg {
1191 phiRegs = append(phiRegs, noRegister)
1192 continue
1193 }
1194 a := v.Args[idx]
1195
1196
1197 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1198 if m != 0 {
1199 r := pickReg(m)
1200 phiUsed |= regMask(1) << r
1201 phiRegs = append(phiRegs, r)
1202 } else {
1203 phiRegs = append(phiRegs, noRegister)
1204 }
1205 }
1206
1207
1208 for i, v := range phis {
1209 if !s.values[v.ID].needReg {
1210 continue
1211 }
1212 a := v.Args[idx]
1213 r := phiRegs[i]
1214 if r == noRegister {
1215 continue
1216 }
1217 if regValLiveSet.contains(a.ID) {
1218
1219
1220
1221
1222
1223
1224
1225
1226 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1227 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1228 r2 := pickReg(m)
1229 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1230 s.copies[c] = false
1231 if s.f.pass.debug > regDebug {
1232 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1233 }
1234 s.setOrig(c, a)
1235 s.assignReg(r2, a, c)
1236 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1237 }
1238 }
1239 s.freeReg(r)
1240 }
1241
1242
1243 b.Values = append(b.Values, phis...)
1244
1245
1246
1247 for i, v := range phis {
1248 if !s.values[v.ID].needReg {
1249 continue
1250 }
1251 if phiRegs[i] != noRegister {
1252 continue
1253 }
1254 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1255
1256
1257 for i, pe := range b.Preds {
1258 if i == idx {
1259 continue
1260 }
1261 ri := noRegister
1262 for _, er := range s.endRegs[pe.b.ID] {
1263 if er.v == s.orig[v.Args[i].ID] {
1264 ri = er.r
1265 break
1266 }
1267 }
1268 if ri != noRegister && m>>ri&1 != 0 {
1269 m = regMask(1) << ri
1270 break
1271 }
1272 }
1273 if m != 0 {
1274 r := pickReg(m)
1275 phiRegs[i] = r
1276 phiUsed |= regMask(1) << r
1277 }
1278 }
1279
1280
1281 for i, v := range phis {
1282 if !s.values[v.ID].needReg {
1283 continue
1284 }
1285 r := phiRegs[i]
1286 if r == noRegister {
1287
1288
1289 s.values[v.ID].spill = v
1290 continue
1291 }
1292
1293 s.assignReg(r, v, v)
1294 }
1295
1296
1297 for r := register(0); r < s.numRegs; r++ {
1298 if phiUsed>>r&1 != 0 {
1299 continue
1300 }
1301 v := s.regs[r].v
1302 if v != nil && !regValLiveSet.contains(v.ID) {
1303 s.freeReg(r)
1304 }
1305 }
1306
1307
1308
1309
1310
1311 regList := make([]startReg, 0, 32)
1312 for r := register(0); r < s.numRegs; r++ {
1313 v := s.regs[r].v
1314 if v == nil {
1315 continue
1316 }
1317 if phiUsed>>r&1 != 0 {
1318
1319
1320 continue
1321 }
1322 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1323 s.startRegsMask |= regMask(1) << r
1324 }
1325 s.startRegs[b.ID] = make([]startReg, len(regList))
1326 copy(s.startRegs[b.ID], regList)
1327
1328 if s.f.pass.debug > regDebug {
1329 fmt.Printf("after phis\n")
1330 for _, x := range s.startRegs[b.ID] {
1331 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1332 }
1333 }
1334 }
1335
1336
1337 for i, v := range phis {
1338 s.curIdx = i
1339 s.dropIfUnused(v)
1340 }
1341
1342
1343 if l := len(oldSched); cap(dinfo) < l {
1344 dinfo = make([]dentry, l)
1345 } else {
1346 dinfo = dinfo[:l]
1347 clear(dinfo)
1348 }
1349
1350
1351 if s.desired != nil {
1352 desired.copy(&s.desired[b.ID])
1353 }
1354
1355
1356
1357
1358
1359
1360 for _, e := range b.Succs {
1361 succ := e.b
1362
1363 for _, x := range s.startRegs[succ.ID] {
1364 desired.add(x.v.ID, x.r)
1365 }
1366
1367 pidx := e.i
1368 for _, v := range succ.Values {
1369 if v.Op != OpPhi {
1370 break
1371 }
1372 if !s.values[v.ID].needReg {
1373 continue
1374 }
1375 rp, ok := s.f.getHome(v.ID).(*Register)
1376 if !ok {
1377
1378
1379
1380
1381 for _, a := range v.Args {
1382 rp, ok = s.f.getHome(a.ID).(*Register)
1383 if ok {
1384 break
1385 }
1386 }
1387 if !ok {
1388 continue
1389 }
1390 }
1391 desired.add(v.Args[pidx].ID, register(rp.num))
1392 }
1393 }
1394
1395
1396 for i := len(oldSched) - 1; i >= 0; i-- {
1397 v := oldSched[i]
1398 prefs := desired.remove(v.ID)
1399 regspec := s.regspec(v)
1400 desired.clobber(regspec.clobbers)
1401 for _, j := range regspec.inputs {
1402 if countRegs(j.regs) != 1 {
1403 continue
1404 }
1405 desired.clobber(j.regs)
1406 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1407 }
1408 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1409 if opcodeTable[v.Op].commutative {
1410 desired.addList(v.Args[1].ID, prefs)
1411 }
1412 desired.addList(v.Args[0].ID, prefs)
1413 }
1414
1415 dinfo[i].out = prefs
1416 for j, a := range v.Args {
1417 if j >= len(dinfo[i].in) {
1418 break
1419 }
1420 dinfo[i].in[j] = desired.get(a.ID)
1421 }
1422 if v.Op == OpSelect1 && prefs[0] != noRegister {
1423
1424
1425 desiredSecondReg[v.Args[0].ID] = prefs
1426 }
1427 }
1428
1429
1430 for idx, v := range oldSched {
1431 s.curIdx = nphi + idx
1432 tmpReg := noRegister
1433 if s.f.pass.debug > regDebug {
1434 fmt.Printf(" processing %s\n", v.LongString())
1435 }
1436 regspec := s.regspec(v)
1437 if v.Op == OpPhi {
1438 f.Fatalf("phi %s not at start of block", v)
1439 }
1440 if opcodeTable[v.Op].fixedReg {
1441 switch v.Op {
1442 case OpSP:
1443 s.assignReg(s.SPReg, v, v)
1444 s.sp = v.ID
1445 case OpSB:
1446 s.assignReg(s.SBReg, v, v)
1447 s.sb = v.ID
1448 case OpARM64ZERO, OpLOONG64ZERO, OpMIPS64ZERO:
1449 s.assignReg(s.ZeroIntReg, v, v)
1450 case OpAMD64Zero128, OpAMD64Zero256, OpAMD64Zero512:
1451 regspec := s.regspec(v)
1452 m := regspec.outputs[0].regs
1453 if countRegs(m) != 1 {
1454 f.Fatalf("bad fixed-register op %s", v)
1455 }
1456 s.assignReg(pickReg(m), v, v)
1457 default:
1458 f.Fatalf("unknown fixed-register op %s", v)
1459 }
1460 b.Values = append(b.Values, v)
1461 s.advanceUses(v)
1462 continue
1463 }
1464 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1465 if s.values[v.ID].needReg {
1466 if v.Op == OpSelectN {
1467 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1468 } else {
1469 var i = 0
1470 if v.Op == OpSelect1 {
1471 i = 1
1472 }
1473 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1474 }
1475 }
1476 b.Values = append(b.Values, v)
1477 s.advanceUses(v)
1478 continue
1479 }
1480 if v.Op == OpGetG && s.f.Config.hasGReg {
1481
1482 if s.regs[s.GReg].v != nil {
1483 s.freeReg(s.GReg)
1484 }
1485 s.assignReg(s.GReg, v, v)
1486 b.Values = append(b.Values, v)
1487 s.advanceUses(v)
1488 continue
1489 }
1490 if v.Op == OpArg {
1491
1492
1493
1494 s.values[v.ID].spill = v
1495 b.Values = append(b.Values, v)
1496 s.advanceUses(v)
1497 continue
1498 }
1499 if v.Op == OpKeepAlive {
1500
1501 s.advanceUses(v)
1502 a := v.Args[0]
1503 vi := &s.values[a.ID]
1504 if vi.regs == 0 && !vi.rematerializeable {
1505
1506
1507
1508 v.SetArg(0, s.makeSpill(a, b))
1509 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1510
1511
1512
1513 v.Op = OpVarLive
1514 v.SetArgs1(v.Args[1])
1515 v.Aux = a.Aux
1516 } else {
1517
1518
1519
1520 v.Op = OpCopy
1521 v.SetArgs1(v.Args[1])
1522 }
1523 b.Values = append(b.Values, v)
1524 continue
1525 }
1526 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1527
1528 if s.doClobber && v.Op.IsCall() {
1529 s.clobberRegs(regspec.clobbers)
1530 }
1531 s.freeRegs(regspec.clobbers)
1532 b.Values = append(b.Values, v)
1533 s.advanceUses(v)
1534 continue
1535 }
1536
1537 if s.values[v.ID].rematerializeable {
1538
1539
1540
1541 for _, a := range v.Args {
1542 a.Uses--
1543 }
1544 s.advanceUses(v)
1545 continue
1546 }
1547
1548 if s.f.pass.debug > regDebug {
1549 fmt.Printf("value %s\n", v.LongString())
1550 fmt.Printf(" out:")
1551 for _, r := range dinfo[idx].out {
1552 if r != noRegister {
1553 fmt.Printf(" %s", &s.registers[r])
1554 }
1555 }
1556 fmt.Println()
1557 for i := 0; i < len(v.Args) && i < 3; i++ {
1558 fmt.Printf(" in%d:", i)
1559 for _, r := range dinfo[idx].in[i] {
1560 if r != noRegister {
1561 fmt.Printf(" %s", &s.registers[r])
1562 }
1563 }
1564 fmt.Println()
1565 }
1566 }
1567
1568
1569
1570
1571 args = append(args[:0], make([]*Value, len(v.Args))...)
1572 for i, a := range v.Args {
1573 if !s.values[a.ID].needReg {
1574 args[i] = a
1575 }
1576 }
1577 for _, i := range regspec.inputs {
1578 mask := i.regs
1579 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1580 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1581 }
1582 }
1583
1584
1585
1586
1587
1588
1589 for {
1590 freed := false
1591 for _, i := range regspec.inputs {
1592 if args[i.idx] != nil {
1593 continue
1594 }
1595 mask := i.regs
1596 if countRegs(mask) == 1 && mask&^s.used != 0 {
1597 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1598
1599
1600
1601 oldregs := s.values[v.Args[i.idx].ID].regs
1602 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1603 s.freeRegs(oldregs &^ mask &^ s.nospill)
1604 freed = true
1605 }
1606 }
1607 }
1608 if !freed {
1609 break
1610 }
1611 }
1612
1613
1614 for _, i := range regspec.inputs {
1615 if args[i.idx] != nil {
1616 continue
1617 }
1618 mask := i.regs
1619 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1620
1621 mask &= s.allocatable
1622 mask &^= s.nospill
1623
1624 if i.idx < 3 {
1625 for _, r := range dinfo[idx].in[i.idx] {
1626 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1627
1628 mask = regMask(1) << r
1629 break
1630 }
1631 }
1632 }
1633
1634 if mask&^desired.avoid != 0 {
1635 mask &^= desired.avoid
1636 }
1637 }
1638 if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
1639
1640
1641
1642 mask = 1 << s.SPReg
1643 }
1644 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1645 }
1646
1647
1648
1649
1650 if opcodeTable[v.Op].resultInArg0 {
1651 var m regMask
1652 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1653
1654 goto ok
1655 }
1656 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1657 args[0], args[1] = args[1], args[0]
1658 goto ok
1659 }
1660 if s.values[v.Args[0].ID].rematerializeable {
1661
1662 goto ok
1663 }
1664 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1665 args[0], args[1] = args[1], args[0]
1666 goto ok
1667 }
1668 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1669
1670 goto ok
1671 }
1672 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1673 args[0], args[1] = args[1], args[0]
1674 goto ok
1675 }
1676
1677
1678
1679
1680
1681 m = s.compatRegs(v.Args[0].Type) &^ s.used
1682 if m == 0 {
1683
1684
1685
1686
1687 goto ok
1688 }
1689
1690
1691 for _, r := range dinfo[idx].out {
1692 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1693 m = regMask(1) << r
1694 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1695
1696
1697 goto ok
1698 }
1699 }
1700
1701
1702 for _, r := range dinfo[idx].in[0] {
1703 if r != noRegister && m>>r&1 != 0 {
1704 m = regMask(1) << r
1705 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1706 s.copies[c] = false
1707
1708
1709 goto ok
1710 }
1711 }
1712 if opcodeTable[v.Op].commutative {
1713 for _, r := range dinfo[idx].in[1] {
1714 if r != noRegister && m>>r&1 != 0 {
1715 m = regMask(1) << r
1716 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1717 s.copies[c] = false
1718 args[0], args[1] = args[1], args[0]
1719 goto ok
1720 }
1721 }
1722 }
1723
1724
1725 if m&^desired.avoid != 0 {
1726 m &^= desired.avoid
1727 }
1728
1729 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1730 s.copies[c] = false
1731
1732
1733
1734
1735 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1736 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1737 r := register(rp.num)
1738 for _, r2 := range dinfo[idx].in[0] {
1739 if r == r2 {
1740 args[0] = c
1741 break
1742 }
1743 }
1744 }
1745 }
1746 }
1747 ok:
1748 for i := 0; i < 2; i++ {
1749 if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
1750 continue
1751 }
1752 if !s.liveAfterCurrentInstruction(v.Args[i]) {
1753
1754 continue
1755 }
1756 if s.values[v.Args[i].ID].rematerializeable {
1757
1758 continue
1759 }
1760 if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
1761
1762 continue
1763 }
1764
1765 m := s.compatRegs(v.Args[i].Type) &^ s.used
1766 if m == 0 {
1767
1768
1769
1770
1771 continue
1772 }
1773
1774 c := s.allocValToReg(v.Args[i], m, true, v.Pos)
1775 s.copies[c] = false
1776 }
1777
1778
1779
1780
1781
1782
1783
1784 if opcodeTable[v.Op].needIntTemp {
1785 m := s.allocatable & s.f.Config.gpRegMask
1786 for _, out := range regspec.outputs {
1787 if countRegs(out.regs) == 1 {
1788 m &^= out.regs
1789 }
1790 }
1791 if m&^desired.avoid&^s.nospill != 0 {
1792 m &^= desired.avoid
1793 }
1794 tmpReg = s.allocReg(m, &tmpVal)
1795 s.nospill |= regMask(1) << tmpReg
1796 s.tmpused |= regMask(1) << tmpReg
1797 }
1798
1799 if regspec.clobbersArg0 {
1800 s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
1801 }
1802 if regspec.clobbersArg1 {
1803 s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
1804 }
1805
1806
1807
1808
1809
1810 if !opcodeTable[v.Op].resultNotInArgs {
1811 s.tmpused = s.nospill
1812 s.nospill = 0
1813 s.advanceUses(v)
1814 }
1815
1816
1817 if s.doClobber && v.Op.IsCall() {
1818
1819
1820 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1821 }
1822 s.freeRegs(regspec.clobbers)
1823 s.tmpused |= regspec.clobbers
1824
1825
1826 {
1827 outRegs := noRegisters
1828 maxOutIdx := -1
1829 var used regMask
1830 if tmpReg != noRegister {
1831
1832
1833 used |= regMask(1) << tmpReg
1834 }
1835 for _, out := range regspec.outputs {
1836 if out.regs == 0 {
1837 continue
1838 }
1839 mask := out.regs & s.allocatable &^ used
1840 if mask == 0 {
1841 s.f.Fatalf("can't find any output register %s", v.LongString())
1842 }
1843 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1844 if !opcodeTable[v.Op].commutative {
1845
1846 r := register(s.f.getHome(args[0].ID).(*Register).num)
1847 if mask>>r&1 == 0 {
1848 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1849 }
1850 mask = regMask(1) << r
1851 } else {
1852
1853 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1854 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1855
1856 found := false
1857 for _, r := range dinfo[idx].out {
1858 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1859 mask = regMask(1) << r
1860 found = true
1861 if r == r1 {
1862 args[0], args[1] = args[1], args[0]
1863 }
1864 break
1865 }
1866 }
1867 if !found {
1868
1869 mask = regMask(1) << r0
1870 }
1871 }
1872 }
1873 if out.idx == 0 {
1874 for _, r := range dinfo[idx].out {
1875 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1876
1877 mask = regMask(1) << r
1878 break
1879 }
1880 }
1881 }
1882 if out.idx == 1 {
1883 if prefs, ok := desiredSecondReg[v.ID]; ok {
1884 for _, r := range prefs {
1885 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1886
1887 mask = regMask(1) << r
1888 break
1889 }
1890 }
1891 }
1892 }
1893
1894 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1895 mask &^= desired.avoid
1896 }
1897 r := s.allocReg(mask, v)
1898 if out.idx > maxOutIdx {
1899 maxOutIdx = out.idx
1900 }
1901 outRegs[out.idx] = r
1902 used |= regMask(1) << r
1903 s.tmpused |= regMask(1) << r
1904 }
1905
1906 if v.Type.IsTuple() {
1907 var outLocs LocPair
1908 if r := outRegs[0]; r != noRegister {
1909 outLocs[0] = &s.registers[r]
1910 }
1911 if r := outRegs[1]; r != noRegister {
1912 outLocs[1] = &s.registers[r]
1913 }
1914 s.f.setHome(v, outLocs)
1915
1916 } else if v.Type.IsResults() {
1917
1918 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1919 for i := 0; i <= maxOutIdx; i++ {
1920 if r := outRegs[i]; r != noRegister {
1921 outLocs[i] = &s.registers[r]
1922 }
1923 }
1924 s.f.setHome(v, outLocs)
1925 } else {
1926 if r := outRegs[0]; r != noRegister {
1927 s.assignReg(r, v, v)
1928 }
1929 }
1930 if tmpReg != noRegister {
1931
1932 if s.f.tempRegs == nil {
1933 s.f.tempRegs = map[ID]*Register{}
1934 }
1935 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1936 }
1937 }
1938
1939
1940 if opcodeTable[v.Op].resultNotInArgs {
1941 s.nospill = 0
1942 s.advanceUses(v)
1943 }
1944 s.tmpused = 0
1945
1946
1947 for i, a := range args {
1948 v.SetArg(i, a)
1949 }
1950 b.Values = append(b.Values, v)
1951 s.dropIfUnused(v)
1952 }
1953
1954
1955
1956 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1957
1958
1959 for i, v := range b.ControlValues() {
1960 if !s.values[v.ID].needReg {
1961 continue
1962 }
1963 if s.f.pass.debug > regDebug {
1964 fmt.Printf(" processing control %s\n", v.LongString())
1965 }
1966
1967
1968
1969 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1970 }
1971
1972
1973
1974 for _, v := range controls {
1975 vi := &s.values[v.ID]
1976 if !vi.needReg {
1977 continue
1978 }
1979
1980 u := vi.uses
1981 vi.uses = u.next
1982 if u.next == nil {
1983 s.freeRegs(vi.regs)
1984 }
1985 u.next = s.freeUseRecords
1986 s.freeUseRecords = u
1987 }
1988
1989
1990
1991
1992 if len(b.Succs) == 1 {
1993 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1994 s.freeReg(s.GReg)
1995 }
1996 if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
1997
1998 goto badloop
1999 }
2000
2001 top := b.Succs[0].b
2002 loop := s.loopnest.b2l[top.ID]
2003 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
2004 goto badloop
2005 }
2006
2007
2008 for _, live := range s.live[b.ID] {
2009 if live.dist >= unlikelyDistance {
2010
2011 continue
2012 }
2013 vid := live.ID
2014 vi := &s.values[vid]
2015 if vi.regs != 0 {
2016 continue
2017 }
2018 if vi.rematerializeable {
2019 continue
2020 }
2021 v := s.orig[vid]
2022 m := s.compatRegs(v.Type) &^ s.used
2023
2024 outerloop:
2025 for _, e := range desired.entries {
2026 if e.ID != v.ID {
2027 continue
2028 }
2029 for _, r := range e.regs {
2030 if r != noRegister && m>>r&1 != 0 {
2031 m = regMask(1) << r
2032 break outerloop
2033 }
2034 }
2035 }
2036 if m&^desired.avoid != 0 {
2037 m &^= desired.avoid
2038 }
2039 if m != 0 {
2040 s.allocValToReg(v, m, false, b.Pos)
2041 }
2042 }
2043 }
2044 badloop:
2045 ;
2046
2047
2048
2049 k := 0
2050 for r := register(0); r < s.numRegs; r++ {
2051 v := s.regs[r].v
2052 if v == nil {
2053 continue
2054 }
2055 k++
2056 }
2057 regList := make([]endReg, 0, k)
2058 for r := register(0); r < s.numRegs; r++ {
2059 v := s.regs[r].v
2060 if v == nil {
2061 continue
2062 }
2063 regList = append(regList, endReg{r, v, s.regs[r].c})
2064 }
2065 s.endRegs[b.ID] = regList
2066
2067 if checkEnabled {
2068 regValLiveSet.clear()
2069 if s.live != nil {
2070 for _, x := range s.live[b.ID] {
2071 regValLiveSet.add(x.ID)
2072 }
2073 }
2074 for r := register(0); r < s.numRegs; r++ {
2075 v := s.regs[r].v
2076 if v == nil {
2077 continue
2078 }
2079 if !regValLiveSet.contains(v.ID) {
2080 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
2081 }
2082 }
2083 }
2084
2085
2086
2087
2088
2089 if s.live != nil {
2090 for _, e := range s.live[b.ID] {
2091 vi := &s.values[e.ID]
2092 if vi.regs != 0 {
2093
2094 continue
2095 }
2096 if vi.rematerializeable {
2097
2098 continue
2099 }
2100 if s.f.pass.debug > regDebug {
2101 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
2102 }
2103 spill := s.makeSpill(s.orig[e.ID], b)
2104 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
2105 }
2106
2107
2108
2109
2110 for _, e := range s.live[b.ID] {
2111 u := s.values[e.ID].uses
2112 if u == nil {
2113 f.Fatalf("live at end, no uses v%d", e.ID)
2114 }
2115 if u.next != nil {
2116 f.Fatalf("live at end, too many uses v%d", e.ID)
2117 }
2118 s.values[e.ID].uses = nil
2119 u.next = s.freeUseRecords
2120 s.freeUseRecords = u
2121 }
2122 }
2123
2124
2125
2126
2127
2128
2129
2130 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
2131 regs := make([]startReg, 0, c)
2132 for _, sr := range s.startRegs[b.ID] {
2133 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
2134 continue
2135 }
2136 regs = append(regs, sr)
2137 }
2138 s.startRegs[b.ID] = regs
2139 }
2140 }
2141
2142
2143 s.placeSpills()
2144
2145
2146
2147 stacklive := stackalloc(s.f, s.spillLive)
2148
2149
2150 s.shuffle(stacklive)
2151
2152
2153
2154
2155 for {
2156 progress := false
2157 for c, used := range s.copies {
2158 if !used && c.Uses == 0 {
2159 if s.f.pass.debug > regDebug {
2160 fmt.Printf("delete copied value %s\n", c.LongString())
2161 }
2162 c.resetArgs()
2163 f.freeValue(c)
2164 delete(s.copies, c)
2165 progress = true
2166 }
2167 }
2168 if !progress {
2169 break
2170 }
2171 }
2172
2173 for _, b := range s.visitOrder {
2174 i := 0
2175 for _, v := range b.Values {
2176 if v.Op == OpInvalid {
2177 continue
2178 }
2179 b.Values[i] = v
2180 i++
2181 }
2182 b.Values = b.Values[:i]
2183 }
2184 }
2185
2186 func (s *regAllocState) placeSpills() {
2187 mustBeFirst := func(op Op) bool {
2188 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2189 }
2190
2191
2192
2193 start := map[ID][]*Value{}
2194
2195
2196 after := map[ID][]*Value{}
2197
2198 for i := range s.values {
2199 vi := s.values[i]
2200 spill := vi.spill
2201 if spill == nil {
2202 continue
2203 }
2204 if spill.Block != nil {
2205
2206
2207 continue
2208 }
2209 v := s.orig[i]
2210
2211
2212
2213
2214
2215 if v == nil {
2216 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2217 }
2218 best := v.Block
2219 bestArg := v
2220 var bestDepth int16
2221 if s.loopnest != nil && s.loopnest.b2l[best.ID] != nil {
2222 bestDepth = s.loopnest.b2l[best.ID].depth
2223 }
2224 b := best
2225 const maxSpillSearch = 100
2226 for i := 0; i < maxSpillSearch; i++ {
2227
2228
2229 p := b
2230 b = nil
2231 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2232 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2233
2234 b = c
2235 break
2236 }
2237 }
2238 if b == nil {
2239
2240 break
2241 }
2242
2243 var depth int16
2244 if s.loopnest != nil && s.loopnest.b2l[b.ID] != nil {
2245 depth = s.loopnest.b2l[b.ID].depth
2246 }
2247 if depth > bestDepth {
2248
2249 continue
2250 }
2251
2252
2253
2254 if len(b.Preds) == 1 {
2255 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2256 if e.v == v {
2257
2258 best = b
2259 bestArg = e.c
2260 bestDepth = depth
2261 break
2262 }
2263 }
2264 } else {
2265 for _, e := range s.startRegs[b.ID] {
2266 if e.v == v {
2267
2268 best = b
2269 bestArg = e.c
2270 bestDepth = depth
2271 break
2272 }
2273 }
2274 }
2275 }
2276
2277
2278 spill.Block = best
2279 spill.AddArg(bestArg)
2280 if best == v.Block && !mustBeFirst(v.Op) {
2281
2282 after[v.ID] = append(after[v.ID], spill)
2283 } else {
2284
2285 start[best.ID] = append(start[best.ID], spill)
2286 }
2287 }
2288
2289
2290 var oldSched []*Value
2291 for _, b := range s.visitOrder {
2292 nfirst := 0
2293 for _, v := range b.Values {
2294 if !mustBeFirst(v.Op) {
2295 break
2296 }
2297 nfirst++
2298 }
2299 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2300 b.Values = b.Values[:nfirst]
2301 b.Values = append(b.Values, start[b.ID]...)
2302 for _, v := range oldSched {
2303 b.Values = append(b.Values, v)
2304 b.Values = append(b.Values, after[v.ID]...)
2305 }
2306 }
2307 }
2308
2309
2310 func (s *regAllocState) shuffle(stacklive [][]ID) {
2311 var e edgeState
2312 e.s = s
2313 e.cache = map[ID][]*Value{}
2314 e.contents = map[Location]contentRecord{}
2315 if s.f.pass.debug > regDebug {
2316 fmt.Printf("shuffle %s\n", s.f.Name)
2317 fmt.Println(s.f.String())
2318 }
2319
2320 for _, b := range s.visitOrder {
2321 if len(b.Preds) <= 1 {
2322 continue
2323 }
2324 e.b = b
2325 for i, edge := range b.Preds {
2326 p := edge.b
2327 e.p = p
2328 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2329 e.process()
2330 }
2331 }
2332
2333 if s.f.pass.debug > regDebug {
2334 fmt.Printf("post shuffle %s\n", s.f.Name)
2335 fmt.Println(s.f.String())
2336 }
2337 }
2338
2339 type edgeState struct {
2340 s *regAllocState
2341 p, b *Block
2342
2343
2344 cache map[ID][]*Value
2345 cachedVals []ID
2346
2347
2348 contents map[Location]contentRecord
2349
2350
2351 destinations []dstRecord
2352 extra []dstRecord
2353
2354 usedRegs regMask
2355 uniqueRegs regMask
2356 finalRegs regMask
2357 rematerializeableRegs regMask
2358 }
2359
2360 type contentRecord struct {
2361 vid ID
2362 c *Value
2363 final bool
2364 pos src.XPos
2365 }
2366
2367 type dstRecord struct {
2368 loc Location
2369 vid ID
2370 splice **Value
2371 pos src.XPos
2372 }
2373
2374
2375 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2376 if e.s.f.pass.debug > regDebug {
2377 fmt.Printf("edge %s->%s\n", e.p, e.b)
2378 }
2379
2380
2381 clear(e.cache)
2382 e.cachedVals = e.cachedVals[:0]
2383 clear(e.contents)
2384 e.usedRegs = 0
2385 e.uniqueRegs = 0
2386 e.finalRegs = 0
2387 e.rematerializeableRegs = 0
2388
2389
2390 for _, x := range srcReg {
2391 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2392 }
2393
2394 for _, spillID := range stacklive {
2395 v := e.s.orig[spillID]
2396 spill := e.s.values[v.ID].spill
2397 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2398
2399
2400
2401
2402
2403
2404
2405
2406 continue
2407 }
2408 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2409 }
2410
2411
2412 dsts := e.destinations[:0]
2413 for _, x := range dstReg {
2414 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2415 }
2416
2417 for _, v := range e.b.Values {
2418 if v.Op != OpPhi {
2419 break
2420 }
2421 loc := e.s.f.getHome(v.ID)
2422 if loc == nil {
2423 continue
2424 }
2425 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2426 }
2427 e.destinations = dsts
2428
2429 if e.s.f.pass.debug > regDebug {
2430 for _, vid := range e.cachedVals {
2431 a := e.cache[vid]
2432 for _, c := range a {
2433 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2434 }
2435 }
2436 for _, d := range e.destinations {
2437 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2438 }
2439 }
2440 }
2441
2442
2443 func (e *edgeState) process() {
2444 dsts := e.destinations
2445
2446
2447 for len(dsts) > 0 {
2448 i := 0
2449 for _, d := range dsts {
2450 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2451
2452 dsts[i] = d
2453 i++
2454 }
2455 }
2456 if i < len(dsts) {
2457
2458 dsts = dsts[:i]
2459
2460
2461 dsts = append(dsts, e.extra...)
2462 e.extra = e.extra[:0]
2463 continue
2464 }
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488 d := dsts[0]
2489 loc := d.loc
2490 vid := e.contents[loc].vid
2491 c := e.contents[loc].c
2492 r := e.findRegFor(c.Type)
2493 if e.s.f.pass.debug > regDebug {
2494 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2495 }
2496 e.erase(r)
2497 pos := d.pos.WithNotStmt()
2498 if _, isReg := loc.(*Register); isReg {
2499 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2500 } else {
2501 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2502 }
2503 e.set(r, vid, c, false, pos)
2504 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2505 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2506 }
2507 }
2508 }
2509
2510
2511
2512 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2513 pos = pos.WithNotStmt()
2514 occupant := e.contents[loc]
2515 if occupant.vid == vid {
2516
2517 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2518 if splice != nil {
2519 (*splice).Uses--
2520 *splice = occupant.c
2521 occupant.c.Uses++
2522 }
2523
2524
2525
2526 if _, ok := e.s.copies[occupant.c]; ok {
2527
2528 e.s.copies[occupant.c] = true
2529 }
2530 return true
2531 }
2532
2533
2534 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
2535
2536
2537 return false
2538 }
2539
2540
2541 v := e.s.orig[vid]
2542 var c *Value
2543 var src Location
2544 if e.s.f.pass.debug > regDebug {
2545 fmt.Printf("moving v%d to %s\n", vid, loc)
2546 fmt.Printf("sources of v%d:", vid)
2547 }
2548 if opcodeTable[v.Op].fixedReg {
2549 c = v
2550 src = e.s.f.getHome(v.ID)
2551 } else {
2552 for _, w := range e.cache[vid] {
2553 h := e.s.f.getHome(w.ID)
2554 if e.s.f.pass.debug > regDebug {
2555 fmt.Printf(" %s:%s", h, w)
2556 }
2557 _, isreg := h.(*Register)
2558 if src == nil || isreg {
2559 c = w
2560 src = h
2561 }
2562 }
2563 }
2564 if e.s.f.pass.debug > regDebug {
2565 if src != nil {
2566 fmt.Printf(" [use %s]\n", src)
2567 } else {
2568 fmt.Printf(" [no source]\n")
2569 }
2570 }
2571 _, dstReg := loc.(*Register)
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583 e.erase(loc)
2584 var x *Value
2585 if c == nil || e.s.values[vid].rematerializeable {
2586 if !e.s.values[vid].rematerializeable {
2587 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2588 }
2589 if dstReg {
2590
2591
2592
2593
2594 if e.s.regspec(v).outputs[0].regs®Mask(1<<register(loc.(*Register).num)) == 0 {
2595 _, srcReg := src.(*Register)
2596 if srcReg {
2597
2598
2599 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2600 } else {
2601
2602 x = v.copyInto(e.p)
2603 r := e.findRegFor(x.Type)
2604 e.erase(r)
2605
2606 e.set(r, vid, x, false, pos)
2607
2608 x = e.p.NewValue1(pos, OpCopy, x.Type, x)
2609 }
2610 } else {
2611 x = v.copyInto(e.p)
2612 }
2613 } else {
2614
2615
2616 r := e.findRegFor(v.Type)
2617 e.erase(r)
2618 x = v.copyIntoWithXPos(e.p, pos)
2619 e.set(r, vid, x, false, pos)
2620
2621
2622
2623 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2624 }
2625 } else {
2626
2627 _, srcReg := src.(*Register)
2628 if srcReg {
2629 if dstReg {
2630 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2631 } else {
2632 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2633 }
2634 } else {
2635 if dstReg {
2636 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2637 } else {
2638
2639 r := e.findRegFor(c.Type)
2640 e.erase(r)
2641 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2642 e.set(r, vid, t, false, pos)
2643 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2644 }
2645 }
2646 }
2647 e.set(loc, vid, x, true, pos)
2648 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2649 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2650 }
2651 if splice != nil {
2652 (*splice).Uses--
2653 *splice = x
2654 x.Uses++
2655 }
2656 return true
2657 }
2658
2659
2660 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2661 e.s.f.setHome(c, loc)
2662 e.contents[loc] = contentRecord{vid, c, final, pos}
2663 a := e.cache[vid]
2664 if len(a) == 0 {
2665 e.cachedVals = append(e.cachedVals, vid)
2666 }
2667 a = append(a, c)
2668 e.cache[vid] = a
2669 if r, ok := loc.(*Register); ok {
2670 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2671 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2672 }
2673 e.usedRegs |= regMask(1) << uint(r.num)
2674 if final {
2675 e.finalRegs |= regMask(1) << uint(r.num)
2676 }
2677 if len(a) == 1 {
2678 e.uniqueRegs |= regMask(1) << uint(r.num)
2679 }
2680 if len(a) == 2 {
2681 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2682 e.uniqueRegs &^= regMask(1) << uint(t.num)
2683 }
2684 }
2685 if e.s.values[vid].rematerializeable {
2686 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2687 }
2688 }
2689 if e.s.f.pass.debug > regDebug {
2690 fmt.Printf("%s\n", c.LongString())
2691 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2692 }
2693 }
2694
2695
2696 func (e *edgeState) erase(loc Location) {
2697 cr := e.contents[loc]
2698 if cr.c == nil {
2699 return
2700 }
2701 vid := cr.vid
2702
2703 if cr.final {
2704
2705
2706
2707 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2708 }
2709
2710
2711 a := e.cache[vid]
2712 for i, c := range a {
2713 if e.s.f.getHome(c.ID) == loc {
2714 if e.s.f.pass.debug > regDebug {
2715 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2716 }
2717 a[i], a = a[len(a)-1], a[:len(a)-1]
2718 break
2719 }
2720 }
2721 e.cache[vid] = a
2722
2723
2724 if r, ok := loc.(*Register); ok {
2725 e.usedRegs &^= regMask(1) << uint(r.num)
2726 if cr.final {
2727 e.finalRegs &^= regMask(1) << uint(r.num)
2728 }
2729 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2730 }
2731 if len(a) == 1 {
2732 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2733 e.uniqueRegs |= regMask(1) << uint(r.num)
2734 }
2735 }
2736 }
2737
2738
2739 func (e *edgeState) findRegFor(typ *types.Type) Location {
2740
2741 m := e.s.compatRegs(typ)
2742
2743
2744
2745
2746
2747
2748 x := m &^ e.usedRegs
2749 if x != 0 {
2750 return &e.s.registers[pickReg(x)]
2751 }
2752 x = m &^ e.uniqueRegs &^ e.finalRegs
2753 if x != 0 {
2754 return &e.s.registers[pickReg(x)]
2755 }
2756 x = m &^ e.uniqueRegs
2757 if x != 0 {
2758 return &e.s.registers[pickReg(x)]
2759 }
2760 x = m & e.rematerializeableRegs
2761 if x != 0 {
2762 return &e.s.registers[pickReg(x)]
2763 }
2764
2765
2766
2767 for _, vid := range e.cachedVals {
2768 a := e.cache[vid]
2769 for _, c := range a {
2770 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2771 if !c.rematerializeable() {
2772 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2773
2774 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, c.Type), Type: c.Type}
2775
2776 e.set(t, vid, x, false, c.Pos)
2777 if e.s.f.pass.debug > regDebug {
2778 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2779 }
2780 }
2781
2782
2783
2784 return r
2785 }
2786 }
2787 }
2788
2789 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2790 for _, vid := range e.cachedVals {
2791 a := e.cache[vid]
2792 for _, c := range a {
2793 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2794 }
2795 }
2796 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2797 return nil
2798 }
2799
2800
2801
2802 func (v *Value) rematerializeable() bool {
2803 if !opcodeTable[v.Op].rematerializeable {
2804 return false
2805 }
2806 for _, a := range v.Args {
2807
2808
2809 if !opcodeTable[a.Op].fixedReg {
2810 return false
2811 }
2812 }
2813 return true
2814 }
2815
2816 type liveInfo struct {
2817 ID ID
2818 dist int32
2819 pos src.XPos
2820 }
2821
2822
2823
2824
2825 func (s *regAllocState) computeLive() {
2826 f := s.f
2827
2828
2829 if len(f.Blocks) == 1 {
2830 return
2831 }
2832 po := f.postorder()
2833 s.live = make([][]liveInfo, f.NumBlocks())
2834 s.desired = make([]desiredState, f.NumBlocks())
2835 s.loopnest = f.loopnest()
2836
2837 rematIDs := make([]ID, 0, 64)
2838
2839 live := f.newSparseMapPos(f.NumValues())
2840 defer f.retSparseMapPos(live)
2841 t := f.newSparseMapPos(f.NumValues())
2842 defer f.retSparseMapPos(t)
2843
2844 s.loopnest.computeUnavoidableCalls()
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860 var loopLiveIn map[*loop][]liveInfo
2861 var numCalls []int32
2862 if len(s.loopnest.loops) > 0 && !s.loopnest.hasIrreducible {
2863 loopLiveIn = make(map[*loop][]liveInfo)
2864 numCalls = f.Cache.allocInt32Slice(f.NumBlocks())
2865 defer f.Cache.freeInt32Slice(numCalls)
2866 }
2867
2868 for {
2869 changed := false
2870
2871 for _, b := range po {
2872
2873 live.clear()
2874 for _, e := range s.live[b.ID] {
2875 live.set(e.ID, e.dist, e.pos)
2876 }
2877 update := false
2878
2879 for _, e := range b.Succs {
2880 succ := e.b
2881 delta := branchDistance(b, succ)
2882 for _, v := range succ.Values {
2883 if v.Op != OpPhi {
2884 break
2885 }
2886 arg := v.Args[e.i]
2887 if s.values[arg.ID].needReg && (!live.contains(arg.ID) || delta < live.get(arg.ID)) {
2888 live.set(arg.ID, delta, v.Pos)
2889 update = true
2890 }
2891 }
2892 }
2893 if update {
2894 s.live[b.ID] = updateLive(live, s.live[b.ID])
2895 }
2896
2897
2898 c := live.contents()
2899 for i := range c {
2900 c[i].val += int32(len(b.Values))
2901 }
2902
2903
2904 for _, c := range b.ControlValues() {
2905 if s.values[c.ID].needReg {
2906 live.set(c.ID, int32(len(b.Values)), b.Pos)
2907 }
2908 }
2909
2910 for i := len(b.Values) - 1; i >= 0; i-- {
2911 v := b.Values[i]
2912 live.remove(v.ID)
2913 if v.Op == OpPhi {
2914 continue
2915 }
2916 if opcodeTable[v.Op].call {
2917 if numCalls != nil {
2918 numCalls[b.ID]++
2919 }
2920 rematIDs = rematIDs[:0]
2921 c := live.contents()
2922 for i := range c {
2923 c[i].val += unlikelyDistance
2924 vid := c[i].key
2925 if s.values[vid].rematerializeable {
2926 rematIDs = append(rematIDs, vid)
2927 }
2928 }
2929
2930
2931
2932 for _, r := range rematIDs {
2933 live.remove(r)
2934 }
2935 }
2936 for _, a := range v.Args {
2937 if s.values[a.ID].needReg {
2938 live.set(a.ID, int32(i), v.Pos)
2939 }
2940 }
2941 }
2942
2943
2944 if loopLiveIn != nil {
2945 loop := s.loopnest.b2l[b.ID]
2946 if loop != nil && loop.header.ID == b.ID {
2947 loopLiveIn[loop] = updateLive(live, nil)
2948 }
2949 }
2950
2951
2952 for _, e := range b.Preds {
2953 p := e.b
2954 delta := branchDistance(p, b)
2955
2956
2957 t.clear()
2958 for _, e := range s.live[p.ID] {
2959 t.set(e.ID, e.dist, e.pos)
2960 }
2961 update := false
2962
2963
2964 for _, e := range live.contents() {
2965 d := e.val + delta
2966 if !t.contains(e.key) || d < t.get(e.key) {
2967 update = true
2968 t.set(e.key, d, e.pos)
2969 }
2970 }
2971
2972 if !update {
2973 continue
2974 }
2975 s.live[p.ID] = updateLive(t, s.live[p.ID])
2976 changed = true
2977 }
2978 }
2979
2980
2981
2982 if !changed {
2983 break
2984 }
2985
2986
2987
2988 if loopLiveIn != nil {
2989 break
2990 }
2991
2992
2993 if len(s.loopnest.loops) == 0 {
2994 break
2995 }
2996 }
2997 if f.pass.debug > regDebug {
2998 s.debugPrintLive("after dfs walk", f, s.live, s.desired)
2999 }
3000
3001
3002
3003 if loopLiveIn == nil {
3004 s.computeDesired()
3005 return
3006 }
3007
3008
3009
3010
3011
3012 loops := slices.Clone(s.loopnest.loops)
3013 slices.SortFunc(loops, func(a, b *loop) int {
3014 return cmp.Compare(a.depth, b.depth)
3015 })
3016
3017 loopset := f.newSparseMapPos(f.NumValues())
3018 defer f.retSparseMapPos(loopset)
3019 for _, loop := range loops {
3020 if loop.outer == nil {
3021 continue
3022 }
3023 livein := loopLiveIn[loop]
3024 loopset.clear()
3025 for _, l := range livein {
3026 loopset.set(l.ID, l.dist, l.pos)
3027 }
3028 update := false
3029 for _, l := range loopLiveIn[loop.outer] {
3030 if !loopset.contains(l.ID) {
3031 loopset.set(l.ID, l.dist, l.pos)
3032 update = true
3033 }
3034 }
3035 if update {
3036 loopLiveIn[loop] = updateLive(loopset, livein)
3037 }
3038 }
3039
3040
3041
3042 const unknownDistance = -1
3043
3044
3045
3046
3047 for _, b := range po {
3048 loop := s.loopnest.b2l[b.ID]
3049 if loop == nil {
3050 continue
3051 }
3052 headerLive := loopLiveIn[loop]
3053 loopset.clear()
3054 for _, l := range s.live[b.ID] {
3055 loopset.set(l.ID, l.dist, l.pos)
3056 }
3057 update := false
3058 for _, l := range headerLive {
3059 if !loopset.contains(l.ID) {
3060 loopset.set(l.ID, unknownDistance, src.NoXPos)
3061 update = true
3062 }
3063 }
3064 if update {
3065 s.live[b.ID] = updateLive(loopset, s.live[b.ID])
3066 }
3067 }
3068 if f.pass.debug > regDebug {
3069 s.debugPrintLive("after live loop prop", f, s.live, s.desired)
3070 }
3071
3072
3073
3074
3075 unfinishedBlocks := f.Cache.allocBlockSlice(len(po))
3076 defer f.Cache.freeBlockSlice(unfinishedBlocks)
3077 copy(unfinishedBlocks, po)
3078
3079 for len(unfinishedBlocks) > 0 {
3080 n := 0
3081 for _, b := range unfinishedBlocks {
3082 live.clear()
3083 unfinishedValues := 0
3084 for _, l := range s.live[b.ID] {
3085 if l.dist == unknownDistance {
3086 unfinishedValues++
3087 }
3088 live.set(l.ID, l.dist, l.pos)
3089 }
3090 update := false
3091 for _, e := range b.Succs {
3092 succ := e.b
3093 for _, l := range s.live[succ.ID] {
3094 if !live.contains(l.ID) || l.dist == unknownDistance {
3095 continue
3096 }
3097 dist := int32(len(succ.Values)) + l.dist + branchDistance(b, succ)
3098 dist += numCalls[succ.ID] * unlikelyDistance
3099 val := live.get(l.ID)
3100 switch {
3101 case val == unknownDistance:
3102 unfinishedValues--
3103 fallthrough
3104 case dist < val:
3105 update = true
3106 live.set(l.ID, dist, l.pos)
3107 }
3108 }
3109 }
3110 if update {
3111 s.live[b.ID] = updateLive(live, s.live[b.ID])
3112 }
3113 if unfinishedValues > 0 {
3114 unfinishedBlocks[n] = b
3115 n++
3116 }
3117 }
3118 unfinishedBlocks = unfinishedBlocks[:n]
3119 }
3120
3121 s.computeDesired()
3122
3123 if f.pass.debug > regDebug {
3124 s.debugPrintLive("final", f, s.live, s.desired)
3125 }
3126 }
3127
3128
3129
3130
3131 func (s *regAllocState) computeDesired() {
3132
3133
3134
3135
3136
3137 var desired desiredState
3138 f := s.f
3139 po := f.postorder()
3140 for {
3141 changed := false
3142 for _, b := range po {
3143 desired.copy(&s.desired[b.ID])
3144 for i := len(b.Values) - 1; i >= 0; i-- {
3145 v := b.Values[i]
3146 prefs := desired.remove(v.ID)
3147 if v.Op == OpPhi {
3148
3149
3150
3151 continue
3152 }
3153 regspec := s.regspec(v)
3154
3155 desired.clobber(regspec.clobbers)
3156
3157 for _, j := range regspec.inputs {
3158 if countRegs(j.regs) != 1 {
3159 continue
3160 }
3161 desired.clobber(j.regs)
3162 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
3163 }
3164
3165 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
3166
3167
3168
3169
3170
3171 if opcodeTable[v.Op].commutative {
3172 desired.addList(v.Args[1].ID, prefs)
3173 }
3174 desired.addList(v.Args[0].ID, prefs)
3175 }
3176 }
3177 for _, e := range b.Preds {
3178 p := e.b
3179 changed = s.desired[p.ID].merge(&desired) || changed
3180 }
3181 }
3182 if !changed || (!s.loopnest.hasIrreducible && len(s.loopnest.loops) == 0) {
3183 break
3184 }
3185 }
3186 }
3187
3188
3189 func updateLive(t *sparseMapPos, live []liveInfo) []liveInfo {
3190 live = live[:0]
3191 if cap(live) < t.size() {
3192 live = make([]liveInfo, 0, t.size())
3193 }
3194 for _, e := range t.contents() {
3195 live = append(live, liveInfo{e.key, e.val, e.pos})
3196 }
3197 return live
3198 }
3199
3200
3201
3202
3203 func branchDistance(b *Block, s *Block) int32 {
3204 if len(b.Succs) == 2 {
3205 if b.Succs[0].b == s && b.Likely == BranchLikely ||
3206 b.Succs[1].b == s && b.Likely == BranchUnlikely {
3207 return likelyDistance
3208 }
3209 if b.Succs[0].b == s && b.Likely == BranchUnlikely ||
3210 b.Succs[1].b == s && b.Likely == BranchLikely {
3211 return unlikelyDistance
3212 }
3213 }
3214
3215
3216 return normalDistance
3217 }
3218
3219 func (s *regAllocState) debugPrintLive(stage string, f *Func, live [][]liveInfo, desired []desiredState) {
3220 fmt.Printf("%s: live values at end of each block: %s\n", stage, f.Name)
3221 for _, b := range f.Blocks {
3222 s.debugPrintLiveBlock(b, live[b.ID], &desired[b.ID])
3223 }
3224 }
3225
3226 func (s *regAllocState) debugPrintLiveBlock(b *Block, live []liveInfo, desired *desiredState) {
3227 fmt.Printf(" %s:", b)
3228 slices.SortFunc(live, func(a, b liveInfo) int {
3229 return cmp.Compare(a.ID, b.ID)
3230 })
3231 for _, x := range live {
3232 fmt.Printf(" v%d(%d)", x.ID, x.dist)
3233 for _, e := range desired.entries {
3234 if e.ID != x.ID {
3235 continue
3236 }
3237 fmt.Printf("[")
3238 first := true
3239 for _, r := range e.regs {
3240 if r == noRegister {
3241 continue
3242 }
3243 if !first {
3244 fmt.Printf(",")
3245 }
3246 fmt.Print(&s.registers[r])
3247 first = false
3248 }
3249 fmt.Printf("]")
3250 }
3251 }
3252 if avoid := desired.avoid; avoid != 0 {
3253 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
3254 }
3255 fmt.Println()
3256 }
3257
3258
3259 type desiredState struct {
3260
3261
3262 entries []desiredStateEntry
3263
3264
3265
3266
3267 avoid regMask
3268 }
3269 type desiredStateEntry struct {
3270
3271 ID ID
3272
3273
3274
3275
3276
3277 regs [4]register
3278 }
3279
3280
3281 func (d *desiredState) get(vid ID) [4]register {
3282 for _, e := range d.entries {
3283 if e.ID == vid {
3284 return e.regs
3285 }
3286 }
3287 return [4]register{noRegister, noRegister, noRegister, noRegister}
3288 }
3289
3290
3291 func (d *desiredState) add(vid ID, r register) {
3292 d.avoid |= regMask(1) << r
3293 for i := range d.entries {
3294 e := &d.entries[i]
3295 if e.ID != vid {
3296 continue
3297 }
3298 if e.regs[0] == r {
3299
3300 return
3301 }
3302 for j := 1; j < len(e.regs); j++ {
3303 if e.regs[j] == r {
3304
3305 copy(e.regs[1:], e.regs[:j])
3306 e.regs[0] = r
3307 return
3308 }
3309 }
3310 copy(e.regs[1:], e.regs[:])
3311 e.regs[0] = r
3312 return
3313 }
3314 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
3315 }
3316
3317 func (d *desiredState) addList(vid ID, regs [4]register) {
3318
3319 for i := len(regs) - 1; i >= 0; i-- {
3320 r := regs[i]
3321 if r != noRegister {
3322 d.add(vid, r)
3323 }
3324 }
3325 }
3326
3327
3328 func (d *desiredState) clobber(m regMask) {
3329 for i := 0; i < len(d.entries); {
3330 e := &d.entries[i]
3331 j := 0
3332 for _, r := range e.regs {
3333 if r != noRegister && m>>r&1 == 0 {
3334 e.regs[j] = r
3335 j++
3336 }
3337 }
3338 if j == 0 {
3339
3340 d.entries[i] = d.entries[len(d.entries)-1]
3341 d.entries = d.entries[:len(d.entries)-1]
3342 continue
3343 }
3344 for ; j < len(e.regs); j++ {
3345 e.regs[j] = noRegister
3346 }
3347 i++
3348 }
3349 d.avoid &^= m
3350 }
3351
3352
3353 func (d *desiredState) copy(x *desiredState) {
3354 d.entries = append(d.entries[:0], x.entries...)
3355 d.avoid = x.avoid
3356 }
3357
3358
3359 func (d *desiredState) remove(vid ID) [4]register {
3360 for i := range d.entries {
3361 if d.entries[i].ID == vid {
3362 regs := d.entries[i].regs
3363 d.entries[i] = d.entries[len(d.entries)-1]
3364 d.entries = d.entries[:len(d.entries)-1]
3365 return regs
3366 }
3367 }
3368 return [4]register{noRegister, noRegister, noRegister, noRegister}
3369 }
3370
3371
3372
3373 func (d *desiredState) merge(x *desiredState) bool {
3374 oldAvoid := d.avoid
3375 d.avoid |= x.avoid
3376
3377
3378 for _, e := range x.entries {
3379 d.addList(e.ID, e.regs)
3380 }
3381 return oldAvoid != d.avoid
3382 }
3383
3384
3385 func (loopnest *loopnest) computeUnavoidableCalls() {
3386 f := loopnest.f
3387
3388 hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
3389 defer f.Cache.freeBoolSlice(hasCall)
3390 for _, b := range f.Blocks {
3391 if b.containsCall() {
3392 hasCall[b.ID] = true
3393 }
3394 }
3395 found := f.Cache.allocSparseSet(f.NumBlocks())
3396 defer f.Cache.freeSparseSet(found)
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406 loopLoop:
3407 for _, l := range loopnest.loops {
3408 found.clear()
3409 tovisit := make([]*Block, 0, 8)
3410 tovisit = append(tovisit, l.header)
3411 for len(tovisit) > 0 {
3412 cur := tovisit[len(tovisit)-1]
3413 tovisit = tovisit[:len(tovisit)-1]
3414 if hasCall[cur.ID] {
3415 continue
3416 }
3417 for _, s := range cur.Succs {
3418 nb := s.Block()
3419 if nb == l.header {
3420
3421 continue loopLoop
3422 }
3423 if found.contains(nb.ID) {
3424
3425 continue
3426 }
3427 nl := loopnest.b2l[nb.ID]
3428 if nl == nil || (nl.depth <= l.depth && nl != l) {
3429
3430 continue
3431 }
3432 tovisit = append(tovisit, nb)
3433 found.add(nb.ID)
3434 }
3435 }
3436
3437 l.containsUnavoidableCall = true
3438 }
3439 }
3440
3441 func (b *Block) containsCall() bool {
3442 if b.Kind == BlockDefer {
3443 return true
3444 }
3445 for _, v := range b.Values {
3446 if opcodeTable[v.Op].call {
3447 return true
3448 }
3449 }
3450 return false
3451 }
3452
View as plain text