1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "fmt"
123 "internal/buildcfg"
124 "math"
125 "math/bits"
126 "unsafe"
127 )
128
129 const (
130 moveSpills = iota
131 logSpills
132 regDebug
133 stackDebug
134 )
135
136
137
138 const (
139 likelyDistance = 1
140 normalDistance = 10
141 unlikelyDistance = 100
142 )
143
144
145
146 func regalloc(f *Func) {
147 var s regAllocState
148 s.init(f)
149 s.regalloc(f)
150 s.close()
151 }
152
153 type register uint8
154
155 const noRegister register = 255
156
157
158 var noRegisters [32]register = [32]register{
159 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
160 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 }
164
165
166
167 type regMask uint64
168
169 func (m regMask) String() string {
170 s := ""
171 for r := register(0); m != 0; r++ {
172 if m>>r&1 == 0 {
173 continue
174 }
175 m &^= regMask(1) << r
176 if s != "" {
177 s += " "
178 }
179 s += fmt.Sprintf("r%d", r)
180 }
181 return s
182 }
183
184 func (m regMask) contains(r register) bool {
185 return m>>r&1 != 0
186 }
187
188 func (s *regAllocState) RegMaskString(m regMask) string {
189 str := ""
190 for r := register(0); m != 0; r++ {
191 if m>>r&1 == 0 {
192 continue
193 }
194 m &^= regMask(1) << r
195 if str != "" {
196 str += " "
197 }
198 str += s.registers[r].String()
199 }
200 return str
201 }
202
203
204 func countRegs(r regMask) int {
205 return bits.OnesCount64(uint64(r))
206 }
207
208
209 func pickReg(r regMask) register {
210 if r == 0 {
211 panic("can't pick a register from an empty set")
212 }
213
214 return register(bits.TrailingZeros64(uint64(r)))
215 }
216
217 type use struct {
218
219
220
221
222
223 dist int32
224 pos src.XPos
225 next *use
226 }
227
228
229 type valState struct {
230 regs regMask
231 uses *use
232 spill *Value
233 restoreMin int32
234 restoreMax int32
235 needReg bool
236 rematerializeable bool
237 }
238
239 type regState struct {
240 v *Value
241 c *Value
242
243 }
244
245 type regAllocState struct {
246 f *Func
247
248 sdom SparseTree
249 registers []Register
250 numRegs register
251 SPReg register
252 SBReg register
253 GReg register
254 ZeroIntReg register
255 allocatable regMask
256
257
258
259
260 live [][]liveInfo
261
262
263
264
265 desired []desiredState
266
267
268 values []valState
269
270
271 sp, sb ID
272
273
274
275 orig []*Value
276
277
278
279 regs []regState
280
281
282 nospill regMask
283
284
285 used regMask
286
287
288 usedSinceBlockStart regMask
289
290
291 tmpused regMask
292
293
294 curBlock *Block
295
296
297 freeUseRecords *use
298
299
300
301 endRegs [][]endReg
302
303
304
305 startRegs [][]startReg
306
307
308
309
310 startRegsMask regMask
311
312
313 spillLive [][]ID
314
315
316
317 copies map[*Value]bool
318
319 loopnest *loopnest
320
321
322 visitOrder []*Block
323
324
325 blockOrder []int32
326
327
328 doClobber bool
329
330
331
332
333
334
335 nextCall []int32
336
337
338
339 curIdx int
340 }
341
342 type endReg struct {
343 r register
344 v *Value
345 c *Value
346 }
347
348 type startReg struct {
349 r register
350 v *Value
351 c *Value
352 pos src.XPos
353 }
354
355
356 func (s *regAllocState) freeReg(r register) {
357 if !s.allocatable.contains(r) && !s.isGReg(r) {
358 return
359 }
360 v := s.regs[r].v
361 if v == nil {
362 s.f.Fatalf("tried to free an already free register %d\n", r)
363 }
364
365
366 if s.f.pass.debug > regDebug {
367 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
368 }
369 s.regs[r] = regState{}
370 s.values[v.ID].regs &^= regMask(1) << r
371 s.used &^= regMask(1) << r
372 }
373
374
375 func (s *regAllocState) freeRegs(m regMask) {
376 for m&s.used != 0 {
377 s.freeReg(pickReg(m & s.used))
378 }
379 }
380
381
382 func (s *regAllocState) clobberRegs(m regMask) {
383 m &= s.allocatable & s.f.Config.gpRegMask
384 for m != 0 {
385 r := pickReg(m)
386 m &^= 1 << r
387 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
388 s.f.setHome(x, &s.registers[r])
389 }
390 }
391
392
393
394 func (s *regAllocState) setOrig(c *Value, v *Value) {
395 if int(c.ID) >= cap(s.orig) {
396 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
397 copy(x, s.orig)
398 s.f.Cache.freeValueSlice(s.orig)
399 s.orig = x
400 }
401 for int(c.ID) >= len(s.orig) {
402 s.orig = append(s.orig, nil)
403 }
404 if s.orig[c.ID] != nil {
405 s.f.Fatalf("orig value set twice %s %s", c, v)
406 }
407 s.orig[c.ID] = s.orig[v.ID]
408 }
409
410
411
412 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
413 if s.f.pass.debug > regDebug {
414 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
415 }
416
417 s.values[v.ID].regs |= regMask(1) << r
418 s.f.setHome(c, &s.registers[r])
419
420
421 if !s.allocatable.contains(r) && !s.isGReg(r) {
422 return
423 }
424 if s.regs[r].v != nil {
425 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
426 }
427 s.regs[r] = regState{v, c}
428 s.used |= regMask(1) << r
429 }
430
431
432
433
434 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
435 if v.OnWasmStack {
436 return noRegister
437 }
438
439 mask &= s.allocatable
440 mask &^= s.nospill
441 if mask == 0 {
442 s.f.Fatalf("no register available for %s", v.LongString())
443 }
444
445
446 if mask&^s.used != 0 {
447 r := pickReg(mask &^ s.used)
448 s.usedSinceBlockStart |= regMask(1) << r
449 return r
450 }
451
452
453
454
455
456
457
458
459
460
461
462 var r register
463 maxuse := int32(-1)
464 for t := register(0); t < s.numRegs; t++ {
465 if mask>>t&1 == 0 {
466 continue
467 }
468 v := s.regs[t].v
469 if n := s.values[v.ID].uses.dist; n > maxuse {
470
471
472 r = t
473 maxuse = n
474 }
475 }
476 if maxuse == -1 {
477 s.f.Fatalf("couldn't find register to spill")
478 }
479
480 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
481
482
483
484 s.freeReg(r)
485 return r
486 }
487
488
489
490 v2 := s.regs[r].v
491 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
492 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
493 s.usedSinceBlockStart |= regMask(1) << r
494 r2 := pickReg(m)
495 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
496 s.copies[c] = false
497 if s.f.pass.debug > regDebug {
498 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
499 }
500 s.setOrig(c, v2)
501 s.assignReg(r2, v2, c)
502 }
503
504
505
506
507 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
508 if s.startRegsMask&(regMask(1)<<r) == 1 {
509 if s.f.pass.debug > regDebug {
510 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
511 }
512 s.startRegsMask &^= regMask(1) << r
513 }
514 }
515
516 s.freeReg(r)
517 s.usedSinceBlockStart |= regMask(1) << r
518 return r
519 }
520
521
522
523 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
524 vi := &s.values[v.ID]
525 if vi.spill != nil {
526
527 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
528 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
529 return vi.spill
530 }
531
532
533 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
534
535
536 s.setOrig(spill, v)
537 vi.spill = spill
538 vi.restoreMin = s.sdom[b.ID].entry
539 vi.restoreMax = s.sdom[b.ID].exit
540 return spill
541 }
542
543
544
545
546
547
548
549 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
550 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
551 c := v.copyIntoWithXPos(s.curBlock, pos)
552 c.OnWasmStack = true
553 s.setOrig(c, v)
554 return c
555 }
556 if v.OnWasmStack {
557 return v
558 }
559
560 vi := &s.values[v.ID]
561 pos = pos.WithNotStmt()
562
563 if mask&vi.regs != 0 {
564 mask &= vi.regs
565 r := pickReg(mask)
566 if mask.contains(s.SPReg) {
567
568
569
570 r = s.SPReg
571 }
572 if !s.allocatable.contains(r) {
573 return v
574 }
575 if s.regs[r].v != v || s.regs[r].c == nil {
576 panic("bad register state")
577 }
578 if nospill {
579 s.nospill |= regMask(1) << r
580 }
581 s.usedSinceBlockStart |= regMask(1) << r
582 return s.regs[r].c
583 }
584
585 var r register
586
587 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
588 if !onWasmStack {
589
590 r = s.allocReg(mask, v)
591 }
592
593
594 var c *Value
595 if vi.regs != 0 {
596
597 r2 := pickReg(vi.regs)
598 var current *Value
599 if !s.allocatable.contains(r2) {
600 current = v
601 } else {
602 if s.regs[r2].v != v {
603 panic("bad register state")
604 }
605 current = s.regs[r2].c
606 }
607 s.usedSinceBlockStart |= regMask(1) << r2
608 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
609 } else if v.rematerializeable() {
610
611 c = v.copyIntoWithXPos(s.curBlock, pos)
612
613
614
615
616
617
618
619 sourceMask := s.regspec(c).outputs[0].regs
620 if mask&sourceMask == 0 && !onWasmStack {
621 s.setOrig(c, v)
622 s.assignReg(s.allocReg(sourceMask, v), v, c)
623
624
625
626
627
628
629
630
631
632
633 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
634 }
635 } else {
636
637 spill := s.makeSpill(v, s.curBlock)
638 if s.f.pass.debug > logSpills {
639 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
640 }
641 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
642 }
643
644 s.setOrig(c, v)
645
646 if onWasmStack {
647 c.OnWasmStack = true
648 return c
649 }
650
651 s.assignReg(r, v, c)
652 if c.Op == OpLoadReg && s.isGReg(r) {
653 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
654 }
655 if nospill {
656 s.nospill |= regMask(1) << r
657 }
658 return c
659 }
660
661
662 func isLeaf(f *Func) bool {
663 for _, b := range f.Blocks {
664 for _, v := range b.Values {
665 if v.Op.IsCall() && !v.Op.IsTailCall() {
666
667 return false
668 }
669 }
670 }
671 return true
672 }
673
674
675 func (v *Value) needRegister() bool {
676 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
677 }
678
679 func (s *regAllocState) init(f *Func) {
680 s.f = f
681 s.f.RegAlloc = s.f.Cache.locs[:0]
682 s.registers = f.Config.registers
683 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
684 s.f.Fatalf("bad number of registers: %d", nr)
685 } else {
686 s.numRegs = register(nr)
687 }
688
689 s.SPReg = noRegister
690 s.SBReg = noRegister
691 s.GReg = noRegister
692 s.ZeroIntReg = noRegister
693 for r := register(0); r < s.numRegs; r++ {
694 switch s.registers[r].String() {
695 case "SP":
696 s.SPReg = r
697 case "SB":
698 s.SBReg = r
699 case "g":
700 s.GReg = r
701 case "ZERO":
702 s.ZeroIntReg = r
703 }
704 }
705
706 switch noRegister {
707 case s.SPReg:
708 s.f.Fatalf("no SP register found")
709 case s.SBReg:
710 s.f.Fatalf("no SB register found")
711 case s.GReg:
712 if f.Config.hasGReg {
713 s.f.Fatalf("no g register found")
714 }
715 }
716
717
718 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
719 s.allocatable &^= 1 << s.SPReg
720 s.allocatable &^= 1 << s.SBReg
721 if s.f.Config.hasGReg {
722 s.allocatable &^= 1 << s.GReg
723 }
724 if s.ZeroIntReg != noRegister {
725 s.allocatable &^= 1 << s.ZeroIntReg
726 }
727 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
728 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
729 }
730 if s.f.Config.LinkReg != -1 {
731 if isLeaf(f) {
732
733 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
734 }
735 }
736 if s.f.Config.ctxt.Flag_dynlink {
737 switch s.f.Config.arch {
738 case "386":
739
740
741
742
743
744 case "amd64":
745 s.allocatable &^= 1 << 15
746 case "arm":
747 s.allocatable &^= 1 << 9
748 case "arm64":
749
750 case "loong64":
751
752 case "ppc64le":
753
754 case "riscv64":
755
756 case "s390x":
757 s.allocatable &^= 1 << 11
758 default:
759 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
760 }
761 }
762
763
764
765
766 s.visitOrder = layoutRegallocOrder(f)
767
768
769
770 s.blockOrder = make([]int32, f.NumBlocks())
771 for i, b := range s.visitOrder {
772 s.blockOrder[b.ID] = int32(i)
773 }
774
775 s.regs = make([]regState, s.numRegs)
776 nv := f.NumValues()
777 if cap(s.f.Cache.regallocValues) >= nv {
778 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
779 } else {
780 s.f.Cache.regallocValues = make([]valState, nv)
781 }
782 s.values = s.f.Cache.regallocValues
783 s.orig = s.f.Cache.allocValueSlice(nv)
784 s.copies = make(map[*Value]bool)
785 for _, b := range s.visitOrder {
786 for _, v := range b.Values {
787 if v.needRegister() {
788 s.values[v.ID].needReg = true
789 s.values[v.ID].rematerializeable = v.rematerializeable()
790 s.orig[v.ID] = v
791 }
792
793
794 }
795 }
796 s.computeLive()
797
798 s.endRegs = make([][]endReg, f.NumBlocks())
799 s.startRegs = make([][]startReg, f.NumBlocks())
800 s.spillLive = make([][]ID, f.NumBlocks())
801 s.sdom = f.Sdom()
802
803
804 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
805 canLiveOnStack := f.newSparseSet(f.NumValues())
806 defer f.retSparseSet(canLiveOnStack)
807 for _, b := range f.Blocks {
808
809 canLiveOnStack.clear()
810 for _, c := range b.ControlValues() {
811 if c.Uses == 1 && !opcodeTable[c.Op].generic {
812 canLiveOnStack.add(c.ID)
813 }
814 }
815
816 for i := len(b.Values) - 1; i >= 0; i-- {
817 v := b.Values[i]
818 if canLiveOnStack.contains(v.ID) {
819 v.OnWasmStack = true
820 } else {
821
822 canLiveOnStack.clear()
823 }
824 for _, arg := range v.Args {
825
826
827
828
829
830 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
831 canLiveOnStack.add(arg.ID)
832 }
833 }
834 }
835 }
836 }
837
838
839
840
841 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
842
843 s.doClobber = true
844 }
845 }
846
847 func (s *regAllocState) close() {
848 s.f.Cache.freeValueSlice(s.orig)
849 }
850
851
852
853 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
854 r := s.freeUseRecords
855 if r != nil {
856 s.freeUseRecords = r.next
857 } else {
858 r = &use{}
859 }
860 r.dist = dist
861 r.pos = pos
862 r.next = s.values[id].uses
863 s.values[id].uses = r
864 if r.next != nil && dist > r.next.dist {
865 s.f.Fatalf("uses added in wrong order")
866 }
867 }
868
869
870
871 func (s *regAllocState) advanceUses(v *Value) {
872 for _, a := range v.Args {
873 if !s.values[a.ID].needReg {
874 continue
875 }
876 ai := &s.values[a.ID]
877 r := ai.uses
878 ai.uses = r.next
879 if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
880
881 s.freeRegs(ai.regs)
882 }
883 r.next = s.freeUseRecords
884 s.freeUseRecords = r
885 }
886 s.dropIfUnused(v)
887 }
888
889
890
891 func (s *regAllocState) dropIfUnused(v *Value) {
892 if !s.values[v.ID].needReg {
893 return
894 }
895 vi := &s.values[v.ID]
896 r := vi.uses
897 if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
898 s.freeRegs(vi.regs)
899 }
900 }
901
902
903
904
905 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
906 u := s.values[v.ID].uses
907 if u == nil {
908 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
909 }
910 d := u.dist
911 for u != nil && u.dist == d {
912 u = u.next
913 }
914 return u != nil && u.dist > d
915 }
916
917
918 func (s *regAllocState) setState(regs []endReg) {
919 s.freeRegs(s.used)
920 for _, x := range regs {
921 s.assignReg(x.r, x.v, x.c)
922 }
923 }
924
925
926 func (s *regAllocState) compatRegs(t *types.Type) regMask {
927 var m regMask
928 if t.IsTuple() || t.IsFlags() {
929 return 0
930 }
931 if t.IsFloat() || t == types.TypeInt128 {
932 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
933 m = s.f.Config.fp32RegMask
934 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
935 m = s.f.Config.fp64RegMask
936 } else {
937 m = s.f.Config.fpRegMask
938 }
939 } else {
940 m = s.f.Config.gpRegMask
941 }
942 return m & s.allocatable
943 }
944
945
946 func (s *regAllocState) regspec(v *Value) regInfo {
947 op := v.Op
948 if op == OpConvert {
949
950
951
952 m := s.allocatable & s.f.Config.gpRegMask
953 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
954 }
955 if op == OpArgIntReg {
956 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
957 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
958 }
959 if op == OpArgFloatReg {
960 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
961 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
962 }
963 if op.IsCall() {
964 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
965 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
966 }
967 }
968 if op == OpMakeResult && s.f.OwnAux.reg != nil {
969 return *s.f.OwnAux.ResultReg(s.f.Config)
970 }
971 return opcodeTable[op].reg
972 }
973
974 func (s *regAllocState) isGReg(r register) bool {
975 return s.f.Config.hasGReg && s.GReg == r
976 }
977
978
979 var tmpVal Value
980
981 func (s *regAllocState) regalloc(f *Func) {
982 regValLiveSet := f.newSparseSet(f.NumValues())
983 defer f.retSparseSet(regValLiveSet)
984 var oldSched []*Value
985 var phis []*Value
986 var phiRegs []register
987 var args []*Value
988
989
990 var desired desiredState
991 desiredSecondReg := map[ID][4]register{}
992
993
994 type dentry struct {
995 out [4]register
996 in [3][4]register
997 }
998 var dinfo []dentry
999
1000 if f.Entry != f.Blocks[0] {
1001 f.Fatalf("entry block must be first")
1002 }
1003
1004 for _, b := range s.visitOrder {
1005 if s.f.pass.debug > regDebug {
1006 fmt.Printf("Begin processing block %v\n", b)
1007 }
1008 s.curBlock = b
1009 s.startRegsMask = 0
1010 s.usedSinceBlockStart = 0
1011 clear(desiredSecondReg)
1012
1013
1014
1015 regValLiveSet.clear()
1016 for _, e := range s.live[b.ID] {
1017 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
1018 regValLiveSet.add(e.ID)
1019 }
1020 for _, v := range b.ControlValues() {
1021 if s.values[v.ID].needReg {
1022 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
1023 regValLiveSet.add(v.ID)
1024 }
1025 }
1026 if len(s.nextCall) < len(b.Values) {
1027 s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
1028 }
1029 var nextCall int32 = math.MaxInt32
1030 for i := len(b.Values) - 1; i >= 0; i-- {
1031 v := b.Values[i]
1032 regValLiveSet.remove(v.ID)
1033 if v.Op == OpPhi {
1034
1035
1036
1037 s.nextCall[i] = nextCall
1038 continue
1039 }
1040 if opcodeTable[v.Op].call {
1041
1042 regValLiveSet.clear()
1043 if s.sp != 0 && s.values[s.sp].uses != nil {
1044 regValLiveSet.add(s.sp)
1045 }
1046 if s.sb != 0 && s.values[s.sb].uses != nil {
1047 regValLiveSet.add(s.sb)
1048 }
1049 nextCall = int32(i)
1050 }
1051 for _, a := range v.Args {
1052 if !s.values[a.ID].needReg {
1053 continue
1054 }
1055 s.addUse(a.ID, int32(i), v.Pos)
1056 regValLiveSet.add(a.ID)
1057 }
1058 s.nextCall[i] = nextCall
1059 }
1060 if s.f.pass.debug > regDebug {
1061 fmt.Printf("use distances for %s\n", b)
1062 for i := range s.values {
1063 vi := &s.values[i]
1064 u := vi.uses
1065 if u == nil {
1066 continue
1067 }
1068 fmt.Printf(" v%d:", i)
1069 for u != nil {
1070 fmt.Printf(" %d", u.dist)
1071 u = u.next
1072 }
1073 fmt.Println()
1074 }
1075 }
1076
1077
1078
1079 nphi := 0
1080 for _, v := range b.Values {
1081 if v.Op != OpPhi {
1082 break
1083 }
1084 nphi++
1085 }
1086 phis = append(phis[:0], b.Values[:nphi]...)
1087 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1088 b.Values = b.Values[:0]
1089
1090
1091 if b == f.Entry {
1092
1093 if nphi > 0 {
1094 f.Fatalf("phis in entry block")
1095 }
1096 } else if len(b.Preds) == 1 {
1097
1098 s.setState(s.endRegs[b.Preds[0].b.ID])
1099 if nphi > 0 {
1100 f.Fatalf("phis in single-predecessor block")
1101 }
1102
1103
1104
1105 for r := register(0); r < s.numRegs; r++ {
1106 v := s.regs[r].v
1107 if v != nil && !regValLiveSet.contains(v.ID) {
1108 s.freeReg(r)
1109 }
1110 }
1111 } else {
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125 idx := -1
1126 for i, p := range b.Preds {
1127
1128
1129 pb := p.b
1130 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1131 continue
1132 }
1133 if idx == -1 {
1134 idx = i
1135 continue
1136 }
1137 pSel := b.Preds[idx].b
1138 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1139 idx = i
1140 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1152 idx = i
1153 }
1154 }
1155 }
1156 if idx < 0 {
1157 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1158 }
1159 p := b.Preds[idx].b
1160 s.setState(s.endRegs[p.ID])
1161
1162 if s.f.pass.debug > regDebug {
1163 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1164 for _, x := range s.endRegs[p.ID] {
1165 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1166 }
1167 }
1168
1169
1170
1171
1172
1173 phiRegs = phiRegs[:0]
1174 var phiUsed regMask
1175
1176 for _, v := range phis {
1177 if !s.values[v.ID].needReg {
1178 phiRegs = append(phiRegs, noRegister)
1179 continue
1180 }
1181 a := v.Args[idx]
1182
1183
1184 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1185 if m != 0 {
1186 r := pickReg(m)
1187 phiUsed |= regMask(1) << r
1188 phiRegs = append(phiRegs, r)
1189 } else {
1190 phiRegs = append(phiRegs, noRegister)
1191 }
1192 }
1193
1194
1195 for i, v := range phis {
1196 if !s.values[v.ID].needReg {
1197 continue
1198 }
1199 a := v.Args[idx]
1200 r := phiRegs[i]
1201 if r == noRegister {
1202 continue
1203 }
1204 if regValLiveSet.contains(a.ID) {
1205
1206
1207
1208
1209
1210
1211
1212
1213 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1214 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1215 r2 := pickReg(m)
1216 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1217 s.copies[c] = false
1218 if s.f.pass.debug > regDebug {
1219 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1220 }
1221 s.setOrig(c, a)
1222 s.assignReg(r2, a, c)
1223 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1224 }
1225 }
1226 s.freeReg(r)
1227 }
1228
1229
1230 b.Values = append(b.Values, phis...)
1231
1232
1233
1234 for i, v := range phis {
1235 if !s.values[v.ID].needReg {
1236 continue
1237 }
1238 if phiRegs[i] != noRegister {
1239 continue
1240 }
1241 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1242
1243
1244 for i, pe := range b.Preds {
1245 if i == idx {
1246 continue
1247 }
1248 ri := noRegister
1249 for _, er := range s.endRegs[pe.b.ID] {
1250 if er.v == s.orig[v.Args[i].ID] {
1251 ri = er.r
1252 break
1253 }
1254 }
1255 if ri != noRegister && m>>ri&1 != 0 {
1256 m = regMask(1) << ri
1257 break
1258 }
1259 }
1260 if m != 0 {
1261 r := pickReg(m)
1262 phiRegs[i] = r
1263 phiUsed |= regMask(1) << r
1264 }
1265 }
1266
1267
1268 for i, v := range phis {
1269 if !s.values[v.ID].needReg {
1270 continue
1271 }
1272 r := phiRegs[i]
1273 if r == noRegister {
1274
1275
1276 s.values[v.ID].spill = v
1277 continue
1278 }
1279
1280 s.assignReg(r, v, v)
1281 }
1282
1283
1284 for r := register(0); r < s.numRegs; r++ {
1285 if phiUsed>>r&1 != 0 {
1286 continue
1287 }
1288 v := s.regs[r].v
1289 if v != nil && !regValLiveSet.contains(v.ID) {
1290 s.freeReg(r)
1291 }
1292 }
1293
1294
1295
1296
1297
1298 regList := make([]startReg, 0, 32)
1299 for r := register(0); r < s.numRegs; r++ {
1300 v := s.regs[r].v
1301 if v == nil {
1302 continue
1303 }
1304 if phiUsed>>r&1 != 0 {
1305
1306
1307 continue
1308 }
1309 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1310 s.startRegsMask |= regMask(1) << r
1311 }
1312 s.startRegs[b.ID] = make([]startReg, len(regList))
1313 copy(s.startRegs[b.ID], regList)
1314
1315 if s.f.pass.debug > regDebug {
1316 fmt.Printf("after phis\n")
1317 for _, x := range s.startRegs[b.ID] {
1318 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1319 }
1320 }
1321 }
1322
1323
1324 for i, v := range phis {
1325 s.curIdx = i
1326 s.dropIfUnused(v)
1327 }
1328
1329
1330 if l := len(oldSched); cap(dinfo) < l {
1331 dinfo = make([]dentry, l)
1332 } else {
1333 dinfo = dinfo[:l]
1334 clear(dinfo)
1335 }
1336
1337
1338 desired.copy(&s.desired[b.ID])
1339
1340
1341
1342
1343
1344
1345 for _, e := range b.Succs {
1346 succ := e.b
1347
1348 for _, x := range s.startRegs[succ.ID] {
1349 desired.add(x.v.ID, x.r)
1350 }
1351
1352 pidx := e.i
1353 for _, v := range succ.Values {
1354 if v.Op != OpPhi {
1355 break
1356 }
1357 if !s.values[v.ID].needReg {
1358 continue
1359 }
1360 rp, ok := s.f.getHome(v.ID).(*Register)
1361 if !ok {
1362
1363
1364
1365
1366 for _, a := range v.Args {
1367 rp, ok = s.f.getHome(a.ID).(*Register)
1368 if ok {
1369 break
1370 }
1371 }
1372 if !ok {
1373 continue
1374 }
1375 }
1376 desired.add(v.Args[pidx].ID, register(rp.num))
1377 }
1378 }
1379
1380
1381 for i := len(oldSched) - 1; i >= 0; i-- {
1382 v := oldSched[i]
1383 prefs := desired.remove(v.ID)
1384 regspec := s.regspec(v)
1385 desired.clobber(regspec.clobbers)
1386 for _, j := range regspec.inputs {
1387 if countRegs(j.regs) != 1 {
1388 continue
1389 }
1390 desired.clobber(j.regs)
1391 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1392 }
1393 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1394 if opcodeTable[v.Op].commutative {
1395 desired.addList(v.Args[1].ID, prefs)
1396 }
1397 desired.addList(v.Args[0].ID, prefs)
1398 }
1399
1400 dinfo[i].out = prefs
1401 for j, a := range v.Args {
1402 if j >= len(dinfo[i].in) {
1403 break
1404 }
1405 dinfo[i].in[j] = desired.get(a.ID)
1406 }
1407 if v.Op == OpSelect1 && prefs[0] != noRegister {
1408
1409
1410 desiredSecondReg[v.Args[0].ID] = prefs
1411 }
1412 }
1413
1414
1415 for idx, v := range oldSched {
1416 s.curIdx = nphi + idx
1417 tmpReg := noRegister
1418 if s.f.pass.debug > regDebug {
1419 fmt.Printf(" processing %s\n", v.LongString())
1420 }
1421 regspec := s.regspec(v)
1422 if v.Op == OpPhi {
1423 f.Fatalf("phi %s not at start of block", v)
1424 }
1425 if opcodeTable[v.Op].fixedReg {
1426 switch v.Op {
1427 case OpSP:
1428 s.assignReg(s.SPReg, v, v)
1429 s.sp = v.ID
1430 case OpSB:
1431 s.assignReg(s.SBReg, v, v)
1432 s.sb = v.ID
1433 case OpARM64ZERO, OpLOONG64ZERO, OpMIPS64ZERO:
1434 s.assignReg(s.ZeroIntReg, v, v)
1435 default:
1436 f.Fatalf("unknown fixed-register op %s", v)
1437 }
1438 b.Values = append(b.Values, v)
1439 s.advanceUses(v)
1440 continue
1441 }
1442 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1443 if s.values[v.ID].needReg {
1444 if v.Op == OpSelectN {
1445 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1446 } else {
1447 var i = 0
1448 if v.Op == OpSelect1 {
1449 i = 1
1450 }
1451 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1452 }
1453 }
1454 b.Values = append(b.Values, v)
1455 s.advanceUses(v)
1456 continue
1457 }
1458 if v.Op == OpGetG && s.f.Config.hasGReg {
1459
1460 if s.regs[s.GReg].v != nil {
1461 s.freeReg(s.GReg)
1462 }
1463 s.assignReg(s.GReg, v, v)
1464 b.Values = append(b.Values, v)
1465 s.advanceUses(v)
1466 continue
1467 }
1468 if v.Op == OpArg {
1469
1470
1471
1472 s.values[v.ID].spill = v
1473 b.Values = append(b.Values, v)
1474 s.advanceUses(v)
1475 continue
1476 }
1477 if v.Op == OpKeepAlive {
1478
1479 s.advanceUses(v)
1480 a := v.Args[0]
1481 vi := &s.values[a.ID]
1482 if vi.regs == 0 && !vi.rematerializeable {
1483
1484
1485
1486 v.SetArg(0, s.makeSpill(a, b))
1487 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1488
1489
1490
1491 v.Op = OpVarLive
1492 v.SetArgs1(v.Args[1])
1493 v.Aux = a.Aux
1494 } else {
1495
1496
1497
1498 v.Op = OpCopy
1499 v.SetArgs1(v.Args[1])
1500 }
1501 b.Values = append(b.Values, v)
1502 continue
1503 }
1504 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1505
1506 if s.doClobber && v.Op.IsCall() {
1507 s.clobberRegs(regspec.clobbers)
1508 }
1509 s.freeRegs(regspec.clobbers)
1510 b.Values = append(b.Values, v)
1511 s.advanceUses(v)
1512 continue
1513 }
1514
1515 if s.values[v.ID].rematerializeable {
1516
1517
1518
1519 for _, a := range v.Args {
1520 a.Uses--
1521 }
1522 s.advanceUses(v)
1523 continue
1524 }
1525
1526 if s.f.pass.debug > regDebug {
1527 fmt.Printf("value %s\n", v.LongString())
1528 fmt.Printf(" out:")
1529 for _, r := range dinfo[idx].out {
1530 if r != noRegister {
1531 fmt.Printf(" %s", &s.registers[r])
1532 }
1533 }
1534 fmt.Println()
1535 for i := 0; i < len(v.Args) && i < 3; i++ {
1536 fmt.Printf(" in%d:", i)
1537 for _, r := range dinfo[idx].in[i] {
1538 if r != noRegister {
1539 fmt.Printf(" %s", &s.registers[r])
1540 }
1541 }
1542 fmt.Println()
1543 }
1544 }
1545
1546
1547
1548
1549 args = append(args[:0], make([]*Value, len(v.Args))...)
1550 for i, a := range v.Args {
1551 if !s.values[a.ID].needReg {
1552 args[i] = a
1553 }
1554 }
1555 for _, i := range regspec.inputs {
1556 mask := i.regs
1557 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1558 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1559 }
1560 }
1561
1562
1563
1564
1565
1566
1567 for {
1568 freed := false
1569 for _, i := range regspec.inputs {
1570 if args[i.idx] != nil {
1571 continue
1572 }
1573 mask := i.regs
1574 if countRegs(mask) == 1 && mask&^s.used != 0 {
1575 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1576
1577
1578
1579 oldregs := s.values[v.Args[i.idx].ID].regs
1580 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1581 s.freeRegs(oldregs &^ mask &^ s.nospill)
1582 freed = true
1583 }
1584 }
1585 }
1586 if !freed {
1587 break
1588 }
1589 }
1590
1591
1592 for _, i := range regspec.inputs {
1593 if args[i.idx] != nil {
1594 continue
1595 }
1596 mask := i.regs
1597 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1598
1599 mask &= s.allocatable
1600 mask &^= s.nospill
1601
1602 if i.idx < 3 {
1603 for _, r := range dinfo[idx].in[i.idx] {
1604 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1605
1606 mask = regMask(1) << r
1607 break
1608 }
1609 }
1610 }
1611
1612 if mask&^desired.avoid != 0 {
1613 mask &^= desired.avoid
1614 }
1615 }
1616 if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
1617
1618
1619
1620 mask = 1 << s.SPReg
1621 }
1622 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1623 }
1624
1625
1626
1627
1628 if opcodeTable[v.Op].resultInArg0 {
1629 var m regMask
1630 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1631
1632 goto ok
1633 }
1634 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1635 args[0], args[1] = args[1], args[0]
1636 goto ok
1637 }
1638 if s.values[v.Args[0].ID].rematerializeable {
1639
1640 goto ok
1641 }
1642 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1643 args[0], args[1] = args[1], args[0]
1644 goto ok
1645 }
1646 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1647
1648 goto ok
1649 }
1650 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1651 args[0], args[1] = args[1], args[0]
1652 goto ok
1653 }
1654
1655
1656
1657
1658
1659 m = s.compatRegs(v.Args[0].Type) &^ s.used
1660 if m == 0 {
1661
1662
1663
1664
1665 goto ok
1666 }
1667
1668
1669 for _, r := range dinfo[idx].out {
1670 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1671 m = regMask(1) << r
1672 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1673
1674
1675 goto ok
1676 }
1677 }
1678
1679
1680 for _, r := range dinfo[idx].in[0] {
1681 if r != noRegister && m>>r&1 != 0 {
1682 m = regMask(1) << r
1683 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1684 s.copies[c] = false
1685
1686
1687 goto ok
1688 }
1689 }
1690 if opcodeTable[v.Op].commutative {
1691 for _, r := range dinfo[idx].in[1] {
1692 if r != noRegister && m>>r&1 != 0 {
1693 m = regMask(1) << r
1694 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1695 s.copies[c] = false
1696 args[0], args[1] = args[1], args[0]
1697 goto ok
1698 }
1699 }
1700 }
1701
1702
1703 if m&^desired.avoid != 0 {
1704 m &^= desired.avoid
1705 }
1706
1707 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1708 s.copies[c] = false
1709
1710
1711
1712
1713 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1714 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1715 r := register(rp.num)
1716 for _, r2 := range dinfo[idx].in[0] {
1717 if r == r2 {
1718 args[0] = c
1719 break
1720 }
1721 }
1722 }
1723 }
1724 }
1725 ok:
1726 for i := 0; i < 2; i++ {
1727 if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
1728 continue
1729 }
1730 if !s.liveAfterCurrentInstruction(v.Args[i]) {
1731
1732 continue
1733 }
1734 if s.values[v.Args[i].ID].rematerializeable {
1735
1736 continue
1737 }
1738 if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
1739
1740 continue
1741 }
1742
1743 m := s.compatRegs(v.Args[i].Type) &^ s.used
1744 if m == 0 {
1745
1746
1747
1748
1749 continue
1750 }
1751
1752 c := s.allocValToReg(v.Args[i], m, true, v.Pos)
1753 s.copies[c] = false
1754 }
1755
1756
1757
1758
1759
1760
1761
1762 if opcodeTable[v.Op].needIntTemp {
1763 m := s.allocatable & s.f.Config.gpRegMask
1764 for _, out := range regspec.outputs {
1765 if countRegs(out.regs) == 1 {
1766 m &^= out.regs
1767 }
1768 }
1769 if m&^desired.avoid&^s.nospill != 0 {
1770 m &^= desired.avoid
1771 }
1772 tmpReg = s.allocReg(m, &tmpVal)
1773 s.nospill |= regMask(1) << tmpReg
1774 s.tmpused |= regMask(1) << tmpReg
1775 }
1776
1777 if regspec.clobbersArg0 {
1778 s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
1779 }
1780 if regspec.clobbersArg1 {
1781 s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
1782 }
1783
1784
1785
1786
1787
1788 if !opcodeTable[v.Op].resultNotInArgs {
1789 s.tmpused = s.nospill
1790 s.nospill = 0
1791 s.advanceUses(v)
1792 }
1793
1794
1795 if s.doClobber && v.Op.IsCall() {
1796
1797
1798 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1799 }
1800 s.freeRegs(regspec.clobbers)
1801 s.tmpused |= regspec.clobbers
1802
1803
1804 {
1805 outRegs := noRegisters
1806 maxOutIdx := -1
1807 var used regMask
1808 if tmpReg != noRegister {
1809
1810
1811 used |= regMask(1) << tmpReg
1812 }
1813 for _, out := range regspec.outputs {
1814 if out.regs == 0 {
1815 continue
1816 }
1817 mask := out.regs & s.allocatable &^ used
1818 if mask == 0 {
1819 s.f.Fatalf("can't find any output register %s", v.LongString())
1820 }
1821 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1822 if !opcodeTable[v.Op].commutative {
1823
1824 r := register(s.f.getHome(args[0].ID).(*Register).num)
1825 if mask>>r&1 == 0 {
1826 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1827 }
1828 mask = regMask(1) << r
1829 } else {
1830
1831 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1832 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1833
1834 found := false
1835 for _, r := range dinfo[idx].out {
1836 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1837 mask = regMask(1) << r
1838 found = true
1839 if r == r1 {
1840 args[0], args[1] = args[1], args[0]
1841 }
1842 break
1843 }
1844 }
1845 if !found {
1846
1847 mask = regMask(1) << r0
1848 }
1849 }
1850 }
1851 if out.idx == 0 {
1852 for _, r := range dinfo[idx].out {
1853 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1854
1855 mask = regMask(1) << r
1856 break
1857 }
1858 }
1859 }
1860 if out.idx == 1 {
1861 if prefs, ok := desiredSecondReg[v.ID]; ok {
1862 for _, r := range prefs {
1863 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1864
1865 mask = regMask(1) << r
1866 break
1867 }
1868 }
1869 }
1870 }
1871
1872 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1873 mask &^= desired.avoid
1874 }
1875 r := s.allocReg(mask, v)
1876 if out.idx > maxOutIdx {
1877 maxOutIdx = out.idx
1878 }
1879 outRegs[out.idx] = r
1880 used |= regMask(1) << r
1881 s.tmpused |= regMask(1) << r
1882 }
1883
1884 if v.Type.IsTuple() {
1885 var outLocs LocPair
1886 if r := outRegs[0]; r != noRegister {
1887 outLocs[0] = &s.registers[r]
1888 }
1889 if r := outRegs[1]; r != noRegister {
1890 outLocs[1] = &s.registers[r]
1891 }
1892 s.f.setHome(v, outLocs)
1893
1894 } else if v.Type.IsResults() {
1895
1896 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1897 for i := 0; i <= maxOutIdx; i++ {
1898 if r := outRegs[i]; r != noRegister {
1899 outLocs[i] = &s.registers[r]
1900 }
1901 }
1902 s.f.setHome(v, outLocs)
1903 } else {
1904 if r := outRegs[0]; r != noRegister {
1905 s.assignReg(r, v, v)
1906 }
1907 }
1908 if tmpReg != noRegister {
1909
1910 if s.f.tempRegs == nil {
1911 s.f.tempRegs = map[ID]*Register{}
1912 }
1913 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1914 }
1915 }
1916
1917
1918 if opcodeTable[v.Op].resultNotInArgs {
1919 s.nospill = 0
1920 s.advanceUses(v)
1921 }
1922 s.tmpused = 0
1923
1924
1925 for i, a := range args {
1926 v.SetArg(i, a)
1927 }
1928 b.Values = append(b.Values, v)
1929 s.dropIfUnused(v)
1930 }
1931
1932
1933
1934 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1935
1936
1937 for i, v := range b.ControlValues() {
1938 if !s.values[v.ID].needReg {
1939 continue
1940 }
1941 if s.f.pass.debug > regDebug {
1942 fmt.Printf(" processing control %s\n", v.LongString())
1943 }
1944
1945
1946
1947 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1948 }
1949
1950
1951
1952 for _, v := range controls {
1953 vi := &s.values[v.ID]
1954 if !vi.needReg {
1955 continue
1956 }
1957
1958 u := vi.uses
1959 vi.uses = u.next
1960 if u.next == nil {
1961 s.freeRegs(vi.regs)
1962 }
1963 u.next = s.freeUseRecords
1964 s.freeUseRecords = u
1965 }
1966
1967
1968
1969
1970 if len(b.Succs) == 1 {
1971 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1972 s.freeReg(s.GReg)
1973 }
1974 if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
1975
1976 goto badloop
1977 }
1978
1979 top := b.Succs[0].b
1980 loop := s.loopnest.b2l[top.ID]
1981 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1982 goto badloop
1983 }
1984
1985
1986 for _, live := range s.live[b.ID] {
1987 if live.dist >= unlikelyDistance {
1988
1989 continue
1990 }
1991 vid := live.ID
1992 vi := &s.values[vid]
1993 if vi.regs != 0 {
1994 continue
1995 }
1996 if vi.rematerializeable {
1997 continue
1998 }
1999 v := s.orig[vid]
2000 m := s.compatRegs(v.Type) &^ s.used
2001
2002 outerloop:
2003 for _, e := range desired.entries {
2004 if e.ID != v.ID {
2005 continue
2006 }
2007 for _, r := range e.regs {
2008 if r != noRegister && m>>r&1 != 0 {
2009 m = regMask(1) << r
2010 break outerloop
2011 }
2012 }
2013 }
2014 if m&^desired.avoid != 0 {
2015 m &^= desired.avoid
2016 }
2017 if m != 0 {
2018 s.allocValToReg(v, m, false, b.Pos)
2019 }
2020 }
2021 }
2022 badloop:
2023 ;
2024
2025
2026
2027 k := 0
2028 for r := register(0); r < s.numRegs; r++ {
2029 v := s.regs[r].v
2030 if v == nil {
2031 continue
2032 }
2033 k++
2034 }
2035 regList := make([]endReg, 0, k)
2036 for r := register(0); r < s.numRegs; r++ {
2037 v := s.regs[r].v
2038 if v == nil {
2039 continue
2040 }
2041 regList = append(regList, endReg{r, v, s.regs[r].c})
2042 }
2043 s.endRegs[b.ID] = regList
2044
2045 if checkEnabled {
2046 regValLiveSet.clear()
2047 for _, x := range s.live[b.ID] {
2048 regValLiveSet.add(x.ID)
2049 }
2050 for r := register(0); r < s.numRegs; r++ {
2051 v := s.regs[r].v
2052 if v == nil {
2053 continue
2054 }
2055 if !regValLiveSet.contains(v.ID) {
2056 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
2057 }
2058 }
2059 }
2060
2061
2062
2063
2064
2065 for _, e := range s.live[b.ID] {
2066 vi := &s.values[e.ID]
2067 if vi.regs != 0 {
2068
2069 continue
2070 }
2071 if vi.rematerializeable {
2072
2073 continue
2074 }
2075 if s.f.pass.debug > regDebug {
2076 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
2077 }
2078 spill := s.makeSpill(s.orig[e.ID], b)
2079 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
2080 }
2081
2082
2083
2084
2085 for _, e := range s.live[b.ID] {
2086 u := s.values[e.ID].uses
2087 if u == nil {
2088 f.Fatalf("live at end, no uses v%d", e.ID)
2089 }
2090 if u.next != nil {
2091 f.Fatalf("live at end, too many uses v%d", e.ID)
2092 }
2093 s.values[e.ID].uses = nil
2094 u.next = s.freeUseRecords
2095 s.freeUseRecords = u
2096 }
2097
2098
2099
2100
2101
2102
2103
2104 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
2105 regs := make([]startReg, 0, c)
2106 for _, sr := range s.startRegs[b.ID] {
2107 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
2108 continue
2109 }
2110 regs = append(regs, sr)
2111 }
2112 s.startRegs[b.ID] = regs
2113 }
2114 }
2115
2116
2117 s.placeSpills()
2118
2119
2120
2121 stacklive := stackalloc(s.f, s.spillLive)
2122
2123
2124 s.shuffle(stacklive)
2125
2126
2127
2128
2129 for {
2130 progress := false
2131 for c, used := range s.copies {
2132 if !used && c.Uses == 0 {
2133 if s.f.pass.debug > regDebug {
2134 fmt.Printf("delete copied value %s\n", c.LongString())
2135 }
2136 c.resetArgs()
2137 f.freeValue(c)
2138 delete(s.copies, c)
2139 progress = true
2140 }
2141 }
2142 if !progress {
2143 break
2144 }
2145 }
2146
2147 for _, b := range s.visitOrder {
2148 i := 0
2149 for _, v := range b.Values {
2150 if v.Op == OpInvalid {
2151 continue
2152 }
2153 b.Values[i] = v
2154 i++
2155 }
2156 b.Values = b.Values[:i]
2157 }
2158 }
2159
2160 func (s *regAllocState) placeSpills() {
2161 mustBeFirst := func(op Op) bool {
2162 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2163 }
2164
2165
2166
2167 start := map[ID][]*Value{}
2168
2169
2170 after := map[ID][]*Value{}
2171
2172 for i := range s.values {
2173 vi := s.values[i]
2174 spill := vi.spill
2175 if spill == nil {
2176 continue
2177 }
2178 if spill.Block != nil {
2179
2180
2181 continue
2182 }
2183 v := s.orig[i]
2184
2185
2186
2187
2188
2189 if v == nil {
2190 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2191 }
2192 best := v.Block
2193 bestArg := v
2194 var bestDepth int16
2195 if l := s.loopnest.b2l[best.ID]; l != nil {
2196 bestDepth = l.depth
2197 }
2198 b := best
2199 const maxSpillSearch = 100
2200 for i := 0; i < maxSpillSearch; i++ {
2201
2202
2203 p := b
2204 b = nil
2205 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2206 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2207
2208 b = c
2209 break
2210 }
2211 }
2212 if b == nil {
2213
2214 break
2215 }
2216
2217 var depth int16
2218 if l := s.loopnest.b2l[b.ID]; l != nil {
2219 depth = l.depth
2220 }
2221 if depth > bestDepth {
2222
2223 continue
2224 }
2225
2226
2227
2228 if len(b.Preds) == 1 {
2229 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2230 if e.v == v {
2231
2232 best = b
2233 bestArg = e.c
2234 bestDepth = depth
2235 break
2236 }
2237 }
2238 } else {
2239 for _, e := range s.startRegs[b.ID] {
2240 if e.v == v {
2241
2242 best = b
2243 bestArg = e.c
2244 bestDepth = depth
2245 break
2246 }
2247 }
2248 }
2249 }
2250
2251
2252 spill.Block = best
2253 spill.AddArg(bestArg)
2254 if best == v.Block && !mustBeFirst(v.Op) {
2255
2256 after[v.ID] = append(after[v.ID], spill)
2257 } else {
2258
2259 start[best.ID] = append(start[best.ID], spill)
2260 }
2261 }
2262
2263
2264 var oldSched []*Value
2265 for _, b := range s.visitOrder {
2266 nfirst := 0
2267 for _, v := range b.Values {
2268 if !mustBeFirst(v.Op) {
2269 break
2270 }
2271 nfirst++
2272 }
2273 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2274 b.Values = b.Values[:nfirst]
2275 b.Values = append(b.Values, start[b.ID]...)
2276 for _, v := range oldSched {
2277 b.Values = append(b.Values, v)
2278 b.Values = append(b.Values, after[v.ID]...)
2279 }
2280 }
2281 }
2282
2283
2284 func (s *regAllocState) shuffle(stacklive [][]ID) {
2285 var e edgeState
2286 e.s = s
2287 e.cache = map[ID][]*Value{}
2288 e.contents = map[Location]contentRecord{}
2289 if s.f.pass.debug > regDebug {
2290 fmt.Printf("shuffle %s\n", s.f.Name)
2291 fmt.Println(s.f.String())
2292 }
2293
2294 for _, b := range s.visitOrder {
2295 if len(b.Preds) <= 1 {
2296 continue
2297 }
2298 e.b = b
2299 for i, edge := range b.Preds {
2300 p := edge.b
2301 e.p = p
2302 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2303 e.process()
2304 }
2305 }
2306
2307 if s.f.pass.debug > regDebug {
2308 fmt.Printf("post shuffle %s\n", s.f.Name)
2309 fmt.Println(s.f.String())
2310 }
2311 }
2312
2313 type edgeState struct {
2314 s *regAllocState
2315 p, b *Block
2316
2317
2318 cache map[ID][]*Value
2319 cachedVals []ID
2320
2321
2322 contents map[Location]contentRecord
2323
2324
2325 destinations []dstRecord
2326 extra []dstRecord
2327
2328 usedRegs regMask
2329 uniqueRegs regMask
2330 finalRegs regMask
2331 rematerializeableRegs regMask
2332 }
2333
2334 type contentRecord struct {
2335 vid ID
2336 c *Value
2337 final bool
2338 pos src.XPos
2339 }
2340
2341 type dstRecord struct {
2342 loc Location
2343 vid ID
2344 splice **Value
2345 pos src.XPos
2346 }
2347
2348
2349 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2350 if e.s.f.pass.debug > regDebug {
2351 fmt.Printf("edge %s->%s\n", e.p, e.b)
2352 }
2353
2354
2355 clear(e.cache)
2356 e.cachedVals = e.cachedVals[:0]
2357 clear(e.contents)
2358 e.usedRegs = 0
2359 e.uniqueRegs = 0
2360 e.finalRegs = 0
2361 e.rematerializeableRegs = 0
2362
2363
2364 for _, x := range srcReg {
2365 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2366 }
2367
2368 for _, spillID := range stacklive {
2369 v := e.s.orig[spillID]
2370 spill := e.s.values[v.ID].spill
2371 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2372
2373
2374
2375
2376
2377
2378
2379
2380 continue
2381 }
2382 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2383 }
2384
2385
2386 dsts := e.destinations[:0]
2387 for _, x := range dstReg {
2388 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2389 }
2390
2391 for _, v := range e.b.Values {
2392 if v.Op != OpPhi {
2393 break
2394 }
2395 loc := e.s.f.getHome(v.ID)
2396 if loc == nil {
2397 continue
2398 }
2399 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2400 }
2401 e.destinations = dsts
2402
2403 if e.s.f.pass.debug > regDebug {
2404 for _, vid := range e.cachedVals {
2405 a := e.cache[vid]
2406 for _, c := range a {
2407 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2408 }
2409 }
2410 for _, d := range e.destinations {
2411 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2412 }
2413 }
2414 }
2415
2416
2417 func (e *edgeState) process() {
2418 dsts := e.destinations
2419
2420
2421 for len(dsts) > 0 {
2422 i := 0
2423 for _, d := range dsts {
2424 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2425
2426 dsts[i] = d
2427 i++
2428 }
2429 }
2430 if i < len(dsts) {
2431
2432 dsts = dsts[:i]
2433
2434
2435 dsts = append(dsts, e.extra...)
2436 e.extra = e.extra[:0]
2437 continue
2438 }
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462 d := dsts[0]
2463 loc := d.loc
2464 vid := e.contents[loc].vid
2465 c := e.contents[loc].c
2466 r := e.findRegFor(c.Type)
2467 if e.s.f.pass.debug > regDebug {
2468 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2469 }
2470 e.erase(r)
2471 pos := d.pos.WithNotStmt()
2472 if _, isReg := loc.(*Register); isReg {
2473 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2474 } else {
2475 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2476 }
2477 e.set(r, vid, c, false, pos)
2478 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2479 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2480 }
2481 }
2482 }
2483
2484
2485
2486 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2487 pos = pos.WithNotStmt()
2488 occupant := e.contents[loc]
2489 if occupant.vid == vid {
2490
2491 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2492 if splice != nil {
2493 (*splice).Uses--
2494 *splice = occupant.c
2495 occupant.c.Uses++
2496 }
2497
2498
2499
2500 if _, ok := e.s.copies[occupant.c]; ok {
2501
2502 e.s.copies[occupant.c] = true
2503 }
2504 return true
2505 }
2506
2507
2508 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
2509
2510
2511 return false
2512 }
2513
2514
2515 v := e.s.orig[vid]
2516 var c *Value
2517 var src Location
2518 if e.s.f.pass.debug > regDebug {
2519 fmt.Printf("moving v%d to %s\n", vid, loc)
2520 fmt.Printf("sources of v%d:", vid)
2521 }
2522 if opcodeTable[v.Op].fixedReg {
2523 c = v
2524 src = e.s.f.getHome(v.ID)
2525 } else {
2526 for _, w := range e.cache[vid] {
2527 h := e.s.f.getHome(w.ID)
2528 if e.s.f.pass.debug > regDebug {
2529 fmt.Printf(" %s:%s", h, w)
2530 }
2531 _, isreg := h.(*Register)
2532 if src == nil || isreg {
2533 c = w
2534 src = h
2535 }
2536 }
2537 }
2538 if e.s.f.pass.debug > regDebug {
2539 if src != nil {
2540 fmt.Printf(" [use %s]\n", src)
2541 } else {
2542 fmt.Printf(" [no source]\n")
2543 }
2544 }
2545 _, dstReg := loc.(*Register)
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557 e.erase(loc)
2558 var x *Value
2559 if c == nil || e.s.values[vid].rematerializeable {
2560 if !e.s.values[vid].rematerializeable {
2561 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2562 }
2563 if dstReg {
2564
2565
2566
2567
2568 if e.s.regspec(v).outputs[0].regs®Mask(1<<register(loc.(*Register).num)) == 0 {
2569 _, srcReg := src.(*Register)
2570 if srcReg {
2571
2572
2573 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2574 } else {
2575
2576 x = v.copyInto(e.p)
2577 r := e.findRegFor(x.Type)
2578 e.erase(r)
2579
2580 e.set(r, vid, x, false, pos)
2581
2582 x = e.p.NewValue1(pos, OpCopy, x.Type, x)
2583 }
2584 } else {
2585 x = v.copyInto(e.p)
2586 }
2587 } else {
2588
2589
2590 r := e.findRegFor(v.Type)
2591 e.erase(r)
2592 x = v.copyIntoWithXPos(e.p, pos)
2593 e.set(r, vid, x, false, pos)
2594
2595
2596
2597 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2598 }
2599 } else {
2600
2601 _, srcReg := src.(*Register)
2602 if srcReg {
2603 if dstReg {
2604 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2605 } else {
2606 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2607 }
2608 } else {
2609 if dstReg {
2610 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2611 } else {
2612
2613 r := e.findRegFor(c.Type)
2614 e.erase(r)
2615 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2616 e.set(r, vid, t, false, pos)
2617 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2618 }
2619 }
2620 }
2621 e.set(loc, vid, x, true, pos)
2622 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2623 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2624 }
2625 if splice != nil {
2626 (*splice).Uses--
2627 *splice = x
2628 x.Uses++
2629 }
2630 return true
2631 }
2632
2633
2634 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2635 e.s.f.setHome(c, loc)
2636 e.contents[loc] = contentRecord{vid, c, final, pos}
2637 a := e.cache[vid]
2638 if len(a) == 0 {
2639 e.cachedVals = append(e.cachedVals, vid)
2640 }
2641 a = append(a, c)
2642 e.cache[vid] = a
2643 if r, ok := loc.(*Register); ok {
2644 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2645 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2646 }
2647 e.usedRegs |= regMask(1) << uint(r.num)
2648 if final {
2649 e.finalRegs |= regMask(1) << uint(r.num)
2650 }
2651 if len(a) == 1 {
2652 e.uniqueRegs |= regMask(1) << uint(r.num)
2653 }
2654 if len(a) == 2 {
2655 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2656 e.uniqueRegs &^= regMask(1) << uint(t.num)
2657 }
2658 }
2659 if e.s.values[vid].rematerializeable {
2660 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2661 }
2662 }
2663 if e.s.f.pass.debug > regDebug {
2664 fmt.Printf("%s\n", c.LongString())
2665 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2666 }
2667 }
2668
2669
2670 func (e *edgeState) erase(loc Location) {
2671 cr := e.contents[loc]
2672 if cr.c == nil {
2673 return
2674 }
2675 vid := cr.vid
2676
2677 if cr.final {
2678
2679
2680
2681 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2682 }
2683
2684
2685 a := e.cache[vid]
2686 for i, c := range a {
2687 if e.s.f.getHome(c.ID) == loc {
2688 if e.s.f.pass.debug > regDebug {
2689 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2690 }
2691 a[i], a = a[len(a)-1], a[:len(a)-1]
2692 break
2693 }
2694 }
2695 e.cache[vid] = a
2696
2697
2698 if r, ok := loc.(*Register); ok {
2699 e.usedRegs &^= regMask(1) << uint(r.num)
2700 if cr.final {
2701 e.finalRegs &^= regMask(1) << uint(r.num)
2702 }
2703 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2704 }
2705 if len(a) == 1 {
2706 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2707 e.uniqueRegs |= regMask(1) << uint(r.num)
2708 }
2709 }
2710 }
2711
2712
2713 func (e *edgeState) findRegFor(typ *types.Type) Location {
2714
2715 m := e.s.compatRegs(typ)
2716
2717
2718
2719
2720
2721
2722 x := m &^ e.usedRegs
2723 if x != 0 {
2724 return &e.s.registers[pickReg(x)]
2725 }
2726 x = m &^ e.uniqueRegs &^ e.finalRegs
2727 if x != 0 {
2728 return &e.s.registers[pickReg(x)]
2729 }
2730 x = m &^ e.uniqueRegs
2731 if x != 0 {
2732 return &e.s.registers[pickReg(x)]
2733 }
2734 x = m & e.rematerializeableRegs
2735 if x != 0 {
2736 return &e.s.registers[pickReg(x)]
2737 }
2738
2739
2740
2741 for _, vid := range e.cachedVals {
2742 a := e.cache[vid]
2743 for _, c := range a {
2744 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2745 if !c.rematerializeable() {
2746 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2747
2748 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, c.Type), Type: c.Type}
2749
2750 e.set(t, vid, x, false, c.Pos)
2751 if e.s.f.pass.debug > regDebug {
2752 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2753 }
2754 }
2755
2756
2757
2758 return r
2759 }
2760 }
2761 }
2762
2763 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2764 for _, vid := range e.cachedVals {
2765 a := e.cache[vid]
2766 for _, c := range a {
2767 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2768 }
2769 }
2770 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2771 return nil
2772 }
2773
2774
2775
2776 func (v *Value) rematerializeable() bool {
2777 if !opcodeTable[v.Op].rematerializeable {
2778 return false
2779 }
2780 for _, a := range v.Args {
2781
2782
2783 if !opcodeTable[a.Op].fixedReg {
2784 return false
2785 }
2786 }
2787 return true
2788 }
2789
2790 type liveInfo struct {
2791 ID ID
2792 dist int32
2793 pos src.XPos
2794 }
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804 func (s *regAllocState) computeLive() {
2805 f := s.f
2806 s.live = make([][]liveInfo, f.NumBlocks())
2807 s.desired = make([]desiredState, f.NumBlocks())
2808 var phis []*Value
2809 rematIDs := make([]ID, 0, 64)
2810
2811 live := f.newSparseMapPos(f.NumValues())
2812 defer f.retSparseMapPos(live)
2813 t := f.newSparseMapPos(f.NumValues())
2814 defer f.retSparseMapPos(t)
2815
2816
2817 var desired desiredState
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828 po := f.postorder()
2829 s.loopnest = f.loopnest()
2830 s.loopnest.computeUnavoidableCalls()
2831 for {
2832 changed := false
2833
2834 for _, b := range po {
2835
2836
2837
2838 live.clear()
2839 for _, e := range s.live[b.ID] {
2840 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2841 }
2842
2843
2844 for _, c := range b.ControlValues() {
2845 if s.values[c.ID].needReg {
2846 live.set(c.ID, int32(len(b.Values)), b.Pos)
2847 }
2848 }
2849
2850
2851
2852 phis = phis[:0]
2853 for i := len(b.Values) - 1; i >= 0; i-- {
2854 v := b.Values[i]
2855 live.remove(v.ID)
2856 if v.Op == OpPhi {
2857
2858 phis = append(phis, v)
2859 continue
2860 }
2861 if opcodeTable[v.Op].call {
2862 rematIDs = rematIDs[:0]
2863 c := live.contents()
2864 for i := range c {
2865 c[i].val += unlikelyDistance
2866 vid := c[i].key
2867 if s.values[vid].rematerializeable {
2868 rematIDs = append(rematIDs, vid)
2869 }
2870 }
2871
2872
2873
2874 for _, r := range rematIDs {
2875 live.remove(r)
2876 }
2877 }
2878 for _, a := range v.Args {
2879 if s.values[a.ID].needReg {
2880 live.set(a.ID, int32(i), v.Pos)
2881 }
2882 }
2883 }
2884
2885 desired.copy(&s.desired[b.ID])
2886 for i := len(b.Values) - 1; i >= 0; i-- {
2887 v := b.Values[i]
2888 prefs := desired.remove(v.ID)
2889 if v.Op == OpPhi {
2890
2891
2892
2893 continue
2894 }
2895 regspec := s.regspec(v)
2896
2897 desired.clobber(regspec.clobbers)
2898
2899 for _, j := range regspec.inputs {
2900 if countRegs(j.regs) != 1 {
2901 continue
2902 }
2903 desired.clobber(j.regs)
2904 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2905 }
2906
2907 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
2908
2909
2910
2911
2912
2913 if opcodeTable[v.Op].commutative {
2914 desired.addList(v.Args[1].ID, prefs)
2915 }
2916 desired.addList(v.Args[0].ID, prefs)
2917 }
2918 }
2919
2920
2921
2922 for i, e := range b.Preds {
2923 p := e.b
2924
2925
2926
2927 delta := int32(normalDistance)
2928 if len(p.Succs) == 2 {
2929 if p.Succs[0].b == b && p.Likely == BranchLikely ||
2930 p.Succs[1].b == b && p.Likely == BranchUnlikely {
2931 delta = likelyDistance
2932 }
2933 if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2934 p.Succs[1].b == b && p.Likely == BranchLikely {
2935 delta = unlikelyDistance
2936 }
2937 }
2938
2939
2940 s.desired[p.ID].merge(&desired)
2941
2942
2943 t.clear()
2944 for _, e := range s.live[p.ID] {
2945 t.set(e.ID, e.dist, e.pos)
2946 }
2947 update := false
2948
2949
2950 for _, e := range live.contents() {
2951 d := e.val + delta
2952 if !t.contains(e.key) || d < t.get(e.key) {
2953 update = true
2954 t.set(e.key, d, e.pos)
2955 }
2956 }
2957
2958
2959
2960 for _, v := range phis {
2961 id := v.Args[i].ID
2962 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2963 update = true
2964 t.set(id, delta, v.Pos)
2965 }
2966 }
2967
2968 if !update {
2969 continue
2970 }
2971
2972 l := s.live[p.ID][:0]
2973 if cap(l) < t.size() {
2974 l = make([]liveInfo, 0, t.size())
2975 }
2976 for _, e := range t.contents() {
2977 l = append(l, liveInfo{e.key, e.val, e.pos})
2978 }
2979 s.live[p.ID] = l
2980 changed = true
2981 }
2982 }
2983
2984 if !changed {
2985 break
2986 }
2987 }
2988 if f.pass.debug > regDebug {
2989 fmt.Println("live values at end of each block")
2990 for _, b := range f.Blocks {
2991 fmt.Printf(" %s:", b)
2992 for _, x := range s.live[b.ID] {
2993 fmt.Printf(" v%d(%d)", x.ID, x.dist)
2994 for _, e := range s.desired[b.ID].entries {
2995 if e.ID != x.ID {
2996 continue
2997 }
2998 fmt.Printf("[")
2999 first := true
3000 for _, r := range e.regs {
3001 if r == noRegister {
3002 continue
3003 }
3004 if !first {
3005 fmt.Printf(",")
3006 }
3007 fmt.Print(&s.registers[r])
3008 first = false
3009 }
3010 fmt.Printf("]")
3011 }
3012 }
3013 if avoid := s.desired[b.ID].avoid; avoid != 0 {
3014 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
3015 }
3016 fmt.Println()
3017 }
3018 }
3019 }
3020
3021
3022 type desiredState struct {
3023
3024
3025 entries []desiredStateEntry
3026
3027
3028
3029
3030 avoid regMask
3031 }
3032 type desiredStateEntry struct {
3033
3034 ID ID
3035
3036
3037
3038
3039
3040 regs [4]register
3041 }
3042
3043
3044 func (d *desiredState) get(vid ID) [4]register {
3045 for _, e := range d.entries {
3046 if e.ID == vid {
3047 return e.regs
3048 }
3049 }
3050 return [4]register{noRegister, noRegister, noRegister, noRegister}
3051 }
3052
3053
3054 func (d *desiredState) add(vid ID, r register) {
3055 d.avoid |= regMask(1) << r
3056 for i := range d.entries {
3057 e := &d.entries[i]
3058 if e.ID != vid {
3059 continue
3060 }
3061 if e.regs[0] == r {
3062
3063 return
3064 }
3065 for j := 1; j < len(e.regs); j++ {
3066 if e.regs[j] == r {
3067
3068 copy(e.regs[1:], e.regs[:j])
3069 e.regs[0] = r
3070 return
3071 }
3072 }
3073 copy(e.regs[1:], e.regs[:])
3074 e.regs[0] = r
3075 return
3076 }
3077 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
3078 }
3079
3080 func (d *desiredState) addList(vid ID, regs [4]register) {
3081
3082 for i := len(regs) - 1; i >= 0; i-- {
3083 r := regs[i]
3084 if r != noRegister {
3085 d.add(vid, r)
3086 }
3087 }
3088 }
3089
3090
3091 func (d *desiredState) clobber(m regMask) {
3092 for i := 0; i < len(d.entries); {
3093 e := &d.entries[i]
3094 j := 0
3095 for _, r := range e.regs {
3096 if r != noRegister && m>>r&1 == 0 {
3097 e.regs[j] = r
3098 j++
3099 }
3100 }
3101 if j == 0 {
3102
3103 d.entries[i] = d.entries[len(d.entries)-1]
3104 d.entries = d.entries[:len(d.entries)-1]
3105 continue
3106 }
3107 for ; j < len(e.regs); j++ {
3108 e.regs[j] = noRegister
3109 }
3110 i++
3111 }
3112 d.avoid &^= m
3113 }
3114
3115
3116 func (d *desiredState) copy(x *desiredState) {
3117 d.entries = append(d.entries[:0], x.entries...)
3118 d.avoid = x.avoid
3119 }
3120
3121
3122 func (d *desiredState) remove(vid ID) [4]register {
3123 for i := range d.entries {
3124 if d.entries[i].ID == vid {
3125 regs := d.entries[i].regs
3126 d.entries[i] = d.entries[len(d.entries)-1]
3127 d.entries = d.entries[:len(d.entries)-1]
3128 return regs
3129 }
3130 }
3131 return [4]register{noRegister, noRegister, noRegister, noRegister}
3132 }
3133
3134
3135 func (d *desiredState) merge(x *desiredState) {
3136 d.avoid |= x.avoid
3137
3138
3139 for _, e := range x.entries {
3140 d.addList(e.ID, e.regs)
3141 }
3142 }
3143
3144
3145 func (loopnest *loopnest) computeUnavoidableCalls() {
3146 f := loopnest.f
3147
3148 hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
3149 defer f.Cache.freeBoolSlice(hasCall)
3150 for _, b := range f.Blocks {
3151 if b.containsCall() {
3152 hasCall[b.ID] = true
3153 }
3154 }
3155 found := f.Cache.allocSparseSet(f.NumBlocks())
3156 defer f.Cache.freeSparseSet(found)
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166 loopLoop:
3167 for _, l := range loopnest.loops {
3168 found.clear()
3169 tovisit := make([]*Block, 0, 8)
3170 tovisit = append(tovisit, l.header)
3171 for len(tovisit) > 0 {
3172 cur := tovisit[len(tovisit)-1]
3173 tovisit = tovisit[:len(tovisit)-1]
3174 if hasCall[cur.ID] {
3175 continue
3176 }
3177 for _, s := range cur.Succs {
3178 nb := s.Block()
3179 if nb == l.header {
3180
3181 continue loopLoop
3182 }
3183 if found.contains(nb.ID) {
3184
3185 continue
3186 }
3187 nl := loopnest.b2l[nb.ID]
3188 if nl == nil || (nl.depth <= l.depth && nl != l) {
3189
3190 continue
3191 }
3192 tovisit = append(tovisit, nb)
3193 found.add(nb.ID)
3194 }
3195 }
3196
3197 l.containsUnavoidableCall = true
3198 }
3199 }
3200
3201 func (b *Block) containsCall() bool {
3202 if b.Kind == BlockDefer {
3203 return true
3204 }
3205 for _, v := range b.Values {
3206 if opcodeTable[v.Op].call {
3207 return true
3208 }
3209 }
3210 return false
3211 }
3212
View as plain text