1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114 package ssa
115
116 import (
117 "cmd/compile/internal/base"
118 "cmd/compile/internal/ir"
119 "cmd/compile/internal/types"
120 "cmd/internal/src"
121 "cmd/internal/sys"
122 "fmt"
123 "internal/buildcfg"
124 "math"
125 "math/bits"
126 "unsafe"
127 )
128
129 const (
130 moveSpills = iota
131 logSpills
132 regDebug
133 stackDebug
134 )
135
136
137
138 const (
139 likelyDistance = 1
140 normalDistance = 10
141 unlikelyDistance = 100
142 )
143
144
145
146 func regalloc(f *Func) {
147 var s regAllocState
148 s.init(f)
149 s.regalloc(f)
150 s.close()
151 }
152
153 type register uint8
154
155 const noRegister register = 255
156
157
158 var noRegisters [32]register = [32]register{
159 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
160 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
161 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
162 noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
163 }
164
165
166
167 type regMask uint64
168
169 func (m regMask) String() string {
170 s := ""
171 for r := register(0); m != 0; r++ {
172 if m>>r&1 == 0 {
173 continue
174 }
175 m &^= regMask(1) << r
176 if s != "" {
177 s += " "
178 }
179 s += fmt.Sprintf("r%d", r)
180 }
181 return s
182 }
183
184 func (s *regAllocState) RegMaskString(m regMask) string {
185 str := ""
186 for r := register(0); m != 0; r++ {
187 if m>>r&1 == 0 {
188 continue
189 }
190 m &^= regMask(1) << r
191 if str != "" {
192 str += " "
193 }
194 str += s.registers[r].String()
195 }
196 return str
197 }
198
199
200 func countRegs(r regMask) int {
201 return bits.OnesCount64(uint64(r))
202 }
203
204
205 func pickReg(r regMask) register {
206 if r == 0 {
207 panic("can't pick a register from an empty set")
208 }
209
210 return register(bits.TrailingZeros64(uint64(r)))
211 }
212
213 type use struct {
214
215
216
217
218
219 dist int32
220 pos src.XPos
221 next *use
222 }
223
224
225 type valState struct {
226 regs regMask
227 uses *use
228 spill *Value
229 restoreMin int32
230 restoreMax int32
231 needReg bool
232 rematerializeable bool
233 }
234
235 type regState struct {
236 v *Value
237 c *Value
238
239 }
240
241 type regAllocState struct {
242 f *Func
243
244 sdom SparseTree
245 registers []Register
246 numRegs register
247 SPReg register
248 SBReg register
249 GReg register
250 allocatable regMask
251
252
253
254
255 live [][]liveInfo
256
257
258
259
260 desired []desiredState
261
262
263 values []valState
264
265
266 sp, sb ID
267
268
269
270 orig []*Value
271
272
273 regs []regState
274
275
276 nospill regMask
277
278
279 used regMask
280
281
282 usedSinceBlockStart regMask
283
284
285 tmpused regMask
286
287
288 curBlock *Block
289
290
291 freeUseRecords *use
292
293
294
295 endRegs [][]endReg
296
297
298
299 startRegs [][]startReg
300
301
302
303
304 startRegsMask regMask
305
306
307 spillLive [][]ID
308
309
310
311 copies map[*Value]bool
312
313 loopnest *loopnest
314
315
316 visitOrder []*Block
317
318
319 blockOrder []int32
320
321
322 doClobber bool
323
324
325
326
327
328
329 nextCall []int32
330
331
332
333 curIdx int
334 }
335
336 type endReg struct {
337 r register
338 v *Value
339 c *Value
340 }
341
342 type startReg struct {
343 r register
344 v *Value
345 c *Value
346 pos src.XPos
347 }
348
349
350 func (s *regAllocState) freeReg(r register) {
351 v := s.regs[r].v
352 if v == nil {
353 s.f.Fatalf("tried to free an already free register %d\n", r)
354 }
355
356
357 if s.f.pass.debug > regDebug {
358 fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
359 }
360 s.regs[r] = regState{}
361 s.values[v.ID].regs &^= regMask(1) << r
362 s.used &^= regMask(1) << r
363 }
364
365
366 func (s *regAllocState) freeRegs(m regMask) {
367 for m&s.used != 0 {
368 s.freeReg(pickReg(m & s.used))
369 }
370 }
371
372
373 func (s *regAllocState) clobberRegs(m regMask) {
374 m &= s.allocatable & s.f.Config.gpRegMask
375 for m != 0 {
376 r := pickReg(m)
377 m &^= 1 << r
378 x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
379 s.f.setHome(x, &s.registers[r])
380 }
381 }
382
383
384
385 func (s *regAllocState) setOrig(c *Value, v *Value) {
386 if int(c.ID) >= cap(s.orig) {
387 x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
388 copy(x, s.orig)
389 s.f.Cache.freeValueSlice(s.orig)
390 s.orig = x
391 }
392 for int(c.ID) >= len(s.orig) {
393 s.orig = append(s.orig, nil)
394 }
395 if s.orig[c.ID] != nil {
396 s.f.Fatalf("orig value set twice %s %s", c, v)
397 }
398 s.orig[c.ID] = s.orig[v.ID]
399 }
400
401
402
403 func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
404 if s.f.pass.debug > regDebug {
405 fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
406 }
407 if s.regs[r].v != nil {
408 s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
409 }
410
411
412 s.regs[r] = regState{v, c}
413 s.values[v.ID].regs |= regMask(1) << r
414 s.used |= regMask(1) << r
415 s.f.setHome(c, &s.registers[r])
416 }
417
418
419
420
421 func (s *regAllocState) allocReg(mask regMask, v *Value) register {
422 if v.OnWasmStack {
423 return noRegister
424 }
425
426 mask &= s.allocatable
427 mask &^= s.nospill
428 if mask == 0 {
429 s.f.Fatalf("no register available for %s", v.LongString())
430 }
431
432
433 if mask&^s.used != 0 {
434 r := pickReg(mask &^ s.used)
435 s.usedSinceBlockStart |= regMask(1) << r
436 return r
437 }
438
439
440
441
442
443
444
445
446
447
448
449 var r register
450 maxuse := int32(-1)
451 for t := register(0); t < s.numRegs; t++ {
452 if mask>>t&1 == 0 {
453 continue
454 }
455 v := s.regs[t].v
456 if n := s.values[v.ID].uses.dist; n > maxuse {
457
458
459 r = t
460 maxuse = n
461 }
462 }
463 if maxuse == -1 {
464 s.f.Fatalf("couldn't find register to spill")
465 }
466
467 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
468
469
470
471 s.freeReg(r)
472 return r
473 }
474
475
476
477 v2 := s.regs[r].v
478 m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
479 if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
480 s.usedSinceBlockStart |= regMask(1) << r
481 r2 := pickReg(m)
482 c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
483 s.copies[c] = false
484 if s.f.pass.debug > regDebug {
485 fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
486 }
487 s.setOrig(c, v2)
488 s.assignReg(r2, v2, c)
489 }
490
491
492
493
494 if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
495 if s.startRegsMask&(regMask(1)<<r) == 1 {
496 if s.f.pass.debug > regDebug {
497 fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
498 }
499 s.startRegsMask &^= regMask(1) << r
500 }
501 }
502
503 s.freeReg(r)
504 s.usedSinceBlockStart |= regMask(1) << r
505 return r
506 }
507
508
509
510 func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
511 vi := &s.values[v.ID]
512 if vi.spill != nil {
513
514 vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
515 vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
516 return vi.spill
517 }
518
519
520 spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
521
522
523 s.setOrig(spill, v)
524 vi.spill = spill
525 vi.restoreMin = s.sdom[b.ID].entry
526 vi.restoreMax = s.sdom[b.ID].exit
527 return spill
528 }
529
530
531
532
533
534
535
536 func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
537 if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
538 c := v.copyIntoWithXPos(s.curBlock, pos)
539 c.OnWasmStack = true
540 s.setOrig(c, v)
541 return c
542 }
543 if v.OnWasmStack {
544 return v
545 }
546
547 vi := &s.values[v.ID]
548 pos = pos.WithNotStmt()
549
550 if mask&vi.regs != 0 {
551 r := pickReg(mask & vi.regs)
552 if s.regs[r].v != v || s.regs[r].c == nil {
553 panic("bad register state")
554 }
555 if nospill {
556 s.nospill |= regMask(1) << r
557 }
558 s.usedSinceBlockStart |= regMask(1) << r
559 return s.regs[r].c
560 }
561
562 var r register
563
564 onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
565 if !onWasmStack {
566
567 r = s.allocReg(mask, v)
568 }
569
570
571 var c *Value
572 if vi.regs != 0 {
573
574 r2 := pickReg(vi.regs)
575 if s.regs[r2].v != v {
576 panic("bad register state")
577 }
578 s.usedSinceBlockStart |= regMask(1) << r2
579 c = s.curBlock.NewValue1(pos, OpCopy, v.Type, s.regs[r2].c)
580 } else if v.rematerializeable() {
581
582 c = v.copyIntoWithXPos(s.curBlock, pos)
583 } else {
584
585 spill := s.makeSpill(v, s.curBlock)
586 if s.f.pass.debug > logSpills {
587 s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
588 }
589 c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
590 }
591
592 s.setOrig(c, v)
593
594 if onWasmStack {
595 c.OnWasmStack = true
596 return c
597 }
598
599 s.assignReg(r, v, c)
600 if c.Op == OpLoadReg && s.isGReg(r) {
601 s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
602 }
603 if nospill {
604 s.nospill |= regMask(1) << r
605 }
606 return c
607 }
608
609
610 func isLeaf(f *Func) bool {
611 for _, b := range f.Blocks {
612 for _, v := range b.Values {
613 if v.Op.IsCall() && !v.Op.IsTailCall() {
614
615 return false
616 }
617 }
618 }
619 return true
620 }
621
622
623 func (v *Value) needRegister() bool {
624 return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
625 }
626
627 func (s *regAllocState) init(f *Func) {
628 s.f = f
629 s.f.RegAlloc = s.f.Cache.locs[:0]
630 s.registers = f.Config.registers
631 if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
632 s.f.Fatalf("bad number of registers: %d", nr)
633 } else {
634 s.numRegs = register(nr)
635 }
636
637 s.SPReg = noRegister
638 s.SBReg = noRegister
639 s.GReg = noRegister
640 for r := register(0); r < s.numRegs; r++ {
641 switch s.registers[r].String() {
642 case "SP":
643 s.SPReg = r
644 case "SB":
645 s.SBReg = r
646 case "g":
647 s.GReg = r
648 }
649 }
650
651 switch noRegister {
652 case s.SPReg:
653 s.f.Fatalf("no SP register found")
654 case s.SBReg:
655 s.f.Fatalf("no SB register found")
656 case s.GReg:
657 if f.Config.hasGReg {
658 s.f.Fatalf("no g register found")
659 }
660 }
661
662
663 s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
664 s.allocatable &^= 1 << s.SPReg
665 s.allocatable &^= 1 << s.SBReg
666 if s.f.Config.hasGReg {
667 s.allocatable &^= 1 << s.GReg
668 }
669 if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
670 s.allocatable &^= 1 << uint(s.f.Config.FPReg)
671 }
672 if s.f.Config.LinkReg != -1 {
673 if isLeaf(f) {
674
675 s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
676 }
677 }
678 if s.f.Config.ctxt.Flag_dynlink {
679 switch s.f.Config.arch {
680 case "386":
681
682
683
684
685
686 case "amd64":
687 s.allocatable &^= 1 << 15
688 case "arm":
689 s.allocatable &^= 1 << 9
690 case "arm64":
691
692 case "loong64":
693
694 case "ppc64le":
695
696 case "riscv64":
697
698 case "s390x":
699 s.allocatable &^= 1 << 11
700 default:
701 s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
702 }
703 }
704
705
706
707
708 s.visitOrder = layoutRegallocOrder(f)
709
710
711
712 s.blockOrder = make([]int32, f.NumBlocks())
713 for i, b := range s.visitOrder {
714 s.blockOrder[b.ID] = int32(i)
715 }
716
717 s.regs = make([]regState, s.numRegs)
718 nv := f.NumValues()
719 if cap(s.f.Cache.regallocValues) >= nv {
720 s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
721 } else {
722 s.f.Cache.regallocValues = make([]valState, nv)
723 }
724 s.values = s.f.Cache.regallocValues
725 s.orig = s.f.Cache.allocValueSlice(nv)
726 s.copies = make(map[*Value]bool)
727 for _, b := range s.visitOrder {
728 for _, v := range b.Values {
729 if v.needRegister() {
730 s.values[v.ID].needReg = true
731 s.values[v.ID].rematerializeable = v.rematerializeable()
732 s.orig[v.ID] = v
733 }
734
735
736 }
737 }
738 s.computeLive()
739
740 s.endRegs = make([][]endReg, f.NumBlocks())
741 s.startRegs = make([][]startReg, f.NumBlocks())
742 s.spillLive = make([][]ID, f.NumBlocks())
743 s.sdom = f.Sdom()
744
745
746 if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
747 canLiveOnStack := f.newSparseSet(f.NumValues())
748 defer f.retSparseSet(canLiveOnStack)
749 for _, b := range f.Blocks {
750
751 canLiveOnStack.clear()
752 for _, c := range b.ControlValues() {
753 if c.Uses == 1 && !opcodeTable[c.Op].generic {
754 canLiveOnStack.add(c.ID)
755 }
756 }
757
758 for i := len(b.Values) - 1; i >= 0; i-- {
759 v := b.Values[i]
760 if canLiveOnStack.contains(v.ID) {
761 v.OnWasmStack = true
762 } else {
763
764 canLiveOnStack.clear()
765 }
766 for _, arg := range v.Args {
767
768
769
770
771
772 if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
773 canLiveOnStack.add(arg.ID)
774 }
775 }
776 }
777 }
778 }
779
780
781
782
783 if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
784
785 s.doClobber = true
786 }
787 }
788
789 func (s *regAllocState) close() {
790 s.f.Cache.freeValueSlice(s.orig)
791 }
792
793
794
795 func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
796 r := s.freeUseRecords
797 if r != nil {
798 s.freeUseRecords = r.next
799 } else {
800 r = &use{}
801 }
802 r.dist = dist
803 r.pos = pos
804 r.next = s.values[id].uses
805 s.values[id].uses = r
806 if r.next != nil && dist > r.next.dist {
807 s.f.Fatalf("uses added in wrong order")
808 }
809 }
810
811
812
813 func (s *regAllocState) advanceUses(v *Value) {
814 for _, a := range v.Args {
815 if !s.values[a.ID].needReg {
816 continue
817 }
818 ai := &s.values[a.ID]
819 r := ai.uses
820 ai.uses = r.next
821 if r.next == nil || (a.Op != OpSP && a.Op != OpSB && r.next.dist > s.nextCall[s.curIdx]) {
822
823 s.freeRegs(ai.regs)
824 }
825 r.next = s.freeUseRecords
826 s.freeUseRecords = r
827 }
828 s.dropIfUnused(v)
829 }
830
831
832
833 func (s *regAllocState) dropIfUnused(v *Value) {
834 if !s.values[v.ID].needReg {
835 return
836 }
837 vi := &s.values[v.ID]
838 r := vi.uses
839 if r == nil || (v.Op != OpSP && v.Op != OpSB && r.dist > s.nextCall[s.curIdx]) {
840 s.freeRegs(vi.regs)
841 }
842 }
843
844
845
846
847 func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
848 u := s.values[v.ID].uses
849 if u == nil {
850 panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
851 }
852 d := u.dist
853 for u != nil && u.dist == d {
854 u = u.next
855 }
856 return u != nil && u.dist > d
857 }
858
859
860 func (s *regAllocState) setState(regs []endReg) {
861 s.freeRegs(s.used)
862 for _, x := range regs {
863 s.assignReg(x.r, x.v, x.c)
864 }
865 }
866
867
868 func (s *regAllocState) compatRegs(t *types.Type) regMask {
869 var m regMask
870 if t.IsTuple() || t.IsFlags() {
871 return 0
872 }
873 if t.IsFloat() || t == types.TypeInt128 {
874 if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
875 m = s.f.Config.fp32RegMask
876 } else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
877 m = s.f.Config.fp64RegMask
878 } else {
879 m = s.f.Config.fpRegMask
880 }
881 } else {
882 m = s.f.Config.gpRegMask
883 }
884 return m & s.allocatable
885 }
886
887
888 func (s *regAllocState) regspec(v *Value) regInfo {
889 op := v.Op
890 if op == OpConvert {
891
892
893
894 m := s.allocatable & s.f.Config.gpRegMask
895 return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
896 }
897 if op == OpArgIntReg {
898 reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
899 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
900 }
901 if op == OpArgFloatReg {
902 reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
903 return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
904 }
905 if op.IsCall() {
906 if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
907 return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
908 }
909 }
910 if op == OpMakeResult && s.f.OwnAux.reg != nil {
911 return *s.f.OwnAux.ResultReg(s.f.Config)
912 }
913 return opcodeTable[op].reg
914 }
915
916 func (s *regAllocState) isGReg(r register) bool {
917 return s.f.Config.hasGReg && s.GReg == r
918 }
919
920
921 var tmpVal Value
922
923 func (s *regAllocState) regalloc(f *Func) {
924 regValLiveSet := f.newSparseSet(f.NumValues())
925 defer f.retSparseSet(regValLiveSet)
926 var oldSched []*Value
927 var phis []*Value
928 var phiRegs []register
929 var args []*Value
930
931
932 var desired desiredState
933
934
935 type dentry struct {
936 out [4]register
937 in [3][4]register
938 }
939 var dinfo []dentry
940
941 if f.Entry != f.Blocks[0] {
942 f.Fatalf("entry block must be first")
943 }
944
945 for _, b := range s.visitOrder {
946 if s.f.pass.debug > regDebug {
947 fmt.Printf("Begin processing block %v\n", b)
948 }
949 s.curBlock = b
950 s.startRegsMask = 0
951 s.usedSinceBlockStart = 0
952
953
954
955 regValLiveSet.clear()
956 for _, e := range s.live[b.ID] {
957 s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos)
958 regValLiveSet.add(e.ID)
959 }
960 for _, v := range b.ControlValues() {
961 if s.values[v.ID].needReg {
962 s.addUse(v.ID, int32(len(b.Values)), b.Pos)
963 regValLiveSet.add(v.ID)
964 }
965 }
966 if len(s.nextCall) < len(b.Values) {
967 s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
968 }
969 var nextCall int32 = math.MaxInt32
970 for i := len(b.Values) - 1; i >= 0; i-- {
971 v := b.Values[i]
972 regValLiveSet.remove(v.ID)
973 if v.Op == OpPhi {
974
975
976
977 s.nextCall[i] = nextCall
978 continue
979 }
980 if opcodeTable[v.Op].call {
981
982 regValLiveSet.clear()
983 if s.sp != 0 && s.values[s.sp].uses != nil {
984 regValLiveSet.add(s.sp)
985 }
986 if s.sb != 0 && s.values[s.sb].uses != nil {
987 regValLiveSet.add(s.sb)
988 }
989 nextCall = int32(i)
990 }
991 for _, a := range v.Args {
992 if !s.values[a.ID].needReg {
993 continue
994 }
995 s.addUse(a.ID, int32(i), v.Pos)
996 regValLiveSet.add(a.ID)
997 }
998 s.nextCall[i] = nextCall
999 }
1000 if s.f.pass.debug > regDebug {
1001 fmt.Printf("use distances for %s\n", b)
1002 for i := range s.values {
1003 vi := &s.values[i]
1004 u := vi.uses
1005 if u == nil {
1006 continue
1007 }
1008 fmt.Printf(" v%d:", i)
1009 for u != nil {
1010 fmt.Printf(" %d", u.dist)
1011 u = u.next
1012 }
1013 fmt.Println()
1014 }
1015 }
1016
1017
1018
1019 nphi := 0
1020 for _, v := range b.Values {
1021 if v.Op != OpPhi {
1022 break
1023 }
1024 nphi++
1025 }
1026 phis = append(phis[:0], b.Values[:nphi]...)
1027 oldSched = append(oldSched[:0], b.Values[nphi:]...)
1028 b.Values = b.Values[:0]
1029
1030
1031 if b == f.Entry {
1032
1033 if nphi > 0 {
1034 f.Fatalf("phis in entry block")
1035 }
1036 } else if len(b.Preds) == 1 {
1037
1038 s.setState(s.endRegs[b.Preds[0].b.ID])
1039 if nphi > 0 {
1040 f.Fatalf("phis in single-predecessor block")
1041 }
1042
1043
1044
1045 for r := register(0); r < s.numRegs; r++ {
1046 v := s.regs[r].v
1047 if v != nil && !regValLiveSet.contains(v.ID) {
1048 s.freeReg(r)
1049 }
1050 }
1051 } else {
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065 idx := -1
1066 for i, p := range b.Preds {
1067
1068
1069 pb := p.b
1070 if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
1071 continue
1072 }
1073 if idx == -1 {
1074 idx = i
1075 continue
1076 }
1077 pSel := b.Preds[idx].b
1078 if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
1079 idx = i
1080 } else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091 if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
1092 idx = i
1093 }
1094 }
1095 }
1096 if idx < 0 {
1097 f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
1098 }
1099 p := b.Preds[idx].b
1100 s.setState(s.endRegs[p.ID])
1101
1102 if s.f.pass.debug > regDebug {
1103 fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
1104 for _, x := range s.endRegs[p.ID] {
1105 fmt.Printf(" %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
1106 }
1107 }
1108
1109
1110
1111
1112
1113 phiRegs = phiRegs[:0]
1114 var phiUsed regMask
1115
1116 for _, v := range phis {
1117 if !s.values[v.ID].needReg {
1118 phiRegs = append(phiRegs, noRegister)
1119 continue
1120 }
1121 a := v.Args[idx]
1122
1123
1124 m := s.values[a.ID].regs &^ phiUsed & s.allocatable
1125 if m != 0 {
1126 r := pickReg(m)
1127 phiUsed |= regMask(1) << r
1128 phiRegs = append(phiRegs, r)
1129 } else {
1130 phiRegs = append(phiRegs, noRegister)
1131 }
1132 }
1133
1134
1135 for i, v := range phis {
1136 if !s.values[v.ID].needReg {
1137 continue
1138 }
1139 a := v.Args[idx]
1140 r := phiRegs[i]
1141 if r == noRegister {
1142 continue
1143 }
1144 if regValLiveSet.contains(a.ID) {
1145
1146
1147
1148
1149
1150
1151
1152
1153 m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
1154 if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
1155 r2 := pickReg(m)
1156 c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
1157 s.copies[c] = false
1158 if s.f.pass.debug > regDebug {
1159 fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
1160 }
1161 s.setOrig(c, a)
1162 s.assignReg(r2, a, c)
1163 s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
1164 }
1165 }
1166 s.freeReg(r)
1167 }
1168
1169
1170 b.Values = append(b.Values, phis...)
1171
1172
1173
1174 for i, v := range phis {
1175 if !s.values[v.ID].needReg {
1176 continue
1177 }
1178 if phiRegs[i] != noRegister {
1179 continue
1180 }
1181 m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
1182
1183
1184 for i, pe := range b.Preds {
1185 if i == idx {
1186 continue
1187 }
1188 ri := noRegister
1189 for _, er := range s.endRegs[pe.b.ID] {
1190 if er.v == s.orig[v.Args[i].ID] {
1191 ri = er.r
1192 break
1193 }
1194 }
1195 if ri != noRegister && m>>ri&1 != 0 {
1196 m = regMask(1) << ri
1197 break
1198 }
1199 }
1200 if m != 0 {
1201 r := pickReg(m)
1202 phiRegs[i] = r
1203 phiUsed |= regMask(1) << r
1204 }
1205 }
1206
1207
1208 for i, v := range phis {
1209 if !s.values[v.ID].needReg {
1210 continue
1211 }
1212 r := phiRegs[i]
1213 if r == noRegister {
1214
1215
1216 s.values[v.ID].spill = v
1217 continue
1218 }
1219
1220 s.assignReg(r, v, v)
1221 }
1222
1223
1224 for r := register(0); r < s.numRegs; r++ {
1225 if phiUsed>>r&1 != 0 {
1226 continue
1227 }
1228 v := s.regs[r].v
1229 if v != nil && !regValLiveSet.contains(v.ID) {
1230 s.freeReg(r)
1231 }
1232 }
1233
1234
1235
1236
1237
1238 regList := make([]startReg, 0, 32)
1239 for r := register(0); r < s.numRegs; r++ {
1240 v := s.regs[r].v
1241 if v == nil {
1242 continue
1243 }
1244 if phiUsed>>r&1 != 0 {
1245
1246
1247 continue
1248 }
1249 regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
1250 s.startRegsMask |= regMask(1) << r
1251 }
1252 s.startRegs[b.ID] = make([]startReg, len(regList))
1253 copy(s.startRegs[b.ID], regList)
1254
1255 if s.f.pass.debug > regDebug {
1256 fmt.Printf("after phis\n")
1257 for _, x := range s.startRegs[b.ID] {
1258 fmt.Printf(" %s: v%d\n", &s.registers[x.r], x.v.ID)
1259 }
1260 }
1261 }
1262
1263
1264 for i, v := range phis {
1265 s.curIdx = i
1266 s.dropIfUnused(v)
1267 }
1268
1269
1270 if l := len(oldSched); cap(dinfo) < l {
1271 dinfo = make([]dentry, l)
1272 } else {
1273 dinfo = dinfo[:l]
1274 for i := range dinfo {
1275 dinfo[i] = dentry{}
1276 }
1277 }
1278
1279
1280 desired.copy(&s.desired[b.ID])
1281
1282
1283
1284
1285
1286
1287 for _, e := range b.Succs {
1288 succ := e.b
1289
1290 for _, x := range s.startRegs[succ.ID] {
1291 desired.add(x.v.ID, x.r)
1292 }
1293
1294 pidx := e.i
1295 for _, v := range succ.Values {
1296 if v.Op != OpPhi {
1297 break
1298 }
1299 if !s.values[v.ID].needReg {
1300 continue
1301 }
1302 rp, ok := s.f.getHome(v.ID).(*Register)
1303 if !ok {
1304
1305
1306
1307
1308 for _, a := range v.Args {
1309 rp, ok = s.f.getHome(a.ID).(*Register)
1310 if ok {
1311 break
1312 }
1313 }
1314 if !ok {
1315 continue
1316 }
1317 }
1318 desired.add(v.Args[pidx].ID, register(rp.num))
1319 }
1320 }
1321
1322
1323 for i := len(oldSched) - 1; i >= 0; i-- {
1324 v := oldSched[i]
1325 prefs := desired.remove(v.ID)
1326 regspec := s.regspec(v)
1327 desired.clobber(regspec.clobbers)
1328 for _, j := range regspec.inputs {
1329 if countRegs(j.regs) != 1 {
1330 continue
1331 }
1332 desired.clobber(j.regs)
1333 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
1334 }
1335 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
1336 if opcodeTable[v.Op].commutative {
1337 desired.addList(v.Args[1].ID, prefs)
1338 }
1339 desired.addList(v.Args[0].ID, prefs)
1340 }
1341
1342 dinfo[i].out = prefs
1343 for j, a := range v.Args {
1344 if j >= len(dinfo[i].in) {
1345 break
1346 }
1347 dinfo[i].in[j] = desired.get(a.ID)
1348 }
1349 }
1350
1351
1352 for idx, v := range oldSched {
1353 s.curIdx = nphi + idx
1354 tmpReg := noRegister
1355 if s.f.pass.debug > regDebug {
1356 fmt.Printf(" processing %s\n", v.LongString())
1357 }
1358 regspec := s.regspec(v)
1359 if v.Op == OpPhi {
1360 f.Fatalf("phi %s not at start of block", v)
1361 }
1362 if v.Op == OpSP {
1363 s.assignReg(s.SPReg, v, v)
1364 b.Values = append(b.Values, v)
1365 s.advanceUses(v)
1366 s.sp = v.ID
1367 continue
1368 }
1369 if v.Op == OpSB {
1370 s.assignReg(s.SBReg, v, v)
1371 b.Values = append(b.Values, v)
1372 s.advanceUses(v)
1373 s.sb = v.ID
1374 continue
1375 }
1376 if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
1377 if s.values[v.ID].needReg {
1378 if v.Op == OpSelectN {
1379 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
1380 } else {
1381 var i = 0
1382 if v.Op == OpSelect1 {
1383 i = 1
1384 }
1385 s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
1386 }
1387 }
1388 b.Values = append(b.Values, v)
1389 s.advanceUses(v)
1390 continue
1391 }
1392 if v.Op == OpGetG && s.f.Config.hasGReg {
1393
1394 if s.regs[s.GReg].v != nil {
1395 s.freeReg(s.GReg)
1396 }
1397 s.assignReg(s.GReg, v, v)
1398 b.Values = append(b.Values, v)
1399 s.advanceUses(v)
1400 continue
1401 }
1402 if v.Op == OpArg {
1403
1404
1405
1406 s.values[v.ID].spill = v
1407 b.Values = append(b.Values, v)
1408 s.advanceUses(v)
1409 continue
1410 }
1411 if v.Op == OpKeepAlive {
1412
1413 s.advanceUses(v)
1414 a := v.Args[0]
1415 vi := &s.values[a.ID]
1416 if vi.regs == 0 && !vi.rematerializeable {
1417
1418
1419
1420 v.SetArg(0, s.makeSpill(a, b))
1421 } else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
1422
1423
1424
1425 v.Op = OpVarLive
1426 v.SetArgs1(v.Args[1])
1427 v.Aux = a.Aux
1428 } else {
1429
1430
1431
1432 v.Op = OpCopy
1433 v.SetArgs1(v.Args[1])
1434 }
1435 b.Values = append(b.Values, v)
1436 continue
1437 }
1438 if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
1439
1440 if s.doClobber && v.Op.IsCall() {
1441 s.clobberRegs(regspec.clobbers)
1442 }
1443 s.freeRegs(regspec.clobbers)
1444 b.Values = append(b.Values, v)
1445 s.advanceUses(v)
1446 continue
1447 }
1448
1449 if s.values[v.ID].rematerializeable {
1450
1451
1452
1453 for _, a := range v.Args {
1454 a.Uses--
1455 }
1456 s.advanceUses(v)
1457 continue
1458 }
1459
1460 if s.f.pass.debug > regDebug {
1461 fmt.Printf("value %s\n", v.LongString())
1462 fmt.Printf(" out:")
1463 for _, r := range dinfo[idx].out {
1464 if r != noRegister {
1465 fmt.Printf(" %s", &s.registers[r])
1466 }
1467 }
1468 fmt.Println()
1469 for i := 0; i < len(v.Args) && i < 3; i++ {
1470 fmt.Printf(" in%d:", i)
1471 for _, r := range dinfo[idx].in[i] {
1472 if r != noRegister {
1473 fmt.Printf(" %s", &s.registers[r])
1474 }
1475 }
1476 fmt.Println()
1477 }
1478 }
1479
1480
1481
1482
1483 args = append(args[:0], make([]*Value, len(v.Args))...)
1484 for i, a := range v.Args {
1485 if !s.values[a.ID].needReg {
1486 args[i] = a
1487 }
1488 }
1489 for _, i := range regspec.inputs {
1490 mask := i.regs
1491 if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
1492 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1493 }
1494 }
1495
1496
1497
1498
1499
1500
1501 for {
1502 freed := false
1503 for _, i := range regspec.inputs {
1504 if args[i.idx] != nil {
1505 continue
1506 }
1507 mask := i.regs
1508 if countRegs(mask) == 1 && mask&^s.used != 0 {
1509 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1510
1511
1512
1513 oldregs := s.values[v.Args[i.idx].ID].regs
1514 if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
1515 s.freeRegs(oldregs &^ mask &^ s.nospill)
1516 freed = true
1517 }
1518 }
1519 }
1520 if !freed {
1521 break
1522 }
1523 }
1524
1525
1526 for _, i := range regspec.inputs {
1527 if args[i.idx] != nil {
1528 continue
1529 }
1530 mask := i.regs
1531 if mask&s.values[v.Args[i.idx].ID].regs == 0 {
1532
1533 mask &= s.allocatable
1534 mask &^= s.nospill
1535
1536 if i.idx < 3 {
1537 for _, r := range dinfo[idx].in[i.idx] {
1538 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1539
1540 mask = regMask(1) << r
1541 break
1542 }
1543 }
1544 }
1545
1546 if mask&^desired.avoid != 0 {
1547 mask &^= desired.avoid
1548 }
1549 }
1550 args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
1551 }
1552
1553
1554
1555
1556 if opcodeTable[v.Op].resultInArg0 {
1557 var m regMask
1558 if !s.liveAfterCurrentInstruction(v.Args[0]) {
1559
1560 goto ok
1561 }
1562 if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
1563 args[0], args[1] = args[1], args[0]
1564 goto ok
1565 }
1566 if s.values[v.Args[0].ID].rematerializeable {
1567
1568 goto ok
1569 }
1570 if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
1571 args[0], args[1] = args[1], args[0]
1572 goto ok
1573 }
1574 if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
1575
1576 goto ok
1577 }
1578 if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
1579 args[0], args[1] = args[1], args[0]
1580 goto ok
1581 }
1582
1583
1584
1585
1586
1587 m = s.compatRegs(v.Args[0].Type) &^ s.used
1588 if m == 0 {
1589
1590
1591
1592
1593 goto ok
1594 }
1595
1596
1597 for _, r := range dinfo[idx].out {
1598 if r != noRegister && (m®spec.outputs[0].regs)>>r&1 != 0 {
1599 m = regMask(1) << r
1600 args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
1601
1602
1603 goto ok
1604 }
1605 }
1606
1607
1608 for _, r := range dinfo[idx].in[0] {
1609 if r != noRegister && m>>r&1 != 0 {
1610 m = regMask(1) << r
1611 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1612 s.copies[c] = false
1613
1614
1615 goto ok
1616 }
1617 }
1618 if opcodeTable[v.Op].commutative {
1619 for _, r := range dinfo[idx].in[1] {
1620 if r != noRegister && m>>r&1 != 0 {
1621 m = regMask(1) << r
1622 c := s.allocValToReg(v.Args[1], m, true, v.Pos)
1623 s.copies[c] = false
1624 args[0], args[1] = args[1], args[0]
1625 goto ok
1626 }
1627 }
1628 }
1629
1630
1631 if m&^desired.avoid != 0 {
1632 m &^= desired.avoid
1633 }
1634
1635 c := s.allocValToReg(v.Args[0], m, true, v.Pos)
1636 s.copies[c] = false
1637
1638
1639
1640
1641 if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
1642 if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
1643 r := register(rp.num)
1644 for _, r2 := range dinfo[idx].in[0] {
1645 if r == r2 {
1646 args[0] = c
1647 break
1648 }
1649 }
1650 }
1651 }
1652 }
1653
1654 ok:
1655
1656
1657
1658
1659
1660
1661 if opcodeTable[v.Op].needIntTemp {
1662 m := s.allocatable & s.f.Config.gpRegMask
1663 for _, out := range regspec.outputs {
1664 if countRegs(out.regs) == 1 {
1665 m &^= out.regs
1666 }
1667 }
1668 if m&^desired.avoid&^s.nospill != 0 {
1669 m &^= desired.avoid
1670 }
1671 tmpReg = s.allocReg(m, &tmpVal)
1672 s.nospill |= regMask(1) << tmpReg
1673 }
1674
1675
1676
1677
1678
1679 if !opcodeTable[v.Op].resultNotInArgs {
1680 s.tmpused = s.nospill
1681 s.nospill = 0
1682 s.advanceUses(v)
1683 }
1684
1685
1686 if s.doClobber && v.Op.IsCall() {
1687
1688
1689 s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
1690 }
1691 s.freeRegs(regspec.clobbers)
1692 s.tmpused |= regspec.clobbers
1693
1694
1695 {
1696 outRegs := noRegisters
1697 maxOutIdx := -1
1698 var used regMask
1699 if tmpReg != noRegister {
1700
1701
1702 used |= regMask(1) << tmpReg
1703 }
1704 for _, out := range regspec.outputs {
1705 if out.regs == 0 {
1706 continue
1707 }
1708 mask := out.regs & s.allocatable &^ used
1709 if mask == 0 {
1710 s.f.Fatalf("can't find any output register %s", v.LongString())
1711 }
1712 if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
1713 if !opcodeTable[v.Op].commutative {
1714
1715 r := register(s.f.getHome(args[0].ID).(*Register).num)
1716 if mask>>r&1 == 0 {
1717 s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
1718 }
1719 mask = regMask(1) << r
1720 } else {
1721
1722 r0 := register(s.f.getHome(args[0].ID).(*Register).num)
1723 r1 := register(s.f.getHome(args[1].ID).(*Register).num)
1724
1725 found := false
1726 for _, r := range dinfo[idx].out {
1727 if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
1728 mask = regMask(1) << r
1729 found = true
1730 if r == r1 {
1731 args[0], args[1] = args[1], args[0]
1732 }
1733 break
1734 }
1735 }
1736 if !found {
1737
1738 mask = regMask(1) << r0
1739 }
1740 }
1741 }
1742 if out.idx == 0 {
1743 for _, r := range dinfo[idx].out {
1744 if r != noRegister && (mask&^s.used)>>r&1 != 0 {
1745
1746 mask = regMask(1) << r
1747 break
1748 }
1749 }
1750 }
1751
1752 if mask&^desired.avoid&^s.nospill&^s.used != 0 {
1753 mask &^= desired.avoid
1754 }
1755 r := s.allocReg(mask, v)
1756 if out.idx > maxOutIdx {
1757 maxOutIdx = out.idx
1758 }
1759 outRegs[out.idx] = r
1760 used |= regMask(1) << r
1761 s.tmpused |= regMask(1) << r
1762 }
1763
1764 if v.Type.IsTuple() {
1765 var outLocs LocPair
1766 if r := outRegs[0]; r != noRegister {
1767 outLocs[0] = &s.registers[r]
1768 }
1769 if r := outRegs[1]; r != noRegister {
1770 outLocs[1] = &s.registers[r]
1771 }
1772 s.f.setHome(v, outLocs)
1773
1774 } else if v.Type.IsResults() {
1775
1776 outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
1777 for i := 0; i <= maxOutIdx; i++ {
1778 if r := outRegs[i]; r != noRegister {
1779 outLocs[i] = &s.registers[r]
1780 }
1781 }
1782 s.f.setHome(v, outLocs)
1783 } else {
1784 if r := outRegs[0]; r != noRegister {
1785 s.assignReg(r, v, v)
1786 }
1787 }
1788 if tmpReg != noRegister {
1789
1790 if s.f.tempRegs == nil {
1791 s.f.tempRegs = map[ID]*Register{}
1792 }
1793 s.f.tempRegs[v.ID] = &s.registers[tmpReg]
1794 }
1795 }
1796
1797
1798 if opcodeTable[v.Op].resultNotInArgs {
1799 s.nospill = 0
1800 s.advanceUses(v)
1801 }
1802 s.tmpused = 0
1803
1804
1805 for i, a := range args {
1806 v.SetArg(i, a)
1807 }
1808 b.Values = append(b.Values, v)
1809 s.dropIfUnused(v)
1810 }
1811
1812
1813
1814 controls := append(make([]*Value, 0, 2), b.ControlValues()...)
1815
1816
1817 for i, v := range b.ControlValues() {
1818 if !s.values[v.ID].needReg {
1819 continue
1820 }
1821 if s.f.pass.debug > regDebug {
1822 fmt.Printf(" processing control %s\n", v.LongString())
1823 }
1824
1825
1826
1827 b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
1828 }
1829
1830
1831
1832 for _, v := range controls {
1833 vi := &s.values[v.ID]
1834 if !vi.needReg {
1835 continue
1836 }
1837
1838 u := vi.uses
1839 vi.uses = u.next
1840 if u.next == nil {
1841 s.freeRegs(vi.regs)
1842 }
1843 u.next = s.freeUseRecords
1844 s.freeUseRecords = u
1845 }
1846
1847
1848
1849
1850 if len(b.Succs) == 1 {
1851 if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
1852 s.freeReg(s.GReg)
1853 }
1854
1855 top := b.Succs[0].b
1856 loop := s.loopnest.b2l[top.ID]
1857 if loop == nil || loop.header != top || loop.containsUnavoidableCall {
1858 goto badloop
1859 }
1860
1861
1862 for _, live := range s.live[b.ID] {
1863 if live.dist >= unlikelyDistance {
1864
1865 continue
1866 }
1867 vid := live.ID
1868 vi := &s.values[vid]
1869 if vi.regs != 0 {
1870 continue
1871 }
1872 if vi.rematerializeable {
1873 continue
1874 }
1875 v := s.orig[vid]
1876 m := s.compatRegs(v.Type) &^ s.used
1877
1878 outerloop:
1879 for _, e := range desired.entries {
1880 if e.ID != v.ID {
1881 continue
1882 }
1883 for _, r := range e.regs {
1884 if r != noRegister && m>>r&1 != 0 {
1885 m = regMask(1) << r
1886 break outerloop
1887 }
1888 }
1889 }
1890 if m&^desired.avoid != 0 {
1891 m &^= desired.avoid
1892 }
1893 if m != 0 {
1894 s.allocValToReg(v, m, false, b.Pos)
1895 }
1896 }
1897 }
1898 badloop:
1899 ;
1900
1901
1902
1903 k := 0
1904 for r := register(0); r < s.numRegs; r++ {
1905 v := s.regs[r].v
1906 if v == nil {
1907 continue
1908 }
1909 k++
1910 }
1911 regList := make([]endReg, 0, k)
1912 for r := register(0); r < s.numRegs; r++ {
1913 v := s.regs[r].v
1914 if v == nil {
1915 continue
1916 }
1917 regList = append(regList, endReg{r, v, s.regs[r].c})
1918 }
1919 s.endRegs[b.ID] = regList
1920
1921 if checkEnabled {
1922 regValLiveSet.clear()
1923 for _, x := range s.live[b.ID] {
1924 regValLiveSet.add(x.ID)
1925 }
1926 for r := register(0); r < s.numRegs; r++ {
1927 v := s.regs[r].v
1928 if v == nil {
1929 continue
1930 }
1931 if !regValLiveSet.contains(v.ID) {
1932 s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
1933 }
1934 }
1935 }
1936
1937
1938
1939
1940
1941 for _, e := range s.live[b.ID] {
1942 vi := &s.values[e.ID]
1943 if vi.regs != 0 {
1944
1945 continue
1946 }
1947 if vi.rematerializeable {
1948
1949 continue
1950 }
1951 if s.f.pass.debug > regDebug {
1952 fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
1953 }
1954 spill := s.makeSpill(s.orig[e.ID], b)
1955 s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
1956 }
1957
1958
1959
1960
1961 for _, e := range s.live[b.ID] {
1962 u := s.values[e.ID].uses
1963 if u == nil {
1964 f.Fatalf("live at end, no uses v%d", e.ID)
1965 }
1966 if u.next != nil {
1967 f.Fatalf("live at end, too many uses v%d", e.ID)
1968 }
1969 s.values[e.ID].uses = nil
1970 u.next = s.freeUseRecords
1971 s.freeUseRecords = u
1972 }
1973
1974
1975
1976
1977
1978
1979
1980 if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
1981 regs := make([]startReg, 0, c)
1982 for _, sr := range s.startRegs[b.ID] {
1983 if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
1984 continue
1985 }
1986 regs = append(regs, sr)
1987 }
1988 s.startRegs[b.ID] = regs
1989 }
1990 }
1991
1992
1993 s.placeSpills()
1994
1995
1996
1997 stacklive := stackalloc(s.f, s.spillLive)
1998
1999
2000 s.shuffle(stacklive)
2001
2002
2003
2004
2005 for {
2006 progress := false
2007 for c, used := range s.copies {
2008 if !used && c.Uses == 0 {
2009 if s.f.pass.debug > regDebug {
2010 fmt.Printf("delete copied value %s\n", c.LongString())
2011 }
2012 c.resetArgs()
2013 f.freeValue(c)
2014 delete(s.copies, c)
2015 progress = true
2016 }
2017 }
2018 if !progress {
2019 break
2020 }
2021 }
2022
2023 for _, b := range s.visitOrder {
2024 i := 0
2025 for _, v := range b.Values {
2026 if v.Op == OpInvalid {
2027 continue
2028 }
2029 b.Values[i] = v
2030 i++
2031 }
2032 b.Values = b.Values[:i]
2033 }
2034 }
2035
2036 func (s *regAllocState) placeSpills() {
2037 mustBeFirst := func(op Op) bool {
2038 return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
2039 }
2040
2041
2042
2043 start := map[ID][]*Value{}
2044
2045
2046 after := map[ID][]*Value{}
2047
2048 for i := range s.values {
2049 vi := s.values[i]
2050 spill := vi.spill
2051 if spill == nil {
2052 continue
2053 }
2054 if spill.Block != nil {
2055
2056
2057 continue
2058 }
2059 v := s.orig[i]
2060
2061
2062
2063
2064
2065 if v == nil {
2066 panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
2067 }
2068 best := v.Block
2069 bestArg := v
2070 var bestDepth int16
2071 if l := s.loopnest.b2l[best.ID]; l != nil {
2072 bestDepth = l.depth
2073 }
2074 b := best
2075 const maxSpillSearch = 100
2076 for i := 0; i < maxSpillSearch; i++ {
2077
2078
2079 p := b
2080 b = nil
2081 for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
2082 if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
2083
2084 b = c
2085 break
2086 }
2087 }
2088 if b == nil {
2089
2090 break
2091 }
2092
2093 var depth int16
2094 if l := s.loopnest.b2l[b.ID]; l != nil {
2095 depth = l.depth
2096 }
2097 if depth > bestDepth {
2098
2099 continue
2100 }
2101
2102
2103
2104 if len(b.Preds) == 1 {
2105 for _, e := range s.endRegs[b.Preds[0].b.ID] {
2106 if e.v == v {
2107
2108 best = b
2109 bestArg = e.c
2110 bestDepth = depth
2111 break
2112 }
2113 }
2114 } else {
2115 for _, e := range s.startRegs[b.ID] {
2116 if e.v == v {
2117
2118 best = b
2119 bestArg = e.c
2120 bestDepth = depth
2121 break
2122 }
2123 }
2124 }
2125 }
2126
2127
2128 spill.Block = best
2129 spill.AddArg(bestArg)
2130 if best == v.Block && !mustBeFirst(v.Op) {
2131
2132 after[v.ID] = append(after[v.ID], spill)
2133 } else {
2134
2135 start[best.ID] = append(start[best.ID], spill)
2136 }
2137 }
2138
2139
2140 var oldSched []*Value
2141 for _, b := range s.visitOrder {
2142 nfirst := 0
2143 for _, v := range b.Values {
2144 if !mustBeFirst(v.Op) {
2145 break
2146 }
2147 nfirst++
2148 }
2149 oldSched = append(oldSched[:0], b.Values[nfirst:]...)
2150 b.Values = b.Values[:nfirst]
2151 b.Values = append(b.Values, start[b.ID]...)
2152 for _, v := range oldSched {
2153 b.Values = append(b.Values, v)
2154 b.Values = append(b.Values, after[v.ID]...)
2155 }
2156 }
2157 }
2158
2159
2160 func (s *regAllocState) shuffle(stacklive [][]ID) {
2161 var e edgeState
2162 e.s = s
2163 e.cache = map[ID][]*Value{}
2164 e.contents = map[Location]contentRecord{}
2165 if s.f.pass.debug > regDebug {
2166 fmt.Printf("shuffle %s\n", s.f.Name)
2167 fmt.Println(s.f.String())
2168 }
2169
2170 for _, b := range s.visitOrder {
2171 if len(b.Preds) <= 1 {
2172 continue
2173 }
2174 e.b = b
2175 for i, edge := range b.Preds {
2176 p := edge.b
2177 e.p = p
2178 e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
2179 e.process()
2180 }
2181 }
2182
2183 if s.f.pass.debug > regDebug {
2184 fmt.Printf("post shuffle %s\n", s.f.Name)
2185 fmt.Println(s.f.String())
2186 }
2187 }
2188
2189 type edgeState struct {
2190 s *regAllocState
2191 p, b *Block
2192
2193
2194 cache map[ID][]*Value
2195 cachedVals []ID
2196
2197
2198 contents map[Location]contentRecord
2199
2200
2201 destinations []dstRecord
2202 extra []dstRecord
2203
2204 usedRegs regMask
2205 uniqueRegs regMask
2206 finalRegs regMask
2207 rematerializeableRegs regMask
2208 }
2209
2210 type contentRecord struct {
2211 vid ID
2212 c *Value
2213 final bool
2214 pos src.XPos
2215 }
2216
2217 type dstRecord struct {
2218 loc Location
2219 vid ID
2220 splice **Value
2221 pos src.XPos
2222 }
2223
2224
2225 func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
2226 if e.s.f.pass.debug > regDebug {
2227 fmt.Printf("edge %s->%s\n", e.p, e.b)
2228 }
2229
2230
2231 clear(e.cache)
2232 e.cachedVals = e.cachedVals[:0]
2233 clear(e.contents)
2234 e.usedRegs = 0
2235 e.uniqueRegs = 0
2236 e.finalRegs = 0
2237 e.rematerializeableRegs = 0
2238
2239
2240 for _, x := range srcReg {
2241 e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos)
2242 }
2243
2244 for _, spillID := range stacklive {
2245 v := e.s.orig[spillID]
2246 spill := e.s.values[v.ID].spill
2247 if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
2248
2249
2250
2251
2252
2253
2254
2255
2256 continue
2257 }
2258 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos)
2259 }
2260
2261
2262 dsts := e.destinations[:0]
2263 for _, x := range dstReg {
2264 dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
2265 }
2266
2267 for _, v := range e.b.Values {
2268 if v.Op != OpPhi {
2269 break
2270 }
2271 loc := e.s.f.getHome(v.ID)
2272 if loc == nil {
2273 continue
2274 }
2275 dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
2276 }
2277 e.destinations = dsts
2278
2279 if e.s.f.pass.debug > regDebug {
2280 for _, vid := range e.cachedVals {
2281 a := e.cache[vid]
2282 for _, c := range a {
2283 fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
2284 }
2285 }
2286 for _, d := range e.destinations {
2287 fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
2288 }
2289 }
2290 }
2291
2292
2293 func (e *edgeState) process() {
2294 dsts := e.destinations
2295
2296
2297 for len(dsts) > 0 {
2298 i := 0
2299 for _, d := range dsts {
2300 if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
2301
2302 dsts[i] = d
2303 i++
2304 }
2305 }
2306 if i < len(dsts) {
2307
2308 dsts = dsts[:i]
2309
2310
2311 dsts = append(dsts, e.extra...)
2312 e.extra = e.extra[:0]
2313 continue
2314 }
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338 d := dsts[0]
2339 loc := d.loc
2340 vid := e.contents[loc].vid
2341 c := e.contents[loc].c
2342 r := e.findRegFor(c.Type)
2343 if e.s.f.pass.debug > regDebug {
2344 fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
2345 }
2346 e.erase(r)
2347 pos := d.pos.WithNotStmt()
2348 if _, isReg := loc.(*Register); isReg {
2349 c = e.p.NewValue1(pos, OpCopy, c.Type, c)
2350 } else {
2351 c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2352 }
2353 e.set(r, vid, c, false, pos)
2354 if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
2355 e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
2356 }
2357 }
2358 }
2359
2360
2361
2362 func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
2363 pos = pos.WithNotStmt()
2364 occupant := e.contents[loc]
2365 if occupant.vid == vid {
2366
2367 e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
2368 if splice != nil {
2369 (*splice).Uses--
2370 *splice = occupant.c
2371 occupant.c.Uses++
2372 }
2373
2374
2375
2376 if _, ok := e.s.copies[occupant.c]; ok {
2377
2378 e.s.copies[occupant.c] = true
2379 }
2380 return true
2381 }
2382
2383
2384 if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
2385
2386
2387 return false
2388 }
2389
2390
2391 v := e.s.orig[vid]
2392 var c *Value
2393 var src Location
2394 if e.s.f.pass.debug > regDebug {
2395 fmt.Printf("moving v%d to %s\n", vid, loc)
2396 fmt.Printf("sources of v%d:", vid)
2397 }
2398 for _, w := range e.cache[vid] {
2399 h := e.s.f.getHome(w.ID)
2400 if e.s.f.pass.debug > regDebug {
2401 fmt.Printf(" %s:%s", h, w)
2402 }
2403 _, isreg := h.(*Register)
2404 if src == nil || isreg {
2405 c = w
2406 src = h
2407 }
2408 }
2409 if e.s.f.pass.debug > regDebug {
2410 if src != nil {
2411 fmt.Printf(" [use %s]\n", src)
2412 } else {
2413 fmt.Printf(" [no source]\n")
2414 }
2415 }
2416 _, dstReg := loc.(*Register)
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428 e.erase(loc)
2429 var x *Value
2430 if c == nil || e.s.values[vid].rematerializeable {
2431 if !e.s.values[vid].rematerializeable {
2432 e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
2433 }
2434 if dstReg {
2435 x = v.copyInto(e.p)
2436 } else {
2437
2438
2439 r := e.findRegFor(v.Type)
2440 e.erase(r)
2441 x = v.copyIntoWithXPos(e.p, pos)
2442 e.set(r, vid, x, false, pos)
2443
2444
2445
2446 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
2447 }
2448 } else {
2449
2450 _, srcReg := src.(*Register)
2451 if srcReg {
2452 if dstReg {
2453 x = e.p.NewValue1(pos, OpCopy, c.Type, c)
2454 } else {
2455 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
2456 }
2457 } else {
2458 if dstReg {
2459 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2460 } else {
2461
2462 r := e.findRegFor(c.Type)
2463 e.erase(r)
2464 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
2465 e.set(r, vid, t, false, pos)
2466 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
2467 }
2468 }
2469 }
2470 e.set(loc, vid, x, true, pos)
2471 if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
2472 e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
2473 }
2474 if splice != nil {
2475 (*splice).Uses--
2476 *splice = x
2477 x.Uses++
2478 }
2479 return true
2480 }
2481
2482
2483 func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
2484 e.s.f.setHome(c, loc)
2485 e.contents[loc] = contentRecord{vid, c, final, pos}
2486 a := e.cache[vid]
2487 if len(a) == 0 {
2488 e.cachedVals = append(e.cachedVals, vid)
2489 }
2490 a = append(a, c)
2491 e.cache[vid] = a
2492 if r, ok := loc.(*Register); ok {
2493 if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
2494 e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
2495 }
2496 e.usedRegs |= regMask(1) << uint(r.num)
2497 if final {
2498 e.finalRegs |= regMask(1) << uint(r.num)
2499 }
2500 if len(a) == 1 {
2501 e.uniqueRegs |= regMask(1) << uint(r.num)
2502 }
2503 if len(a) == 2 {
2504 if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2505 e.uniqueRegs &^= regMask(1) << uint(t.num)
2506 }
2507 }
2508 if e.s.values[vid].rematerializeable {
2509 e.rematerializeableRegs |= regMask(1) << uint(r.num)
2510 }
2511 }
2512 if e.s.f.pass.debug > regDebug {
2513 fmt.Printf("%s\n", c.LongString())
2514 fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
2515 }
2516 }
2517
2518
2519 func (e *edgeState) erase(loc Location) {
2520 cr := e.contents[loc]
2521 if cr.c == nil {
2522 return
2523 }
2524 vid := cr.vid
2525
2526 if cr.final {
2527
2528
2529
2530 e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
2531 }
2532
2533
2534 a := e.cache[vid]
2535 for i, c := range a {
2536 if e.s.f.getHome(c.ID) == loc {
2537 if e.s.f.pass.debug > regDebug {
2538 fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
2539 }
2540 a[i], a = a[len(a)-1], a[:len(a)-1]
2541 break
2542 }
2543 }
2544 e.cache[vid] = a
2545
2546
2547 if r, ok := loc.(*Register); ok {
2548 e.usedRegs &^= regMask(1) << uint(r.num)
2549 if cr.final {
2550 e.finalRegs &^= regMask(1) << uint(r.num)
2551 }
2552 e.rematerializeableRegs &^= regMask(1) << uint(r.num)
2553 }
2554 if len(a) == 1 {
2555 if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
2556 e.uniqueRegs |= regMask(1) << uint(r.num)
2557 }
2558 }
2559 }
2560
2561
2562 func (e *edgeState) findRegFor(typ *types.Type) Location {
2563
2564 types := &e.s.f.Config.Types
2565 m := e.s.compatRegs(typ)
2566
2567
2568
2569
2570
2571
2572 x := m &^ e.usedRegs
2573 if x != 0 {
2574 return &e.s.registers[pickReg(x)]
2575 }
2576 x = m &^ e.uniqueRegs &^ e.finalRegs
2577 if x != 0 {
2578 return &e.s.registers[pickReg(x)]
2579 }
2580 x = m &^ e.uniqueRegs
2581 if x != 0 {
2582 return &e.s.registers[pickReg(x)]
2583 }
2584 x = m & e.rematerializeableRegs
2585 if x != 0 {
2586 return &e.s.registers[pickReg(x)]
2587 }
2588
2589
2590
2591 for _, vid := range e.cachedVals {
2592 a := e.cache[vid]
2593 for _, c := range a {
2594 if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
2595 if !c.rematerializeable() {
2596 x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
2597
2598
2599
2600 t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64}
2601
2602 e.set(t, vid, x, false, c.Pos)
2603 if e.s.f.pass.debug > regDebug {
2604 fmt.Printf(" SPILL %s->%s %s\n", r, t, x.LongString())
2605 }
2606 }
2607
2608
2609
2610 return r
2611 }
2612 }
2613 }
2614
2615 fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
2616 for _, vid := range e.cachedVals {
2617 a := e.cache[vid]
2618 for _, c := range a {
2619 fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
2620 }
2621 }
2622 e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
2623 return nil
2624 }
2625
2626
2627
2628 func (v *Value) rematerializeable() bool {
2629 if !opcodeTable[v.Op].rematerializeable {
2630 return false
2631 }
2632 for _, a := range v.Args {
2633
2634 if a.Op != OpSP && a.Op != OpSB {
2635 return false
2636 }
2637 }
2638 return true
2639 }
2640
2641 type liveInfo struct {
2642 ID ID
2643 dist int32
2644 pos src.XPos
2645 }
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655 func (s *regAllocState) computeLive() {
2656 f := s.f
2657 s.live = make([][]liveInfo, f.NumBlocks())
2658 s.desired = make([]desiredState, f.NumBlocks())
2659 var phis []*Value
2660
2661 live := f.newSparseMapPos(f.NumValues())
2662 defer f.retSparseMapPos(live)
2663 t := f.newSparseMapPos(f.NumValues())
2664 defer f.retSparseMapPos(t)
2665
2666
2667 var desired desiredState
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678 po := f.postorder()
2679 s.loopnest = f.loopnest()
2680 s.loopnest.calculateDepths()
2681 for {
2682 changed := false
2683
2684 for _, b := range po {
2685
2686
2687
2688 live.clear()
2689 for _, e := range s.live[b.ID] {
2690 live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
2691 }
2692
2693
2694 for _, c := range b.ControlValues() {
2695 if s.values[c.ID].needReg {
2696 live.set(c.ID, int32(len(b.Values)), b.Pos)
2697 }
2698 }
2699
2700
2701
2702 phis = phis[:0]
2703 for i := len(b.Values) - 1; i >= 0; i-- {
2704 v := b.Values[i]
2705 live.remove(v.ID)
2706 if v.Op == OpPhi {
2707
2708 phis = append(phis, v)
2709 continue
2710 }
2711 if opcodeTable[v.Op].call {
2712 c := live.contents()
2713 for i := range c {
2714 c[i].val += unlikelyDistance
2715 }
2716 }
2717 for _, a := range v.Args {
2718 if s.values[a.ID].needReg {
2719 live.set(a.ID, int32(i), v.Pos)
2720 }
2721 }
2722 }
2723
2724 desired.copy(&s.desired[b.ID])
2725 for i := len(b.Values) - 1; i >= 0; i-- {
2726 v := b.Values[i]
2727 prefs := desired.remove(v.ID)
2728 if v.Op == OpPhi {
2729
2730
2731
2732 continue
2733 }
2734 regspec := s.regspec(v)
2735
2736 desired.clobber(regspec.clobbers)
2737
2738 for _, j := range regspec.inputs {
2739 if countRegs(j.regs) != 1 {
2740 continue
2741 }
2742 desired.clobber(j.regs)
2743 desired.add(v.Args[j.idx].ID, pickReg(j.regs))
2744 }
2745
2746 if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
2747
2748
2749
2750
2751
2752 if opcodeTable[v.Op].commutative {
2753 desired.addList(v.Args[1].ID, prefs)
2754 }
2755 desired.addList(v.Args[0].ID, prefs)
2756 }
2757 }
2758
2759
2760
2761 for i, e := range b.Preds {
2762 p := e.b
2763
2764
2765
2766 delta := int32(normalDistance)
2767 if len(p.Succs) == 2 {
2768 if p.Succs[0].b == b && p.Likely == BranchLikely ||
2769 p.Succs[1].b == b && p.Likely == BranchUnlikely {
2770 delta = likelyDistance
2771 }
2772 if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
2773 p.Succs[1].b == b && p.Likely == BranchLikely {
2774 delta = unlikelyDistance
2775 }
2776 }
2777
2778
2779 s.desired[p.ID].merge(&desired)
2780
2781
2782 t.clear()
2783 for _, e := range s.live[p.ID] {
2784 t.set(e.ID, e.dist, e.pos)
2785 }
2786 update := false
2787
2788
2789 for _, e := range live.contents() {
2790 d := e.val + delta
2791 if !t.contains(e.key) || d < t.get(e.key) {
2792 update = true
2793 t.set(e.key, d, e.pos)
2794 }
2795 }
2796
2797
2798
2799 for _, v := range phis {
2800 id := v.Args[i].ID
2801 if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
2802 update = true
2803 t.set(id, delta, v.Pos)
2804 }
2805 }
2806
2807 if !update {
2808 continue
2809 }
2810
2811 l := s.live[p.ID][:0]
2812 if cap(l) < t.size() {
2813 l = make([]liveInfo, 0, t.size())
2814 }
2815 for _, e := range t.contents() {
2816 l = append(l, liveInfo{e.key, e.val, e.pos})
2817 }
2818 s.live[p.ID] = l
2819 changed = true
2820 }
2821 }
2822
2823 if !changed {
2824 break
2825 }
2826 }
2827 if f.pass.debug > regDebug {
2828 fmt.Println("live values at end of each block")
2829 for _, b := range f.Blocks {
2830 fmt.Printf(" %s:", b)
2831 for _, x := range s.live[b.ID] {
2832 fmt.Printf(" v%d(%d)", x.ID, x.dist)
2833 for _, e := range s.desired[b.ID].entries {
2834 if e.ID != x.ID {
2835 continue
2836 }
2837 fmt.Printf("[")
2838 first := true
2839 for _, r := range e.regs {
2840 if r == noRegister {
2841 continue
2842 }
2843 if !first {
2844 fmt.Printf(",")
2845 }
2846 fmt.Print(&s.registers[r])
2847 first = false
2848 }
2849 fmt.Printf("]")
2850 }
2851 }
2852 if avoid := s.desired[b.ID].avoid; avoid != 0 {
2853 fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
2854 }
2855 fmt.Println()
2856 }
2857 }
2858 }
2859
2860
2861 type desiredState struct {
2862
2863
2864 entries []desiredStateEntry
2865
2866
2867
2868
2869 avoid regMask
2870 }
2871 type desiredStateEntry struct {
2872
2873 ID ID
2874
2875
2876
2877
2878 regs [4]register
2879 }
2880
2881 func (d *desiredState) clear() {
2882 d.entries = d.entries[:0]
2883 d.avoid = 0
2884 }
2885
2886
2887 func (d *desiredState) get(vid ID) [4]register {
2888 for _, e := range d.entries {
2889 if e.ID == vid {
2890 return e.regs
2891 }
2892 }
2893 return [4]register{noRegister, noRegister, noRegister, noRegister}
2894 }
2895
2896
2897 func (d *desiredState) add(vid ID, r register) {
2898 d.avoid |= regMask(1) << r
2899 for i := range d.entries {
2900 e := &d.entries[i]
2901 if e.ID != vid {
2902 continue
2903 }
2904 if e.regs[0] == r {
2905
2906 return
2907 }
2908 for j := 1; j < len(e.regs); j++ {
2909 if e.regs[j] == r {
2910
2911 copy(e.regs[1:], e.regs[:j])
2912 e.regs[0] = r
2913 return
2914 }
2915 }
2916 copy(e.regs[1:], e.regs[:])
2917 e.regs[0] = r
2918 return
2919 }
2920 d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
2921 }
2922
2923 func (d *desiredState) addList(vid ID, regs [4]register) {
2924
2925 for i := len(regs) - 1; i >= 0; i-- {
2926 r := regs[i]
2927 if r != noRegister {
2928 d.add(vid, r)
2929 }
2930 }
2931 }
2932
2933
2934 func (d *desiredState) clobber(m regMask) {
2935 for i := 0; i < len(d.entries); {
2936 e := &d.entries[i]
2937 j := 0
2938 for _, r := range e.regs {
2939 if r != noRegister && m>>r&1 == 0 {
2940 e.regs[j] = r
2941 j++
2942 }
2943 }
2944 if j == 0 {
2945
2946 d.entries[i] = d.entries[len(d.entries)-1]
2947 d.entries = d.entries[:len(d.entries)-1]
2948 continue
2949 }
2950 for ; j < len(e.regs); j++ {
2951 e.regs[j] = noRegister
2952 }
2953 i++
2954 }
2955 d.avoid &^= m
2956 }
2957
2958
2959 func (d *desiredState) copy(x *desiredState) {
2960 d.entries = append(d.entries[:0], x.entries...)
2961 d.avoid = x.avoid
2962 }
2963
2964
2965 func (d *desiredState) remove(vid ID) [4]register {
2966 for i := range d.entries {
2967 if d.entries[i].ID == vid {
2968 regs := d.entries[i].regs
2969 d.entries[i] = d.entries[len(d.entries)-1]
2970 d.entries = d.entries[:len(d.entries)-1]
2971 return regs
2972 }
2973 }
2974 return [4]register{noRegister, noRegister, noRegister, noRegister}
2975 }
2976
2977
2978 func (d *desiredState) merge(x *desiredState) {
2979 d.avoid |= x.avoid
2980
2981
2982 for _, e := range x.entries {
2983 d.addList(e.ID, e.regs)
2984 }
2985 }
2986
View as plain text