Source file src/cmd/compile/internal/ssa/regalloc.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Register allocation.
     6  //
     7  // We use a version of a linear scan register allocator. We treat the
     8  // whole function as a single long basic block and run through
     9  // it using a greedy register allocator. Then all merge edges
    10  // (those targeting a block with len(Preds)>1) are processed to
    11  // shuffle data into the place that the target of the edge expects.
    12  //
    13  // The greedy allocator moves values into registers just before they
    14  // are used, spills registers only when necessary, and spills the
    15  // value whose next use is farthest in the future.
    16  //
    17  // The register allocator requires that a block is not scheduled until
    18  // at least one of its predecessors have been scheduled. The most recent
    19  // such predecessor provides the starting register state for a block.
    20  //
    21  // It also requires that there are no critical edges (critical =
    22  // comes from a block with >1 successor and goes to a block with >1
    23  // predecessor).  This makes it easy to add fixup code on merge edges -
    24  // the source of a merge edge has only one successor, so we can add
    25  // fixup code to the end of that block.
    26  
    27  // Spilling
    28  //
    29  // During the normal course of the allocator, we might throw a still-live
    30  // value out of all registers. When that value is subsequently used, we must
    31  // load it from a slot on the stack. We must also issue an instruction to
    32  // initialize that stack location with a copy of v.
    33  //
    34  // pre-regalloc:
    35  //   (1) v = Op ...
    36  //   (2) x = Op ...
    37  //   (3) ... = Op v ...
    38  //
    39  // post-regalloc:
    40  //   (1) v = Op ...    : AX // computes v, store result in AX
    41  //       s = StoreReg v     // spill v to a stack slot
    42  //   (2) x = Op ...    : AX // some other op uses AX
    43  //       c = LoadReg s : CX // restore v from stack slot
    44  //   (3) ... = Op c ...     // use the restored value
    45  //
    46  // Allocation occurs normally until we reach (3) and we realize we have
    47  // a use of v and it isn't in any register. At that point, we allocate
    48  // a spill (a StoreReg) for v. We can't determine the correct place for
    49  // the spill at this point, so we allocate the spill as blockless initially.
    50  // The restore is then generated to load v back into a register so it can
    51  // be used. Subsequent uses of v will use the restored value c instead.
    52  //
    53  // What remains is the question of where to schedule the spill.
    54  // During allocation, we keep track of the dominator of all restores of v.
    55  // The spill of v must dominate that block. The spill must also be issued at
    56  // a point where v is still in a register.
    57  //
    58  // To find the right place, start at b, the block which dominates all restores.
    59  //  - If b is v.Block, then issue the spill right after v.
    60  //    It is known to be in a register at that point, and dominates any restores.
    61  //  - Otherwise, if v is in a register at the start of b,
    62  //    put the spill of v at the start of b.
    63  //  - Otherwise, set b = immediate dominator of b, and repeat.
    64  //
    65  // Phi values are special, as always. We define two kinds of phis, those
    66  // where the merge happens in a register (a "register" phi) and those where
    67  // the merge happens in a stack location (a "stack" phi).
    68  //
    69  // A register phi must have the phi and all of its inputs allocated to the
    70  // same register. Register phis are spilled similarly to regular ops.
    71  //
    72  // A stack phi must have the phi and all of its inputs allocated to the same
    73  // stack location. Stack phis start out life already spilled - each phi
    74  // input must be a store (using StoreReg) at the end of the corresponding
    75  // predecessor block.
    76  //     b1: y = ... : AX        b2: z = ... : BX
    77  //         y2 = StoreReg y         z2 = StoreReg z
    78  //         goto b3                 goto b3
    79  //     b3: x = phi(y2, z2)
    80  // The stack allocator knows that StoreReg args of stack-allocated phis
    81  // must be allocated to the same stack slot as the phi that uses them.
    82  // x is now a spilled value and a restore must appear before its first use.
    83  
    84  // TODO
    85  
    86  // Use an affinity graph to mark two values which should use the
    87  // same register. This affinity graph will be used to prefer certain
    88  // registers for allocation. This affinity helps eliminate moves that
    89  // are required for phi implementations and helps generate allocations
    90  // for 2-register architectures.
    91  
    92  // Note: regalloc generates a not-quite-SSA output. If we have:
    93  //
    94  //             b1: x = ... : AX
    95  //                 x2 = StoreReg x
    96  //                 ... AX gets reused for something else ...
    97  //                 if ... goto b3 else b4
    98  //
    99  //   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
   100  //       ... use x3 ...                 ... use x4 ...
   101  //
   102  //             b2: ... use x3 ...
   103  //
   104  // If b3 is the primary predecessor of b2, then we use x3 in b2 and
   105  // add a x4:CX->BX copy at the end of b4.
   106  // But the definition of x3 doesn't dominate b2.  We should really
   107  // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
   108  // SSA form. For now, we ignore this problem as remaining in strict
   109  // SSA form isn't needed after regalloc. We'll just leave the use
   110  // of x3 not dominated by the definition of x3, and the CX->BX copy
   111  // will have no use (so don't run deadcode after regalloc!).
   112  // TODO: maybe we should introduce these extra phis?
   113  
   114  package ssa
   115  
   116  import (
   117  	"cmd/compile/internal/base"
   118  	"cmd/compile/internal/ir"
   119  	"cmd/compile/internal/types"
   120  	"cmd/internal/src"
   121  	"cmd/internal/sys"
   122  	"fmt"
   123  	"internal/buildcfg"
   124  	"math"
   125  	"math/bits"
   126  	"unsafe"
   127  )
   128  
   129  const (
   130  	moveSpills = iota
   131  	logSpills
   132  	regDebug
   133  	stackDebug
   134  )
   135  
   136  // distance is a measure of how far into the future values are used.
   137  // distance is measured in units of instructions.
   138  const (
   139  	likelyDistance   = 1
   140  	normalDistance   = 10
   141  	unlikelyDistance = 100
   142  )
   143  
   144  // regalloc performs register allocation on f. It sets f.RegAlloc
   145  // to the resulting allocation.
   146  func regalloc(f *Func) {
   147  	var s regAllocState
   148  	s.init(f)
   149  	s.regalloc(f)
   150  	s.close()
   151  }
   152  
   153  type register uint8
   154  
   155  const noRegister register = 255
   156  
   157  // For bulk initializing
   158  var noRegisters [32]register = [32]register{
   159  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   160  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   161  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   162  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   163  }
   164  
   165  // A regMask encodes a set of machine registers.
   166  // TODO: regMask -> regSet?
   167  type regMask uint64
   168  
   169  func (m regMask) String() string {
   170  	s := ""
   171  	for r := register(0); m != 0; r++ {
   172  		if m>>r&1 == 0 {
   173  			continue
   174  		}
   175  		m &^= regMask(1) << r
   176  		if s != "" {
   177  			s += " "
   178  		}
   179  		s += fmt.Sprintf("r%d", r)
   180  	}
   181  	return s
   182  }
   183  
   184  func (m regMask) contains(r register) bool {
   185  	return m>>r&1 != 0
   186  }
   187  
   188  func (s *regAllocState) RegMaskString(m regMask) string {
   189  	str := ""
   190  	for r := register(0); m != 0; r++ {
   191  		if m>>r&1 == 0 {
   192  			continue
   193  		}
   194  		m &^= regMask(1) << r
   195  		if str != "" {
   196  			str += " "
   197  		}
   198  		str += s.registers[r].String()
   199  	}
   200  	return str
   201  }
   202  
   203  // countRegs returns the number of set bits in the register mask.
   204  func countRegs(r regMask) int {
   205  	return bits.OnesCount64(uint64(r))
   206  }
   207  
   208  // pickReg picks an arbitrary register from the register mask.
   209  func pickReg(r regMask) register {
   210  	if r == 0 {
   211  		panic("can't pick a register from an empty set")
   212  	}
   213  	// pick the lowest one
   214  	return register(bits.TrailingZeros64(uint64(r)))
   215  }
   216  
   217  type use struct {
   218  	// distance from start of the block to a use of a value
   219  	//   dist == 0                 used by first instruction in block
   220  	//   dist == len(b.Values)-1   used by last instruction in block
   221  	//   dist == len(b.Values)     used by block's control value
   222  	//   dist  > len(b.Values)     used by a subsequent block
   223  	dist int32
   224  	pos  src.XPos // source position of the use
   225  	next *use     // linked list of uses of a value in nondecreasing dist order
   226  }
   227  
   228  // A valState records the register allocation state for a (pre-regalloc) value.
   229  type valState struct {
   230  	regs              regMask // the set of registers holding a Value (usually just one)
   231  	uses              *use    // list of uses in this block
   232  	spill             *Value  // spilled copy of the Value (if any)
   233  	restoreMin        int32   // minimum of all restores' blocks' sdom.entry
   234  	restoreMax        int32   // maximum of all restores' blocks' sdom.exit
   235  	needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
   236  	rematerializeable bool    // cached value of v.rematerializeable()
   237  }
   238  
   239  type regState struct {
   240  	v *Value // Original (preregalloc) Value stored in this register.
   241  	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
   242  	// If a register is unused, v==c==nil
   243  }
   244  
   245  type regAllocState struct {
   246  	f *Func
   247  
   248  	sdom        SparseTree
   249  	registers   []Register
   250  	numRegs     register
   251  	SPReg       register
   252  	SBReg       register
   253  	GReg        register
   254  	ZeroIntReg  register
   255  	allocatable regMask
   256  
   257  	// live values at the end of each block.  live[b.ID] is a list of value IDs
   258  	// which are live at the end of b, together with a count of how many instructions
   259  	// forward to the next use.
   260  	live [][]liveInfo
   261  	// desired register assignments at the end of each block.
   262  	// Note that this is a static map computed before allocation occurs. Dynamic
   263  	// register desires (from partially completed allocations) will trump
   264  	// this information.
   265  	desired []desiredState
   266  
   267  	// current state of each (preregalloc) Value
   268  	values []valState
   269  
   270  	// ID of SP, SB values
   271  	sp, sb ID
   272  
   273  	// For each Value, map from its value ID back to the
   274  	// preregalloc Value it was derived from.
   275  	orig []*Value
   276  
   277  	// current state of each register.
   278  	// Includes only registers in allocatable.
   279  	regs []regState
   280  
   281  	// registers that contain values which can't be kicked out
   282  	nospill regMask
   283  
   284  	// mask of registers currently in use
   285  	used regMask
   286  
   287  	// mask of registers used since the start of the current block
   288  	usedSinceBlockStart regMask
   289  
   290  	// mask of registers used in the current instruction
   291  	tmpused regMask
   292  
   293  	// current block we're working on
   294  	curBlock *Block
   295  
   296  	// cache of use records
   297  	freeUseRecords *use
   298  
   299  	// endRegs[blockid] is the register state at the end of each block.
   300  	// encoded as a set of endReg records.
   301  	endRegs [][]endReg
   302  
   303  	// startRegs[blockid] is the register state at the start of merge blocks.
   304  	// saved state does not include the state of phi ops in the block.
   305  	startRegs [][]startReg
   306  
   307  	// startRegsMask is a mask of the registers in startRegs[curBlock.ID].
   308  	// Registers dropped from startRegsMask are later synchronoized back to
   309  	// startRegs by dropping from there as well.
   310  	startRegsMask regMask
   311  
   312  	// spillLive[blockid] is the set of live spills at the end of each block
   313  	spillLive [][]ID
   314  
   315  	// a set of copies we generated to move things around, and
   316  	// whether it is used in shuffle. Unused copies will be deleted.
   317  	copies map[*Value]bool
   318  
   319  	loopnest *loopnest
   320  
   321  	// choose a good order in which to visit blocks for allocation purposes.
   322  	visitOrder []*Block
   323  
   324  	// blockOrder[b.ID] corresponds to the index of block b in visitOrder.
   325  	blockOrder []int32
   326  
   327  	// whether to insert instructions that clobber dead registers at call sites
   328  	doClobber bool
   329  
   330  	// For each instruction index in a basic block, the index of the next call
   331  	// at or after that instruction index.
   332  	// If there is no next call, returns maxInt32.
   333  	// nextCall for a call instruction points to itself.
   334  	// (Indexes and results are pre-regalloc.)
   335  	nextCall []int32
   336  
   337  	// Index of the instruction we're currently working on.
   338  	// Index is expressed in terms of the pre-regalloc b.Values list.
   339  	curIdx int
   340  }
   341  
   342  type endReg struct {
   343  	r register
   344  	v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
   345  	c *Value // cached version of the value
   346  }
   347  
   348  type startReg struct {
   349  	r   register
   350  	v   *Value   // pre-regalloc value needed in this register
   351  	c   *Value   // cached version of the value
   352  	pos src.XPos // source position of use of this register
   353  }
   354  
   355  // freeReg frees up register r. Any current user of r is kicked out.
   356  func (s *regAllocState) freeReg(r register) {
   357  	if !s.allocatable.contains(r) && !s.isGReg(r) {
   358  		return
   359  	}
   360  	v := s.regs[r].v
   361  	if v == nil {
   362  		s.f.Fatalf("tried to free an already free register %d\n", r)
   363  	}
   364  
   365  	// Mark r as unused.
   366  	if s.f.pass.debug > regDebug {
   367  		fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
   368  	}
   369  	s.regs[r] = regState{}
   370  	s.values[v.ID].regs &^= regMask(1) << r
   371  	s.used &^= regMask(1) << r
   372  }
   373  
   374  // freeRegs frees up all registers listed in m.
   375  func (s *regAllocState) freeRegs(m regMask) {
   376  	for m&s.used != 0 {
   377  		s.freeReg(pickReg(m & s.used))
   378  	}
   379  }
   380  
   381  // clobberRegs inserts instructions that clobber registers listed in m.
   382  func (s *regAllocState) clobberRegs(m regMask) {
   383  	m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them
   384  	for m != 0 {
   385  		r := pickReg(m)
   386  		m &^= 1 << r
   387  		x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
   388  		s.f.setHome(x, &s.registers[r])
   389  	}
   390  }
   391  
   392  // setOrig records that c's original value is the same as
   393  // v's original value.
   394  func (s *regAllocState) setOrig(c *Value, v *Value) {
   395  	if int(c.ID) >= cap(s.orig) {
   396  		x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
   397  		copy(x, s.orig)
   398  		s.f.Cache.freeValueSlice(s.orig)
   399  		s.orig = x
   400  	}
   401  	for int(c.ID) >= len(s.orig) {
   402  		s.orig = append(s.orig, nil)
   403  	}
   404  	if s.orig[c.ID] != nil {
   405  		s.f.Fatalf("orig value set twice %s %s", c, v)
   406  	}
   407  	s.orig[c.ID] = s.orig[v.ID]
   408  }
   409  
   410  // assignReg assigns register r to hold c, a copy of v.
   411  // r must be unused.
   412  func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
   413  	if s.f.pass.debug > regDebug {
   414  		fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
   415  	}
   416  	// Allocate v to r.
   417  	s.values[v.ID].regs |= regMask(1) << r
   418  	s.f.setHome(c, &s.registers[r])
   419  
   420  	// Allocate r to v.
   421  	if !s.allocatable.contains(r) && !s.isGReg(r) {
   422  		return
   423  	}
   424  	if s.regs[r].v != nil {
   425  		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
   426  	}
   427  	s.regs[r] = regState{v, c}
   428  	s.used |= regMask(1) << r
   429  }
   430  
   431  // allocReg chooses a register from the set of registers in mask.
   432  // If there is no unused register, a Value will be kicked out of
   433  // a register to make room.
   434  func (s *regAllocState) allocReg(mask regMask, v *Value) register {
   435  	if v.OnWasmStack {
   436  		return noRegister
   437  	}
   438  
   439  	mask &= s.allocatable
   440  	mask &^= s.nospill
   441  	if mask == 0 {
   442  		s.f.Fatalf("no register available for %s", v.LongString())
   443  	}
   444  
   445  	// Pick an unused register if one is available.
   446  	if mask&^s.used != 0 {
   447  		r := pickReg(mask &^ s.used)
   448  		s.usedSinceBlockStart |= regMask(1) << r
   449  		return r
   450  	}
   451  
   452  	// Pick a value to spill. Spill the value with the
   453  	// farthest-in-the-future use.
   454  	// TODO: Prefer registers with already spilled Values?
   455  	// TODO: Modify preference using affinity graph.
   456  	// TODO: if a single value is in multiple registers, spill one of them
   457  	// before spilling a value in just a single register.
   458  
   459  	// Find a register to spill. We spill the register containing the value
   460  	// whose next use is as far in the future as possible.
   461  	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
   462  	var r register
   463  	maxuse := int32(-1)
   464  	for t := register(0); t < s.numRegs; t++ {
   465  		if mask>>t&1 == 0 {
   466  			continue
   467  		}
   468  		v := s.regs[t].v
   469  		if n := s.values[v.ID].uses.dist; n > maxuse {
   470  			// v's next use is farther in the future than any value
   471  			// we've seen so far. A new best spill candidate.
   472  			r = t
   473  			maxuse = n
   474  		}
   475  	}
   476  	if maxuse == -1 {
   477  		s.f.Fatalf("couldn't find register to spill")
   478  	}
   479  
   480  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   481  		// TODO(neelance): In theory this should never happen, because all wasm registers are equal.
   482  		// So if there is still a free register, the allocation should have picked that one in the first place instead of
   483  		// trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
   484  		s.freeReg(r)
   485  		return r
   486  	}
   487  
   488  	// Try to move it around before kicking out, if there is a free register.
   489  	// We generate a Copy and record it. It will be deleted if never used.
   490  	v2 := s.regs[r].v
   491  	m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
   492  	if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
   493  		s.usedSinceBlockStart |= regMask(1) << r
   494  		r2 := pickReg(m)
   495  		c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
   496  		s.copies[c] = false
   497  		if s.f.pass.debug > regDebug {
   498  			fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
   499  		}
   500  		s.setOrig(c, v2)
   501  		s.assignReg(r2, v2, c)
   502  	}
   503  
   504  	// If the evicted register isn't used between the start of the block
   505  	// and now then there is no reason to even request it on entry. We can
   506  	// drop from startRegs in that case.
   507  	if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
   508  		if s.startRegsMask&(regMask(1)<<r) == 1 {
   509  			if s.f.pass.debug > regDebug {
   510  				fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
   511  			}
   512  			s.startRegsMask &^= regMask(1) << r
   513  		}
   514  	}
   515  
   516  	s.freeReg(r)
   517  	s.usedSinceBlockStart |= regMask(1) << r
   518  	return r
   519  }
   520  
   521  // makeSpill returns a Value which represents the spilled value of v.
   522  // b is the block in which the spill is used.
   523  func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
   524  	vi := &s.values[v.ID]
   525  	if vi.spill != nil {
   526  		// Final block not known - keep track of subtree where restores reside.
   527  		vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
   528  		vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
   529  		return vi.spill
   530  	}
   531  	// Make a spill for v. We don't know where we want
   532  	// to put it yet, so we leave it blockless for now.
   533  	spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
   534  	// We also don't know what the spill's arg will be.
   535  	// Leave it argless for now.
   536  	s.setOrig(spill, v)
   537  	vi.spill = spill
   538  	vi.restoreMin = s.sdom[b.ID].entry
   539  	vi.restoreMax = s.sdom[b.ID].exit
   540  	return spill
   541  }
   542  
   543  // allocValToReg allocates v to a register selected from regMask and
   544  // returns the register copy of v. Any previous user is kicked out and spilled
   545  // (if necessary). Load code is added at the current pc. If nospill is set the
   546  // allocated register is marked nospill so the assignment cannot be
   547  // undone until the caller allows it by clearing nospill. Returns a
   548  // *Value which is either v or a copy of v allocated to the chosen register.
   549  func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
   550  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
   551  		c := v.copyIntoWithXPos(s.curBlock, pos)
   552  		c.OnWasmStack = true
   553  		s.setOrig(c, v)
   554  		return c
   555  	}
   556  	if v.OnWasmStack {
   557  		return v
   558  	}
   559  
   560  	vi := &s.values[v.ID]
   561  	pos = pos.WithNotStmt()
   562  	// Check if v is already in a requested register.
   563  	if mask&vi.regs != 0 {
   564  		mask &= vi.regs
   565  		r := pickReg(mask)
   566  		if mask.contains(s.SPReg) {
   567  			// Prefer the stack pointer if it is allowed.
   568  			// (Needed because the op might have an Aux symbol
   569  			// that needs SP as its base.)
   570  			r = s.SPReg
   571  		}
   572  		if !s.allocatable.contains(r) {
   573  			return v // v is in a fixed register
   574  		}
   575  		if s.regs[r].v != v || s.regs[r].c == nil {
   576  			panic("bad register state")
   577  		}
   578  		if nospill {
   579  			s.nospill |= regMask(1) << r
   580  		}
   581  		s.usedSinceBlockStart |= regMask(1) << r
   582  		return s.regs[r].c
   583  	}
   584  
   585  	var r register
   586  	// If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
   587  	onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
   588  	if !onWasmStack {
   589  		// Allocate a register.
   590  		r = s.allocReg(mask, v)
   591  	}
   592  
   593  	// Allocate v to the new register.
   594  	var c *Value
   595  	if vi.regs != 0 {
   596  		// Copy from a register that v is already in.
   597  		r2 := pickReg(vi.regs)
   598  		var current *Value
   599  		if !s.allocatable.contains(r2) {
   600  			current = v // v is in a fixed register
   601  		} else {
   602  			if s.regs[r2].v != v {
   603  				panic("bad register state")
   604  			}
   605  			current = s.regs[r2].c
   606  		}
   607  		s.usedSinceBlockStart |= regMask(1) << r2
   608  		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
   609  	} else if v.rematerializeable() {
   610  		// Rematerialize instead of loading from the spill location.
   611  		c = v.copyIntoWithXPos(s.curBlock, pos)
   612  	} else {
   613  		// Load v from its spill location.
   614  		spill := s.makeSpill(v, s.curBlock)
   615  		if s.f.pass.debug > logSpills {
   616  			s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
   617  		}
   618  		c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
   619  	}
   620  
   621  	s.setOrig(c, v)
   622  
   623  	if onWasmStack {
   624  		c.OnWasmStack = true
   625  		return c
   626  	}
   627  
   628  	s.assignReg(r, v, c)
   629  	if c.Op == OpLoadReg && s.isGReg(r) {
   630  		s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
   631  	}
   632  	if nospill {
   633  		s.nospill |= regMask(1) << r
   634  	}
   635  	return c
   636  }
   637  
   638  // isLeaf reports whether f performs any calls.
   639  func isLeaf(f *Func) bool {
   640  	for _, b := range f.Blocks {
   641  		for _, v := range b.Values {
   642  			if v.Op.IsCall() && !v.Op.IsTailCall() {
   643  				// tail call is not counted as it does not save the return PC or need a frame
   644  				return false
   645  			}
   646  		}
   647  	}
   648  	return true
   649  }
   650  
   651  // needRegister reports whether v needs a register.
   652  func (v *Value) needRegister() bool {
   653  	return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
   654  }
   655  
   656  func (s *regAllocState) init(f *Func) {
   657  	s.f = f
   658  	s.f.RegAlloc = s.f.Cache.locs[:0]
   659  	s.registers = f.Config.registers
   660  	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
   661  		s.f.Fatalf("bad number of registers: %d", nr)
   662  	} else {
   663  		s.numRegs = register(nr)
   664  	}
   665  	// Locate SP, SB, and g registers.
   666  	s.SPReg = noRegister
   667  	s.SBReg = noRegister
   668  	s.GReg = noRegister
   669  	s.ZeroIntReg = noRegister
   670  	for r := register(0); r < s.numRegs; r++ {
   671  		switch s.registers[r].String() {
   672  		case "SP":
   673  			s.SPReg = r
   674  		case "SB":
   675  			s.SBReg = r
   676  		case "g":
   677  			s.GReg = r
   678  		case "ZERO": // TODO: arch-specific?
   679  			s.ZeroIntReg = r
   680  		}
   681  	}
   682  	// Make sure we found all required registers.
   683  	switch noRegister {
   684  	case s.SPReg:
   685  		s.f.Fatalf("no SP register found")
   686  	case s.SBReg:
   687  		s.f.Fatalf("no SB register found")
   688  	case s.GReg:
   689  		if f.Config.hasGReg {
   690  			s.f.Fatalf("no g register found")
   691  		}
   692  	}
   693  
   694  	// Figure out which registers we're allowed to use.
   695  	s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
   696  	s.allocatable &^= 1 << s.SPReg
   697  	s.allocatable &^= 1 << s.SBReg
   698  	if s.f.Config.hasGReg {
   699  		s.allocatable &^= 1 << s.GReg
   700  	}
   701  	if s.ZeroIntReg != noRegister {
   702  		s.allocatable &^= 1 << s.ZeroIntReg
   703  	}
   704  	if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
   705  		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
   706  	}
   707  	if s.f.Config.LinkReg != -1 {
   708  		if isLeaf(f) {
   709  			// Leaf functions don't save/restore the link register.
   710  			s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
   711  		}
   712  	}
   713  	if s.f.Config.ctxt.Flag_dynlink {
   714  		switch s.f.Config.arch {
   715  		case "386":
   716  			// nothing to do.
   717  			// Note that for Flag_shared (position independent code)
   718  			// we do need to be careful, but that carefulness is hidden
   719  			// in the rewrite rules so we always have a free register
   720  			// available for global load/stores. See _gen/386.rules (search for Flag_shared).
   721  		case "amd64":
   722  			s.allocatable &^= 1 << 15 // R15
   723  		case "arm":
   724  			s.allocatable &^= 1 << 9 // R9
   725  		case "arm64":
   726  			// nothing to do
   727  		case "loong64": // R2 (aka TP) already reserved.
   728  			// nothing to do
   729  		case "ppc64le": // R2 already reserved.
   730  			// nothing to do
   731  		case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved.
   732  			// nothing to do
   733  		case "s390x":
   734  			s.allocatable &^= 1 << 11 // R11
   735  		default:
   736  			s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
   737  		}
   738  	}
   739  
   740  	// Linear scan register allocation can be influenced by the order in which blocks appear.
   741  	// Decouple the register allocation order from the generated block order.
   742  	// This also creates an opportunity for experiments to find a better order.
   743  	s.visitOrder = layoutRegallocOrder(f)
   744  
   745  	// Compute block order. This array allows us to distinguish forward edges
   746  	// from backward edges and compute how far they go.
   747  	s.blockOrder = make([]int32, f.NumBlocks())
   748  	for i, b := range s.visitOrder {
   749  		s.blockOrder[b.ID] = int32(i)
   750  	}
   751  
   752  	s.regs = make([]regState, s.numRegs)
   753  	nv := f.NumValues()
   754  	if cap(s.f.Cache.regallocValues) >= nv {
   755  		s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
   756  	} else {
   757  		s.f.Cache.regallocValues = make([]valState, nv)
   758  	}
   759  	s.values = s.f.Cache.regallocValues
   760  	s.orig = s.f.Cache.allocValueSlice(nv)
   761  	s.copies = make(map[*Value]bool)
   762  	for _, b := range s.visitOrder {
   763  		for _, v := range b.Values {
   764  			if v.needRegister() {
   765  				s.values[v.ID].needReg = true
   766  				s.values[v.ID].rematerializeable = v.rematerializeable()
   767  				s.orig[v.ID] = v
   768  			}
   769  			// Note: needReg is false for values returning Tuple types.
   770  			// Instead, we mark the corresponding Selects as needReg.
   771  		}
   772  	}
   773  	s.computeLive()
   774  
   775  	s.endRegs = make([][]endReg, f.NumBlocks())
   776  	s.startRegs = make([][]startReg, f.NumBlocks())
   777  	s.spillLive = make([][]ID, f.NumBlocks())
   778  	s.sdom = f.Sdom()
   779  
   780  	// wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
   781  	if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   782  		canLiveOnStack := f.newSparseSet(f.NumValues())
   783  		defer f.retSparseSet(canLiveOnStack)
   784  		for _, b := range f.Blocks {
   785  			// New block. Clear candidate set.
   786  			canLiveOnStack.clear()
   787  			for _, c := range b.ControlValues() {
   788  				if c.Uses == 1 && !opcodeTable[c.Op].generic {
   789  					canLiveOnStack.add(c.ID)
   790  				}
   791  			}
   792  			// Walking backwards.
   793  			for i := len(b.Values) - 1; i >= 0; i-- {
   794  				v := b.Values[i]
   795  				if canLiveOnStack.contains(v.ID) {
   796  					v.OnWasmStack = true
   797  				} else {
   798  					// Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
   799  					canLiveOnStack.clear()
   800  				}
   801  				for _, arg := range v.Args {
   802  					// Value can live on the stack if:
   803  					// - it is only used once
   804  					// - it is used in the same basic block
   805  					// - it is not a "mem" value
   806  					// - it is a WebAssembly op
   807  					if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
   808  						canLiveOnStack.add(arg.ID)
   809  					}
   810  				}
   811  			}
   812  		}
   813  	}
   814  
   815  	// The clobberdeadreg experiment inserts code to clobber dead registers
   816  	// at call sites.
   817  	// Ignore huge functions to avoid doing too much work.
   818  	if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
   819  		// TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
   820  		s.doClobber = true
   821  	}
   822  }
   823  
   824  func (s *regAllocState) close() {
   825  	s.f.Cache.freeValueSlice(s.orig)
   826  }
   827  
   828  // Adds a use record for id at distance dist from the start of the block.
   829  // All calls to addUse must happen with nonincreasing dist.
   830  func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
   831  	r := s.freeUseRecords
   832  	if r != nil {
   833  		s.freeUseRecords = r.next
   834  	} else {
   835  		r = &use{}
   836  	}
   837  	r.dist = dist
   838  	r.pos = pos
   839  	r.next = s.values[id].uses
   840  	s.values[id].uses = r
   841  	if r.next != nil && dist > r.next.dist {
   842  		s.f.Fatalf("uses added in wrong order")
   843  	}
   844  }
   845  
   846  // advanceUses advances the uses of v's args from the state before v to the state after v.
   847  // Any values which have no more uses are deallocated from registers.
   848  func (s *regAllocState) advanceUses(v *Value) {
   849  	for _, a := range v.Args {
   850  		if !s.values[a.ID].needReg {
   851  			continue
   852  		}
   853  		ai := &s.values[a.ID]
   854  		r := ai.uses
   855  		ai.uses = r.next
   856  		if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
   857  			// Value is dead (or is not used again until after a call), free all registers that hold it.
   858  			s.freeRegs(ai.regs)
   859  		}
   860  		r.next = s.freeUseRecords
   861  		s.freeUseRecords = r
   862  	}
   863  	s.dropIfUnused(v)
   864  }
   865  
   866  // Drop v from registers if it isn't used again, or its only uses are after
   867  // a call instruction.
   868  func (s *regAllocState) dropIfUnused(v *Value) {
   869  	if !s.values[v.ID].needReg {
   870  		return
   871  	}
   872  	vi := &s.values[v.ID]
   873  	r := vi.uses
   874  	if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
   875  		s.freeRegs(vi.regs)
   876  	}
   877  }
   878  
   879  // liveAfterCurrentInstruction reports whether v is live after
   880  // the current instruction is completed.  v must be used by the
   881  // current instruction.
   882  func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
   883  	u := s.values[v.ID].uses
   884  	if u == nil {
   885  		panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
   886  	}
   887  	d := u.dist
   888  	for u != nil && u.dist == d {
   889  		u = u.next
   890  	}
   891  	return u != nil && u.dist > d
   892  }
   893  
   894  // Sets the state of the registers to that encoded in regs.
   895  func (s *regAllocState) setState(regs []endReg) {
   896  	s.freeRegs(s.used)
   897  	for _, x := range regs {
   898  		s.assignReg(x.r, x.v, x.c)
   899  	}
   900  }
   901  
   902  // compatRegs returns the set of registers which can store a type t.
   903  func (s *regAllocState) compatRegs(t *types.Type) regMask {
   904  	var m regMask
   905  	if t.IsTuple() || t.IsFlags() {
   906  		return 0
   907  	}
   908  	if t.IsFloat() || t == types.TypeInt128 {
   909  		if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
   910  			m = s.f.Config.fp32RegMask
   911  		} else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
   912  			m = s.f.Config.fp64RegMask
   913  		} else {
   914  			m = s.f.Config.fpRegMask
   915  		}
   916  	} else {
   917  		m = s.f.Config.gpRegMask
   918  	}
   919  	return m & s.allocatable
   920  }
   921  
   922  // regspec returns the regInfo for operation op.
   923  func (s *regAllocState) regspec(v *Value) regInfo {
   924  	op := v.Op
   925  	if op == OpConvert {
   926  		// OpConvert is a generic op, so it doesn't have a
   927  		// register set in the static table. It can use any
   928  		// allocatable integer register.
   929  		m := s.allocatable & s.f.Config.gpRegMask
   930  		return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
   931  	}
   932  	if op == OpArgIntReg {
   933  		reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
   934  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   935  	}
   936  	if op == OpArgFloatReg {
   937  		reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
   938  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   939  	}
   940  	if op.IsCall() {
   941  		if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
   942  			return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
   943  		}
   944  	}
   945  	if op == OpMakeResult && s.f.OwnAux.reg != nil {
   946  		return *s.f.OwnAux.ResultReg(s.f.Config)
   947  	}
   948  	return opcodeTable[op].reg
   949  }
   950  
   951  func (s *regAllocState) isGReg(r register) bool {
   952  	return s.f.Config.hasGReg && s.GReg == r
   953  }
   954  
   955  // Dummy value used to represent the value being held in a temporary register.
   956  var tmpVal Value
   957  
   958  func (s *regAllocState) regalloc(f *Func) {
   959  	regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
   960  	defer f.retSparseSet(regValLiveSet)
   961  	var oldSched []*Value
   962  	var phis []*Value
   963  	var phiRegs []register
   964  	var args []*Value
   965  
   966  	// Data structure used for computing desired registers.
   967  	var desired desiredState
   968  	desiredSecondReg := map[ID][4]register{} // desired register allocation for 2nd part of a tuple
   969  
   970  	// Desired registers for inputs & outputs for each instruction in the block.
   971  	type dentry struct {
   972  		out [4]register    // desired output registers
   973  		in  [3][4]register // desired input registers (for inputs 0,1, and 2)
   974  	}
   975  	var dinfo []dentry
   976  
   977  	if f.Entry != f.Blocks[0] {
   978  		f.Fatalf("entry block must be first")
   979  	}
   980  
   981  	for _, b := range s.visitOrder {
   982  		if s.f.pass.debug > regDebug {
   983  			fmt.Printf("Begin processing block %v\n", b)
   984  		}
   985  		s.curBlock = b
   986  		s.startRegsMask = 0
   987  		s.usedSinceBlockStart = 0
   988  		clear(desiredSecondReg)
   989  
   990  		// Initialize regValLiveSet and uses fields for this block.
   991  		// Walk backwards through the block doing liveness analysis.
   992  		regValLiveSet.clear()
   993  		for _, e := range s.live[b.ID] {
   994  			s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
   995  			regValLiveSet.add(e.ID)
   996  		}
   997  		for _, v := range b.ControlValues() {
   998  			if s.values[v.ID].needReg {
   999  				s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
  1000  				regValLiveSet.add(v.ID)
  1001  			}
  1002  		}
  1003  		if len(s.nextCall) < len(b.Values) {
  1004  			s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
  1005  		}
  1006  		var nextCall int32 = math.MaxInt32
  1007  		for i := len(b.Values) - 1; i >= 0; i-- {
  1008  			v := b.Values[i]
  1009  			regValLiveSet.remove(v.ID)
  1010  			if v.Op == OpPhi {
  1011  				// Remove v from the live set, but don't add
  1012  				// any inputs. This is the state the len(b.Preds)>1
  1013  				// case below desires; it wants to process phis specially.
  1014  				s.nextCall[i] = nextCall
  1015  				continue
  1016  			}
  1017  			if opcodeTable[v.Op].call {
  1018  				// Function call clobbers all the registers but SP and SB.
  1019  				regValLiveSet.clear()
  1020  				if s.sp != 0 && s.values[s.sp].uses != nil {
  1021  					regValLiveSet.add(s.sp)
  1022  				}
  1023  				if s.sb != 0 && s.values[s.sb].uses != nil {
  1024  					regValLiveSet.add(s.sb)
  1025  				}
  1026  				nextCall = int32(i)
  1027  			}
  1028  			for _, a := range v.Args {
  1029  				if !s.values[a.ID].needReg {
  1030  					continue
  1031  				}
  1032  				s.addUse(a.ID, int32(i), v.Pos)
  1033  				regValLiveSet.add(a.ID)
  1034  			}
  1035  			s.nextCall[i] = nextCall
  1036  		}
  1037  		if s.f.pass.debug > regDebug {
  1038  			fmt.Printf("use distances for %s\n", b)
  1039  			for i := range s.values {
  1040  				vi := &s.values[i]
  1041  				u := vi.uses
  1042  				if u == nil {
  1043  					continue
  1044  				}
  1045  				fmt.Printf("  v%d:", i)
  1046  				for u != nil {
  1047  					fmt.Printf(" %d", u.dist)
  1048  					u = u.next
  1049  				}
  1050  				fmt.Println()
  1051  			}
  1052  		}
  1053  
  1054  		// Make a copy of the block schedule so we can generate a new one in place.
  1055  		// We make a separate copy for phis and regular values.
  1056  		nphi := 0
  1057  		for _, v := range b.Values {
  1058  			if v.Op != OpPhi {
  1059  				break
  1060  			}
  1061  			nphi++
  1062  		}
  1063  		phis = append(phis[:0], b.Values[:nphi]...)
  1064  		oldSched = append(oldSched[:0], b.Values[nphi:]...)
  1065  		b.Values = b.Values[:0]
  1066  
  1067  		// Initialize start state of block.
  1068  		if b == f.Entry {
  1069  			// Regalloc state is empty to start.
  1070  			if nphi > 0 {
  1071  				f.Fatalf("phis in entry block")
  1072  			}
  1073  		} else if len(b.Preds) == 1 {
  1074  			// Start regalloc state with the end state of the previous block.
  1075  			s.setState(s.endRegs[b.Preds[0].b.ID])
  1076  			if nphi > 0 {
  1077  				f.Fatalf("phis in single-predecessor block")
  1078  			}
  1079  			// Drop any values which are no longer live.
  1080  			// This may happen because at the end of p, a value may be
  1081  			// live but only used by some other successor of p.
  1082  			for r := register(0); r < s.numRegs; r++ {
  1083  				v := s.regs[r].v
  1084  				if v != nil && !regValLiveSet.contains(v.ID) {
  1085  					s.freeReg(r)
  1086  				}
  1087  			}
  1088  		} else {
  1089  			// This is the complicated case. We have more than one predecessor,
  1090  			// which means we may have Phi ops.
  1091  
  1092  			// Start with the final register state of the predecessor with least spill values.
  1093  			// This is based on the following points:
  1094  			// 1, The less spill value indicates that the register pressure of this path is smaller,
  1095  			//    so the values of this block are more likely to be allocated to registers.
  1096  			// 2, Avoid the predecessor that contains the function call, because the predecessor that
  1097  			//    contains the function call usually generates a lot of spills and lose the previous
  1098  			//    allocation state.
  1099  			// TODO: Improve this part. At least the size of endRegs of the predecessor also has
  1100  			// an impact on the code size and compiler speed. But it is not easy to find a simple
  1101  			// and efficient method that combines multiple factors.
  1102  			idx := -1
  1103  			for i, p := range b.Preds {
  1104  				// If the predecessor has not been visited yet, skip it because its end state
  1105  				// (redRegs and spillLive) has not been computed yet.
  1106  				pb := p.b
  1107  				if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
  1108  					continue
  1109  				}
  1110  				if idx == -1 {
  1111  					idx = i
  1112  					continue
  1113  				}
  1114  				pSel := b.Preds[idx].b
  1115  				if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
  1116  					idx = i
  1117  				} else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
  1118  					// Use a bit of likely information. After critical pass, pb and pSel must
  1119  					// be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
  1120  					// TODO: improve the prediction of the likely predecessor. The following
  1121  					// method is only suitable for the simplest cases. For complex cases,
  1122  					// the prediction may be inaccurate, but this does not affect the
  1123  					// correctness of the program.
  1124  					// According to the layout algorithm, the predecessor with the
  1125  					// smaller blockOrder is the true branch, and the test results show
  1126  					// that it is better to choose the predecessor with a smaller
  1127  					// blockOrder than no choice.
  1128  					if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
  1129  						idx = i
  1130  					}
  1131  				}
  1132  			}
  1133  			if idx < 0 {
  1134  				f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
  1135  			}
  1136  			p := b.Preds[idx].b
  1137  			s.setState(s.endRegs[p.ID])
  1138  
  1139  			if s.f.pass.debug > regDebug {
  1140  				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
  1141  				for _, x := range s.endRegs[p.ID] {
  1142  					fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
  1143  				}
  1144  			}
  1145  
  1146  			// Decide on registers for phi ops. Use the registers determined
  1147  			// by the primary predecessor if we can.
  1148  			// TODO: pick best of (already processed) predecessors?
  1149  			// Majority vote? Deepest nesting level?
  1150  			phiRegs = phiRegs[:0]
  1151  			var phiUsed regMask
  1152  
  1153  			for _, v := range phis {
  1154  				if !s.values[v.ID].needReg {
  1155  					phiRegs = append(phiRegs, noRegister)
  1156  					continue
  1157  				}
  1158  				a := v.Args[idx]
  1159  				// Some instructions target not-allocatable registers.
  1160  				// They're not suitable for further (phi-function) allocation.
  1161  				m := s.values[a.ID].regs &^ phiUsed & s.allocatable
  1162  				if m != 0 {
  1163  					r := pickReg(m)
  1164  					phiUsed |= regMask(1) << r
  1165  					phiRegs = append(phiRegs, r)
  1166  				} else {
  1167  					phiRegs = append(phiRegs, noRegister)
  1168  				}
  1169  			}
  1170  
  1171  			// Second pass - deallocate all in-register phi inputs.
  1172  			for i, v := range phis {
  1173  				if !s.values[v.ID].needReg {
  1174  					continue
  1175  				}
  1176  				a := v.Args[idx]
  1177  				r := phiRegs[i]
  1178  				if r == noRegister {
  1179  					continue
  1180  				}
  1181  				if regValLiveSet.contains(a.ID) {
  1182  					// Input value is still live (it is used by something other than Phi).
  1183  					// Try to move it around before kicking out, if there is a free register.
  1184  					// We generate a Copy in the predecessor block and record it. It will be
  1185  					// deleted later if never used.
  1186  					//
  1187  					// Pick a free register. At this point some registers used in the predecessor
  1188  					// block may have been deallocated. Those are the ones used for Phis. Exclude
  1189  					// them (and they are not going to be helpful anyway).
  1190  					m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
  1191  					if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
  1192  						r2 := pickReg(m)
  1193  						c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
  1194  						s.copies[c] = false
  1195  						if s.f.pass.debug > regDebug {
  1196  							fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
  1197  						}
  1198  						s.setOrig(c, a)
  1199  						s.assignReg(r2, a, c)
  1200  						s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
  1201  					}
  1202  				}
  1203  				s.freeReg(r)
  1204  			}
  1205  
  1206  			// Copy phi ops into new schedule.
  1207  			b.Values = append(b.Values, phis...)
  1208  
  1209  			// Third pass - pick registers for phis whose input
  1210  			// was not in a register in the primary predecessor.
  1211  			for i, v := range phis {
  1212  				if !s.values[v.ID].needReg {
  1213  					continue
  1214  				}
  1215  				if phiRegs[i] != noRegister {
  1216  					continue
  1217  				}
  1218  				m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
  1219  				// If one of the other inputs of v is in a register, and the register is available,
  1220  				// select this register, which can save some unnecessary copies.
  1221  				for i, pe := range b.Preds {
  1222  					if i == idx {
  1223  						continue
  1224  					}
  1225  					ri := noRegister
  1226  					for _, er := range s.endRegs[pe.b.ID] {
  1227  						if er.v == s.orig[v.Args[i].ID] {
  1228  							ri = er.r
  1229  							break
  1230  						}
  1231  					}
  1232  					if ri != noRegister && m>>ri&1 != 0 {
  1233  						m = regMask(1) << ri
  1234  						break
  1235  					}
  1236  				}
  1237  				if m != 0 {
  1238  					r := pickReg(m)
  1239  					phiRegs[i] = r
  1240  					phiUsed |= regMask(1) << r
  1241  				}
  1242  			}
  1243  
  1244  			// Set registers for phis. Add phi spill code.
  1245  			for i, v := range phis {
  1246  				if !s.values[v.ID].needReg {
  1247  					continue
  1248  				}
  1249  				r := phiRegs[i]
  1250  				if r == noRegister {
  1251  					// stack-based phi
  1252  					// Spills will be inserted in all the predecessors below.
  1253  					s.values[v.ID].spill = v // v starts life spilled
  1254  					continue
  1255  				}
  1256  				// register-based phi
  1257  				s.assignReg(r, v, v)
  1258  			}
  1259  
  1260  			// Deallocate any values which are no longer live. Phis are excluded.
  1261  			for r := register(0); r < s.numRegs; r++ {
  1262  				if phiUsed>>r&1 != 0 {
  1263  					continue
  1264  				}
  1265  				v := s.regs[r].v
  1266  				if v != nil && !regValLiveSet.contains(v.ID) {
  1267  					s.freeReg(r)
  1268  				}
  1269  			}
  1270  
  1271  			// Save the starting state for use by merge edges.
  1272  			// We append to a stack allocated variable that we'll
  1273  			// later copy into s.startRegs in one fell swoop, to save
  1274  			// on allocations.
  1275  			regList := make([]startReg, 0, 32)
  1276  			for r := register(0); r < s.numRegs; r++ {
  1277  				v := s.regs[r].v
  1278  				if v == nil {
  1279  					continue
  1280  				}
  1281  				if phiUsed>>r&1 != 0 {
  1282  					// Skip registers that phis used, we'll handle those
  1283  					// specially during merge edge processing.
  1284  					continue
  1285  				}
  1286  				regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
  1287  				s.startRegsMask |= regMask(1) << r
  1288  			}
  1289  			s.startRegs[b.ID] = make([]startReg, len(regList))
  1290  			copy(s.startRegs[b.ID], regList)
  1291  
  1292  			if s.f.pass.debug > regDebug {
  1293  				fmt.Printf("after phis\n")
  1294  				for _, x := range s.startRegs[b.ID] {
  1295  					fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
  1296  				}
  1297  			}
  1298  		}
  1299  
  1300  		// Drop phis from registers if they immediately go dead.
  1301  		for i, v := range phis {
  1302  			s.curIdx = i
  1303  			s.dropIfUnused(v)
  1304  		}
  1305  
  1306  		// Allocate space to record the desired registers for each value.
  1307  		if l := len(oldSched); cap(dinfo) < l {
  1308  			dinfo = make([]dentry, l)
  1309  		} else {
  1310  			dinfo = dinfo[:l]
  1311  			clear(dinfo)
  1312  		}
  1313  
  1314  		// Load static desired register info at the end of the block.
  1315  		desired.copy(&s.desired[b.ID])
  1316  
  1317  		// Check actual assigned registers at the start of the next block(s).
  1318  		// Dynamically assigned registers will trump the static
  1319  		// desired registers computed during liveness analysis.
  1320  		// Note that we do this phase after startRegs is set above, so that
  1321  		// we get the right behavior for a block which branches to itself.
  1322  		for _, e := range b.Succs {
  1323  			succ := e.b
  1324  			// TODO: prioritize likely successor?
  1325  			for _, x := range s.startRegs[succ.ID] {
  1326  				desired.add(x.v.ID, x.r)
  1327  			}
  1328  			// Process phi ops in succ.
  1329  			pidx := e.i
  1330  			for _, v := range succ.Values {
  1331  				if v.Op != OpPhi {
  1332  					break
  1333  				}
  1334  				if !s.values[v.ID].needReg {
  1335  					continue
  1336  				}
  1337  				rp, ok := s.f.getHome(v.ID).(*Register)
  1338  				if !ok {
  1339  					// If v is not assigned a register, pick a register assigned to one of v's inputs.
  1340  					// Hopefully v will get assigned that register later.
  1341  					// If the inputs have allocated register information, add it to desired,
  1342  					// which may reduce spill or copy operations when the register is available.
  1343  					for _, a := range v.Args {
  1344  						rp, ok = s.f.getHome(a.ID).(*Register)
  1345  						if ok {
  1346  							break
  1347  						}
  1348  					}
  1349  					if !ok {
  1350  						continue
  1351  					}
  1352  				}
  1353  				desired.add(v.Args[pidx].ID, register(rp.num))
  1354  			}
  1355  		}
  1356  		// Walk values backwards computing desired register info.
  1357  		// See computeLive for more comments.
  1358  		for i := len(oldSched) - 1; i >= 0; i-- {
  1359  			v := oldSched[i]
  1360  			prefs := desired.remove(v.ID)
  1361  			regspec := s.regspec(v)
  1362  			desired.clobber(regspec.clobbers)
  1363  			for _, j := range regspec.inputs {
  1364  				if countRegs(j.regs) != 1 {
  1365  					continue
  1366  				}
  1367  				desired.clobber(j.regs)
  1368  				desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  1369  			}
  1370  			if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  1371  				if opcodeTable[v.Op].commutative {
  1372  					desired.addList(v.Args[1].ID, prefs)
  1373  				}
  1374  				desired.addList(v.Args[0].ID, prefs)
  1375  			}
  1376  			// Save desired registers for this value.
  1377  			dinfo[i].out = prefs
  1378  			for j, a := range v.Args {
  1379  				if j >= len(dinfo[i].in) {
  1380  					break
  1381  				}
  1382  				dinfo[i].in[j] = desired.get(a.ID)
  1383  			}
  1384  			if v.Op == OpSelect1 && prefs[0] != noRegister {
  1385  				// Save desired registers of select1 for
  1386  				// use by the tuple generating instruction.
  1387  				desiredSecondReg[v.Args[0].ID] = prefs
  1388  			}
  1389  		}
  1390  
  1391  		// Process all the non-phi values.
  1392  		for idx, v := range oldSched {
  1393  			s.curIdx = nphi + idx
  1394  			tmpReg := noRegister
  1395  			if s.f.pass.debug > regDebug {
  1396  				fmt.Printf("  processing %s\n", v.LongString())
  1397  			}
  1398  			regspec := s.regspec(v)
  1399  			if v.Op == OpPhi {
  1400  				f.Fatalf("phi %s not at start of block", v)
  1401  			}
  1402  			if opcodeTable[v.Op].fixedReg {
  1403  				switch v.Op {
  1404  				case OpSP:
  1405  					s.assignReg(s.SPReg, v, v)
  1406  					s.sp = v.ID
  1407  				case OpSB:
  1408  					s.assignReg(s.SBReg, v, v)
  1409  					s.sb = v.ID
  1410  				case OpARM64ZERO, OpLOONG64ZERO, OpMIPS64ZERO:
  1411  					s.assignReg(s.ZeroIntReg, v, v)
  1412  				default:
  1413  					f.Fatalf("unknown fixed-register op %s", v)
  1414  				}
  1415  				b.Values = append(b.Values, v)
  1416  				s.advanceUses(v)
  1417  				continue
  1418  			}
  1419  			if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
  1420  				if s.values[v.ID].needReg {
  1421  					if v.Op == OpSelectN {
  1422  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
  1423  					} else {
  1424  						var i = 0
  1425  						if v.Op == OpSelect1 {
  1426  							i = 1
  1427  						}
  1428  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
  1429  					}
  1430  				}
  1431  				b.Values = append(b.Values, v)
  1432  				s.advanceUses(v)
  1433  				continue
  1434  			}
  1435  			if v.Op == OpGetG && s.f.Config.hasGReg {
  1436  				// use hardware g register
  1437  				if s.regs[s.GReg].v != nil {
  1438  					s.freeReg(s.GReg) // kick out the old value
  1439  				}
  1440  				s.assignReg(s.GReg, v, v)
  1441  				b.Values = append(b.Values, v)
  1442  				s.advanceUses(v)
  1443  				continue
  1444  			}
  1445  			if v.Op == OpArg {
  1446  				// Args are "pre-spilled" values. We don't allocate
  1447  				// any register here. We just set up the spill pointer to
  1448  				// point at itself and any later user will restore it to use it.
  1449  				s.values[v.ID].spill = v
  1450  				b.Values = append(b.Values, v)
  1451  				s.advanceUses(v)
  1452  				continue
  1453  			}
  1454  			if v.Op == OpKeepAlive {
  1455  				// Make sure the argument to v is still live here.
  1456  				s.advanceUses(v)
  1457  				a := v.Args[0]
  1458  				vi := &s.values[a.ID]
  1459  				if vi.regs == 0 && !vi.rematerializeable {
  1460  					// Use the spill location.
  1461  					// This forces later liveness analysis to make the
  1462  					// value live at this point.
  1463  					v.SetArg(0, s.makeSpill(a, b))
  1464  				} else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
  1465  					// Rematerializeable value with a *ir.Name. This is the address of
  1466  					// a stack object (e.g. an LEAQ). Keep the object live.
  1467  					// Change it to VarLive, which is what plive expects for locals.
  1468  					v.Op = OpVarLive
  1469  					v.SetArgs1(v.Args[1])
  1470  					v.Aux = a.Aux
  1471  				} else {
  1472  					// In-register and rematerializeable values are already live.
  1473  					// These are typically rematerializeable constants like nil,
  1474  					// or values of a variable that were modified since the last call.
  1475  					v.Op = OpCopy
  1476  					v.SetArgs1(v.Args[1])
  1477  				}
  1478  				b.Values = append(b.Values, v)
  1479  				continue
  1480  			}
  1481  			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
  1482  				// No register allocation required (or none specified yet)
  1483  				if s.doClobber && v.Op.IsCall() {
  1484  					s.clobberRegs(regspec.clobbers)
  1485  				}
  1486  				s.freeRegs(regspec.clobbers)
  1487  				b.Values = append(b.Values, v)
  1488  				s.advanceUses(v)
  1489  				continue
  1490  			}
  1491  
  1492  			if s.values[v.ID].rematerializeable {
  1493  				// Value is rematerializeable, don't issue it here.
  1494  				// It will get issued just before each use (see
  1495  				// allocValueToReg).
  1496  				for _, a := range v.Args {
  1497  					a.Uses--
  1498  				}
  1499  				s.advanceUses(v)
  1500  				continue
  1501  			}
  1502  
  1503  			if s.f.pass.debug > regDebug {
  1504  				fmt.Printf("value %s\n", v.LongString())
  1505  				fmt.Printf("  out:")
  1506  				for _, r := range dinfo[idx].out {
  1507  					if r != noRegister {
  1508  						fmt.Printf(" %s", &s.registers[r])
  1509  					}
  1510  				}
  1511  				fmt.Println()
  1512  				for i := 0; i < len(v.Args) && i < 3; i++ {
  1513  					fmt.Printf("  in%d:", i)
  1514  					for _, r := range dinfo[idx].in[i] {
  1515  						if r != noRegister {
  1516  							fmt.Printf(" %s", &s.registers[r])
  1517  						}
  1518  					}
  1519  					fmt.Println()
  1520  				}
  1521  			}
  1522  
  1523  			// Move arguments to registers.
  1524  			// First, if an arg must be in a specific register and it is already
  1525  			// in place, keep it.
  1526  			args = append(args[:0], make([]*Value, len(v.Args))...)
  1527  			for i, a := range v.Args {
  1528  				if !s.values[a.ID].needReg {
  1529  					args[i] = a
  1530  				}
  1531  			}
  1532  			for _, i := range regspec.inputs {
  1533  				mask := i.regs
  1534  				if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
  1535  					args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1536  				}
  1537  			}
  1538  			// Then, if an arg must be in a specific register and that
  1539  			// register is free, allocate that one. Otherwise when processing
  1540  			// another input we may kick a value into the free register, which
  1541  			// then will be kicked out again.
  1542  			// This is a common case for passing-in-register arguments for
  1543  			// function calls.
  1544  			for {
  1545  				freed := false
  1546  				for _, i := range regspec.inputs {
  1547  					if args[i.idx] != nil {
  1548  						continue // already allocated
  1549  					}
  1550  					mask := i.regs
  1551  					if countRegs(mask) == 1 && mask&^s.used != 0 {
  1552  						args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1553  						// If the input is in other registers that will be clobbered by v,
  1554  						// or the input is dead, free the registers. This may make room
  1555  						// for other inputs.
  1556  						oldregs := s.values[v.Args[i.idx].ID].regs
  1557  						if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
  1558  							s.freeRegs(oldregs &^ mask &^ s.nospill)
  1559  							freed = true
  1560  						}
  1561  					}
  1562  				}
  1563  				if !freed {
  1564  					break
  1565  				}
  1566  			}
  1567  			// Last, allocate remaining ones, in an ordering defined
  1568  			// by the register specification (most constrained first).
  1569  			for _, i := range regspec.inputs {
  1570  				if args[i.idx] != nil {
  1571  					continue // already allocated
  1572  				}
  1573  				mask := i.regs
  1574  				if mask&s.values[v.Args[i.idx].ID].regs == 0 {
  1575  					// Need a new register for the input.
  1576  					mask &= s.allocatable
  1577  					mask &^= s.nospill
  1578  					// Used desired register if available.
  1579  					if i.idx < 3 {
  1580  						for _, r := range dinfo[idx].in[i.idx] {
  1581  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1582  								// Desired register is allowed and unused.
  1583  								mask = regMask(1) << r
  1584  								break
  1585  							}
  1586  						}
  1587  					}
  1588  					// Avoid registers we're saving for other values.
  1589  					if mask&^desired.avoid != 0 {
  1590  						mask &^= desired.avoid
  1591  					}
  1592  				}
  1593  				if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
  1594  					// Prefer SP register. This ensures that local variables
  1595  					// use SP as their base register (instead of a copy of the
  1596  					// stack pointer living in another register). See issue 74836.
  1597  					mask = 1 << s.SPReg
  1598  				}
  1599  				args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1600  			}
  1601  
  1602  			// If the output clobbers the input register, make sure we have
  1603  			// at least two copies of the input register so we don't
  1604  			// have to reload the value from the spill location.
  1605  			if opcodeTable[v.Op].resultInArg0 {
  1606  				var m regMask
  1607  				if !s.liveAfterCurrentInstruction(v.Args[0]) {
  1608  					// arg0 is dead.  We can clobber its register.
  1609  					goto ok
  1610  				}
  1611  				if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
  1612  					args[0], args[1] = args[1], args[0]
  1613  					goto ok
  1614  				}
  1615  				if s.values[v.Args[0].ID].rematerializeable {
  1616  					// We can rematerialize the input, don't worry about clobbering it.
  1617  					goto ok
  1618  				}
  1619  				if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
  1620  					args[0], args[1] = args[1], args[0]
  1621  					goto ok
  1622  				}
  1623  				if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
  1624  					// we have at least 2 copies of arg0.  We can afford to clobber one.
  1625  					goto ok
  1626  				}
  1627  				if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
  1628  					args[0], args[1] = args[1], args[0]
  1629  					goto ok
  1630  				}
  1631  
  1632  				// We can't overwrite arg0 (or arg1, if commutative).  So we
  1633  				// need to make a copy of an input so we have a register we can modify.
  1634  
  1635  				// Possible new registers to copy into.
  1636  				m = s.compatRegs(v.Args[0].Type) &^ s.used
  1637  				if m == 0 {
  1638  					// No free registers.  In this case we'll just clobber
  1639  					// an input and future uses of that input must use a restore.
  1640  					// TODO(khr): We should really do this like allocReg does it,
  1641  					// spilling the value with the most distant next use.
  1642  					goto ok
  1643  				}
  1644  
  1645  				// Try to move an input to the desired output, if allowed.
  1646  				for _, r := range dinfo[idx].out {
  1647  					if r != noRegister && (m&regspec.outputs[0].regs)>>r&1 != 0 {
  1648  						m = regMask(1) << r
  1649  						args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
  1650  						// Note: we update args[0] so the instruction will
  1651  						// use the register copy we just made.
  1652  						goto ok
  1653  					}
  1654  				}
  1655  				// Try to copy input to its desired location & use its old
  1656  				// location as the result register.
  1657  				for _, r := range dinfo[idx].in[0] {
  1658  					if r != noRegister && m>>r&1 != 0 {
  1659  						m = regMask(1) << r
  1660  						c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1661  						s.copies[c] = false
  1662  						// Note: no update to args[0] so the instruction will
  1663  						// use the original copy.
  1664  						goto ok
  1665  					}
  1666  				}
  1667  				if opcodeTable[v.Op].commutative {
  1668  					for _, r := range dinfo[idx].in[1] {
  1669  						if r != noRegister && m>>r&1 != 0 {
  1670  							m = regMask(1) << r
  1671  							c := s.allocValToReg(v.Args[1], m, true, v.Pos)
  1672  							s.copies[c] = false
  1673  							args[0], args[1] = args[1], args[0]
  1674  							goto ok
  1675  						}
  1676  					}
  1677  				}
  1678  
  1679  				// Avoid future fixed uses if we can.
  1680  				if m&^desired.avoid != 0 {
  1681  					m &^= desired.avoid
  1682  				}
  1683  				// Save input 0 to a new register so we can clobber it.
  1684  				c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1685  				s.copies[c] = false
  1686  
  1687  				// Normally we use the register of the old copy of input 0 as the target.
  1688  				// However, if input 0 is already in its desired register then we use
  1689  				// the register of the new copy instead.
  1690  				if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
  1691  					if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
  1692  						r := register(rp.num)
  1693  						for _, r2 := range dinfo[idx].in[0] {
  1694  							if r == r2 {
  1695  								args[0] = c
  1696  								break
  1697  							}
  1698  						}
  1699  					}
  1700  				}
  1701  			}
  1702  		ok:
  1703  			for i := 0; i < 2; i++ {
  1704  				if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
  1705  					continue
  1706  				}
  1707  				if !s.liveAfterCurrentInstruction(v.Args[i]) {
  1708  					// arg is dead.  We can clobber its register.
  1709  					continue
  1710  				}
  1711  				if s.values[v.Args[i].ID].rematerializeable {
  1712  					// We can rematerialize the input, don't worry about clobbering it.
  1713  					continue
  1714  				}
  1715  				if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
  1716  					// We have at least 2 copies of arg.  We can afford to clobber one.
  1717  					continue
  1718  				}
  1719  				// Possible new registers to copy into.
  1720  				m := s.compatRegs(v.Args[i].Type) &^ s.used
  1721  				if m == 0 {
  1722  					// No free registers.  In this case we'll just clobber the
  1723  					// input and future uses of that input must use a restore.
  1724  					// TODO(khr): We should really do this like allocReg does it,
  1725  					// spilling the value with the most distant next use.
  1726  					continue
  1727  				}
  1728  				// Copy input to a different register that won't be clobbered.
  1729  				c := s.allocValToReg(v.Args[i], m, true, v.Pos)
  1730  				s.copies[c] = false
  1731  			}
  1732  
  1733  			// Pick a temporary register if needed.
  1734  			// It should be distinct from all the input registers, so we
  1735  			// allocate it after all the input registers, but before
  1736  			// the input registers are freed via advanceUses below.
  1737  			// (Not all instructions need that distinct part, but it is conservative.)
  1738  			// We also ensure it is not any of the single-choice output registers.
  1739  			if opcodeTable[v.Op].needIntTemp {
  1740  				m := s.allocatable & s.f.Config.gpRegMask
  1741  				for _, out := range regspec.outputs {
  1742  					if countRegs(out.regs) == 1 {
  1743  						m &^= out.regs
  1744  					}
  1745  				}
  1746  				if m&^desired.avoid&^s.nospill != 0 {
  1747  					m &^= desired.avoid
  1748  				}
  1749  				tmpReg = s.allocReg(m, &tmpVal)
  1750  				s.nospill |= regMask(1) << tmpReg
  1751  				s.tmpused |= regMask(1) << tmpReg
  1752  			}
  1753  
  1754  			if regspec.clobbersArg0 {
  1755  				s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
  1756  			}
  1757  			if regspec.clobbersArg1 {
  1758  				s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
  1759  			}
  1760  
  1761  			// Now that all args are in regs, we're ready to issue the value itself.
  1762  			// Before we pick a register for the output value, allow input registers
  1763  			// to be deallocated. We do this here so that the output can use the
  1764  			// same register as a dying input.
  1765  			if !opcodeTable[v.Op].resultNotInArgs {
  1766  				s.tmpused = s.nospill
  1767  				s.nospill = 0
  1768  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1769  			}
  1770  
  1771  			// Dump any registers which will be clobbered
  1772  			if s.doClobber && v.Op.IsCall() {
  1773  				// clobber registers that are marked as clobber in regmask, but
  1774  				// don't clobber inputs.
  1775  				s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
  1776  			}
  1777  			s.freeRegs(regspec.clobbers)
  1778  			s.tmpused |= regspec.clobbers
  1779  
  1780  			// Pick registers for outputs.
  1781  			{
  1782  				outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
  1783  				maxOutIdx := -1
  1784  				var used regMask
  1785  				if tmpReg != noRegister {
  1786  					// Ensure output registers are distinct from the temporary register.
  1787  					// (Not all instructions need that distinct part, but it is conservative.)
  1788  					used |= regMask(1) << tmpReg
  1789  				}
  1790  				for _, out := range regspec.outputs {
  1791  					if out.regs == 0 {
  1792  						continue
  1793  					}
  1794  					mask := out.regs & s.allocatable &^ used
  1795  					if mask == 0 {
  1796  						s.f.Fatalf("can't find any output register %s", v.LongString())
  1797  					}
  1798  					if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
  1799  						if !opcodeTable[v.Op].commutative {
  1800  							// Output must use the same register as input 0.
  1801  							r := register(s.f.getHome(args[0].ID).(*Register).num)
  1802  							if mask>>r&1 == 0 {
  1803  								s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
  1804  							}
  1805  							mask = regMask(1) << r
  1806  						} else {
  1807  							// Output must use the same register as input 0 or 1.
  1808  							r0 := register(s.f.getHome(args[0].ID).(*Register).num)
  1809  							r1 := register(s.f.getHome(args[1].ID).(*Register).num)
  1810  							// Check r0 and r1 for desired output register.
  1811  							found := false
  1812  							for _, r := range dinfo[idx].out {
  1813  								if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
  1814  									mask = regMask(1) << r
  1815  									found = true
  1816  									if r == r1 {
  1817  										args[0], args[1] = args[1], args[0]
  1818  									}
  1819  									break
  1820  								}
  1821  							}
  1822  							if !found {
  1823  								// Neither are desired, pick r0.
  1824  								mask = regMask(1) << r0
  1825  							}
  1826  						}
  1827  					}
  1828  					if out.idx == 0 { // desired registers only apply to the first element of a tuple result
  1829  						for _, r := range dinfo[idx].out {
  1830  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1831  								// Desired register is allowed and unused.
  1832  								mask = regMask(1) << r
  1833  								break
  1834  							}
  1835  						}
  1836  					}
  1837  					if out.idx == 1 {
  1838  						if prefs, ok := desiredSecondReg[v.ID]; ok {
  1839  							for _, r := range prefs {
  1840  								if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1841  									// Desired register is allowed and unused.
  1842  									mask = regMask(1) << r
  1843  									break
  1844  								}
  1845  							}
  1846  						}
  1847  					}
  1848  					// Avoid registers we're saving for other values.
  1849  					if mask&^desired.avoid&^s.nospill&^s.used != 0 {
  1850  						mask &^= desired.avoid
  1851  					}
  1852  					r := s.allocReg(mask, v)
  1853  					if out.idx > maxOutIdx {
  1854  						maxOutIdx = out.idx
  1855  					}
  1856  					outRegs[out.idx] = r
  1857  					used |= regMask(1) << r
  1858  					s.tmpused |= regMask(1) << r
  1859  				}
  1860  				// Record register choices
  1861  				if v.Type.IsTuple() {
  1862  					var outLocs LocPair
  1863  					if r := outRegs[0]; r != noRegister {
  1864  						outLocs[0] = &s.registers[r]
  1865  					}
  1866  					if r := outRegs[1]; r != noRegister {
  1867  						outLocs[1] = &s.registers[r]
  1868  					}
  1869  					s.f.setHome(v, outLocs)
  1870  					// Note that subsequent SelectX instructions will do the assignReg calls.
  1871  				} else if v.Type.IsResults() {
  1872  					// preallocate outLocs to the right size, which is maxOutIdx+1
  1873  					outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
  1874  					for i := 0; i <= maxOutIdx; i++ {
  1875  						if r := outRegs[i]; r != noRegister {
  1876  							outLocs[i] = &s.registers[r]
  1877  						}
  1878  					}
  1879  					s.f.setHome(v, outLocs)
  1880  				} else {
  1881  					if r := outRegs[0]; r != noRegister {
  1882  						s.assignReg(r, v, v)
  1883  					}
  1884  				}
  1885  				if tmpReg != noRegister {
  1886  					// Remember the temp register allocation, if any.
  1887  					if s.f.tempRegs == nil {
  1888  						s.f.tempRegs = map[ID]*Register{}
  1889  					}
  1890  					s.f.tempRegs[v.ID] = &s.registers[tmpReg]
  1891  				}
  1892  			}
  1893  
  1894  			// deallocate dead args, if we have not done so
  1895  			if opcodeTable[v.Op].resultNotInArgs {
  1896  				s.nospill = 0
  1897  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1898  			}
  1899  			s.tmpused = 0
  1900  
  1901  			// Issue the Value itself.
  1902  			for i, a := range args {
  1903  				v.SetArg(i, a) // use register version of arguments
  1904  			}
  1905  			b.Values = append(b.Values, v)
  1906  			s.dropIfUnused(v)
  1907  		}
  1908  
  1909  		// Copy the control values - we need this so we can reduce the
  1910  		// uses property of these values later.
  1911  		controls := append(make([]*Value, 0, 2), b.ControlValues()...)
  1912  
  1913  		// Load control values into registers.
  1914  		for i, v := range b.ControlValues() {
  1915  			if !s.values[v.ID].needReg {
  1916  				continue
  1917  			}
  1918  			if s.f.pass.debug > regDebug {
  1919  				fmt.Printf("  processing control %s\n", v.LongString())
  1920  			}
  1921  			// We assume that a control input can be passed in any
  1922  			// type-compatible register. If this turns out not to be true,
  1923  			// we'll need to introduce a regspec for a block's control value.
  1924  			b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
  1925  		}
  1926  
  1927  		// Reduce the uses of the control values once registers have been loaded.
  1928  		// This loop is equivalent to the advanceUses method.
  1929  		for _, v := range controls {
  1930  			vi := &s.values[v.ID]
  1931  			if !vi.needReg {
  1932  				continue
  1933  			}
  1934  			// Remove this use from the uses list.
  1935  			u := vi.uses
  1936  			vi.uses = u.next
  1937  			if u.next == nil {
  1938  				s.freeRegs(vi.regs) // value is dead
  1939  			}
  1940  			u.next = s.freeUseRecords
  1941  			s.freeUseRecords = u
  1942  		}
  1943  
  1944  		// If we are approaching a merge point and we are the primary
  1945  		// predecessor of it, find live values that we use soon after
  1946  		// the merge point and promote them to registers now.
  1947  		if len(b.Succs) == 1 {
  1948  			if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
  1949  				s.freeReg(s.GReg) // Spill value in G register before any merge.
  1950  			}
  1951  			if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
  1952  				// No point if we've already regalloc'd the destination.
  1953  				goto badloop
  1954  			}
  1955  			// For this to be worthwhile, the loop must have no calls in it.
  1956  			top := b.Succs[0].b
  1957  			loop := s.loopnest.b2l[top.ID]
  1958  			if loop == nil || loop.header != top || loop.containsUnavoidableCall {
  1959  				goto badloop
  1960  			}
  1961  
  1962  			// TODO: sort by distance, pick the closest ones?
  1963  			for _, live := range s.live[b.ID] {
  1964  				if live.dist >= unlikelyDistance {
  1965  					// Don't preload anything live after the loop.
  1966  					continue
  1967  				}
  1968  				vid := live.ID
  1969  				vi := &s.values[vid]
  1970  				if vi.regs != 0 {
  1971  					continue
  1972  				}
  1973  				if vi.rematerializeable {
  1974  					continue
  1975  				}
  1976  				v := s.orig[vid]
  1977  				m := s.compatRegs(v.Type) &^ s.used
  1978  				// Used desired register if available.
  1979  			outerloop:
  1980  				for _, e := range desired.entries {
  1981  					if e.ID != v.ID {
  1982  						continue
  1983  					}
  1984  					for _, r := range e.regs {
  1985  						if r != noRegister && m>>r&1 != 0 {
  1986  							m = regMask(1) << r
  1987  							break outerloop
  1988  						}
  1989  					}
  1990  				}
  1991  				if m&^desired.avoid != 0 {
  1992  					m &^= desired.avoid
  1993  				}
  1994  				if m != 0 {
  1995  					s.allocValToReg(v, m, false, b.Pos)
  1996  				}
  1997  			}
  1998  		}
  1999  	badloop:
  2000  		;
  2001  
  2002  		// Save end-of-block register state.
  2003  		// First count how many, this cuts allocations in half.
  2004  		k := 0
  2005  		for r := register(0); r < s.numRegs; r++ {
  2006  			v := s.regs[r].v
  2007  			if v == nil {
  2008  				continue
  2009  			}
  2010  			k++
  2011  		}
  2012  		regList := make([]endReg, 0, k)
  2013  		for r := register(0); r < s.numRegs; r++ {
  2014  			v := s.regs[r].v
  2015  			if v == nil {
  2016  				continue
  2017  			}
  2018  			regList = append(regList, endReg{r, v, s.regs[r].c})
  2019  		}
  2020  		s.endRegs[b.ID] = regList
  2021  
  2022  		if checkEnabled {
  2023  			regValLiveSet.clear()
  2024  			for _, x := range s.live[b.ID] {
  2025  				regValLiveSet.add(x.ID)
  2026  			}
  2027  			for r := register(0); r < s.numRegs; r++ {
  2028  				v := s.regs[r].v
  2029  				if v == nil {
  2030  					continue
  2031  				}
  2032  				if !regValLiveSet.contains(v.ID) {
  2033  					s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
  2034  				}
  2035  			}
  2036  		}
  2037  
  2038  		// If a value is live at the end of the block and
  2039  		// isn't in a register, generate a use for the spill location.
  2040  		// We need to remember this information so that
  2041  		// the liveness analysis in stackalloc is correct.
  2042  		for _, e := range s.live[b.ID] {
  2043  			vi := &s.values[e.ID]
  2044  			if vi.regs != 0 {
  2045  				// in a register, we'll use that source for the merge.
  2046  				continue
  2047  			}
  2048  			if vi.rematerializeable {
  2049  				// we'll rematerialize during the merge.
  2050  				continue
  2051  			}
  2052  			if s.f.pass.debug > regDebug {
  2053  				fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
  2054  			}
  2055  			spill := s.makeSpill(s.orig[e.ID], b)
  2056  			s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
  2057  		}
  2058  
  2059  		// Clear any final uses.
  2060  		// All that is left should be the pseudo-uses added for values which
  2061  		// are live at the end of b.
  2062  		for _, e := range s.live[b.ID] {
  2063  			u := s.values[e.ID].uses
  2064  			if u == nil {
  2065  				f.Fatalf("live at end, no uses v%d", e.ID)
  2066  			}
  2067  			if u.next != nil {
  2068  				f.Fatalf("live at end, too many uses v%d", e.ID)
  2069  			}
  2070  			s.values[e.ID].uses = nil
  2071  			u.next = s.freeUseRecords
  2072  			s.freeUseRecords = u
  2073  		}
  2074  
  2075  		// allocReg may have dropped registers from startRegsMask that
  2076  		// aren't actually needed in startRegs. Synchronize back to
  2077  		// startRegs.
  2078  		//
  2079  		// This must be done before placing spills, which will look at
  2080  		// startRegs to decide if a block is a valid block for a spill.
  2081  		if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
  2082  			regs := make([]startReg, 0, c)
  2083  			for _, sr := range s.startRegs[b.ID] {
  2084  				if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
  2085  					continue
  2086  				}
  2087  				regs = append(regs, sr)
  2088  			}
  2089  			s.startRegs[b.ID] = regs
  2090  		}
  2091  	}
  2092  
  2093  	// Decide where the spills we generated will go.
  2094  	s.placeSpills()
  2095  
  2096  	// Anything that didn't get a register gets a stack location here.
  2097  	// (StoreReg, stack-based phis, inputs, ...)
  2098  	stacklive := stackalloc(s.f, s.spillLive)
  2099  
  2100  	// Fix up all merge edges.
  2101  	s.shuffle(stacklive)
  2102  
  2103  	// Erase any copies we never used.
  2104  	// Also, an unused copy might be the only use of another copy,
  2105  	// so continue erasing until we reach a fixed point.
  2106  	for {
  2107  		progress := false
  2108  		for c, used := range s.copies {
  2109  			if !used && c.Uses == 0 {
  2110  				if s.f.pass.debug > regDebug {
  2111  					fmt.Printf("delete copied value %s\n", c.LongString())
  2112  				}
  2113  				c.resetArgs()
  2114  				f.freeValue(c)
  2115  				delete(s.copies, c)
  2116  				progress = true
  2117  			}
  2118  		}
  2119  		if !progress {
  2120  			break
  2121  		}
  2122  	}
  2123  
  2124  	for _, b := range s.visitOrder {
  2125  		i := 0
  2126  		for _, v := range b.Values {
  2127  			if v.Op == OpInvalid {
  2128  				continue
  2129  			}
  2130  			b.Values[i] = v
  2131  			i++
  2132  		}
  2133  		b.Values = b.Values[:i]
  2134  	}
  2135  }
  2136  
  2137  func (s *regAllocState) placeSpills() {
  2138  	mustBeFirst := func(op Op) bool {
  2139  		return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
  2140  	}
  2141  
  2142  	// Start maps block IDs to the list of spills
  2143  	// that go at the start of the block (but after any phis).
  2144  	start := map[ID][]*Value{}
  2145  	// After maps value IDs to the list of spills
  2146  	// that go immediately after that value ID.
  2147  	after := map[ID][]*Value{}
  2148  
  2149  	for i := range s.values {
  2150  		vi := s.values[i]
  2151  		spill := vi.spill
  2152  		if spill == nil {
  2153  			continue
  2154  		}
  2155  		if spill.Block != nil {
  2156  			// Some spills are already fully set up,
  2157  			// like OpArgs and stack-based phis.
  2158  			continue
  2159  		}
  2160  		v := s.orig[i]
  2161  
  2162  		// Walk down the dominator tree looking for a good place to
  2163  		// put the spill of v.  At the start "best" is the best place
  2164  		// we have found so far.
  2165  		// TODO: find a way to make this O(1) without arbitrary cutoffs.
  2166  		if v == nil {
  2167  			panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
  2168  		}
  2169  		best := v.Block
  2170  		bestArg := v
  2171  		var bestDepth int16
  2172  		if l := s.loopnest.b2l[best.ID]; l != nil {
  2173  			bestDepth = l.depth
  2174  		}
  2175  		b := best
  2176  		const maxSpillSearch = 100
  2177  		for i := 0; i < maxSpillSearch; i++ {
  2178  			// Find the child of b in the dominator tree which
  2179  			// dominates all restores.
  2180  			p := b
  2181  			b = nil
  2182  			for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
  2183  				if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
  2184  					// c also dominates all restores.  Walk down into c.
  2185  					b = c
  2186  					break
  2187  				}
  2188  			}
  2189  			if b == nil {
  2190  				// Ran out of blocks which dominate all restores.
  2191  				break
  2192  			}
  2193  
  2194  			var depth int16
  2195  			if l := s.loopnest.b2l[b.ID]; l != nil {
  2196  				depth = l.depth
  2197  			}
  2198  			if depth > bestDepth {
  2199  				// Don't push the spill into a deeper loop.
  2200  				continue
  2201  			}
  2202  
  2203  			// If v is in a register at the start of b, we can
  2204  			// place the spill here (after the phis).
  2205  			if len(b.Preds) == 1 {
  2206  				for _, e := range s.endRegs[b.Preds[0].b.ID] {
  2207  					if e.v == v {
  2208  						// Found a better spot for the spill.
  2209  						best = b
  2210  						bestArg = e.c
  2211  						bestDepth = depth
  2212  						break
  2213  					}
  2214  				}
  2215  			} else {
  2216  				for _, e := range s.startRegs[b.ID] {
  2217  					if e.v == v {
  2218  						// Found a better spot for the spill.
  2219  						best = b
  2220  						bestArg = e.c
  2221  						bestDepth = depth
  2222  						break
  2223  					}
  2224  				}
  2225  			}
  2226  		}
  2227  
  2228  		// Put the spill in the best block we found.
  2229  		spill.Block = best
  2230  		spill.AddArg(bestArg)
  2231  		if best == v.Block && !mustBeFirst(v.Op) {
  2232  			// Place immediately after v.
  2233  			after[v.ID] = append(after[v.ID], spill)
  2234  		} else {
  2235  			// Place at the start of best block.
  2236  			start[best.ID] = append(start[best.ID], spill)
  2237  		}
  2238  	}
  2239  
  2240  	// Insert spill instructions into the block schedules.
  2241  	var oldSched []*Value
  2242  	for _, b := range s.visitOrder {
  2243  		nfirst := 0
  2244  		for _, v := range b.Values {
  2245  			if !mustBeFirst(v.Op) {
  2246  				break
  2247  			}
  2248  			nfirst++
  2249  		}
  2250  		oldSched = append(oldSched[:0], b.Values[nfirst:]...)
  2251  		b.Values = b.Values[:nfirst]
  2252  		b.Values = append(b.Values, start[b.ID]...)
  2253  		for _, v := range oldSched {
  2254  			b.Values = append(b.Values, v)
  2255  			b.Values = append(b.Values, after[v.ID]...)
  2256  		}
  2257  	}
  2258  }
  2259  
  2260  // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
  2261  func (s *regAllocState) shuffle(stacklive [][]ID) {
  2262  	var e edgeState
  2263  	e.s = s
  2264  	e.cache = map[ID][]*Value{}
  2265  	e.contents = map[Location]contentRecord{}
  2266  	if s.f.pass.debug > regDebug {
  2267  		fmt.Printf("shuffle %s\n", s.f.Name)
  2268  		fmt.Println(s.f.String())
  2269  	}
  2270  
  2271  	for _, b := range s.visitOrder {
  2272  		if len(b.Preds) <= 1 {
  2273  			continue
  2274  		}
  2275  		e.b = b
  2276  		for i, edge := range b.Preds {
  2277  			p := edge.b
  2278  			e.p = p
  2279  			e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
  2280  			e.process()
  2281  		}
  2282  	}
  2283  
  2284  	if s.f.pass.debug > regDebug {
  2285  		fmt.Printf("post shuffle %s\n", s.f.Name)
  2286  		fmt.Println(s.f.String())
  2287  	}
  2288  }
  2289  
  2290  type edgeState struct {
  2291  	s    *regAllocState
  2292  	p, b *Block // edge goes from p->b.
  2293  
  2294  	// for each pre-regalloc value, a list of equivalent cached values
  2295  	cache      map[ID][]*Value
  2296  	cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
  2297  
  2298  	// map from location to the value it contains
  2299  	contents map[Location]contentRecord
  2300  
  2301  	// desired destination locations
  2302  	destinations []dstRecord
  2303  	extra        []dstRecord
  2304  
  2305  	usedRegs              regMask // registers currently holding something
  2306  	uniqueRegs            regMask // registers holding the only copy of a value
  2307  	finalRegs             regMask // registers holding final target
  2308  	rematerializeableRegs regMask // registers that hold rematerializeable values
  2309  }
  2310  
  2311  type contentRecord struct {
  2312  	vid   ID       // pre-regalloc value
  2313  	c     *Value   // cached value
  2314  	final bool     // this is a satisfied destination
  2315  	pos   src.XPos // source position of use of the value
  2316  }
  2317  
  2318  type dstRecord struct {
  2319  	loc    Location // register or stack slot
  2320  	vid    ID       // pre-regalloc value it should contain
  2321  	splice **Value  // place to store reference to the generating instruction
  2322  	pos    src.XPos // source position of use of this location
  2323  }
  2324  
  2325  // setup initializes the edge state for shuffling.
  2326  func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
  2327  	if e.s.f.pass.debug > regDebug {
  2328  		fmt.Printf("edge %s->%s\n", e.p, e.b)
  2329  	}
  2330  
  2331  	// Clear state.
  2332  	clear(e.cache)
  2333  	e.cachedVals = e.cachedVals[:0]
  2334  	clear(e.contents)
  2335  	e.usedRegs = 0
  2336  	e.uniqueRegs = 0
  2337  	e.finalRegs = 0
  2338  	e.rematerializeableRegs = 0
  2339  
  2340  	// Live registers can be sources.
  2341  	for _, x := range srcReg {
  2342  		e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
  2343  	}
  2344  	// So can all of the spill locations.
  2345  	for _, spillID := range stacklive {
  2346  		v := e.s.orig[spillID]
  2347  		spill := e.s.values[v.ID].spill
  2348  		if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
  2349  			// Spills were placed that only dominate the uses found
  2350  			// during the first regalloc pass. The edge fixup code
  2351  			// can't use a spill location if the spill doesn't dominate
  2352  			// the edge.
  2353  			// We are guaranteed that if the spill doesn't dominate this edge,
  2354  			// then the value is available in a register (because we called
  2355  			// makeSpill for every value not in a register at the start
  2356  			// of an edge).
  2357  			continue
  2358  		}
  2359  		e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
  2360  	}
  2361  
  2362  	// Figure out all the destinations we need.
  2363  	dsts := e.destinations[:0]
  2364  	for _, x := range dstReg {
  2365  		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
  2366  	}
  2367  	// Phis need their args to end up in a specific location.
  2368  	for _, v := range e.b.Values {
  2369  		if v.Op != OpPhi {
  2370  			break
  2371  		}
  2372  		loc := e.s.f.getHome(v.ID)
  2373  		if loc == nil {
  2374  			continue
  2375  		}
  2376  		dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
  2377  	}
  2378  	e.destinations = dsts
  2379  
  2380  	if e.s.f.pass.debug > regDebug {
  2381  		for _, vid := range e.cachedVals {
  2382  			a := e.cache[vid]
  2383  			for _, c := range a {
  2384  				fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
  2385  			}
  2386  		}
  2387  		for _, d := range e.destinations {
  2388  			fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
  2389  		}
  2390  	}
  2391  }
  2392  
  2393  // process generates code to move all the values to the right destination locations.
  2394  func (e *edgeState) process() {
  2395  	dsts := e.destinations
  2396  
  2397  	// Process the destinations until they are all satisfied.
  2398  	for len(dsts) > 0 {
  2399  		i := 0
  2400  		for _, d := range dsts {
  2401  			if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
  2402  				// Failed - save for next iteration.
  2403  				dsts[i] = d
  2404  				i++
  2405  			}
  2406  		}
  2407  		if i < len(dsts) {
  2408  			// Made some progress. Go around again.
  2409  			dsts = dsts[:i]
  2410  
  2411  			// Append any extras destinations we generated.
  2412  			dsts = append(dsts, e.extra...)
  2413  			e.extra = e.extra[:0]
  2414  			continue
  2415  		}
  2416  
  2417  		// We made no progress. That means that any
  2418  		// remaining unsatisfied moves are in simple cycles.
  2419  		// For example, A -> B -> C -> D -> A.
  2420  		//   A ----> B
  2421  		//   ^       |
  2422  		//   |       |
  2423  		//   |       v
  2424  		//   D <---- C
  2425  
  2426  		// To break the cycle, we pick an unused register, say R,
  2427  		// and put a copy of B there.
  2428  		//   A ----> B
  2429  		//   ^       |
  2430  		//   |       |
  2431  		//   |       v
  2432  		//   D <---- C <---- R=copyofB
  2433  		// When we resume the outer loop, the A->B move can now proceed,
  2434  		// and eventually the whole cycle completes.
  2435  
  2436  		// Copy any cycle location to a temp register. This duplicates
  2437  		// one of the cycle entries, allowing the just duplicated value
  2438  		// to be overwritten and the cycle to proceed.
  2439  		d := dsts[0]
  2440  		loc := d.loc
  2441  		vid := e.contents[loc].vid
  2442  		c := e.contents[loc].c
  2443  		r := e.findRegFor(c.Type)
  2444  		if e.s.f.pass.debug > regDebug {
  2445  			fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
  2446  		}
  2447  		e.erase(r)
  2448  		pos := d.pos.WithNotStmt()
  2449  		if _, isReg := loc.(*Register); isReg {
  2450  			c = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2451  		} else {
  2452  			c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2453  		}
  2454  		e.set(r, vid, c, false, pos)
  2455  		if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
  2456  			e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
  2457  		}
  2458  	}
  2459  }
  2460  
  2461  // processDest generates code to put value vid into location loc. Returns true
  2462  // if progress was made.
  2463  func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
  2464  	pos = pos.WithNotStmt()
  2465  	occupant := e.contents[loc]
  2466  	if occupant.vid == vid {
  2467  		// Value is already in the correct place.
  2468  		e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
  2469  		if splice != nil {
  2470  			(*splice).Uses--
  2471  			*splice = occupant.c
  2472  			occupant.c.Uses++
  2473  		}
  2474  		// Note: if splice==nil then c will appear dead. This is
  2475  		// non-SSA formed code, so be careful after this pass not to run
  2476  		// deadcode elimination.
  2477  		if _, ok := e.s.copies[occupant.c]; ok {
  2478  			// The copy at occupant.c was used to avoid spill.
  2479  			e.s.copies[occupant.c] = true
  2480  		}
  2481  		return true
  2482  	}
  2483  
  2484  	// Check if we're allowed to clobber the destination location.
  2485  	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
  2486  		// We can't overwrite the last copy
  2487  		// of a value that needs to survive.
  2488  		return false
  2489  	}
  2490  
  2491  	// Copy from a source of v, register preferred.
  2492  	v := e.s.orig[vid]
  2493  	var c *Value
  2494  	var src Location
  2495  	if e.s.f.pass.debug > regDebug {
  2496  		fmt.Printf("moving v%d to %s\n", vid, loc)
  2497  		fmt.Printf("sources of v%d:", vid)
  2498  	}
  2499  	if opcodeTable[v.Op].fixedReg {
  2500  		c = v
  2501  		src = e.s.f.getHome(v.ID)
  2502  	} else {
  2503  		for _, w := range e.cache[vid] {
  2504  			h := e.s.f.getHome(w.ID)
  2505  			if e.s.f.pass.debug > regDebug {
  2506  				fmt.Printf(" %s:%s", h, w)
  2507  			}
  2508  			_, isreg := h.(*Register)
  2509  			if src == nil || isreg {
  2510  				c = w
  2511  				src = h
  2512  			}
  2513  		}
  2514  	}
  2515  	if e.s.f.pass.debug > regDebug {
  2516  		if src != nil {
  2517  			fmt.Printf(" [use %s]\n", src)
  2518  		} else {
  2519  			fmt.Printf(" [no source]\n")
  2520  		}
  2521  	}
  2522  	_, dstReg := loc.(*Register)
  2523  
  2524  	// Pre-clobber destination. This avoids the
  2525  	// following situation:
  2526  	//   - v is currently held in R0 and stacktmp0.
  2527  	//   - We want to copy stacktmp1 to stacktmp0.
  2528  	//   - We choose R0 as the temporary register.
  2529  	// During the copy, both R0 and stacktmp0 are
  2530  	// clobbered, losing both copies of v. Oops!
  2531  	// Erasing the destination early means R0 will not
  2532  	// be chosen as the temp register, as it will then
  2533  	// be the last copy of v.
  2534  	e.erase(loc)
  2535  	var x *Value
  2536  	if c == nil || e.s.values[vid].rematerializeable {
  2537  		if !e.s.values[vid].rematerializeable {
  2538  			e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
  2539  		}
  2540  		if dstReg {
  2541  			x = v.copyInto(e.p)
  2542  		} else {
  2543  			// Rematerialize into stack slot. Need a free
  2544  			// register to accomplish this.
  2545  			r := e.findRegFor(v.Type)
  2546  			e.erase(r)
  2547  			x = v.copyIntoWithXPos(e.p, pos)
  2548  			e.set(r, vid, x, false, pos)
  2549  			// Make sure we spill with the size of the slot, not the
  2550  			// size of x (which might be wider due to our dropping
  2551  			// of narrowing conversions).
  2552  			x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
  2553  		}
  2554  	} else {
  2555  		// Emit move from src to dst.
  2556  		_, srcReg := src.(*Register)
  2557  		if srcReg {
  2558  			if dstReg {
  2559  				x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2560  			} else {
  2561  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
  2562  			}
  2563  		} else {
  2564  			if dstReg {
  2565  				x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2566  			} else {
  2567  				// mem->mem. Use temp register.
  2568  				r := e.findRegFor(c.Type)
  2569  				e.erase(r)
  2570  				t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2571  				e.set(r, vid, t, false, pos)
  2572  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
  2573  			}
  2574  		}
  2575  	}
  2576  	e.set(loc, vid, x, true, pos)
  2577  	if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
  2578  		e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
  2579  	}
  2580  	if splice != nil {
  2581  		(*splice).Uses--
  2582  		*splice = x
  2583  		x.Uses++
  2584  	}
  2585  	return true
  2586  }
  2587  
  2588  // set changes the contents of location loc to hold the given value and its cached representative.
  2589  func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
  2590  	e.s.f.setHome(c, loc)
  2591  	e.contents[loc] = contentRecord{vid, c, final, pos}
  2592  	a := e.cache[vid]
  2593  	if len(a) == 0 {
  2594  		e.cachedVals = append(e.cachedVals, vid)
  2595  	}
  2596  	a = append(a, c)
  2597  	e.cache[vid] = a
  2598  	if r, ok := loc.(*Register); ok {
  2599  		if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
  2600  			e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
  2601  		}
  2602  		e.usedRegs |= regMask(1) << uint(r.num)
  2603  		if final {
  2604  			e.finalRegs |= regMask(1) << uint(r.num)
  2605  		}
  2606  		if len(a) == 1 {
  2607  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2608  		}
  2609  		if len(a) == 2 {
  2610  			if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2611  				e.uniqueRegs &^= regMask(1) << uint(t.num)
  2612  			}
  2613  		}
  2614  		if e.s.values[vid].rematerializeable {
  2615  			e.rematerializeableRegs |= regMask(1) << uint(r.num)
  2616  		}
  2617  	}
  2618  	if e.s.f.pass.debug > regDebug {
  2619  		fmt.Printf("%s\n", c.LongString())
  2620  		fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
  2621  	}
  2622  }
  2623  
  2624  // erase removes any user of loc.
  2625  func (e *edgeState) erase(loc Location) {
  2626  	cr := e.contents[loc]
  2627  	if cr.c == nil {
  2628  		return
  2629  	}
  2630  	vid := cr.vid
  2631  
  2632  	if cr.final {
  2633  		// Add a destination to move this value back into place.
  2634  		// Make sure it gets added to the tail of the destination queue
  2635  		// so we make progress on other moves first.
  2636  		e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
  2637  	}
  2638  
  2639  	// Remove c from the list of cached values.
  2640  	a := e.cache[vid]
  2641  	for i, c := range a {
  2642  		if e.s.f.getHome(c.ID) == loc {
  2643  			if e.s.f.pass.debug > regDebug {
  2644  				fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
  2645  			}
  2646  			a[i], a = a[len(a)-1], a[:len(a)-1]
  2647  			break
  2648  		}
  2649  	}
  2650  	e.cache[vid] = a
  2651  
  2652  	// Update register masks.
  2653  	if r, ok := loc.(*Register); ok {
  2654  		e.usedRegs &^= regMask(1) << uint(r.num)
  2655  		if cr.final {
  2656  			e.finalRegs &^= regMask(1) << uint(r.num)
  2657  		}
  2658  		e.rematerializeableRegs &^= regMask(1) << uint(r.num)
  2659  	}
  2660  	if len(a) == 1 {
  2661  		if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2662  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2663  		}
  2664  	}
  2665  }
  2666  
  2667  // findRegFor finds a register we can use to make a temp copy of type typ.
  2668  func (e *edgeState) findRegFor(typ *types.Type) Location {
  2669  	// Which registers are possibilities.
  2670  	types := &e.s.f.Config.Types
  2671  	m := e.s.compatRegs(typ)
  2672  
  2673  	// Pick a register. In priority order:
  2674  	// 1) an unused register
  2675  	// 2) a non-unique register not holding a final value
  2676  	// 3) a non-unique register
  2677  	// 4) a register holding a rematerializeable value
  2678  	x := m &^ e.usedRegs
  2679  	if x != 0 {
  2680  		return &e.s.registers[pickReg(x)]
  2681  	}
  2682  	x = m &^ e.uniqueRegs &^ e.finalRegs
  2683  	if x != 0 {
  2684  		return &e.s.registers[pickReg(x)]
  2685  	}
  2686  	x = m &^ e.uniqueRegs
  2687  	if x != 0 {
  2688  		return &e.s.registers[pickReg(x)]
  2689  	}
  2690  	x = m & e.rematerializeableRegs
  2691  	if x != 0 {
  2692  		return &e.s.registers[pickReg(x)]
  2693  	}
  2694  
  2695  	// No register is available.
  2696  	// Pick a register to spill.
  2697  	for _, vid := range e.cachedVals {
  2698  		a := e.cache[vid]
  2699  		for _, c := range a {
  2700  			if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
  2701  				if !c.rematerializeable() {
  2702  					x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
  2703  					// Allocate a temp location to spill a register to.
  2704  					// The type of the slot is immaterial - it will not be live across
  2705  					// any safepoint. Just use a type big enough to hold any register.
  2706  					t := LocalSlot{N: e.s.f.NewLocal(c.Pos, types.Int64), Type: types.Int64}
  2707  					// TODO: reuse these slots. They'll need to be erased first.
  2708  					e.set(t, vid, x, false, c.Pos)
  2709  					if e.s.f.pass.debug > regDebug {
  2710  						fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
  2711  					}
  2712  				}
  2713  				// r will now be overwritten by the caller. At some point
  2714  				// later, the newly saved value will be moved back to its
  2715  				// final destination in processDest.
  2716  				return r
  2717  			}
  2718  		}
  2719  	}
  2720  
  2721  	fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
  2722  	for _, vid := range e.cachedVals {
  2723  		a := e.cache[vid]
  2724  		for _, c := range a {
  2725  			fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
  2726  		}
  2727  	}
  2728  	e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
  2729  	return nil
  2730  }
  2731  
  2732  // rematerializeable reports whether the register allocator should recompute
  2733  // a value instead of spilling/restoring it.
  2734  func (v *Value) rematerializeable() bool {
  2735  	if !opcodeTable[v.Op].rematerializeable {
  2736  		return false
  2737  	}
  2738  	for _, a := range v.Args {
  2739  		// Fixed-register allocations (SP, SB, etc.) are always available.
  2740  		// Any other argument of an opcode makes it not rematerializeable.
  2741  		if !opcodeTable[a.Op].fixedReg {
  2742  			return false
  2743  		}
  2744  	}
  2745  	return true
  2746  }
  2747  
  2748  type liveInfo struct {
  2749  	ID   ID       // ID of value
  2750  	dist int32    // # of instructions before next use
  2751  	pos  src.XPos // source position of next use
  2752  }
  2753  
  2754  // computeLive computes a map from block ID to a list of value IDs live at the end
  2755  // of that block. Together with the value ID is a count of how many instructions
  2756  // to the next use of that value. The resulting map is stored in s.live.
  2757  // computeLive also computes the desired register information at the end of each block.
  2758  // This desired register information is stored in s.desired.
  2759  // TODO: this could be quadratic if lots of variables are live across lots of
  2760  // basic blocks. Figure out a way to make this function (or, more precisely, the user
  2761  // of this function) require only linear size & time.
  2762  func (s *regAllocState) computeLive() {
  2763  	f := s.f
  2764  	s.live = make([][]liveInfo, f.NumBlocks())
  2765  	s.desired = make([]desiredState, f.NumBlocks())
  2766  	var phis []*Value
  2767  
  2768  	live := f.newSparseMapPos(f.NumValues())
  2769  	defer f.retSparseMapPos(live)
  2770  	t := f.newSparseMapPos(f.NumValues())
  2771  	defer f.retSparseMapPos(t)
  2772  
  2773  	// Keep track of which value we want in each register.
  2774  	var desired desiredState
  2775  
  2776  	// Instead of iterating over f.Blocks, iterate over their postordering.
  2777  	// Liveness information flows backward, so starting at the end
  2778  	// increases the probability that we will stabilize quickly.
  2779  	// TODO: Do a better job yet. Here's one possibility:
  2780  	// Calculate the dominator tree and locate all strongly connected components.
  2781  	// If a value is live in one block of an SCC, it is live in all.
  2782  	// Walk the dominator tree from end to beginning, just once, treating SCC
  2783  	// components as single blocks, duplicated calculated liveness information
  2784  	// out to all of them.
  2785  	po := f.postorder()
  2786  	s.loopnest = f.loopnest()
  2787  	s.loopnest.computeUnavoidableCalls()
  2788  	for {
  2789  		changed := false
  2790  
  2791  		for _, b := range po {
  2792  			// Start with known live values at the end of the block.
  2793  			// Add len(b.Values) to adjust from end-of-block distance
  2794  			// to beginning-of-block distance.
  2795  			live.clear()
  2796  			for _, e := range s.live[b.ID] {
  2797  				live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
  2798  			}
  2799  
  2800  			// Mark control values as live
  2801  			for _, c := range b.ControlValues() {
  2802  				if s.values[c.ID].needReg {
  2803  					live.set(c.ID, int32(len(b.Values)), b.Pos)
  2804  				}
  2805  			}
  2806  
  2807  			// Propagate backwards to the start of the block
  2808  			// Assumes Values have been scheduled.
  2809  			phis = phis[:0]
  2810  			for i := len(b.Values) - 1; i >= 0; i-- {
  2811  				v := b.Values[i]
  2812  				live.remove(v.ID)
  2813  				if v.Op == OpPhi {
  2814  					// save phi ops for later
  2815  					phis = append(phis, v)
  2816  					continue
  2817  				}
  2818  				if opcodeTable[v.Op].call {
  2819  					c := live.contents()
  2820  					for i := range c {
  2821  						c[i].val += unlikelyDistance
  2822  					}
  2823  				}
  2824  				for _, a := range v.Args {
  2825  					if s.values[a.ID].needReg {
  2826  						live.set(a.ID, int32(i), v.Pos)
  2827  					}
  2828  				}
  2829  			}
  2830  			// Propagate desired registers backwards.
  2831  			desired.copy(&s.desired[b.ID])
  2832  			for i := len(b.Values) - 1; i >= 0; i-- {
  2833  				v := b.Values[i]
  2834  				prefs := desired.remove(v.ID)
  2835  				if v.Op == OpPhi {
  2836  					// TODO: if v is a phi, save desired register for phi inputs.
  2837  					// For now, we just drop it and don't propagate
  2838  					// desired registers back though phi nodes.
  2839  					continue
  2840  				}
  2841  				regspec := s.regspec(v)
  2842  				// Cancel desired registers if they get clobbered.
  2843  				desired.clobber(regspec.clobbers)
  2844  				// Update desired registers if there are any fixed register inputs.
  2845  				for _, j := range regspec.inputs {
  2846  					if countRegs(j.regs) != 1 {
  2847  						continue
  2848  					}
  2849  					desired.clobber(j.regs)
  2850  					desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  2851  				}
  2852  				// Set desired register of input 0 if this is a 2-operand instruction.
  2853  				if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  2854  					// ADDQconst is added here because we want to treat it as resultInArg0 for
  2855  					// the purposes of desired registers, even though it is not an absolute requirement.
  2856  					// This is because we'd rather implement it as ADDQ instead of LEAQ.
  2857  					// Same for ADDLconst
  2858  					// Select0 is added here to propagate the desired register to the tuple-generating instruction.
  2859  					if opcodeTable[v.Op].commutative {
  2860  						desired.addList(v.Args[1].ID, prefs)
  2861  					}
  2862  					desired.addList(v.Args[0].ID, prefs)
  2863  				}
  2864  			}
  2865  
  2866  			// For each predecessor of b, expand its list of live-at-end values.
  2867  			// invariant: live contains the values live at the start of b (excluding phi inputs)
  2868  			for i, e := range b.Preds {
  2869  				p := e.b
  2870  				// Compute additional distance for the edge.
  2871  				// Note: delta must be at least 1 to distinguish the control
  2872  				// value use from the first user in a successor block.
  2873  				delta := int32(normalDistance)
  2874  				if len(p.Succs) == 2 {
  2875  					if p.Succs[0].b == b && p.Likely == BranchLikely ||
  2876  						p.Succs[1].b == b && p.Likely == BranchUnlikely {
  2877  						delta = likelyDistance
  2878  					}
  2879  					if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
  2880  						p.Succs[1].b == b && p.Likely == BranchLikely {
  2881  						delta = unlikelyDistance
  2882  					}
  2883  				}
  2884  
  2885  				// Update any desired registers at the end of p.
  2886  				s.desired[p.ID].merge(&desired)
  2887  
  2888  				// Start t off with the previously known live values at the end of p.
  2889  				t.clear()
  2890  				for _, e := range s.live[p.ID] {
  2891  					t.set(e.ID, e.dist, e.pos)
  2892  				}
  2893  				update := false
  2894  
  2895  				// Add new live values from scanning this block.
  2896  				for _, e := range live.contents() {
  2897  					d := e.val + delta
  2898  					if !t.contains(e.key) || d < t.get(e.key) {
  2899  						update = true
  2900  						t.set(e.key, d, e.pos)
  2901  					}
  2902  				}
  2903  				// Also add the correct arg from the saved phi values.
  2904  				// All phis are at distance delta (we consider them
  2905  				// simultaneously happening at the start of the block).
  2906  				for _, v := range phis {
  2907  					id := v.Args[i].ID
  2908  					if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
  2909  						update = true
  2910  						t.set(id, delta, v.Pos)
  2911  					}
  2912  				}
  2913  
  2914  				if !update {
  2915  					continue
  2916  				}
  2917  				// The live set has changed, update it.
  2918  				l := s.live[p.ID][:0]
  2919  				if cap(l) < t.size() {
  2920  					l = make([]liveInfo, 0, t.size())
  2921  				}
  2922  				for _, e := range t.contents() {
  2923  					l = append(l, liveInfo{e.key, e.val, e.pos})
  2924  				}
  2925  				s.live[p.ID] = l
  2926  				changed = true
  2927  			}
  2928  		}
  2929  
  2930  		if !changed {
  2931  			break
  2932  		}
  2933  	}
  2934  	if f.pass.debug > regDebug {
  2935  		fmt.Println("live values at end of each block")
  2936  		for _, b := range f.Blocks {
  2937  			fmt.Printf("  %s:", b)
  2938  			for _, x := range s.live[b.ID] {
  2939  				fmt.Printf(" v%d(%d)", x.ID, x.dist)
  2940  				for _, e := range s.desired[b.ID].entries {
  2941  					if e.ID != x.ID {
  2942  						continue
  2943  					}
  2944  					fmt.Printf("[")
  2945  					first := true
  2946  					for _, r := range e.regs {
  2947  						if r == noRegister {
  2948  							continue
  2949  						}
  2950  						if !first {
  2951  							fmt.Printf(",")
  2952  						}
  2953  						fmt.Print(&s.registers[r])
  2954  						first = false
  2955  					}
  2956  					fmt.Printf("]")
  2957  				}
  2958  			}
  2959  			if avoid := s.desired[b.ID].avoid; avoid != 0 {
  2960  				fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
  2961  			}
  2962  			fmt.Println()
  2963  		}
  2964  	}
  2965  }
  2966  
  2967  // A desiredState represents desired register assignments.
  2968  type desiredState struct {
  2969  	// Desired assignments will be small, so we just use a list
  2970  	// of valueID+registers entries.
  2971  	entries []desiredStateEntry
  2972  	// Registers that other values want to be in.  This value will
  2973  	// contain at least the union of the regs fields of entries, but
  2974  	// may contain additional entries for values that were once in
  2975  	// this data structure but are no longer.
  2976  	avoid regMask
  2977  }
  2978  type desiredStateEntry struct {
  2979  	// (pre-regalloc) value
  2980  	ID ID
  2981  	// Registers it would like to be in, in priority order.
  2982  	// Unused slots are filled with noRegister.
  2983  	// For opcodes that return tuples, we track desired registers only
  2984  	// for the first element of the tuple (see desiredSecondReg for
  2985  	// tracking the desired register for second part of a tuple).
  2986  	regs [4]register
  2987  }
  2988  
  2989  // get returns a list of desired registers for value vid.
  2990  func (d *desiredState) get(vid ID) [4]register {
  2991  	for _, e := range d.entries {
  2992  		if e.ID == vid {
  2993  			return e.regs
  2994  		}
  2995  	}
  2996  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  2997  }
  2998  
  2999  // add records that we'd like value vid to be in register r.
  3000  func (d *desiredState) add(vid ID, r register) {
  3001  	d.avoid |= regMask(1) << r
  3002  	for i := range d.entries {
  3003  		e := &d.entries[i]
  3004  		if e.ID != vid {
  3005  			continue
  3006  		}
  3007  		if e.regs[0] == r {
  3008  			// Already known and highest priority
  3009  			return
  3010  		}
  3011  		for j := 1; j < len(e.regs); j++ {
  3012  			if e.regs[j] == r {
  3013  				// Move from lower priority to top priority
  3014  				copy(e.regs[1:], e.regs[:j])
  3015  				e.regs[0] = r
  3016  				return
  3017  			}
  3018  		}
  3019  		copy(e.regs[1:], e.regs[:])
  3020  		e.regs[0] = r
  3021  		return
  3022  	}
  3023  	d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
  3024  }
  3025  
  3026  func (d *desiredState) addList(vid ID, regs [4]register) {
  3027  	// regs is in priority order, so iterate in reverse order.
  3028  	for i := len(regs) - 1; i >= 0; i-- {
  3029  		r := regs[i]
  3030  		if r != noRegister {
  3031  			d.add(vid, r)
  3032  		}
  3033  	}
  3034  }
  3035  
  3036  // clobber erases any desired registers in the set m.
  3037  func (d *desiredState) clobber(m regMask) {
  3038  	for i := 0; i < len(d.entries); {
  3039  		e := &d.entries[i]
  3040  		j := 0
  3041  		for _, r := range e.regs {
  3042  			if r != noRegister && m>>r&1 == 0 {
  3043  				e.regs[j] = r
  3044  				j++
  3045  			}
  3046  		}
  3047  		if j == 0 {
  3048  			// No more desired registers for this value.
  3049  			d.entries[i] = d.entries[len(d.entries)-1]
  3050  			d.entries = d.entries[:len(d.entries)-1]
  3051  			continue
  3052  		}
  3053  		for ; j < len(e.regs); j++ {
  3054  			e.regs[j] = noRegister
  3055  		}
  3056  		i++
  3057  	}
  3058  	d.avoid &^= m
  3059  }
  3060  
  3061  // copy copies a desired state from another desiredState x.
  3062  func (d *desiredState) copy(x *desiredState) {
  3063  	d.entries = append(d.entries[:0], x.entries...)
  3064  	d.avoid = x.avoid
  3065  }
  3066  
  3067  // remove removes the desired registers for vid and returns them.
  3068  func (d *desiredState) remove(vid ID) [4]register {
  3069  	for i := range d.entries {
  3070  		if d.entries[i].ID == vid {
  3071  			regs := d.entries[i].regs
  3072  			d.entries[i] = d.entries[len(d.entries)-1]
  3073  			d.entries = d.entries[:len(d.entries)-1]
  3074  			return regs
  3075  		}
  3076  	}
  3077  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  3078  }
  3079  
  3080  // merge merges another desired state x into d.
  3081  func (d *desiredState) merge(x *desiredState) {
  3082  	d.avoid |= x.avoid
  3083  	// There should only be a few desired registers, so
  3084  	// linear insert is ok.
  3085  	for _, e := range x.entries {
  3086  		d.addList(e.ID, e.regs)
  3087  	}
  3088  }
  3089  
  3090  // computeUnavoidableCalls computes the containsUnavoidableCall fields in the loop nest.
  3091  func (loopnest *loopnest) computeUnavoidableCalls() {
  3092  	f := loopnest.f
  3093  
  3094  	hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
  3095  	defer f.Cache.freeBoolSlice(hasCall)
  3096  	for _, b := range f.Blocks {
  3097  		if b.containsCall() {
  3098  			hasCall[b.ID] = true
  3099  		}
  3100  	}
  3101  	found := f.Cache.allocSparseSet(f.NumBlocks())
  3102  	defer f.Cache.freeSparseSet(found)
  3103  	// Run dfs to find path through the loop that avoids all calls.
  3104  	// Such path either escapes the loop or returns back to the header.
  3105  	// It isn't enough to have exit not dominated by any call, for example:
  3106  	// ... some loop
  3107  	// call1    call2
  3108  	//   \       /
  3109  	//     block
  3110  	// ...
  3111  	// block is not dominated by any single call, but we don't have call-free path to it.
  3112  loopLoop:
  3113  	for _, l := range loopnest.loops {
  3114  		found.clear()
  3115  		tovisit := make([]*Block, 0, 8)
  3116  		tovisit = append(tovisit, l.header)
  3117  		for len(tovisit) > 0 {
  3118  			cur := tovisit[len(tovisit)-1]
  3119  			tovisit = tovisit[:len(tovisit)-1]
  3120  			if hasCall[cur.ID] {
  3121  				continue
  3122  			}
  3123  			for _, s := range cur.Succs {
  3124  				nb := s.Block()
  3125  				if nb == l.header {
  3126  					// Found a call-free path around the loop.
  3127  					continue loopLoop
  3128  				}
  3129  				if found.contains(nb.ID) {
  3130  					// Already found via another path.
  3131  					continue
  3132  				}
  3133  				nl := loopnest.b2l[nb.ID]
  3134  				if nl == nil || (nl.depth <= l.depth && nl != l) {
  3135  					// Left the loop.
  3136  					continue
  3137  				}
  3138  				tovisit = append(tovisit, nb)
  3139  				found.add(nb.ID)
  3140  			}
  3141  		}
  3142  		// No call-free path was found.
  3143  		l.containsUnavoidableCall = true
  3144  	}
  3145  }
  3146  
  3147  func (b *Block) containsCall() bool {
  3148  	if b.Kind == BlockDefer {
  3149  		return true
  3150  	}
  3151  	for _, v := range b.Values {
  3152  		if opcodeTable[v.Op].call {
  3153  			return true
  3154  		}
  3155  	}
  3156  	return false
  3157  }
  3158  

View as plain text