Source file src/cmd/compile/internal/ssa/regalloc.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Register allocation.
     6  //
     7  // We use a version of a linear scan register allocator. We treat the
     8  // whole function as a single long basic block and run through
     9  // it using a greedy register allocator. Then all merge edges
    10  // (those targeting a block with len(Preds)>1) are processed to
    11  // shuffle data into the place that the target of the edge expects.
    12  //
    13  // The greedy allocator moves values into registers just before they
    14  // are used, spills registers only when necessary, and spills the
    15  // value whose next use is farthest in the future.
    16  //
    17  // The register allocator requires that a block is not scheduled until
    18  // at least one of its predecessors have been scheduled. The most recent
    19  // such predecessor provides the starting register state for a block.
    20  //
    21  // It also requires that there are no critical edges (critical =
    22  // comes from a block with >1 successor and goes to a block with >1
    23  // predecessor).  This makes it easy to add fixup code on merge edges -
    24  // the source of a merge edge has only one successor, so we can add
    25  // fixup code to the end of that block.
    26  
    27  // Spilling
    28  //
    29  // During the normal course of the allocator, we might throw a still-live
    30  // value out of all registers. When that value is subsequently used, we must
    31  // load it from a slot on the stack. We must also issue an instruction to
    32  // initialize that stack location with a copy of v.
    33  //
    34  // pre-regalloc:
    35  //   (1) v = Op ...
    36  //   (2) x = Op ...
    37  //   (3) ... = Op v ...
    38  //
    39  // post-regalloc:
    40  //   (1) v = Op ...    : AX // computes v, store result in AX
    41  //       s = StoreReg v     // spill v to a stack slot
    42  //   (2) x = Op ...    : AX // some other op uses AX
    43  //       c = LoadReg s : CX // restore v from stack slot
    44  //   (3) ... = Op c ...     // use the restored value
    45  //
    46  // Allocation occurs normally until we reach (3) and we realize we have
    47  // a use of v and it isn't in any register. At that point, we allocate
    48  // a spill (a StoreReg) for v. We can't determine the correct place for
    49  // the spill at this point, so we allocate the spill as blockless initially.
    50  // The restore is then generated to load v back into a register so it can
    51  // be used. Subsequent uses of v will use the restored value c instead.
    52  //
    53  // What remains is the question of where to schedule the spill.
    54  // During allocation, we keep track of the dominator of all restores of v.
    55  // The spill of v must dominate that block. The spill must also be issued at
    56  // a point where v is still in a register.
    57  //
    58  // To find the right place, start at b, the block which dominates all restores.
    59  //  - If b is v.Block, then issue the spill right after v.
    60  //    It is known to be in a register at that point, and dominates any restores.
    61  //  - Otherwise, if v is in a register at the start of b,
    62  //    put the spill of v at the start of b.
    63  //  - Otherwise, set b = immediate dominator of b, and repeat.
    64  //
    65  // Phi values are special, as always. We define two kinds of phis, those
    66  // where the merge happens in a register (a "register" phi) and those where
    67  // the merge happens in a stack location (a "stack" phi).
    68  //
    69  // A register phi must have the phi and all of its inputs allocated to the
    70  // same register. Register phis are spilled similarly to regular ops.
    71  //
    72  // A stack phi must have the phi and all of its inputs allocated to the same
    73  // stack location. Stack phis start out life already spilled - each phi
    74  // input must be a store (using StoreReg) at the end of the corresponding
    75  // predecessor block.
    76  //     b1: y = ... : AX        b2: z = ... : BX
    77  //         y2 = StoreReg y         z2 = StoreReg z
    78  //         goto b3                 goto b3
    79  //     b3: x = phi(y2, z2)
    80  // The stack allocator knows that StoreReg args of stack-allocated phis
    81  // must be allocated to the same stack slot as the phi that uses them.
    82  // x is now a spilled value and a restore must appear before its first use.
    83  
    84  // TODO
    85  
    86  // Use an affinity graph to mark two values which should use the
    87  // same register. This affinity graph will be used to prefer certain
    88  // registers for allocation. This affinity helps eliminate moves that
    89  // are required for phi implementations and helps generate allocations
    90  // for 2-register architectures.
    91  
    92  // Note: regalloc generates a not-quite-SSA output. If we have:
    93  //
    94  //             b1: x = ... : AX
    95  //                 x2 = StoreReg x
    96  //                 ... AX gets reused for something else ...
    97  //                 if ... goto b3 else b4
    98  //
    99  //   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
   100  //       ... use x3 ...                 ... use x4 ...
   101  //
   102  //             b2: ... use x3 ...
   103  //
   104  // If b3 is the primary predecessor of b2, then we use x3 in b2 and
   105  // add a x4:CX->BX copy at the end of b4.
   106  // But the definition of x3 doesn't dominate b2.  We should really
   107  // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
   108  // SSA form. For now, we ignore this problem as remaining in strict
   109  // SSA form isn't needed after regalloc. We'll just leave the use
   110  // of x3 not dominated by the definition of x3, and the CX->BX copy
   111  // will have no use (so don't run deadcode after regalloc!).
   112  // TODO: maybe we should introduce these extra phis?
   113  
   114  package ssa
   115  
   116  import (
   117  	"cmd/compile/internal/base"
   118  	"cmd/compile/internal/ir"
   119  	"cmd/compile/internal/types"
   120  	"cmd/internal/src"
   121  	"cmd/internal/sys"
   122  	"fmt"
   123  	"internal/buildcfg"
   124  	"math"
   125  	"math/bits"
   126  	"unsafe"
   127  )
   128  
   129  const (
   130  	moveSpills = iota
   131  	logSpills
   132  	regDebug
   133  	stackDebug
   134  )
   135  
   136  // distance is a measure of how far into the future values are used.
   137  // distance is measured in units of instructions.
   138  const (
   139  	likelyDistance   = 1
   140  	normalDistance   = 10
   141  	unlikelyDistance = 100
   142  )
   143  
   144  // regalloc performs register allocation on f. It sets f.RegAlloc
   145  // to the resulting allocation.
   146  func regalloc(f *Func) {
   147  	var s regAllocState
   148  	s.init(f)
   149  	s.regalloc(f)
   150  	s.close()
   151  }
   152  
   153  type register uint8
   154  
   155  const noRegister register = 255
   156  
   157  // For bulk initializing
   158  var noRegisters [32]register = [32]register{
   159  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   160  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   161  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   162  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   163  }
   164  
   165  // A regMask encodes a set of machine registers.
   166  // TODO: regMask -> regSet?
   167  type regMask uint64
   168  
   169  func (m regMask) String() string {
   170  	s := ""
   171  	for r := register(0); m != 0; r++ {
   172  		if m>>r&1 == 0 {
   173  			continue
   174  		}
   175  		m &^= regMask(1) << r
   176  		if s != "" {
   177  			s += " "
   178  		}
   179  		s += fmt.Sprintf("r%d", r)
   180  	}
   181  	return s
   182  }
   183  
   184  func (m regMask) contains(r register) bool {
   185  	return m>>r&1 != 0
   186  }
   187  
   188  func (s *regAllocState) RegMaskString(m regMask) string {
   189  	str := ""
   190  	for r := register(0); m != 0; r++ {
   191  		if m>>r&1 == 0 {
   192  			continue
   193  		}
   194  		m &^= regMask(1) << r
   195  		if str != "" {
   196  			str += " "
   197  		}
   198  		str += s.registers[r].String()
   199  	}
   200  	return str
   201  }
   202  
   203  // countRegs returns the number of set bits in the register mask.
   204  func countRegs(r regMask) int {
   205  	return bits.OnesCount64(uint64(r))
   206  }
   207  
   208  // pickReg picks an arbitrary register from the register mask.
   209  func pickReg(r regMask) register {
   210  	if r == 0 {
   211  		panic("can't pick a register from an empty set")
   212  	}
   213  	// pick the lowest one
   214  	return register(bits.TrailingZeros64(uint64(r)))
   215  }
   216  
   217  type use struct {
   218  	// distance from start of the block to a use of a value
   219  	//   dist == 0                 used by first instruction in block
   220  	//   dist == len(b.Values)-1   used by last instruction in block
   221  	//   dist == len(b.Values)     used by block's control value
   222  	//   dist  > len(b.Values)     used by a subsequent block
   223  	dist int32
   224  	pos  src.XPos // source position of the use
   225  	next *use     // linked list of uses of a value in nondecreasing dist order
   226  }
   227  
   228  // A valState records the register allocation state for a (pre-regalloc) value.
   229  type valState struct {
   230  	regs              regMask // the set of registers holding a Value (usually just one)
   231  	uses              *use    // list of uses in this block
   232  	spill             *Value  // spilled copy of the Value (if any)
   233  	restoreMin        int32   // minimum of all restores' blocks' sdom.entry
   234  	restoreMax        int32   // maximum of all restores' blocks' sdom.exit
   235  	needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
   236  	rematerializeable bool    // cached value of v.rematerializeable()
   237  }
   238  
   239  type regState struct {
   240  	v *Value // Original (preregalloc) Value stored in this register.
   241  	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
   242  	// If a register is unused, v==c==nil
   243  }
   244  
   245  type regAllocState struct {
   246  	f *Func
   247  
   248  	sdom        SparseTree
   249  	registers   []Register
   250  	numRegs     register
   251  	SPReg       register
   252  	SBReg       register
   253  	GReg        register
   254  	ZeroIntReg  register
   255  	allocatable regMask
   256  
   257  	// live values at the end of each block.  live[b.ID] is a list of value IDs
   258  	// which are live at the end of b, together with a count of how many instructions
   259  	// forward to the next use.
   260  	live [][]liveInfo
   261  	// desired register assignments at the end of each block.
   262  	// Note that this is a static map computed before allocation occurs. Dynamic
   263  	// register desires (from partially completed allocations) will trump
   264  	// this information.
   265  	desired []desiredState
   266  
   267  	// current state of each (preregalloc) Value
   268  	values []valState
   269  
   270  	// ID of SP, SB values
   271  	sp, sb ID
   272  
   273  	// For each Value, map from its value ID back to the
   274  	// preregalloc Value it was derived from.
   275  	orig []*Value
   276  
   277  	// current state of each register.
   278  	// Includes only registers in allocatable.
   279  	regs []regState
   280  
   281  	// registers that contain values which can't be kicked out
   282  	nospill regMask
   283  
   284  	// mask of registers currently in use
   285  	used regMask
   286  
   287  	// mask of registers used since the start of the current block
   288  	usedSinceBlockStart regMask
   289  
   290  	// mask of registers used in the current instruction
   291  	tmpused regMask
   292  
   293  	// current block we're working on
   294  	curBlock *Block
   295  
   296  	// cache of use records
   297  	freeUseRecords *use
   298  
   299  	// endRegs[blockid] is the register state at the end of each block.
   300  	// encoded as a set of endReg records.
   301  	endRegs [][]endReg
   302  
   303  	// startRegs[blockid] is the register state at the start of merge blocks.
   304  	// saved state does not include the state of phi ops in the block.
   305  	startRegs [][]startReg
   306  
   307  	// startRegsMask is a mask of the registers in startRegs[curBlock.ID].
   308  	// Registers dropped from startRegsMask are later synchronoized back to
   309  	// startRegs by dropping from there as well.
   310  	startRegsMask regMask
   311  
   312  	// spillLive[blockid] is the set of live spills at the end of each block
   313  	spillLive [][]ID
   314  
   315  	// a set of copies we generated to move things around, and
   316  	// whether it is used in shuffle. Unused copies will be deleted.
   317  	copies map[*Value]bool
   318  
   319  	loopnest *loopnest
   320  
   321  	// choose a good order in which to visit blocks for allocation purposes.
   322  	visitOrder []*Block
   323  
   324  	// blockOrder[b.ID] corresponds to the index of block b in visitOrder.
   325  	blockOrder []int32
   326  
   327  	// whether to insert instructions that clobber dead registers at call sites
   328  	doClobber bool
   329  
   330  	// For each instruction index in a basic block, the index of the next call
   331  	// at or after that instruction index.
   332  	// If there is no next call, returns maxInt32.
   333  	// nextCall for a call instruction points to itself.
   334  	// (Indexes and results are pre-regalloc.)
   335  	nextCall []int32
   336  
   337  	// Index of the instruction we're currently working on.
   338  	// Index is expressed in terms of the pre-regalloc b.Values list.
   339  	curIdx int
   340  }
   341  
   342  type endReg struct {
   343  	r register
   344  	v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
   345  	c *Value // cached version of the value
   346  }
   347  
   348  type startReg struct {
   349  	r   register
   350  	v   *Value   // pre-regalloc value needed in this register
   351  	c   *Value   // cached version of the value
   352  	pos src.XPos // source position of use of this register
   353  }
   354  
   355  // freeReg frees up register r. Any current user of r is kicked out.
   356  func (s *regAllocState) freeReg(r register) {
   357  	if !s.allocatable.contains(r) && !s.isGReg(r) {
   358  		return
   359  	}
   360  	v := s.regs[r].v
   361  	if v == nil {
   362  		s.f.Fatalf("tried to free an already free register %d\n", r)
   363  	}
   364  
   365  	// Mark r as unused.
   366  	if s.f.pass.debug > regDebug {
   367  		fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
   368  	}
   369  	s.regs[r] = regState{}
   370  	s.values[v.ID].regs &^= regMask(1) << r
   371  	s.used &^= regMask(1) << r
   372  }
   373  
   374  // freeRegs frees up all registers listed in m.
   375  func (s *regAllocState) freeRegs(m regMask) {
   376  	for m&s.used != 0 {
   377  		s.freeReg(pickReg(m & s.used))
   378  	}
   379  }
   380  
   381  // clobberRegs inserts instructions that clobber registers listed in m.
   382  func (s *regAllocState) clobberRegs(m regMask) {
   383  	m &= s.allocatable & s.f.Config.gpRegMask // only integer register can contain pointers, only clobber them
   384  	for m != 0 {
   385  		r := pickReg(m)
   386  		m &^= 1 << r
   387  		x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
   388  		s.f.setHome(x, &s.registers[r])
   389  	}
   390  }
   391  
   392  // setOrig records that c's original value is the same as
   393  // v's original value.
   394  func (s *regAllocState) setOrig(c *Value, v *Value) {
   395  	if int(c.ID) >= cap(s.orig) {
   396  		x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
   397  		copy(x, s.orig)
   398  		s.f.Cache.freeValueSlice(s.orig)
   399  		s.orig = x
   400  	}
   401  	for int(c.ID) >= len(s.orig) {
   402  		s.orig = append(s.orig, nil)
   403  	}
   404  	if s.orig[c.ID] != nil {
   405  		s.f.Fatalf("orig value set twice %s %s", c, v)
   406  	}
   407  	s.orig[c.ID] = s.orig[v.ID]
   408  }
   409  
   410  // assignReg assigns register r to hold c, a copy of v.
   411  // r must be unused.
   412  func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
   413  	if s.f.pass.debug > regDebug {
   414  		fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
   415  	}
   416  	// Allocate v to r.
   417  	s.values[v.ID].regs |= regMask(1) << r
   418  	s.f.setHome(c, &s.registers[r])
   419  
   420  	// Allocate r to v.
   421  	if !s.allocatable.contains(r) && !s.isGReg(r) {
   422  		return
   423  	}
   424  	if s.regs[r].v != nil {
   425  		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
   426  	}
   427  	s.regs[r] = regState{v, c}
   428  	s.used |= regMask(1) << r
   429  }
   430  
   431  // allocReg chooses a register from the set of registers in mask.
   432  // If there is no unused register, a Value will be kicked out of
   433  // a register to make room.
   434  func (s *regAllocState) allocReg(mask regMask, v *Value) register {
   435  	if v.OnWasmStack {
   436  		return noRegister
   437  	}
   438  
   439  	mask &= s.allocatable
   440  	mask &^= s.nospill
   441  	if mask == 0 {
   442  		s.f.Fatalf("no register available for %s", v.LongString())
   443  	}
   444  
   445  	// Pick an unused register if one is available.
   446  	if mask&^s.used != 0 {
   447  		r := pickReg(mask &^ s.used)
   448  		s.usedSinceBlockStart |= regMask(1) << r
   449  		return r
   450  	}
   451  
   452  	// Pick a value to spill. Spill the value with the
   453  	// farthest-in-the-future use.
   454  	// TODO: Prefer registers with already spilled Values?
   455  	// TODO: Modify preference using affinity graph.
   456  	// TODO: if a single value is in multiple registers, spill one of them
   457  	// before spilling a value in just a single register.
   458  
   459  	// Find a register to spill. We spill the register containing the value
   460  	// whose next use is as far in the future as possible.
   461  	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
   462  	var r register
   463  	maxuse := int32(-1)
   464  	for t := register(0); t < s.numRegs; t++ {
   465  		if mask>>t&1 == 0 {
   466  			continue
   467  		}
   468  		v := s.regs[t].v
   469  		if n := s.values[v.ID].uses.dist; n > maxuse {
   470  			// v's next use is farther in the future than any value
   471  			// we've seen so far. A new best spill candidate.
   472  			r = t
   473  			maxuse = n
   474  		}
   475  	}
   476  	if maxuse == -1 {
   477  		s.f.Fatalf("couldn't find register to spill")
   478  	}
   479  
   480  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   481  		// TODO(neelance): In theory this should never happen, because all wasm registers are equal.
   482  		// So if there is still a free register, the allocation should have picked that one in the first place instead of
   483  		// trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
   484  		s.freeReg(r)
   485  		return r
   486  	}
   487  
   488  	// Try to move it around before kicking out, if there is a free register.
   489  	// We generate a Copy and record it. It will be deleted if never used.
   490  	v2 := s.regs[r].v
   491  	m := s.compatRegs(v2.Type) &^ s.used &^ s.tmpused &^ (regMask(1) << r)
   492  	if m != 0 && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
   493  		s.usedSinceBlockStart |= regMask(1) << r
   494  		r2 := pickReg(m)
   495  		c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
   496  		s.copies[c] = false
   497  		if s.f.pass.debug > regDebug {
   498  			fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
   499  		}
   500  		s.setOrig(c, v2)
   501  		s.assignReg(r2, v2, c)
   502  	}
   503  
   504  	// If the evicted register isn't used between the start of the block
   505  	// and now then there is no reason to even request it on entry. We can
   506  	// drop from startRegs in that case.
   507  	if s.usedSinceBlockStart&(regMask(1)<<r) == 0 {
   508  		if s.startRegsMask&(regMask(1)<<r) == 1 {
   509  			if s.f.pass.debug > regDebug {
   510  				fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
   511  			}
   512  			s.startRegsMask &^= regMask(1) << r
   513  		}
   514  	}
   515  
   516  	s.freeReg(r)
   517  	s.usedSinceBlockStart |= regMask(1) << r
   518  	return r
   519  }
   520  
   521  // makeSpill returns a Value which represents the spilled value of v.
   522  // b is the block in which the spill is used.
   523  func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
   524  	vi := &s.values[v.ID]
   525  	if vi.spill != nil {
   526  		// Final block not known - keep track of subtree where restores reside.
   527  		vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
   528  		vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
   529  		return vi.spill
   530  	}
   531  	// Make a spill for v. We don't know where we want
   532  	// to put it yet, so we leave it blockless for now.
   533  	spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
   534  	// We also don't know what the spill's arg will be.
   535  	// Leave it argless for now.
   536  	s.setOrig(spill, v)
   537  	vi.spill = spill
   538  	vi.restoreMin = s.sdom[b.ID].entry
   539  	vi.restoreMax = s.sdom[b.ID].exit
   540  	return spill
   541  }
   542  
   543  // allocValToReg allocates v to a register selected from regMask and
   544  // returns the register copy of v. Any previous user is kicked out and spilled
   545  // (if necessary). Load code is added at the current pc. If nospill is set the
   546  // allocated register is marked nospill so the assignment cannot be
   547  // undone until the caller allows it by clearing nospill. Returns a
   548  // *Value which is either v or a copy of v allocated to the chosen register.
   549  func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
   550  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
   551  		c := v.copyIntoWithXPos(s.curBlock, pos)
   552  		c.OnWasmStack = true
   553  		s.setOrig(c, v)
   554  		return c
   555  	}
   556  	if v.OnWasmStack {
   557  		return v
   558  	}
   559  
   560  	vi := &s.values[v.ID]
   561  	pos = pos.WithNotStmt()
   562  	// Check if v is already in a requested register.
   563  	if mask&vi.regs != 0 {
   564  		mask &= vi.regs
   565  		r := pickReg(mask)
   566  		if mask.contains(s.SPReg) {
   567  			// Prefer the stack pointer if it is allowed.
   568  			// (Needed because the op might have an Aux symbol
   569  			// that needs SP as its base.)
   570  			r = s.SPReg
   571  		}
   572  		if !s.allocatable.contains(r) {
   573  			return v // v is in a fixed register
   574  		}
   575  		if s.regs[r].v != v || s.regs[r].c == nil {
   576  			panic("bad register state")
   577  		}
   578  		if nospill {
   579  			s.nospill |= regMask(1) << r
   580  		}
   581  		s.usedSinceBlockStart |= regMask(1) << r
   582  		return s.regs[r].c
   583  	}
   584  
   585  	var r register
   586  	// If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
   587  	onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
   588  	if !onWasmStack {
   589  		// Allocate a register.
   590  		r = s.allocReg(mask, v)
   591  	}
   592  
   593  	// Allocate v to the new register.
   594  	var c *Value
   595  	if vi.regs != 0 {
   596  		// Copy from a register that v is already in.
   597  		r2 := pickReg(vi.regs)
   598  		var current *Value
   599  		if !s.allocatable.contains(r2) {
   600  			current = v // v is in a fixed register
   601  		} else {
   602  			if s.regs[r2].v != v {
   603  				panic("bad register state")
   604  			}
   605  			current = s.regs[r2].c
   606  		}
   607  		s.usedSinceBlockStart |= regMask(1) << r2
   608  		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
   609  	} else if v.rematerializeable() {
   610  		// Rematerialize instead of loading from the spill location.
   611  		c = v.copyIntoWithXPos(s.curBlock, pos)
   612  		// We need to consider its output mask and potentially issue a Copy
   613  		// if there are register mask conflicts.
   614  		// This currently happens for the SIMD package only between GP and FP
   615  		// register. Because Intel's vector extension can put integer value into
   616  		// FP, which is seen as a vector. Example instruction: VPSLL[BWDQ]
   617  		// Because GP and FP masks do not overlap, mask & outputMask == 0
   618  		// detects this situation thoroughly.
   619  		sourceMask := s.regspec(c).outputs[0].regs
   620  		if mask&sourceMask == 0 && !onWasmStack {
   621  			s.setOrig(c, v)
   622  			s.assignReg(s.allocReg(sourceMask, v), v, c)
   623  			// v.Type for the new OpCopy is likely wrong and it might delay the problem
   624  			// until ssa to asm lowering, which might need the types to generate the right
   625  			// assembly for OpCopy. For Intel's GP to FP move, it happens to be that
   626  			// MOV instruction has such a variant so it happens to be right.
   627  			// But it's unclear for other architectures or situations, and the problem
   628  			// might be exposed when the assembler sees illegal instructions.
   629  			// Right now make we still pick v.Type, because at least its size should be correct
   630  			// for the rematerialization case the amd64 SIMD package exposed.
   631  			// TODO: We might need to figure out a way to find the correct type or make
   632  			// the asm lowering use reg info only for OpCopy.
   633  			c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
   634  		}
   635  	} else {
   636  		// Load v from its spill location.
   637  		spill := s.makeSpill(v, s.curBlock)
   638  		if s.f.pass.debug > logSpills {
   639  			s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
   640  		}
   641  		c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
   642  	}
   643  
   644  	s.setOrig(c, v)
   645  
   646  	if onWasmStack {
   647  		c.OnWasmStack = true
   648  		return c
   649  	}
   650  
   651  	s.assignReg(r, v, c)
   652  	if c.Op == OpLoadReg && s.isGReg(r) {
   653  		s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
   654  	}
   655  	if nospill {
   656  		s.nospill |= regMask(1) << r
   657  	}
   658  	return c
   659  }
   660  
   661  // isLeaf reports whether f performs any calls.
   662  func isLeaf(f *Func) bool {
   663  	for _, b := range f.Blocks {
   664  		for _, v := range b.Values {
   665  			if v.Op.IsCall() && !v.Op.IsTailCall() {
   666  				// tail call is not counted as it does not save the return PC or need a frame
   667  				return false
   668  			}
   669  		}
   670  	}
   671  	return true
   672  }
   673  
   674  // needRegister reports whether v needs a register.
   675  func (v *Value) needRegister() bool {
   676  	return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
   677  }
   678  
   679  func (s *regAllocState) init(f *Func) {
   680  	s.f = f
   681  	s.f.RegAlloc = s.f.Cache.locs[:0]
   682  	s.registers = f.Config.registers
   683  	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask(0))*8) {
   684  		s.f.Fatalf("bad number of registers: %d", nr)
   685  	} else {
   686  		s.numRegs = register(nr)
   687  	}
   688  	// Locate SP, SB, and g registers.
   689  	s.SPReg = noRegister
   690  	s.SBReg = noRegister
   691  	s.GReg = noRegister
   692  	s.ZeroIntReg = noRegister
   693  	for r := register(0); r < s.numRegs; r++ {
   694  		switch s.registers[r].String() {
   695  		case "SP":
   696  			s.SPReg = r
   697  		case "SB":
   698  			s.SBReg = r
   699  		case "g":
   700  			s.GReg = r
   701  		case "ZERO": // TODO: arch-specific?
   702  			s.ZeroIntReg = r
   703  		}
   704  	}
   705  	// Make sure we found all required registers.
   706  	switch noRegister {
   707  	case s.SPReg:
   708  		s.f.Fatalf("no SP register found")
   709  	case s.SBReg:
   710  		s.f.Fatalf("no SB register found")
   711  	case s.GReg:
   712  		if f.Config.hasGReg {
   713  			s.f.Fatalf("no g register found")
   714  		}
   715  	}
   716  
   717  	// Figure out which registers we're allowed to use.
   718  	s.allocatable = s.f.Config.gpRegMask | s.f.Config.fpRegMask | s.f.Config.specialRegMask
   719  	s.allocatable &^= 1 << s.SPReg
   720  	s.allocatable &^= 1 << s.SBReg
   721  	if s.f.Config.hasGReg {
   722  		s.allocatable &^= 1 << s.GReg
   723  	}
   724  	if s.ZeroIntReg != noRegister {
   725  		s.allocatable &^= 1 << s.ZeroIntReg
   726  	}
   727  	if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
   728  		s.allocatable &^= 1 << uint(s.f.Config.FPReg)
   729  	}
   730  	if s.f.Config.LinkReg != -1 {
   731  		if isLeaf(f) {
   732  			// Leaf functions don't save/restore the link register.
   733  			s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
   734  		}
   735  	}
   736  	if s.f.Config.ctxt.Flag_dynlink {
   737  		switch s.f.Config.arch {
   738  		case "386":
   739  			// nothing to do.
   740  			// Note that for Flag_shared (position independent code)
   741  			// we do need to be careful, but that carefulness is hidden
   742  			// in the rewrite rules so we always have a free register
   743  			// available for global load/stores. See _gen/386.rules (search for Flag_shared).
   744  		case "amd64":
   745  			s.allocatable &^= 1 << 15 // R15
   746  		case "arm":
   747  			s.allocatable &^= 1 << 9 // R9
   748  		case "arm64":
   749  			// nothing to do
   750  		case "loong64": // R2 (aka TP) already reserved.
   751  			// nothing to do
   752  		case "ppc64le": // R2 already reserved.
   753  			// nothing to do
   754  		case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved.
   755  			// nothing to do
   756  		case "s390x":
   757  			s.allocatable &^= 1 << 11 // R11
   758  		default:
   759  			s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
   760  		}
   761  	}
   762  
   763  	// Linear scan register allocation can be influenced by the order in which blocks appear.
   764  	// Decouple the register allocation order from the generated block order.
   765  	// This also creates an opportunity for experiments to find a better order.
   766  	s.visitOrder = layoutRegallocOrder(f)
   767  
   768  	// Compute block order. This array allows us to distinguish forward edges
   769  	// from backward edges and compute how far they go.
   770  	s.blockOrder = make([]int32, f.NumBlocks())
   771  	for i, b := range s.visitOrder {
   772  		s.blockOrder[b.ID] = int32(i)
   773  	}
   774  
   775  	s.regs = make([]regState, s.numRegs)
   776  	nv := f.NumValues()
   777  	if cap(s.f.Cache.regallocValues) >= nv {
   778  		s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
   779  	} else {
   780  		s.f.Cache.regallocValues = make([]valState, nv)
   781  	}
   782  	s.values = s.f.Cache.regallocValues
   783  	s.orig = s.f.Cache.allocValueSlice(nv)
   784  	s.copies = make(map[*Value]bool)
   785  	for _, b := range s.visitOrder {
   786  		for _, v := range b.Values {
   787  			if v.needRegister() {
   788  				s.values[v.ID].needReg = true
   789  				s.values[v.ID].rematerializeable = v.rematerializeable()
   790  				s.orig[v.ID] = v
   791  			}
   792  			// Note: needReg is false for values returning Tuple types.
   793  			// Instead, we mark the corresponding Selects as needReg.
   794  		}
   795  	}
   796  	s.computeLive()
   797  
   798  	s.endRegs = make([][]endReg, f.NumBlocks())
   799  	s.startRegs = make([][]startReg, f.NumBlocks())
   800  	s.spillLive = make([][]ID, f.NumBlocks())
   801  	s.sdom = f.Sdom()
   802  
   803  	// wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
   804  	if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   805  		canLiveOnStack := f.newSparseSet(f.NumValues())
   806  		defer f.retSparseSet(canLiveOnStack)
   807  		for _, b := range f.Blocks {
   808  			// New block. Clear candidate set.
   809  			canLiveOnStack.clear()
   810  			for _, c := range b.ControlValues() {
   811  				if c.Uses == 1 && !opcodeTable[c.Op].generic {
   812  					canLiveOnStack.add(c.ID)
   813  				}
   814  			}
   815  			// Walking backwards.
   816  			for i := len(b.Values) - 1; i >= 0; i-- {
   817  				v := b.Values[i]
   818  				if canLiveOnStack.contains(v.ID) {
   819  					v.OnWasmStack = true
   820  				} else {
   821  					// Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
   822  					canLiveOnStack.clear()
   823  				}
   824  				for _, arg := range v.Args {
   825  					// Value can live on the stack if:
   826  					// - it is only used once
   827  					// - it is used in the same basic block
   828  					// - it is not a "mem" value
   829  					// - it is a WebAssembly op
   830  					if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
   831  						canLiveOnStack.add(arg.ID)
   832  					}
   833  				}
   834  			}
   835  		}
   836  	}
   837  
   838  	// The clobberdeadreg experiment inserts code to clobber dead registers
   839  	// at call sites.
   840  	// Ignore huge functions to avoid doing too much work.
   841  	if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
   842  		// TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
   843  		s.doClobber = true
   844  	}
   845  }
   846  
   847  func (s *regAllocState) close() {
   848  	s.f.Cache.freeValueSlice(s.orig)
   849  }
   850  
   851  // Adds a use record for id at distance dist from the start of the block.
   852  // All calls to addUse must happen with nonincreasing dist.
   853  func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
   854  	r := s.freeUseRecords
   855  	if r != nil {
   856  		s.freeUseRecords = r.next
   857  	} else {
   858  		r = &use{}
   859  	}
   860  	r.dist = dist
   861  	r.pos = pos
   862  	r.next = s.values[id].uses
   863  	s.values[id].uses = r
   864  	if r.next != nil && dist > r.next.dist {
   865  		s.f.Fatalf("uses added in wrong order")
   866  	}
   867  }
   868  
   869  // advanceUses advances the uses of v's args from the state before v to the state after v.
   870  // Any values which have no more uses are deallocated from registers.
   871  func (s *regAllocState) advanceUses(v *Value) {
   872  	for _, a := range v.Args {
   873  		if !s.values[a.ID].needReg {
   874  			continue
   875  		}
   876  		ai := &s.values[a.ID]
   877  		r := ai.uses
   878  		ai.uses = r.next
   879  		if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
   880  			// Value is dead (or is not used again until after a call), free all registers that hold it.
   881  			s.freeRegs(ai.regs)
   882  		}
   883  		r.next = s.freeUseRecords
   884  		s.freeUseRecords = r
   885  	}
   886  	s.dropIfUnused(v)
   887  }
   888  
   889  // Drop v from registers if it isn't used again, or its only uses are after
   890  // a call instruction.
   891  func (s *regAllocState) dropIfUnused(v *Value) {
   892  	if !s.values[v.ID].needReg {
   893  		return
   894  	}
   895  	vi := &s.values[v.ID]
   896  	r := vi.uses
   897  	if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > s.nextCall[s.curIdx]) {
   898  		s.freeRegs(vi.regs)
   899  	}
   900  }
   901  
   902  // liveAfterCurrentInstruction reports whether v is live after
   903  // the current instruction is completed.  v must be used by the
   904  // current instruction.
   905  func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
   906  	u := s.values[v.ID].uses
   907  	if u == nil {
   908  		panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
   909  	}
   910  	d := u.dist
   911  	for u != nil && u.dist == d {
   912  		u = u.next
   913  	}
   914  	return u != nil && u.dist > d
   915  }
   916  
   917  // Sets the state of the registers to that encoded in regs.
   918  func (s *regAllocState) setState(regs []endReg) {
   919  	s.freeRegs(s.used)
   920  	for _, x := range regs {
   921  		s.assignReg(x.r, x.v, x.c)
   922  	}
   923  }
   924  
   925  // compatRegs returns the set of registers which can store a type t.
   926  func (s *regAllocState) compatRegs(t *types.Type) regMask {
   927  	var m regMask
   928  	if t.IsTuple() || t.IsFlags() {
   929  		return 0
   930  	}
   931  	if t.IsFloat() || t == types.TypeInt128 {
   932  		if t.Kind() == types.TFLOAT32 && s.f.Config.fp32RegMask != 0 {
   933  			m = s.f.Config.fp32RegMask
   934  		} else if t.Kind() == types.TFLOAT64 && s.f.Config.fp64RegMask != 0 {
   935  			m = s.f.Config.fp64RegMask
   936  		} else {
   937  			m = s.f.Config.fpRegMask
   938  		}
   939  	} else {
   940  		m = s.f.Config.gpRegMask
   941  	}
   942  	return m & s.allocatable
   943  }
   944  
   945  // regspec returns the regInfo for operation op.
   946  func (s *regAllocState) regspec(v *Value) regInfo {
   947  	op := v.Op
   948  	if op == OpConvert {
   949  		// OpConvert is a generic op, so it doesn't have a
   950  		// register set in the static table. It can use any
   951  		// allocatable integer register.
   952  		m := s.allocatable & s.f.Config.gpRegMask
   953  		return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
   954  	}
   955  	if op == OpArgIntReg {
   956  		reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
   957  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   958  	}
   959  	if op == OpArgFloatReg {
   960  		reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
   961  		return regInfo{outputs: []outputInfo{{regs: 1 << uint(reg)}}}
   962  	}
   963  	if op.IsCall() {
   964  		if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
   965  			return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
   966  		}
   967  	}
   968  	if op == OpMakeResult && s.f.OwnAux.reg != nil {
   969  		return *s.f.OwnAux.ResultReg(s.f.Config)
   970  	}
   971  	return opcodeTable[op].reg
   972  }
   973  
   974  func (s *regAllocState) isGReg(r register) bool {
   975  	return s.f.Config.hasGReg && s.GReg == r
   976  }
   977  
   978  // Dummy value used to represent the value being held in a temporary register.
   979  var tmpVal Value
   980  
   981  func (s *regAllocState) regalloc(f *Func) {
   982  	regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
   983  	defer f.retSparseSet(regValLiveSet)
   984  	var oldSched []*Value
   985  	var phis []*Value
   986  	var phiRegs []register
   987  	var args []*Value
   988  
   989  	// Data structure used for computing desired registers.
   990  	var desired desiredState
   991  	desiredSecondReg := map[ID][4]register{} // desired register allocation for 2nd part of a tuple
   992  
   993  	// Desired registers for inputs & outputs for each instruction in the block.
   994  	type dentry struct {
   995  		out [4]register    // desired output registers
   996  		in  [3][4]register // desired input registers (for inputs 0,1, and 2)
   997  	}
   998  	var dinfo []dentry
   999  
  1000  	if f.Entry != f.Blocks[0] {
  1001  		f.Fatalf("entry block must be first")
  1002  	}
  1003  
  1004  	for _, b := range s.visitOrder {
  1005  		if s.f.pass.debug > regDebug {
  1006  			fmt.Printf("Begin processing block %v\n", b)
  1007  		}
  1008  		s.curBlock = b
  1009  		s.startRegsMask = 0
  1010  		s.usedSinceBlockStart = 0
  1011  		clear(desiredSecondReg)
  1012  
  1013  		// Initialize regValLiveSet and uses fields for this block.
  1014  		// Walk backwards through the block doing liveness analysis.
  1015  		regValLiveSet.clear()
  1016  		for _, e := range s.live[b.ID] {
  1017  			s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
  1018  			regValLiveSet.add(e.ID)
  1019  		}
  1020  		for _, v := range b.ControlValues() {
  1021  			if s.values[v.ID].needReg {
  1022  				s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
  1023  				regValLiveSet.add(v.ID)
  1024  			}
  1025  		}
  1026  		if len(s.nextCall) < len(b.Values) {
  1027  			s.nextCall = append(s.nextCall, make([]int32, len(b.Values)-len(s.nextCall))...)
  1028  		}
  1029  		var nextCall int32 = math.MaxInt32
  1030  		for i := len(b.Values) - 1; i >= 0; i-- {
  1031  			v := b.Values[i]
  1032  			regValLiveSet.remove(v.ID)
  1033  			if v.Op == OpPhi {
  1034  				// Remove v from the live set, but don't add
  1035  				// any inputs. This is the state the len(b.Preds)>1
  1036  				// case below desires; it wants to process phis specially.
  1037  				s.nextCall[i] = nextCall
  1038  				continue
  1039  			}
  1040  			if opcodeTable[v.Op].call {
  1041  				// Function call clobbers all the registers but SP and SB.
  1042  				regValLiveSet.clear()
  1043  				if s.sp != 0 && s.values[s.sp].uses != nil {
  1044  					regValLiveSet.add(s.sp)
  1045  				}
  1046  				if s.sb != 0 && s.values[s.sb].uses != nil {
  1047  					regValLiveSet.add(s.sb)
  1048  				}
  1049  				nextCall = int32(i)
  1050  			}
  1051  			for _, a := range v.Args {
  1052  				if !s.values[a.ID].needReg {
  1053  					continue
  1054  				}
  1055  				s.addUse(a.ID, int32(i), v.Pos)
  1056  				regValLiveSet.add(a.ID)
  1057  			}
  1058  			s.nextCall[i] = nextCall
  1059  		}
  1060  		if s.f.pass.debug > regDebug {
  1061  			fmt.Printf("use distances for %s\n", b)
  1062  			for i := range s.values {
  1063  				vi := &s.values[i]
  1064  				u := vi.uses
  1065  				if u == nil {
  1066  					continue
  1067  				}
  1068  				fmt.Printf("  v%d:", i)
  1069  				for u != nil {
  1070  					fmt.Printf(" %d", u.dist)
  1071  					u = u.next
  1072  				}
  1073  				fmt.Println()
  1074  			}
  1075  		}
  1076  
  1077  		// Make a copy of the block schedule so we can generate a new one in place.
  1078  		// We make a separate copy for phis and regular values.
  1079  		nphi := 0
  1080  		for _, v := range b.Values {
  1081  			if v.Op != OpPhi {
  1082  				break
  1083  			}
  1084  			nphi++
  1085  		}
  1086  		phis = append(phis[:0], b.Values[:nphi]...)
  1087  		oldSched = append(oldSched[:0], b.Values[nphi:]...)
  1088  		b.Values = b.Values[:0]
  1089  
  1090  		// Initialize start state of block.
  1091  		if b == f.Entry {
  1092  			// Regalloc state is empty to start.
  1093  			if nphi > 0 {
  1094  				f.Fatalf("phis in entry block")
  1095  			}
  1096  		} else if len(b.Preds) == 1 {
  1097  			// Start regalloc state with the end state of the previous block.
  1098  			s.setState(s.endRegs[b.Preds[0].b.ID])
  1099  			if nphi > 0 {
  1100  				f.Fatalf("phis in single-predecessor block")
  1101  			}
  1102  			// Drop any values which are no longer live.
  1103  			// This may happen because at the end of p, a value may be
  1104  			// live but only used by some other successor of p.
  1105  			for r := register(0); r < s.numRegs; r++ {
  1106  				v := s.regs[r].v
  1107  				if v != nil && !regValLiveSet.contains(v.ID) {
  1108  					s.freeReg(r)
  1109  				}
  1110  			}
  1111  		} else {
  1112  			// This is the complicated case. We have more than one predecessor,
  1113  			// which means we may have Phi ops.
  1114  
  1115  			// Start with the final register state of the predecessor with least spill values.
  1116  			// This is based on the following points:
  1117  			// 1, The less spill value indicates that the register pressure of this path is smaller,
  1118  			//    so the values of this block are more likely to be allocated to registers.
  1119  			// 2, Avoid the predecessor that contains the function call, because the predecessor that
  1120  			//    contains the function call usually generates a lot of spills and lose the previous
  1121  			//    allocation state.
  1122  			// TODO: Improve this part. At least the size of endRegs of the predecessor also has
  1123  			// an impact on the code size and compiler speed. But it is not easy to find a simple
  1124  			// and efficient method that combines multiple factors.
  1125  			idx := -1
  1126  			for i, p := range b.Preds {
  1127  				// If the predecessor has not been visited yet, skip it because its end state
  1128  				// (redRegs and spillLive) has not been computed yet.
  1129  				pb := p.b
  1130  				if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
  1131  					continue
  1132  				}
  1133  				if idx == -1 {
  1134  					idx = i
  1135  					continue
  1136  				}
  1137  				pSel := b.Preds[idx].b
  1138  				if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
  1139  					idx = i
  1140  				} else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
  1141  					// Use a bit of likely information. After critical pass, pb and pSel must
  1142  					// be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
  1143  					// TODO: improve the prediction of the likely predecessor. The following
  1144  					// method is only suitable for the simplest cases. For complex cases,
  1145  					// the prediction may be inaccurate, but this does not affect the
  1146  					// correctness of the program.
  1147  					// According to the layout algorithm, the predecessor with the
  1148  					// smaller blockOrder is the true branch, and the test results show
  1149  					// that it is better to choose the predecessor with a smaller
  1150  					// blockOrder than no choice.
  1151  					if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
  1152  						idx = i
  1153  					}
  1154  				}
  1155  			}
  1156  			if idx < 0 {
  1157  				f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
  1158  			}
  1159  			p := b.Preds[idx].b
  1160  			s.setState(s.endRegs[p.ID])
  1161  
  1162  			if s.f.pass.debug > regDebug {
  1163  				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
  1164  				for _, x := range s.endRegs[p.ID] {
  1165  					fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
  1166  				}
  1167  			}
  1168  
  1169  			// Decide on registers for phi ops. Use the registers determined
  1170  			// by the primary predecessor if we can.
  1171  			// TODO: pick best of (already processed) predecessors?
  1172  			// Majority vote? Deepest nesting level?
  1173  			phiRegs = phiRegs[:0]
  1174  			var phiUsed regMask
  1175  
  1176  			for _, v := range phis {
  1177  				if !s.values[v.ID].needReg {
  1178  					phiRegs = append(phiRegs, noRegister)
  1179  					continue
  1180  				}
  1181  				a := v.Args[idx]
  1182  				// Some instructions target not-allocatable registers.
  1183  				// They're not suitable for further (phi-function) allocation.
  1184  				m := s.values[a.ID].regs &^ phiUsed & s.allocatable
  1185  				if m != 0 {
  1186  					r := pickReg(m)
  1187  					phiUsed |= regMask(1) << r
  1188  					phiRegs = append(phiRegs, r)
  1189  				} else {
  1190  					phiRegs = append(phiRegs, noRegister)
  1191  				}
  1192  			}
  1193  
  1194  			// Second pass - deallocate all in-register phi inputs.
  1195  			for i, v := range phis {
  1196  				if !s.values[v.ID].needReg {
  1197  					continue
  1198  				}
  1199  				a := v.Args[idx]
  1200  				r := phiRegs[i]
  1201  				if r == noRegister {
  1202  					continue
  1203  				}
  1204  				if regValLiveSet.contains(a.ID) {
  1205  					// Input value is still live (it is used by something other than Phi).
  1206  					// Try to move it around before kicking out, if there is a free register.
  1207  					// We generate a Copy in the predecessor block and record it. It will be
  1208  					// deleted later if never used.
  1209  					//
  1210  					// Pick a free register. At this point some registers used in the predecessor
  1211  					// block may have been deallocated. Those are the ones used for Phis. Exclude
  1212  					// them (and they are not going to be helpful anyway).
  1213  					m := s.compatRegs(a.Type) &^ s.used &^ phiUsed
  1214  					if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
  1215  						r2 := pickReg(m)
  1216  						c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
  1217  						s.copies[c] = false
  1218  						if s.f.pass.debug > regDebug {
  1219  							fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
  1220  						}
  1221  						s.setOrig(c, a)
  1222  						s.assignReg(r2, a, c)
  1223  						s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
  1224  					}
  1225  				}
  1226  				s.freeReg(r)
  1227  			}
  1228  
  1229  			// Copy phi ops into new schedule.
  1230  			b.Values = append(b.Values, phis...)
  1231  
  1232  			// Third pass - pick registers for phis whose input
  1233  			// was not in a register in the primary predecessor.
  1234  			for i, v := range phis {
  1235  				if !s.values[v.ID].needReg {
  1236  					continue
  1237  				}
  1238  				if phiRegs[i] != noRegister {
  1239  					continue
  1240  				}
  1241  				m := s.compatRegs(v.Type) &^ phiUsed &^ s.used
  1242  				// If one of the other inputs of v is in a register, and the register is available,
  1243  				// select this register, which can save some unnecessary copies.
  1244  				for i, pe := range b.Preds {
  1245  					if i == idx {
  1246  						continue
  1247  					}
  1248  					ri := noRegister
  1249  					for _, er := range s.endRegs[pe.b.ID] {
  1250  						if er.v == s.orig[v.Args[i].ID] {
  1251  							ri = er.r
  1252  							break
  1253  						}
  1254  					}
  1255  					if ri != noRegister && m>>ri&1 != 0 {
  1256  						m = regMask(1) << ri
  1257  						break
  1258  					}
  1259  				}
  1260  				if m != 0 {
  1261  					r := pickReg(m)
  1262  					phiRegs[i] = r
  1263  					phiUsed |= regMask(1) << r
  1264  				}
  1265  			}
  1266  
  1267  			// Set registers for phis. Add phi spill code.
  1268  			for i, v := range phis {
  1269  				if !s.values[v.ID].needReg {
  1270  					continue
  1271  				}
  1272  				r := phiRegs[i]
  1273  				if r == noRegister {
  1274  					// stack-based phi
  1275  					// Spills will be inserted in all the predecessors below.
  1276  					s.values[v.ID].spill = v // v starts life spilled
  1277  					continue
  1278  				}
  1279  				// register-based phi
  1280  				s.assignReg(r, v, v)
  1281  			}
  1282  
  1283  			// Deallocate any values which are no longer live. Phis are excluded.
  1284  			for r := register(0); r < s.numRegs; r++ {
  1285  				if phiUsed>>r&1 != 0 {
  1286  					continue
  1287  				}
  1288  				v := s.regs[r].v
  1289  				if v != nil && !regValLiveSet.contains(v.ID) {
  1290  					s.freeReg(r)
  1291  				}
  1292  			}
  1293  
  1294  			// Save the starting state for use by merge edges.
  1295  			// We append to a stack allocated variable that we'll
  1296  			// later copy into s.startRegs in one fell swoop, to save
  1297  			// on allocations.
  1298  			regList := make([]startReg, 0, 32)
  1299  			for r := register(0); r < s.numRegs; r++ {
  1300  				v := s.regs[r].v
  1301  				if v == nil {
  1302  					continue
  1303  				}
  1304  				if phiUsed>>r&1 != 0 {
  1305  					// Skip registers that phis used, we'll handle those
  1306  					// specially during merge edge processing.
  1307  					continue
  1308  				}
  1309  				regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
  1310  				s.startRegsMask |= regMask(1) << r
  1311  			}
  1312  			s.startRegs[b.ID] = make([]startReg, len(regList))
  1313  			copy(s.startRegs[b.ID], regList)
  1314  
  1315  			if s.f.pass.debug > regDebug {
  1316  				fmt.Printf("after phis\n")
  1317  				for _, x := range s.startRegs[b.ID] {
  1318  					fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
  1319  				}
  1320  			}
  1321  		}
  1322  
  1323  		// Drop phis from registers if they immediately go dead.
  1324  		for i, v := range phis {
  1325  			s.curIdx = i
  1326  			s.dropIfUnused(v)
  1327  		}
  1328  
  1329  		// Allocate space to record the desired registers for each value.
  1330  		if l := len(oldSched); cap(dinfo) < l {
  1331  			dinfo = make([]dentry, l)
  1332  		} else {
  1333  			dinfo = dinfo[:l]
  1334  			clear(dinfo)
  1335  		}
  1336  
  1337  		// Load static desired register info at the end of the block.
  1338  		desired.copy(&s.desired[b.ID])
  1339  
  1340  		// Check actual assigned registers at the start of the next block(s).
  1341  		// Dynamically assigned registers will trump the static
  1342  		// desired registers computed during liveness analysis.
  1343  		// Note that we do this phase after startRegs is set above, so that
  1344  		// we get the right behavior for a block which branches to itself.
  1345  		for _, e := range b.Succs {
  1346  			succ := e.b
  1347  			// TODO: prioritize likely successor?
  1348  			for _, x := range s.startRegs[succ.ID] {
  1349  				desired.add(x.v.ID, x.r)
  1350  			}
  1351  			// Process phi ops in succ.
  1352  			pidx := e.i
  1353  			for _, v := range succ.Values {
  1354  				if v.Op != OpPhi {
  1355  					break
  1356  				}
  1357  				if !s.values[v.ID].needReg {
  1358  					continue
  1359  				}
  1360  				rp, ok := s.f.getHome(v.ID).(*Register)
  1361  				if !ok {
  1362  					// If v is not assigned a register, pick a register assigned to one of v's inputs.
  1363  					// Hopefully v will get assigned that register later.
  1364  					// If the inputs have allocated register information, add it to desired,
  1365  					// which may reduce spill or copy operations when the register is available.
  1366  					for _, a := range v.Args {
  1367  						rp, ok = s.f.getHome(a.ID).(*Register)
  1368  						if ok {
  1369  							break
  1370  						}
  1371  					}
  1372  					if !ok {
  1373  						continue
  1374  					}
  1375  				}
  1376  				desired.add(v.Args[pidx].ID, register(rp.num))
  1377  			}
  1378  		}
  1379  		// Walk values backwards computing desired register info.
  1380  		// See computeLive for more comments.
  1381  		for i := len(oldSched) - 1; i >= 0; i-- {
  1382  			v := oldSched[i]
  1383  			prefs := desired.remove(v.ID)
  1384  			regspec := s.regspec(v)
  1385  			desired.clobber(regspec.clobbers)
  1386  			for _, j := range regspec.inputs {
  1387  				if countRegs(j.regs) != 1 {
  1388  					continue
  1389  				}
  1390  				desired.clobber(j.regs)
  1391  				desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  1392  			}
  1393  			if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  1394  				if opcodeTable[v.Op].commutative {
  1395  					desired.addList(v.Args[1].ID, prefs)
  1396  				}
  1397  				desired.addList(v.Args[0].ID, prefs)
  1398  			}
  1399  			// Save desired registers for this value.
  1400  			dinfo[i].out = prefs
  1401  			for j, a := range v.Args {
  1402  				if j >= len(dinfo[i].in) {
  1403  					break
  1404  				}
  1405  				dinfo[i].in[j] = desired.get(a.ID)
  1406  			}
  1407  			if v.Op == OpSelect1 && prefs[0] != noRegister {
  1408  				// Save desired registers of select1 for
  1409  				// use by the tuple generating instruction.
  1410  				desiredSecondReg[v.Args[0].ID] = prefs
  1411  			}
  1412  		}
  1413  
  1414  		// Process all the non-phi values.
  1415  		for idx, v := range oldSched {
  1416  			s.curIdx = nphi + idx
  1417  			tmpReg := noRegister
  1418  			if s.f.pass.debug > regDebug {
  1419  				fmt.Printf("  processing %s\n", v.LongString())
  1420  			}
  1421  			regspec := s.regspec(v)
  1422  			if v.Op == OpPhi {
  1423  				f.Fatalf("phi %s not at start of block", v)
  1424  			}
  1425  			if opcodeTable[v.Op].fixedReg {
  1426  				switch v.Op {
  1427  				case OpSP:
  1428  					s.assignReg(s.SPReg, v, v)
  1429  					s.sp = v.ID
  1430  				case OpSB:
  1431  					s.assignReg(s.SBReg, v, v)
  1432  					s.sb = v.ID
  1433  				case OpARM64ZERO, OpLOONG64ZERO, OpMIPS64ZERO:
  1434  					s.assignReg(s.ZeroIntReg, v, v)
  1435  				default:
  1436  					f.Fatalf("unknown fixed-register op %s", v)
  1437  				}
  1438  				b.Values = append(b.Values, v)
  1439  				s.advanceUses(v)
  1440  				continue
  1441  			}
  1442  			if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
  1443  				if s.values[v.ID].needReg {
  1444  					if v.Op == OpSelectN {
  1445  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
  1446  					} else {
  1447  						var i = 0
  1448  						if v.Op == OpSelect1 {
  1449  							i = 1
  1450  						}
  1451  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
  1452  					}
  1453  				}
  1454  				b.Values = append(b.Values, v)
  1455  				s.advanceUses(v)
  1456  				continue
  1457  			}
  1458  			if v.Op == OpGetG && s.f.Config.hasGReg {
  1459  				// use hardware g register
  1460  				if s.regs[s.GReg].v != nil {
  1461  					s.freeReg(s.GReg) // kick out the old value
  1462  				}
  1463  				s.assignReg(s.GReg, v, v)
  1464  				b.Values = append(b.Values, v)
  1465  				s.advanceUses(v)
  1466  				continue
  1467  			}
  1468  			if v.Op == OpArg {
  1469  				// Args are "pre-spilled" values. We don't allocate
  1470  				// any register here. We just set up the spill pointer to
  1471  				// point at itself and any later user will restore it to use it.
  1472  				s.values[v.ID].spill = v
  1473  				b.Values = append(b.Values, v)
  1474  				s.advanceUses(v)
  1475  				continue
  1476  			}
  1477  			if v.Op == OpKeepAlive {
  1478  				// Make sure the argument to v is still live here.
  1479  				s.advanceUses(v)
  1480  				a := v.Args[0]
  1481  				vi := &s.values[a.ID]
  1482  				if vi.regs == 0 && !vi.rematerializeable {
  1483  					// Use the spill location.
  1484  					// This forces later liveness analysis to make the
  1485  					// value live at this point.
  1486  					v.SetArg(0, s.makeSpill(a, b))
  1487  				} else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
  1488  					// Rematerializeable value with a *ir.Name. This is the address of
  1489  					// a stack object (e.g. an LEAQ). Keep the object live.
  1490  					// Change it to VarLive, which is what plive expects for locals.
  1491  					v.Op = OpVarLive
  1492  					v.SetArgs1(v.Args[1])
  1493  					v.Aux = a.Aux
  1494  				} else {
  1495  					// In-register and rematerializeable values are already live.
  1496  					// These are typically rematerializeable constants like nil,
  1497  					// or values of a variable that were modified since the last call.
  1498  					v.Op = OpCopy
  1499  					v.SetArgs1(v.Args[1])
  1500  				}
  1501  				b.Values = append(b.Values, v)
  1502  				continue
  1503  			}
  1504  			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
  1505  				// No register allocation required (or none specified yet)
  1506  				if s.doClobber && v.Op.IsCall() {
  1507  					s.clobberRegs(regspec.clobbers)
  1508  				}
  1509  				s.freeRegs(regspec.clobbers)
  1510  				b.Values = append(b.Values, v)
  1511  				s.advanceUses(v)
  1512  				continue
  1513  			}
  1514  
  1515  			if s.values[v.ID].rematerializeable {
  1516  				// Value is rematerializeable, don't issue it here.
  1517  				// It will get issued just before each use (see
  1518  				// allocValueToReg).
  1519  				for _, a := range v.Args {
  1520  					a.Uses--
  1521  				}
  1522  				s.advanceUses(v)
  1523  				continue
  1524  			}
  1525  
  1526  			if s.f.pass.debug > regDebug {
  1527  				fmt.Printf("value %s\n", v.LongString())
  1528  				fmt.Printf("  out:")
  1529  				for _, r := range dinfo[idx].out {
  1530  					if r != noRegister {
  1531  						fmt.Printf(" %s", &s.registers[r])
  1532  					}
  1533  				}
  1534  				fmt.Println()
  1535  				for i := 0; i < len(v.Args) && i < 3; i++ {
  1536  					fmt.Printf("  in%d:", i)
  1537  					for _, r := range dinfo[idx].in[i] {
  1538  						if r != noRegister {
  1539  							fmt.Printf(" %s", &s.registers[r])
  1540  						}
  1541  					}
  1542  					fmt.Println()
  1543  				}
  1544  			}
  1545  
  1546  			// Move arguments to registers.
  1547  			// First, if an arg must be in a specific register and it is already
  1548  			// in place, keep it.
  1549  			args = append(args[:0], make([]*Value, len(v.Args))...)
  1550  			for i, a := range v.Args {
  1551  				if !s.values[a.ID].needReg {
  1552  					args[i] = a
  1553  				}
  1554  			}
  1555  			for _, i := range regspec.inputs {
  1556  				mask := i.regs
  1557  				if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 {
  1558  					args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1559  				}
  1560  			}
  1561  			// Then, if an arg must be in a specific register and that
  1562  			// register is free, allocate that one. Otherwise when processing
  1563  			// another input we may kick a value into the free register, which
  1564  			// then will be kicked out again.
  1565  			// This is a common case for passing-in-register arguments for
  1566  			// function calls.
  1567  			for {
  1568  				freed := false
  1569  				for _, i := range regspec.inputs {
  1570  					if args[i.idx] != nil {
  1571  						continue // already allocated
  1572  					}
  1573  					mask := i.regs
  1574  					if countRegs(mask) == 1 && mask&^s.used != 0 {
  1575  						args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1576  						// If the input is in other registers that will be clobbered by v,
  1577  						// or the input is dead, free the registers. This may make room
  1578  						// for other inputs.
  1579  						oldregs := s.values[v.Args[i.idx].ID].regs
  1580  						if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
  1581  							s.freeRegs(oldregs &^ mask &^ s.nospill)
  1582  							freed = true
  1583  						}
  1584  					}
  1585  				}
  1586  				if !freed {
  1587  					break
  1588  				}
  1589  			}
  1590  			// Last, allocate remaining ones, in an ordering defined
  1591  			// by the register specification (most constrained first).
  1592  			for _, i := range regspec.inputs {
  1593  				if args[i.idx] != nil {
  1594  					continue // already allocated
  1595  				}
  1596  				mask := i.regs
  1597  				if mask&s.values[v.Args[i.idx].ID].regs == 0 {
  1598  					// Need a new register for the input.
  1599  					mask &= s.allocatable
  1600  					mask &^= s.nospill
  1601  					// Used desired register if available.
  1602  					if i.idx < 3 {
  1603  						for _, r := range dinfo[idx].in[i.idx] {
  1604  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1605  								// Desired register is allowed and unused.
  1606  								mask = regMask(1) << r
  1607  								break
  1608  							}
  1609  						}
  1610  					}
  1611  					// Avoid registers we're saving for other values.
  1612  					if mask&^desired.avoid != 0 {
  1613  						mask &^= desired.avoid
  1614  					}
  1615  				}
  1616  				if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
  1617  					// Prefer SP register. This ensures that local variables
  1618  					// use SP as their base register (instead of a copy of the
  1619  					// stack pointer living in another register). See issue 74836.
  1620  					mask = 1 << s.SPReg
  1621  				}
  1622  				args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1623  			}
  1624  
  1625  			// If the output clobbers the input register, make sure we have
  1626  			// at least two copies of the input register so we don't
  1627  			// have to reload the value from the spill location.
  1628  			if opcodeTable[v.Op].resultInArg0 {
  1629  				var m regMask
  1630  				if !s.liveAfterCurrentInstruction(v.Args[0]) {
  1631  					// arg0 is dead.  We can clobber its register.
  1632  					goto ok
  1633  				}
  1634  				if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
  1635  					args[0], args[1] = args[1], args[0]
  1636  					goto ok
  1637  				}
  1638  				if s.values[v.Args[0].ID].rematerializeable {
  1639  					// We can rematerialize the input, don't worry about clobbering it.
  1640  					goto ok
  1641  				}
  1642  				if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
  1643  					args[0], args[1] = args[1], args[0]
  1644  					goto ok
  1645  				}
  1646  				if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
  1647  					// we have at least 2 copies of arg0.  We can afford to clobber one.
  1648  					goto ok
  1649  				}
  1650  				if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
  1651  					args[0], args[1] = args[1], args[0]
  1652  					goto ok
  1653  				}
  1654  
  1655  				// We can't overwrite arg0 (or arg1, if commutative).  So we
  1656  				// need to make a copy of an input so we have a register we can modify.
  1657  
  1658  				// Possible new registers to copy into.
  1659  				m = s.compatRegs(v.Args[0].Type) &^ s.used
  1660  				if m == 0 {
  1661  					// No free registers.  In this case we'll just clobber
  1662  					// an input and future uses of that input must use a restore.
  1663  					// TODO(khr): We should really do this like allocReg does it,
  1664  					// spilling the value with the most distant next use.
  1665  					goto ok
  1666  				}
  1667  
  1668  				// Try to move an input to the desired output, if allowed.
  1669  				for _, r := range dinfo[idx].out {
  1670  					if r != noRegister && (m&regspec.outputs[0].regs)>>r&1 != 0 {
  1671  						m = regMask(1) << r
  1672  						args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
  1673  						// Note: we update args[0] so the instruction will
  1674  						// use the register copy we just made.
  1675  						goto ok
  1676  					}
  1677  				}
  1678  				// Try to copy input to its desired location & use its old
  1679  				// location as the result register.
  1680  				for _, r := range dinfo[idx].in[0] {
  1681  					if r != noRegister && m>>r&1 != 0 {
  1682  						m = regMask(1) << r
  1683  						c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1684  						s.copies[c] = false
  1685  						// Note: no update to args[0] so the instruction will
  1686  						// use the original copy.
  1687  						goto ok
  1688  					}
  1689  				}
  1690  				if opcodeTable[v.Op].commutative {
  1691  					for _, r := range dinfo[idx].in[1] {
  1692  						if r != noRegister && m>>r&1 != 0 {
  1693  							m = regMask(1) << r
  1694  							c := s.allocValToReg(v.Args[1], m, true, v.Pos)
  1695  							s.copies[c] = false
  1696  							args[0], args[1] = args[1], args[0]
  1697  							goto ok
  1698  						}
  1699  					}
  1700  				}
  1701  
  1702  				// Avoid future fixed uses if we can.
  1703  				if m&^desired.avoid != 0 {
  1704  					m &^= desired.avoid
  1705  				}
  1706  				// Save input 0 to a new register so we can clobber it.
  1707  				c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1708  				s.copies[c] = false
  1709  
  1710  				// Normally we use the register of the old copy of input 0 as the target.
  1711  				// However, if input 0 is already in its desired register then we use
  1712  				// the register of the new copy instead.
  1713  				if regspec.outputs[0].regs>>s.f.getHome(c.ID).(*Register).num&1 != 0 {
  1714  					if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
  1715  						r := register(rp.num)
  1716  						for _, r2 := range dinfo[idx].in[0] {
  1717  							if r == r2 {
  1718  								args[0] = c
  1719  								break
  1720  							}
  1721  						}
  1722  					}
  1723  				}
  1724  			}
  1725  		ok:
  1726  			for i := 0; i < 2; i++ {
  1727  				if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
  1728  					continue
  1729  				}
  1730  				if !s.liveAfterCurrentInstruction(v.Args[i]) {
  1731  					// arg is dead.  We can clobber its register.
  1732  					continue
  1733  				}
  1734  				if s.values[v.Args[i].ID].rematerializeable {
  1735  					// We can rematerialize the input, don't worry about clobbering it.
  1736  					continue
  1737  				}
  1738  				if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
  1739  					// We have at least 2 copies of arg.  We can afford to clobber one.
  1740  					continue
  1741  				}
  1742  				// Possible new registers to copy into.
  1743  				m := s.compatRegs(v.Args[i].Type) &^ s.used
  1744  				if m == 0 {
  1745  					// No free registers.  In this case we'll just clobber the
  1746  					// input and future uses of that input must use a restore.
  1747  					// TODO(khr): We should really do this like allocReg does it,
  1748  					// spilling the value with the most distant next use.
  1749  					continue
  1750  				}
  1751  				// Copy input to a different register that won't be clobbered.
  1752  				c := s.allocValToReg(v.Args[i], m, true, v.Pos)
  1753  				s.copies[c] = false
  1754  			}
  1755  
  1756  			// Pick a temporary register if needed.
  1757  			// It should be distinct from all the input registers, so we
  1758  			// allocate it after all the input registers, but before
  1759  			// the input registers are freed via advanceUses below.
  1760  			// (Not all instructions need that distinct part, but it is conservative.)
  1761  			// We also ensure it is not any of the single-choice output registers.
  1762  			if opcodeTable[v.Op].needIntTemp {
  1763  				m := s.allocatable & s.f.Config.gpRegMask
  1764  				for _, out := range regspec.outputs {
  1765  					if countRegs(out.regs) == 1 {
  1766  						m &^= out.regs
  1767  					}
  1768  				}
  1769  				if m&^desired.avoid&^s.nospill != 0 {
  1770  					m &^= desired.avoid
  1771  				}
  1772  				tmpReg = s.allocReg(m, &tmpVal)
  1773  				s.nospill |= regMask(1) << tmpReg
  1774  				s.tmpused |= regMask(1) << tmpReg
  1775  			}
  1776  
  1777  			if regspec.clobbersArg0 {
  1778  				s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
  1779  			}
  1780  			if regspec.clobbersArg1 {
  1781  				s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
  1782  			}
  1783  
  1784  			// Now that all args are in regs, we're ready to issue the value itself.
  1785  			// Before we pick a register for the output value, allow input registers
  1786  			// to be deallocated. We do this here so that the output can use the
  1787  			// same register as a dying input.
  1788  			if !opcodeTable[v.Op].resultNotInArgs {
  1789  				s.tmpused = s.nospill
  1790  				s.nospill = 0
  1791  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1792  			}
  1793  
  1794  			// Dump any registers which will be clobbered
  1795  			if s.doClobber && v.Op.IsCall() {
  1796  				// clobber registers that are marked as clobber in regmask, but
  1797  				// don't clobber inputs.
  1798  				s.clobberRegs(regspec.clobbers &^ s.tmpused &^ s.nospill)
  1799  			}
  1800  			s.freeRegs(regspec.clobbers)
  1801  			s.tmpused |= regspec.clobbers
  1802  
  1803  			// Pick registers for outputs.
  1804  			{
  1805  				outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
  1806  				maxOutIdx := -1
  1807  				var used regMask
  1808  				if tmpReg != noRegister {
  1809  					// Ensure output registers are distinct from the temporary register.
  1810  					// (Not all instructions need that distinct part, but it is conservative.)
  1811  					used |= regMask(1) << tmpReg
  1812  				}
  1813  				for _, out := range regspec.outputs {
  1814  					if out.regs == 0 {
  1815  						continue
  1816  					}
  1817  					mask := out.regs & s.allocatable &^ used
  1818  					if mask == 0 {
  1819  						s.f.Fatalf("can't find any output register %s", v.LongString())
  1820  					}
  1821  					if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
  1822  						if !opcodeTable[v.Op].commutative {
  1823  							// Output must use the same register as input 0.
  1824  							r := register(s.f.getHome(args[0].ID).(*Register).num)
  1825  							if mask>>r&1 == 0 {
  1826  								s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
  1827  							}
  1828  							mask = regMask(1) << r
  1829  						} else {
  1830  							// Output must use the same register as input 0 or 1.
  1831  							r0 := register(s.f.getHome(args[0].ID).(*Register).num)
  1832  							r1 := register(s.f.getHome(args[1].ID).(*Register).num)
  1833  							// Check r0 and r1 for desired output register.
  1834  							found := false
  1835  							for _, r := range dinfo[idx].out {
  1836  								if (r == r0 || r == r1) && (mask&^s.used)>>r&1 != 0 {
  1837  									mask = regMask(1) << r
  1838  									found = true
  1839  									if r == r1 {
  1840  										args[0], args[1] = args[1], args[0]
  1841  									}
  1842  									break
  1843  								}
  1844  							}
  1845  							if !found {
  1846  								// Neither are desired, pick r0.
  1847  								mask = regMask(1) << r0
  1848  							}
  1849  						}
  1850  					}
  1851  					if out.idx == 0 { // desired registers only apply to the first element of a tuple result
  1852  						for _, r := range dinfo[idx].out {
  1853  							if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1854  								// Desired register is allowed and unused.
  1855  								mask = regMask(1) << r
  1856  								break
  1857  							}
  1858  						}
  1859  					}
  1860  					if out.idx == 1 {
  1861  						if prefs, ok := desiredSecondReg[v.ID]; ok {
  1862  							for _, r := range prefs {
  1863  								if r != noRegister && (mask&^s.used)>>r&1 != 0 {
  1864  									// Desired register is allowed and unused.
  1865  									mask = regMask(1) << r
  1866  									break
  1867  								}
  1868  							}
  1869  						}
  1870  					}
  1871  					// Avoid registers we're saving for other values.
  1872  					if mask&^desired.avoid&^s.nospill&^s.used != 0 {
  1873  						mask &^= desired.avoid
  1874  					}
  1875  					r := s.allocReg(mask, v)
  1876  					if out.idx > maxOutIdx {
  1877  						maxOutIdx = out.idx
  1878  					}
  1879  					outRegs[out.idx] = r
  1880  					used |= regMask(1) << r
  1881  					s.tmpused |= regMask(1) << r
  1882  				}
  1883  				// Record register choices
  1884  				if v.Type.IsTuple() {
  1885  					var outLocs LocPair
  1886  					if r := outRegs[0]; r != noRegister {
  1887  						outLocs[0] = &s.registers[r]
  1888  					}
  1889  					if r := outRegs[1]; r != noRegister {
  1890  						outLocs[1] = &s.registers[r]
  1891  					}
  1892  					s.f.setHome(v, outLocs)
  1893  					// Note that subsequent SelectX instructions will do the assignReg calls.
  1894  				} else if v.Type.IsResults() {
  1895  					// preallocate outLocs to the right size, which is maxOutIdx+1
  1896  					outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
  1897  					for i := 0; i <= maxOutIdx; i++ {
  1898  						if r := outRegs[i]; r != noRegister {
  1899  							outLocs[i] = &s.registers[r]
  1900  						}
  1901  					}
  1902  					s.f.setHome(v, outLocs)
  1903  				} else {
  1904  					if r := outRegs[0]; r != noRegister {
  1905  						s.assignReg(r, v, v)
  1906  					}
  1907  				}
  1908  				if tmpReg != noRegister {
  1909  					// Remember the temp register allocation, if any.
  1910  					if s.f.tempRegs == nil {
  1911  						s.f.tempRegs = map[ID]*Register{}
  1912  					}
  1913  					s.f.tempRegs[v.ID] = &s.registers[tmpReg]
  1914  				}
  1915  			}
  1916  
  1917  			// deallocate dead args, if we have not done so
  1918  			if opcodeTable[v.Op].resultNotInArgs {
  1919  				s.nospill = 0
  1920  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1921  			}
  1922  			s.tmpused = 0
  1923  
  1924  			// Issue the Value itself.
  1925  			for i, a := range args {
  1926  				v.SetArg(i, a) // use register version of arguments
  1927  			}
  1928  			b.Values = append(b.Values, v)
  1929  			s.dropIfUnused(v)
  1930  		}
  1931  
  1932  		// Copy the control values - we need this so we can reduce the
  1933  		// uses property of these values later.
  1934  		controls := append(make([]*Value, 0, 2), b.ControlValues()...)
  1935  
  1936  		// Load control values into registers.
  1937  		for i, v := range b.ControlValues() {
  1938  			if !s.values[v.ID].needReg {
  1939  				continue
  1940  			}
  1941  			if s.f.pass.debug > regDebug {
  1942  				fmt.Printf("  processing control %s\n", v.LongString())
  1943  			}
  1944  			// We assume that a control input can be passed in any
  1945  			// type-compatible register. If this turns out not to be true,
  1946  			// we'll need to introduce a regspec for a block's control value.
  1947  			b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
  1948  		}
  1949  
  1950  		// Reduce the uses of the control values once registers have been loaded.
  1951  		// This loop is equivalent to the advanceUses method.
  1952  		for _, v := range controls {
  1953  			vi := &s.values[v.ID]
  1954  			if !vi.needReg {
  1955  				continue
  1956  			}
  1957  			// Remove this use from the uses list.
  1958  			u := vi.uses
  1959  			vi.uses = u.next
  1960  			if u.next == nil {
  1961  				s.freeRegs(vi.regs) // value is dead
  1962  			}
  1963  			u.next = s.freeUseRecords
  1964  			s.freeUseRecords = u
  1965  		}
  1966  
  1967  		// If we are approaching a merge point and we are the primary
  1968  		// predecessor of it, find live values that we use soon after
  1969  		// the merge point and promote them to registers now.
  1970  		if len(b.Succs) == 1 {
  1971  			if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
  1972  				s.freeReg(s.GReg) // Spill value in G register before any merge.
  1973  			}
  1974  			if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
  1975  				// No point if we've already regalloc'd the destination.
  1976  				goto badloop
  1977  			}
  1978  			// For this to be worthwhile, the loop must have no calls in it.
  1979  			top := b.Succs[0].b
  1980  			loop := s.loopnest.b2l[top.ID]
  1981  			if loop == nil || loop.header != top || loop.containsUnavoidableCall {
  1982  				goto badloop
  1983  			}
  1984  
  1985  			// TODO: sort by distance, pick the closest ones?
  1986  			for _, live := range s.live[b.ID] {
  1987  				if live.dist >= unlikelyDistance {
  1988  					// Don't preload anything live after the loop.
  1989  					continue
  1990  				}
  1991  				vid := live.ID
  1992  				vi := &s.values[vid]
  1993  				if vi.regs != 0 {
  1994  					continue
  1995  				}
  1996  				if vi.rematerializeable {
  1997  					continue
  1998  				}
  1999  				v := s.orig[vid]
  2000  				m := s.compatRegs(v.Type) &^ s.used
  2001  				// Used desired register if available.
  2002  			outerloop:
  2003  				for _, e := range desired.entries {
  2004  					if e.ID != v.ID {
  2005  						continue
  2006  					}
  2007  					for _, r := range e.regs {
  2008  						if r != noRegister && m>>r&1 != 0 {
  2009  							m = regMask(1) << r
  2010  							break outerloop
  2011  						}
  2012  					}
  2013  				}
  2014  				if m&^desired.avoid != 0 {
  2015  					m &^= desired.avoid
  2016  				}
  2017  				if m != 0 {
  2018  					s.allocValToReg(v, m, false, b.Pos)
  2019  				}
  2020  			}
  2021  		}
  2022  	badloop:
  2023  		;
  2024  
  2025  		// Save end-of-block register state.
  2026  		// First count how many, this cuts allocations in half.
  2027  		k := 0
  2028  		for r := register(0); r < s.numRegs; r++ {
  2029  			v := s.regs[r].v
  2030  			if v == nil {
  2031  				continue
  2032  			}
  2033  			k++
  2034  		}
  2035  		regList := make([]endReg, 0, k)
  2036  		for r := register(0); r < s.numRegs; r++ {
  2037  			v := s.regs[r].v
  2038  			if v == nil {
  2039  				continue
  2040  			}
  2041  			regList = append(regList, endReg{r, v, s.regs[r].c})
  2042  		}
  2043  		s.endRegs[b.ID] = regList
  2044  
  2045  		if checkEnabled {
  2046  			regValLiveSet.clear()
  2047  			for _, x := range s.live[b.ID] {
  2048  				regValLiveSet.add(x.ID)
  2049  			}
  2050  			for r := register(0); r < s.numRegs; r++ {
  2051  				v := s.regs[r].v
  2052  				if v == nil {
  2053  					continue
  2054  				}
  2055  				if !regValLiveSet.contains(v.ID) {
  2056  					s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
  2057  				}
  2058  			}
  2059  		}
  2060  
  2061  		// If a value is live at the end of the block and
  2062  		// isn't in a register, generate a use for the spill location.
  2063  		// We need to remember this information so that
  2064  		// the liveness analysis in stackalloc is correct.
  2065  		for _, e := range s.live[b.ID] {
  2066  			vi := &s.values[e.ID]
  2067  			if vi.regs != 0 {
  2068  				// in a register, we'll use that source for the merge.
  2069  				continue
  2070  			}
  2071  			if vi.rematerializeable {
  2072  				// we'll rematerialize during the merge.
  2073  				continue
  2074  			}
  2075  			if s.f.pass.debug > regDebug {
  2076  				fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
  2077  			}
  2078  			spill := s.makeSpill(s.orig[e.ID], b)
  2079  			s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
  2080  		}
  2081  
  2082  		// Clear any final uses.
  2083  		// All that is left should be the pseudo-uses added for values which
  2084  		// are live at the end of b.
  2085  		for _, e := range s.live[b.ID] {
  2086  			u := s.values[e.ID].uses
  2087  			if u == nil {
  2088  				f.Fatalf("live at end, no uses v%d", e.ID)
  2089  			}
  2090  			if u.next != nil {
  2091  				f.Fatalf("live at end, too many uses v%d", e.ID)
  2092  			}
  2093  			s.values[e.ID].uses = nil
  2094  			u.next = s.freeUseRecords
  2095  			s.freeUseRecords = u
  2096  		}
  2097  
  2098  		// allocReg may have dropped registers from startRegsMask that
  2099  		// aren't actually needed in startRegs. Synchronize back to
  2100  		// startRegs.
  2101  		//
  2102  		// This must be done before placing spills, which will look at
  2103  		// startRegs to decide if a block is a valid block for a spill.
  2104  		if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
  2105  			regs := make([]startReg, 0, c)
  2106  			for _, sr := range s.startRegs[b.ID] {
  2107  				if s.startRegsMask&(regMask(1)<<sr.r) == 0 {
  2108  					continue
  2109  				}
  2110  				regs = append(regs, sr)
  2111  			}
  2112  			s.startRegs[b.ID] = regs
  2113  		}
  2114  	}
  2115  
  2116  	// Decide where the spills we generated will go.
  2117  	s.placeSpills()
  2118  
  2119  	// Anything that didn't get a register gets a stack location here.
  2120  	// (StoreReg, stack-based phis, inputs, ...)
  2121  	stacklive := stackalloc(s.f, s.spillLive)
  2122  
  2123  	// Fix up all merge edges.
  2124  	s.shuffle(stacklive)
  2125  
  2126  	// Erase any copies we never used.
  2127  	// Also, an unused copy might be the only use of another copy,
  2128  	// so continue erasing until we reach a fixed point.
  2129  	for {
  2130  		progress := false
  2131  		for c, used := range s.copies {
  2132  			if !used && c.Uses == 0 {
  2133  				if s.f.pass.debug > regDebug {
  2134  					fmt.Printf("delete copied value %s\n", c.LongString())
  2135  				}
  2136  				c.resetArgs()
  2137  				f.freeValue(c)
  2138  				delete(s.copies, c)
  2139  				progress = true
  2140  			}
  2141  		}
  2142  		if !progress {
  2143  			break
  2144  		}
  2145  	}
  2146  
  2147  	for _, b := range s.visitOrder {
  2148  		i := 0
  2149  		for _, v := range b.Values {
  2150  			if v.Op == OpInvalid {
  2151  				continue
  2152  			}
  2153  			b.Values[i] = v
  2154  			i++
  2155  		}
  2156  		b.Values = b.Values[:i]
  2157  	}
  2158  }
  2159  
  2160  func (s *regAllocState) placeSpills() {
  2161  	mustBeFirst := func(op Op) bool {
  2162  		return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
  2163  	}
  2164  
  2165  	// Start maps block IDs to the list of spills
  2166  	// that go at the start of the block (but after any phis).
  2167  	start := map[ID][]*Value{}
  2168  	// After maps value IDs to the list of spills
  2169  	// that go immediately after that value ID.
  2170  	after := map[ID][]*Value{}
  2171  
  2172  	for i := range s.values {
  2173  		vi := s.values[i]
  2174  		spill := vi.spill
  2175  		if spill == nil {
  2176  			continue
  2177  		}
  2178  		if spill.Block != nil {
  2179  			// Some spills are already fully set up,
  2180  			// like OpArgs and stack-based phis.
  2181  			continue
  2182  		}
  2183  		v := s.orig[i]
  2184  
  2185  		// Walk down the dominator tree looking for a good place to
  2186  		// put the spill of v.  At the start "best" is the best place
  2187  		// we have found so far.
  2188  		// TODO: find a way to make this O(1) without arbitrary cutoffs.
  2189  		if v == nil {
  2190  			panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
  2191  		}
  2192  		best := v.Block
  2193  		bestArg := v
  2194  		var bestDepth int16
  2195  		if l := s.loopnest.b2l[best.ID]; l != nil {
  2196  			bestDepth = l.depth
  2197  		}
  2198  		b := best
  2199  		const maxSpillSearch = 100
  2200  		for i := 0; i < maxSpillSearch; i++ {
  2201  			// Find the child of b in the dominator tree which
  2202  			// dominates all restores.
  2203  			p := b
  2204  			b = nil
  2205  			for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
  2206  				if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
  2207  					// c also dominates all restores.  Walk down into c.
  2208  					b = c
  2209  					break
  2210  				}
  2211  			}
  2212  			if b == nil {
  2213  				// Ran out of blocks which dominate all restores.
  2214  				break
  2215  			}
  2216  
  2217  			var depth int16
  2218  			if l := s.loopnest.b2l[b.ID]; l != nil {
  2219  				depth = l.depth
  2220  			}
  2221  			if depth > bestDepth {
  2222  				// Don't push the spill into a deeper loop.
  2223  				continue
  2224  			}
  2225  
  2226  			// If v is in a register at the start of b, we can
  2227  			// place the spill here (after the phis).
  2228  			if len(b.Preds) == 1 {
  2229  				for _, e := range s.endRegs[b.Preds[0].b.ID] {
  2230  					if e.v == v {
  2231  						// Found a better spot for the spill.
  2232  						best = b
  2233  						bestArg = e.c
  2234  						bestDepth = depth
  2235  						break
  2236  					}
  2237  				}
  2238  			} else {
  2239  				for _, e := range s.startRegs[b.ID] {
  2240  					if e.v == v {
  2241  						// Found a better spot for the spill.
  2242  						best = b
  2243  						bestArg = e.c
  2244  						bestDepth = depth
  2245  						break
  2246  					}
  2247  				}
  2248  			}
  2249  		}
  2250  
  2251  		// Put the spill in the best block we found.
  2252  		spill.Block = best
  2253  		spill.AddArg(bestArg)
  2254  		if best == v.Block && !mustBeFirst(v.Op) {
  2255  			// Place immediately after v.
  2256  			after[v.ID] = append(after[v.ID], spill)
  2257  		} else {
  2258  			// Place at the start of best block.
  2259  			start[best.ID] = append(start[best.ID], spill)
  2260  		}
  2261  	}
  2262  
  2263  	// Insert spill instructions into the block schedules.
  2264  	var oldSched []*Value
  2265  	for _, b := range s.visitOrder {
  2266  		nfirst := 0
  2267  		for _, v := range b.Values {
  2268  			if !mustBeFirst(v.Op) {
  2269  				break
  2270  			}
  2271  			nfirst++
  2272  		}
  2273  		oldSched = append(oldSched[:0], b.Values[nfirst:]...)
  2274  		b.Values = b.Values[:nfirst]
  2275  		b.Values = append(b.Values, start[b.ID]...)
  2276  		for _, v := range oldSched {
  2277  			b.Values = append(b.Values, v)
  2278  			b.Values = append(b.Values, after[v.ID]...)
  2279  		}
  2280  	}
  2281  }
  2282  
  2283  // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
  2284  func (s *regAllocState) shuffle(stacklive [][]ID) {
  2285  	var e edgeState
  2286  	e.s = s
  2287  	e.cache = map[ID][]*Value{}
  2288  	e.contents = map[Location]contentRecord{}
  2289  	if s.f.pass.debug > regDebug {
  2290  		fmt.Printf("shuffle %s\n", s.f.Name)
  2291  		fmt.Println(s.f.String())
  2292  	}
  2293  
  2294  	for _, b := range s.visitOrder {
  2295  		if len(b.Preds) <= 1 {
  2296  			continue
  2297  		}
  2298  		e.b = b
  2299  		for i, edge := range b.Preds {
  2300  			p := edge.b
  2301  			e.p = p
  2302  			e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
  2303  			e.process()
  2304  		}
  2305  	}
  2306  
  2307  	if s.f.pass.debug > regDebug {
  2308  		fmt.Printf("post shuffle %s\n", s.f.Name)
  2309  		fmt.Println(s.f.String())
  2310  	}
  2311  }
  2312  
  2313  type edgeState struct {
  2314  	s    *regAllocState
  2315  	p, b *Block // edge goes from p->b.
  2316  
  2317  	// for each pre-regalloc value, a list of equivalent cached values
  2318  	cache      map[ID][]*Value
  2319  	cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
  2320  
  2321  	// map from location to the value it contains
  2322  	contents map[Location]contentRecord
  2323  
  2324  	// desired destination locations
  2325  	destinations []dstRecord
  2326  	extra        []dstRecord
  2327  
  2328  	usedRegs              regMask // registers currently holding something
  2329  	uniqueRegs            regMask // registers holding the only copy of a value
  2330  	finalRegs             regMask // registers holding final target
  2331  	rematerializeableRegs regMask // registers that hold rematerializeable values
  2332  }
  2333  
  2334  type contentRecord struct {
  2335  	vid   ID       // pre-regalloc value
  2336  	c     *Value   // cached value
  2337  	final bool     // this is a satisfied destination
  2338  	pos   src.XPos // source position of use of the value
  2339  }
  2340  
  2341  type dstRecord struct {
  2342  	loc    Location // register or stack slot
  2343  	vid    ID       // pre-regalloc value it should contain
  2344  	splice **Value  // place to store reference to the generating instruction
  2345  	pos    src.XPos // source position of use of this location
  2346  }
  2347  
  2348  // setup initializes the edge state for shuffling.
  2349  func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
  2350  	if e.s.f.pass.debug > regDebug {
  2351  		fmt.Printf("edge %s->%s\n", e.p, e.b)
  2352  	}
  2353  
  2354  	// Clear state.
  2355  	clear(e.cache)
  2356  	e.cachedVals = e.cachedVals[:0]
  2357  	clear(e.contents)
  2358  	e.usedRegs = 0
  2359  	e.uniqueRegs = 0
  2360  	e.finalRegs = 0
  2361  	e.rematerializeableRegs = 0
  2362  
  2363  	// Live registers can be sources.
  2364  	for _, x := range srcReg {
  2365  		e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
  2366  	}
  2367  	// So can all of the spill locations.
  2368  	for _, spillID := range stacklive {
  2369  		v := e.s.orig[spillID]
  2370  		spill := e.s.values[v.ID].spill
  2371  		if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
  2372  			// Spills were placed that only dominate the uses found
  2373  			// during the first regalloc pass. The edge fixup code
  2374  			// can't use a spill location if the spill doesn't dominate
  2375  			// the edge.
  2376  			// We are guaranteed that if the spill doesn't dominate this edge,
  2377  			// then the value is available in a register (because we called
  2378  			// makeSpill for every value not in a register at the start
  2379  			// of an edge).
  2380  			continue
  2381  		}
  2382  		e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
  2383  	}
  2384  
  2385  	// Figure out all the destinations we need.
  2386  	dsts := e.destinations[:0]
  2387  	for _, x := range dstReg {
  2388  		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
  2389  	}
  2390  	// Phis need their args to end up in a specific location.
  2391  	for _, v := range e.b.Values {
  2392  		if v.Op != OpPhi {
  2393  			break
  2394  		}
  2395  		loc := e.s.f.getHome(v.ID)
  2396  		if loc == nil {
  2397  			continue
  2398  		}
  2399  		dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
  2400  	}
  2401  	e.destinations = dsts
  2402  
  2403  	if e.s.f.pass.debug > regDebug {
  2404  		for _, vid := range e.cachedVals {
  2405  			a := e.cache[vid]
  2406  			for _, c := range a {
  2407  				fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
  2408  			}
  2409  		}
  2410  		for _, d := range e.destinations {
  2411  			fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
  2412  		}
  2413  	}
  2414  }
  2415  
  2416  // process generates code to move all the values to the right destination locations.
  2417  func (e *edgeState) process() {
  2418  	dsts := e.destinations
  2419  
  2420  	// Process the destinations until they are all satisfied.
  2421  	for len(dsts) > 0 {
  2422  		i := 0
  2423  		for _, d := range dsts {
  2424  			if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
  2425  				// Failed - save for next iteration.
  2426  				dsts[i] = d
  2427  				i++
  2428  			}
  2429  		}
  2430  		if i < len(dsts) {
  2431  			// Made some progress. Go around again.
  2432  			dsts = dsts[:i]
  2433  
  2434  			// Append any extras destinations we generated.
  2435  			dsts = append(dsts, e.extra...)
  2436  			e.extra = e.extra[:0]
  2437  			continue
  2438  		}
  2439  
  2440  		// We made no progress. That means that any
  2441  		// remaining unsatisfied moves are in simple cycles.
  2442  		// For example, A -> B -> C -> D -> A.
  2443  		//   A ----> B
  2444  		//   ^       |
  2445  		//   |       |
  2446  		//   |       v
  2447  		//   D <---- C
  2448  
  2449  		// To break the cycle, we pick an unused register, say R,
  2450  		// and put a copy of B there.
  2451  		//   A ----> B
  2452  		//   ^       |
  2453  		//   |       |
  2454  		//   |       v
  2455  		//   D <---- C <---- R=copyofB
  2456  		// When we resume the outer loop, the A->B move can now proceed,
  2457  		// and eventually the whole cycle completes.
  2458  
  2459  		// Copy any cycle location to a temp register. This duplicates
  2460  		// one of the cycle entries, allowing the just duplicated value
  2461  		// to be overwritten and the cycle to proceed.
  2462  		d := dsts[0]
  2463  		loc := d.loc
  2464  		vid := e.contents[loc].vid
  2465  		c := e.contents[loc].c
  2466  		r := e.findRegFor(c.Type)
  2467  		if e.s.f.pass.debug > regDebug {
  2468  			fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
  2469  		}
  2470  		e.erase(r)
  2471  		pos := d.pos.WithNotStmt()
  2472  		if _, isReg := loc.(*Register); isReg {
  2473  			c = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2474  		} else {
  2475  			c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2476  		}
  2477  		e.set(r, vid, c, false, pos)
  2478  		if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
  2479  			e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
  2480  		}
  2481  	}
  2482  }
  2483  
  2484  // processDest generates code to put value vid into location loc. Returns true
  2485  // if progress was made.
  2486  func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
  2487  	pos = pos.WithNotStmt()
  2488  	occupant := e.contents[loc]
  2489  	if occupant.vid == vid {
  2490  		// Value is already in the correct place.
  2491  		e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
  2492  		if splice != nil {
  2493  			(*splice).Uses--
  2494  			*splice = occupant.c
  2495  			occupant.c.Uses++
  2496  		}
  2497  		// Note: if splice==nil then c will appear dead. This is
  2498  		// non-SSA formed code, so be careful after this pass not to run
  2499  		// deadcode elimination.
  2500  		if _, ok := e.s.copies[occupant.c]; ok {
  2501  			// The copy at occupant.c was used to avoid spill.
  2502  			e.s.copies[occupant.c] = true
  2503  		}
  2504  		return true
  2505  	}
  2506  
  2507  	// Check if we're allowed to clobber the destination location.
  2508  	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
  2509  		// We can't overwrite the last copy
  2510  		// of a value that needs to survive.
  2511  		return false
  2512  	}
  2513  
  2514  	// Copy from a source of v, register preferred.
  2515  	v := e.s.orig[vid]
  2516  	var c *Value
  2517  	var src Location
  2518  	if e.s.f.pass.debug > regDebug {
  2519  		fmt.Printf("moving v%d to %s\n", vid, loc)
  2520  		fmt.Printf("sources of v%d:", vid)
  2521  	}
  2522  	if opcodeTable[v.Op].fixedReg {
  2523  		c = v
  2524  		src = e.s.f.getHome(v.ID)
  2525  	} else {
  2526  		for _, w := range e.cache[vid] {
  2527  			h := e.s.f.getHome(w.ID)
  2528  			if e.s.f.pass.debug > regDebug {
  2529  				fmt.Printf(" %s:%s", h, w)
  2530  			}
  2531  			_, isreg := h.(*Register)
  2532  			if src == nil || isreg {
  2533  				c = w
  2534  				src = h
  2535  			}
  2536  		}
  2537  	}
  2538  	if e.s.f.pass.debug > regDebug {
  2539  		if src != nil {
  2540  			fmt.Printf(" [use %s]\n", src)
  2541  		} else {
  2542  			fmt.Printf(" [no source]\n")
  2543  		}
  2544  	}
  2545  	_, dstReg := loc.(*Register)
  2546  
  2547  	// Pre-clobber destination. This avoids the
  2548  	// following situation:
  2549  	//   - v is currently held in R0 and stacktmp0.
  2550  	//   - We want to copy stacktmp1 to stacktmp0.
  2551  	//   - We choose R0 as the temporary register.
  2552  	// During the copy, both R0 and stacktmp0 are
  2553  	// clobbered, losing both copies of v. Oops!
  2554  	// Erasing the destination early means R0 will not
  2555  	// be chosen as the temp register, as it will then
  2556  	// be the last copy of v.
  2557  	e.erase(loc)
  2558  	var x *Value
  2559  	if c == nil || e.s.values[vid].rematerializeable {
  2560  		if !e.s.values[vid].rematerializeable {
  2561  			e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
  2562  		}
  2563  		if dstReg {
  2564  			// We want to rematerialize v into a register that is incompatible with v's op's register mask.
  2565  			// Instead of setting the wrong register for the rematerialized v, we should find the right register
  2566  			// for it and emit an additional copy to move to the desired register.
  2567  			// For #70451.
  2568  			if e.s.regspec(v).outputs[0].regs&regMask(1<<register(loc.(*Register).num)) == 0 {
  2569  				_, srcReg := src.(*Register)
  2570  				if srcReg {
  2571  					// It exists in a valid register already, so just copy it to the desired register
  2572  					// If src is a Register, c must have already been set.
  2573  					x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2574  				} else {
  2575  					// We need a tmp register
  2576  					x = v.copyInto(e.p)
  2577  					r := e.findRegFor(x.Type)
  2578  					e.erase(r)
  2579  					// Rematerialize to the tmp register
  2580  					e.set(r, vid, x, false, pos)
  2581  					// Copy from tmp to the desired register
  2582  					x = e.p.NewValue1(pos, OpCopy, x.Type, x)
  2583  				}
  2584  			} else {
  2585  				x = v.copyInto(e.p)
  2586  			}
  2587  		} else {
  2588  			// Rematerialize into stack slot. Need a free
  2589  			// register to accomplish this.
  2590  			r := e.findRegFor(v.Type)
  2591  			e.erase(r)
  2592  			x = v.copyIntoWithXPos(e.p, pos)
  2593  			e.set(r, vid, x, false, pos)
  2594  			// Make sure we spill with the size of the slot, not the
  2595  			// size of x (which might be wider due to our dropping
  2596  			// of narrowing conversions).
  2597  			x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
  2598  		}
  2599  	} else {
  2600  		// Emit move from src to dst.
  2601  		_, srcReg := src.(*Register)
  2602  		if srcReg {
  2603  			if dstReg {
  2604  				x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2605  			} else {
  2606  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
  2607  			}
  2608  		} else {
  2609  			if dstReg {
  2610  				x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2611  			} else {
  2612  				// mem->mem. Use temp register.
  2613  				r := e.findRegFor(c.Type)
  2614  				e.erase(r)
  2615  				t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2616  				e.set(r, vid, t, false, pos)
  2617  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
  2618  			}
  2619  		}
  2620  	}
  2621  	e.set(loc, vid, x, true, pos)
  2622  	if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
  2623  		e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
  2624  	}
  2625  	if splice != nil {
  2626  		(*splice).Uses--
  2627  		*splice = x
  2628  		x.Uses++
  2629  	}
  2630  	return true
  2631  }
  2632  
  2633  // set changes the contents of location loc to hold the given value and its cached representative.
  2634  func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
  2635  	e.s.f.setHome(c, loc)
  2636  	e.contents[loc] = contentRecord{vid, c, final, pos}
  2637  	a := e.cache[vid]
  2638  	if len(a) == 0 {
  2639  		e.cachedVals = append(e.cachedVals, vid)
  2640  	}
  2641  	a = append(a, c)
  2642  	e.cache[vid] = a
  2643  	if r, ok := loc.(*Register); ok {
  2644  		if e.usedRegs&(regMask(1)<<uint(r.num)) != 0 {
  2645  			e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
  2646  		}
  2647  		e.usedRegs |= regMask(1) << uint(r.num)
  2648  		if final {
  2649  			e.finalRegs |= regMask(1) << uint(r.num)
  2650  		}
  2651  		if len(a) == 1 {
  2652  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2653  		}
  2654  		if len(a) == 2 {
  2655  			if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2656  				e.uniqueRegs &^= regMask(1) << uint(t.num)
  2657  			}
  2658  		}
  2659  		if e.s.values[vid].rematerializeable {
  2660  			e.rematerializeableRegs |= regMask(1) << uint(r.num)
  2661  		}
  2662  	}
  2663  	if e.s.f.pass.debug > regDebug {
  2664  		fmt.Printf("%s\n", c.LongString())
  2665  		fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
  2666  	}
  2667  }
  2668  
  2669  // erase removes any user of loc.
  2670  func (e *edgeState) erase(loc Location) {
  2671  	cr := e.contents[loc]
  2672  	if cr.c == nil {
  2673  		return
  2674  	}
  2675  	vid := cr.vid
  2676  
  2677  	if cr.final {
  2678  		// Add a destination to move this value back into place.
  2679  		// Make sure it gets added to the tail of the destination queue
  2680  		// so we make progress on other moves first.
  2681  		e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
  2682  	}
  2683  
  2684  	// Remove c from the list of cached values.
  2685  	a := e.cache[vid]
  2686  	for i, c := range a {
  2687  		if e.s.f.getHome(c.ID) == loc {
  2688  			if e.s.f.pass.debug > regDebug {
  2689  				fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
  2690  			}
  2691  			a[i], a = a[len(a)-1], a[:len(a)-1]
  2692  			break
  2693  		}
  2694  	}
  2695  	e.cache[vid] = a
  2696  
  2697  	// Update register masks.
  2698  	if r, ok := loc.(*Register); ok {
  2699  		e.usedRegs &^= regMask(1) << uint(r.num)
  2700  		if cr.final {
  2701  			e.finalRegs &^= regMask(1) << uint(r.num)
  2702  		}
  2703  		e.rematerializeableRegs &^= regMask(1) << uint(r.num)
  2704  	}
  2705  	if len(a) == 1 {
  2706  		if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2707  			e.uniqueRegs |= regMask(1) << uint(r.num)
  2708  		}
  2709  	}
  2710  }
  2711  
  2712  // findRegFor finds a register we can use to make a temp copy of type typ.
  2713  func (e *edgeState) findRegFor(typ *types.Type) Location {
  2714  	// Which registers are possibilities.
  2715  	m := e.s.compatRegs(typ)
  2716  
  2717  	// Pick a register. In priority order:
  2718  	// 1) an unused register
  2719  	// 2) a non-unique register not holding a final value
  2720  	// 3) a non-unique register
  2721  	// 4) a register holding a rematerializeable value
  2722  	x := m &^ e.usedRegs
  2723  	if x != 0 {
  2724  		return &e.s.registers[pickReg(x)]
  2725  	}
  2726  	x = m &^ e.uniqueRegs &^ e.finalRegs
  2727  	if x != 0 {
  2728  		return &e.s.registers[pickReg(x)]
  2729  	}
  2730  	x = m &^ e.uniqueRegs
  2731  	if x != 0 {
  2732  		return &e.s.registers[pickReg(x)]
  2733  	}
  2734  	x = m & e.rematerializeableRegs
  2735  	if x != 0 {
  2736  		return &e.s.registers[pickReg(x)]
  2737  	}
  2738  
  2739  	// No register is available.
  2740  	// Pick a register to spill.
  2741  	for _, vid := range e.cachedVals {
  2742  		a := e.cache[vid]
  2743  		for _, c := range a {
  2744  			if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m>>uint(r.num)&1 != 0 {
  2745  				if !c.rematerializeable() {
  2746  					x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
  2747  					// Allocate a temp location to spill a register to.
  2748  					t := LocalSlot{N: e.s.f.NewLocal(c.Pos, c.Type), Type: c.Type}
  2749  					// TODO: reuse these slots. They'll need to be erased first.
  2750  					e.set(t, vid, x, false, c.Pos)
  2751  					if e.s.f.pass.debug > regDebug {
  2752  						fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
  2753  					}
  2754  				}
  2755  				// r will now be overwritten by the caller. At some point
  2756  				// later, the newly saved value will be moved back to its
  2757  				// final destination in processDest.
  2758  				return r
  2759  			}
  2760  		}
  2761  	}
  2762  
  2763  	fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
  2764  	for _, vid := range e.cachedVals {
  2765  		a := e.cache[vid]
  2766  		for _, c := range a {
  2767  			fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
  2768  		}
  2769  	}
  2770  	e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
  2771  	return nil
  2772  }
  2773  
  2774  // rematerializeable reports whether the register allocator should recompute
  2775  // a value instead of spilling/restoring it.
  2776  func (v *Value) rematerializeable() bool {
  2777  	if !opcodeTable[v.Op].rematerializeable {
  2778  		return false
  2779  	}
  2780  	for _, a := range v.Args {
  2781  		// Fixed-register allocations (SP, SB, etc.) are always available.
  2782  		// Any other argument of an opcode makes it not rematerializeable.
  2783  		if !opcodeTable[a.Op].fixedReg {
  2784  			return false
  2785  		}
  2786  	}
  2787  	return true
  2788  }
  2789  
  2790  type liveInfo struct {
  2791  	ID   ID       // ID of value
  2792  	dist int32    // # of instructions before next use
  2793  	pos  src.XPos // source position of next use
  2794  }
  2795  
  2796  // computeLive computes a map from block ID to a list of value IDs live at the end
  2797  // of that block. Together with the value ID is a count of how many instructions
  2798  // to the next use of that value. The resulting map is stored in s.live.
  2799  // computeLive also computes the desired register information at the end of each block.
  2800  // This desired register information is stored in s.desired.
  2801  // TODO: this could be quadratic if lots of variables are live across lots of
  2802  // basic blocks. Figure out a way to make this function (or, more precisely, the user
  2803  // of this function) require only linear size & time.
  2804  func (s *regAllocState) computeLive() {
  2805  	f := s.f
  2806  	s.live = make([][]liveInfo, f.NumBlocks())
  2807  	s.desired = make([]desiredState, f.NumBlocks())
  2808  	var phis []*Value
  2809  	rematIDs := make([]ID, 0, 64)
  2810  
  2811  	live := f.newSparseMapPos(f.NumValues())
  2812  	defer f.retSparseMapPos(live)
  2813  	t := f.newSparseMapPos(f.NumValues())
  2814  	defer f.retSparseMapPos(t)
  2815  
  2816  	// Keep track of which value we want in each register.
  2817  	var desired desiredState
  2818  
  2819  	// Instead of iterating over f.Blocks, iterate over their postordering.
  2820  	// Liveness information flows backward, so starting at the end
  2821  	// increases the probability that we will stabilize quickly.
  2822  	// TODO: Do a better job yet. Here's one possibility:
  2823  	// Calculate the dominator tree and locate all strongly connected components.
  2824  	// If a value is live in one block of an SCC, it is live in all.
  2825  	// Walk the dominator tree from end to beginning, just once, treating SCC
  2826  	// components as single blocks, duplicated calculated liveness information
  2827  	// out to all of them.
  2828  	po := f.postorder()
  2829  	s.loopnest = f.loopnest()
  2830  	s.loopnest.computeUnavoidableCalls()
  2831  	for {
  2832  		changed := false
  2833  
  2834  		for _, b := range po {
  2835  			// Start with known live values at the end of the block.
  2836  			// Add len(b.Values) to adjust from end-of-block distance
  2837  			// to beginning-of-block distance.
  2838  			live.clear()
  2839  			for _, e := range s.live[b.ID] {
  2840  				live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
  2841  			}
  2842  
  2843  			// Mark control values as live
  2844  			for _, c := range b.ControlValues() {
  2845  				if s.values[c.ID].needReg {
  2846  					live.set(c.ID, int32(len(b.Values)), b.Pos)
  2847  				}
  2848  			}
  2849  
  2850  			// Propagate backwards to the start of the block
  2851  			// Assumes Values have been scheduled.
  2852  			phis = phis[:0]
  2853  			for i := len(b.Values) - 1; i >= 0; i-- {
  2854  				v := b.Values[i]
  2855  				live.remove(v.ID)
  2856  				if v.Op == OpPhi {
  2857  					// save phi ops for later
  2858  					phis = append(phis, v)
  2859  					continue
  2860  				}
  2861  				if opcodeTable[v.Op].call {
  2862  					rematIDs = rematIDs[:0]
  2863  					c := live.contents()
  2864  					for i := range c {
  2865  						c[i].val += unlikelyDistance
  2866  						vid := c[i].key
  2867  						if s.values[vid].rematerializeable {
  2868  							rematIDs = append(rematIDs, vid)
  2869  						}
  2870  					}
  2871  					// We don't spill rematerializeable values, and assuming they
  2872  					// are live across a call would only force shuffle to add some
  2873  					// (dead) constant rematerialization. Remove them.
  2874  					for _, r := range rematIDs {
  2875  						live.remove(r)
  2876  					}
  2877  				}
  2878  				for _, a := range v.Args {
  2879  					if s.values[a.ID].needReg {
  2880  						live.set(a.ID, int32(i), v.Pos)
  2881  					}
  2882  				}
  2883  			}
  2884  			// Propagate desired registers backwards.
  2885  			desired.copy(&s.desired[b.ID])
  2886  			for i := len(b.Values) - 1; i >= 0; i-- {
  2887  				v := b.Values[i]
  2888  				prefs := desired.remove(v.ID)
  2889  				if v.Op == OpPhi {
  2890  					// TODO: if v is a phi, save desired register for phi inputs.
  2891  					// For now, we just drop it and don't propagate
  2892  					// desired registers back though phi nodes.
  2893  					continue
  2894  				}
  2895  				regspec := s.regspec(v)
  2896  				// Cancel desired registers if they get clobbered.
  2897  				desired.clobber(regspec.clobbers)
  2898  				// Update desired registers if there are any fixed register inputs.
  2899  				for _, j := range regspec.inputs {
  2900  					if countRegs(j.regs) != 1 {
  2901  						continue
  2902  					}
  2903  					desired.clobber(j.regs)
  2904  					desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  2905  				}
  2906  				// Set desired register of input 0 if this is a 2-operand instruction.
  2907  				if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  2908  					// ADDQconst is added here because we want to treat it as resultInArg0 for
  2909  					// the purposes of desired registers, even though it is not an absolute requirement.
  2910  					// This is because we'd rather implement it as ADDQ instead of LEAQ.
  2911  					// Same for ADDLconst
  2912  					// Select0 is added here to propagate the desired register to the tuple-generating instruction.
  2913  					if opcodeTable[v.Op].commutative {
  2914  						desired.addList(v.Args[1].ID, prefs)
  2915  					}
  2916  					desired.addList(v.Args[0].ID, prefs)
  2917  				}
  2918  			}
  2919  
  2920  			// For each predecessor of b, expand its list of live-at-end values.
  2921  			// invariant: live contains the values live at the start of b (excluding phi inputs)
  2922  			for i, e := range b.Preds {
  2923  				p := e.b
  2924  				// Compute additional distance for the edge.
  2925  				// Note: delta must be at least 1 to distinguish the control
  2926  				// value use from the first user in a successor block.
  2927  				delta := int32(normalDistance)
  2928  				if len(p.Succs) == 2 {
  2929  					if p.Succs[0].b == b && p.Likely == BranchLikely ||
  2930  						p.Succs[1].b == b && p.Likely == BranchUnlikely {
  2931  						delta = likelyDistance
  2932  					}
  2933  					if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
  2934  						p.Succs[1].b == b && p.Likely == BranchLikely {
  2935  						delta = unlikelyDistance
  2936  					}
  2937  				}
  2938  
  2939  				// Update any desired registers at the end of p.
  2940  				s.desired[p.ID].merge(&desired)
  2941  
  2942  				// Start t off with the previously known live values at the end of p.
  2943  				t.clear()
  2944  				for _, e := range s.live[p.ID] {
  2945  					t.set(e.ID, e.dist, e.pos)
  2946  				}
  2947  				update := false
  2948  
  2949  				// Add new live values from scanning this block.
  2950  				for _, e := range live.contents() {
  2951  					d := e.val + delta
  2952  					if !t.contains(e.key) || d < t.get(e.key) {
  2953  						update = true
  2954  						t.set(e.key, d, e.pos)
  2955  					}
  2956  				}
  2957  				// Also add the correct arg from the saved phi values.
  2958  				// All phis are at distance delta (we consider them
  2959  				// simultaneously happening at the start of the block).
  2960  				for _, v := range phis {
  2961  					id := v.Args[i].ID
  2962  					if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
  2963  						update = true
  2964  						t.set(id, delta, v.Pos)
  2965  					}
  2966  				}
  2967  
  2968  				if !update {
  2969  					continue
  2970  				}
  2971  				// The live set has changed, update it.
  2972  				l := s.live[p.ID][:0]
  2973  				if cap(l) < t.size() {
  2974  					l = make([]liveInfo, 0, t.size())
  2975  				}
  2976  				for _, e := range t.contents() {
  2977  					l = append(l, liveInfo{e.key, e.val, e.pos})
  2978  				}
  2979  				s.live[p.ID] = l
  2980  				changed = true
  2981  			}
  2982  		}
  2983  
  2984  		if !changed {
  2985  			break
  2986  		}
  2987  	}
  2988  	if f.pass.debug > regDebug {
  2989  		fmt.Println("live values at end of each block")
  2990  		for _, b := range f.Blocks {
  2991  			fmt.Printf("  %s:", b)
  2992  			for _, x := range s.live[b.ID] {
  2993  				fmt.Printf(" v%d(%d)", x.ID, x.dist)
  2994  				for _, e := range s.desired[b.ID].entries {
  2995  					if e.ID != x.ID {
  2996  						continue
  2997  					}
  2998  					fmt.Printf("[")
  2999  					first := true
  3000  					for _, r := range e.regs {
  3001  						if r == noRegister {
  3002  							continue
  3003  						}
  3004  						if !first {
  3005  							fmt.Printf(",")
  3006  						}
  3007  						fmt.Print(&s.registers[r])
  3008  						first = false
  3009  					}
  3010  					fmt.Printf("]")
  3011  				}
  3012  			}
  3013  			if avoid := s.desired[b.ID].avoid; avoid != 0 {
  3014  				fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
  3015  			}
  3016  			fmt.Println()
  3017  		}
  3018  	}
  3019  }
  3020  
  3021  // A desiredState represents desired register assignments.
  3022  type desiredState struct {
  3023  	// Desired assignments will be small, so we just use a list
  3024  	// of valueID+registers entries.
  3025  	entries []desiredStateEntry
  3026  	// Registers that other values want to be in.  This value will
  3027  	// contain at least the union of the regs fields of entries, but
  3028  	// may contain additional entries for values that were once in
  3029  	// this data structure but are no longer.
  3030  	avoid regMask
  3031  }
  3032  type desiredStateEntry struct {
  3033  	// (pre-regalloc) value
  3034  	ID ID
  3035  	// Registers it would like to be in, in priority order.
  3036  	// Unused slots are filled with noRegister.
  3037  	// For opcodes that return tuples, we track desired registers only
  3038  	// for the first element of the tuple (see desiredSecondReg for
  3039  	// tracking the desired register for second part of a tuple).
  3040  	regs [4]register
  3041  }
  3042  
  3043  // get returns a list of desired registers for value vid.
  3044  func (d *desiredState) get(vid ID) [4]register {
  3045  	for _, e := range d.entries {
  3046  		if e.ID == vid {
  3047  			return e.regs
  3048  		}
  3049  	}
  3050  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  3051  }
  3052  
  3053  // add records that we'd like value vid to be in register r.
  3054  func (d *desiredState) add(vid ID, r register) {
  3055  	d.avoid |= regMask(1) << r
  3056  	for i := range d.entries {
  3057  		e := &d.entries[i]
  3058  		if e.ID != vid {
  3059  			continue
  3060  		}
  3061  		if e.regs[0] == r {
  3062  			// Already known and highest priority
  3063  			return
  3064  		}
  3065  		for j := 1; j < len(e.regs); j++ {
  3066  			if e.regs[j] == r {
  3067  				// Move from lower priority to top priority
  3068  				copy(e.regs[1:], e.regs[:j])
  3069  				e.regs[0] = r
  3070  				return
  3071  			}
  3072  		}
  3073  		copy(e.regs[1:], e.regs[:])
  3074  		e.regs[0] = r
  3075  		return
  3076  	}
  3077  	d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
  3078  }
  3079  
  3080  func (d *desiredState) addList(vid ID, regs [4]register) {
  3081  	// regs is in priority order, so iterate in reverse order.
  3082  	for i := len(regs) - 1; i >= 0; i-- {
  3083  		r := regs[i]
  3084  		if r != noRegister {
  3085  			d.add(vid, r)
  3086  		}
  3087  	}
  3088  }
  3089  
  3090  // clobber erases any desired registers in the set m.
  3091  func (d *desiredState) clobber(m regMask) {
  3092  	for i := 0; i < len(d.entries); {
  3093  		e := &d.entries[i]
  3094  		j := 0
  3095  		for _, r := range e.regs {
  3096  			if r != noRegister && m>>r&1 == 0 {
  3097  				e.regs[j] = r
  3098  				j++
  3099  			}
  3100  		}
  3101  		if j == 0 {
  3102  			// No more desired registers for this value.
  3103  			d.entries[i] = d.entries[len(d.entries)-1]
  3104  			d.entries = d.entries[:len(d.entries)-1]
  3105  			continue
  3106  		}
  3107  		for ; j < len(e.regs); j++ {
  3108  			e.regs[j] = noRegister
  3109  		}
  3110  		i++
  3111  	}
  3112  	d.avoid &^= m
  3113  }
  3114  
  3115  // copy copies a desired state from another desiredState x.
  3116  func (d *desiredState) copy(x *desiredState) {
  3117  	d.entries = append(d.entries[:0], x.entries...)
  3118  	d.avoid = x.avoid
  3119  }
  3120  
  3121  // remove removes the desired registers for vid and returns them.
  3122  func (d *desiredState) remove(vid ID) [4]register {
  3123  	for i := range d.entries {
  3124  		if d.entries[i].ID == vid {
  3125  			regs := d.entries[i].regs
  3126  			d.entries[i] = d.entries[len(d.entries)-1]
  3127  			d.entries = d.entries[:len(d.entries)-1]
  3128  			return regs
  3129  		}
  3130  	}
  3131  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  3132  }
  3133  
  3134  // merge merges another desired state x into d.
  3135  func (d *desiredState) merge(x *desiredState) {
  3136  	d.avoid |= x.avoid
  3137  	// There should only be a few desired registers, so
  3138  	// linear insert is ok.
  3139  	for _, e := range x.entries {
  3140  		d.addList(e.ID, e.regs)
  3141  	}
  3142  }
  3143  
  3144  // computeUnavoidableCalls computes the containsUnavoidableCall fields in the loop nest.
  3145  func (loopnest *loopnest) computeUnavoidableCalls() {
  3146  	f := loopnest.f
  3147  
  3148  	hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
  3149  	defer f.Cache.freeBoolSlice(hasCall)
  3150  	for _, b := range f.Blocks {
  3151  		if b.containsCall() {
  3152  			hasCall[b.ID] = true
  3153  		}
  3154  	}
  3155  	found := f.Cache.allocSparseSet(f.NumBlocks())
  3156  	defer f.Cache.freeSparseSet(found)
  3157  	// Run dfs to find path through the loop that avoids all calls.
  3158  	// Such path either escapes the loop or returns back to the header.
  3159  	// It isn't enough to have exit not dominated by any call, for example:
  3160  	// ... some loop
  3161  	// call1    call2
  3162  	//   \       /
  3163  	//     block
  3164  	// ...
  3165  	// block is not dominated by any single call, but we don't have call-free path to it.
  3166  loopLoop:
  3167  	for _, l := range loopnest.loops {
  3168  		found.clear()
  3169  		tovisit := make([]*Block, 0, 8)
  3170  		tovisit = append(tovisit, l.header)
  3171  		for len(tovisit) > 0 {
  3172  			cur := tovisit[len(tovisit)-1]
  3173  			tovisit = tovisit[:len(tovisit)-1]
  3174  			if hasCall[cur.ID] {
  3175  				continue
  3176  			}
  3177  			for _, s := range cur.Succs {
  3178  				nb := s.Block()
  3179  				if nb == l.header {
  3180  					// Found a call-free path around the loop.
  3181  					continue loopLoop
  3182  				}
  3183  				if found.contains(nb.ID) {
  3184  					// Already found via another path.
  3185  					continue
  3186  				}
  3187  				nl := loopnest.b2l[nb.ID]
  3188  				if nl == nil || (nl.depth <= l.depth && nl != l) {
  3189  					// Left the loop.
  3190  					continue
  3191  				}
  3192  				tovisit = append(tovisit, nb)
  3193  				found.add(nb.ID)
  3194  			}
  3195  		}
  3196  		// No call-free path was found.
  3197  		l.containsUnavoidableCall = true
  3198  	}
  3199  }
  3200  
  3201  func (b *Block) containsCall() bool {
  3202  	if b.Kind == BlockDefer {
  3203  		return true
  3204  	}
  3205  	for _, v := range b.Values {
  3206  		if opcodeTable[v.Op].call {
  3207  			return true
  3208  		}
  3209  	}
  3210  	return false
  3211  }
  3212  

View as plain text