Source file src/cmd/compile/internal/ssa/regalloc.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Register allocation.
     6  //
     7  // We use a version of a linear scan register allocator. We treat the
     8  // whole function as a single long basic block and run through
     9  // it using a greedy register allocator. Then all merge edges
    10  // (those targeting a block with len(Preds)>1) are processed to
    11  // shuffle data into the place that the target of the edge expects.
    12  //
    13  // The greedy allocator moves values into registers just before they
    14  // are used, spills registers only when necessary, and spills the
    15  // value whose next use is farthest in the future.
    16  //
    17  // The register allocator requires that a block is not scheduled until
    18  // at least one of its predecessors have been scheduled. The most recent
    19  // such predecessor provides the starting register state for a block.
    20  //
    21  // It also requires that there are no critical edges (critical =
    22  // comes from a block with >1 successor and goes to a block with >1
    23  // predecessor).  This makes it easy to add fixup code on merge edges -
    24  // the source of a merge edge has only one successor, so we can add
    25  // fixup code to the end of that block.
    26  
    27  // Spilling
    28  //
    29  // During the normal course of the allocator, we might throw a still-live
    30  // value out of all registers. When that value is subsequently used, we must
    31  // load it from a slot on the stack. We must also issue an instruction to
    32  // initialize that stack location with a copy of v.
    33  //
    34  // pre-regalloc:
    35  //   (1) v = Op ...
    36  //   (2) x = Op ...
    37  //   (3) ... = Op v ...
    38  //
    39  // post-regalloc:
    40  //   (1) v = Op ...    : AX // computes v, store result in AX
    41  //       s = StoreReg v     // spill v to a stack slot
    42  //   (2) x = Op ...    : AX // some other op uses AX
    43  //       c = LoadReg s : CX // restore v from stack slot
    44  //   (3) ... = Op c ...     // use the restored value
    45  //
    46  // Allocation occurs normally until we reach (3) and we realize we have
    47  // a use of v and it isn't in any register. At that point, we allocate
    48  // a spill (a StoreReg) for v. We can't determine the correct place for
    49  // the spill at this point, so we allocate the spill as blockless initially.
    50  // The restore is then generated to load v back into a register so it can
    51  // be used. Subsequent uses of v will use the restored value c instead.
    52  //
    53  // What remains is the question of where to schedule the spill.
    54  // During allocation, we keep track of the dominator of all restores of v.
    55  // The spill of v must dominate that block. The spill must also be issued at
    56  // a point where v is still in a register.
    57  //
    58  // To find the right place, start at b, the block which dominates all restores.
    59  //  - If b is v.Block, then issue the spill right after v.
    60  //    It is known to be in a register at that point, and dominates any restores.
    61  //  - Otherwise, if v is in a register at the start of b,
    62  //    put the spill of v at the start of b.
    63  //  - Otherwise, set b = immediate dominator of b, and repeat.
    64  //
    65  // Phi values are special, as always. We define two kinds of phis, those
    66  // where the merge happens in a register (a "register" phi) and those where
    67  // the merge happens in a stack location (a "stack" phi).
    68  //
    69  // A register phi must have the phi and all of its inputs allocated to the
    70  // same register. Register phis are spilled similarly to regular ops.
    71  //
    72  // A stack phi must have the phi and all of its inputs allocated to the same
    73  // stack location. Stack phis start out life already spilled - each phi
    74  // input must be a store (using StoreReg) at the end of the corresponding
    75  // predecessor block.
    76  //     b1: y = ... : AX        b2: z = ... : BX
    77  //         y2 = StoreReg y         z2 = StoreReg z
    78  //         goto b3                 goto b3
    79  //     b3: x = phi(y2, z2)
    80  // The stack allocator knows that StoreReg args of stack-allocated phis
    81  // must be allocated to the same stack slot as the phi that uses them.
    82  // x is now a spilled value and a restore must appear before its first use.
    83  
    84  // TODO
    85  
    86  // Use an affinity graph to mark two values which should use the
    87  // same register. This affinity graph will be used to prefer certain
    88  // registers for allocation. This affinity helps eliminate moves that
    89  // are required for phi implementations and helps generate allocations
    90  // for 2-register architectures.
    91  
    92  // Note: regalloc generates a not-quite-SSA output. If we have:
    93  //
    94  //             b1: x = ... : AX
    95  //                 x2 = StoreReg x
    96  //                 ... AX gets reused for something else ...
    97  //                 if ... goto b3 else b4
    98  //
    99  //   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
   100  //       ... use x3 ...                 ... use x4 ...
   101  //
   102  //             b2: ... use x3 ...
   103  //
   104  // If b3 is the primary predecessor of b2, then we use x3 in b2 and
   105  // add a x4:CX->BX copy at the end of b4.
   106  // But the definition of x3 doesn't dominate b2.  We should really
   107  // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
   108  // SSA form. For now, we ignore this problem as remaining in strict
   109  // SSA form isn't needed after regalloc. We'll just leave the use
   110  // of x3 not dominated by the definition of x3, and the CX->BX copy
   111  // will have no use (so don't run deadcode after regalloc!).
   112  // TODO: maybe we should introduce these extra phis?
   113  
   114  package ssa
   115  
   116  import (
   117  	"cmd/compile/internal/base"
   118  	"cmd/compile/internal/ir"
   119  	"cmd/compile/internal/types"
   120  	"cmd/internal/src"
   121  	"cmd/internal/sys"
   122  	"cmp"
   123  	"fmt"
   124  	"internal/buildcfg"
   125  	"math"
   126  	"math/bits"
   127  	"slices"
   128  	"unsafe"
   129  )
   130  
   131  const (
   132  	moveSpills = iota
   133  	logSpills
   134  	regDebug
   135  	stackDebug
   136  )
   137  
   138  // distance is a measure of how far into the future values are used.
   139  // distance is measured in units of instructions.
   140  const (
   141  	likelyDistance   = 1
   142  	normalDistance   = 10
   143  	unlikelyDistance = 100
   144  )
   145  
   146  // regalloc performs register allocation on f. It sets f.RegAlloc
   147  // to the resulting allocation.
   148  func regalloc(f *Func) {
   149  	var s regAllocState
   150  	s.init(f)
   151  	s.regalloc(f)
   152  	s.close()
   153  }
   154  
   155  type register uint8
   156  
   157  const noRegister register = 255
   158  
   159  // For bulk initializing
   160  var noRegisters [32]register = [32]register{
   161  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   162  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   163  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   164  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   165  }
   166  
   167  // A regMask encodes a set of machine registers.
   168  type regMask struct {
   169  	v1, v2 uint64
   170  }
   171  
   172  func (r regMask) intersect(s regMask) regMask {
   173  	return regMask{r.v1 & s.v1, r.v2 & s.v2}
   174  }
   175  
   176  func (r regMask) union(s regMask) regMask {
   177  	return regMask{r.v1 | s.v1, r.v2 | s.v2}
   178  }
   179  
   180  func (r regMask) minus(s regMask) regMask {
   181  	return regMask{r.v1 &^ s.v1, r.v2 &^ s.v2}
   182  }
   183  
   184  func (r regMask) empty() bool {
   185  	return r.v1 == 0 && r.v2 == 0
   186  }
   187  
   188  func (r regMask) pickReg() register {
   189  	if r.empty() {
   190  		panic("can't pick a register from an empty set")
   191  	}
   192  	// pick the lowest one
   193  	if r.v1 != 0 {
   194  		return register(bits.TrailingZeros64(r.v1))
   195  	}
   196  	return register(bits.TrailingZeros64(r.v2) + 64)
   197  }
   198  
   199  func regMaskAt(i register) regMask {
   200  	if i < 64 {
   201  		return regMask{v1: 1 << i}
   202  	}
   203  	return regMask{v2: 1 << (i - 64)}
   204  }
   205  
   206  func (r regMask) addReg(i register) regMask {
   207  	if i < 64 {
   208  		return regMask{r.v1 | 1<<i, r.v2}
   209  	}
   210  	return regMask{r.v1, r.v2 | 1<<(i-64)}
   211  }
   212  
   213  func (r regMask) removeReg(i register) regMask {
   214  	if i < 64 {
   215  		return regMask{r.v1 &^ (1 << i), r.v2}
   216  	}
   217  	return regMask{r.v1, r.v2 &^ (1 << (i - 64))}
   218  }
   219  
   220  func (r regMask) hasReg(i register) bool {
   221  	if i < 64 {
   222  		return (r.v1>>i)&1 != 0
   223  	}
   224  	return (r.v2>>(i-64))&1 != 0
   225  }
   226  
   227  func (m regMask) String() string {
   228  	s := ""
   229  	for r := register(0); !m.empty(); r++ {
   230  		if !m.hasReg(r) {
   231  			continue
   232  		}
   233  		m = m.removeReg(r)
   234  		if s != "" {
   235  			s += " "
   236  		}
   237  		s += fmt.Sprintf("r%d", r)
   238  	}
   239  	return s
   240  }
   241  
   242  func (s *regAllocState) RegMaskString(m regMask) string {
   243  	str := ""
   244  	for r := register(0); !m.empty(); r++ {
   245  		if !m.hasReg(r) {
   246  			continue
   247  		}
   248  		m = m.removeReg(r)
   249  		if str != "" {
   250  			str += " "
   251  		}
   252  		str += s.registers[r].String()
   253  	}
   254  	return str
   255  }
   256  
   257  // countRegs returns the number of set bits in the register mask.
   258  func countRegs(r regMask) int {
   259  	return bits.OnesCount64(r.v1) + bits.OnesCount64(r.v2)
   260  }
   261  
   262  // pickReg picks a register from the register mask.
   263  func (s *regAllocState) pickReg(rm regMask) register {
   264  	if s.f.Config.ctxt.Arch.Arch == sys.ArchRISCV64 {
   265  		// Prefer x8-x15 and f8-f15 to enable increased use of compressed instructions.
   266  		riscv64CompressedMask := rm.intersect(regMask{v1: 0x0000ff000000ff00})
   267  		if !riscv64CompressedMask.empty() {
   268  			rm = riscv64CompressedMask
   269  		}
   270  	}
   271  	return rm.pickReg()
   272  }
   273  
   274  type use struct {
   275  	// distance from start of the block to a use of a value
   276  	//   dist == 0                 used by first instruction in block
   277  	//   dist == len(b.Values)-1   used by last instruction in block
   278  	//   dist == len(b.Values)     used by block's control value
   279  	//   dist  > len(b.Values)     used by a subsequent block
   280  	dist int32
   281  	pos  src.XPos // source position of the use
   282  	next *use     // linked list of uses of a value in nondecreasing dist order
   283  }
   284  
   285  // A valState records the register allocation state for a (pre-regalloc) value.
   286  type valState struct {
   287  	regs              regMask // the set of registers holding a Value (usually just one)
   288  	uses              *use    // list of uses in this block
   289  	spill             *Value  // spilled copy of the Value (if any)
   290  	restoreMin        int32   // minimum of all restores' blocks' sdom.entry
   291  	restoreMax        int32   // maximum of all restores' blocks' sdom.exit
   292  	needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
   293  	rematerializeable bool    // cached value of v.rematerializeable()
   294  }
   295  
   296  type regState struct {
   297  	v *Value // Original (preregalloc) Value stored in this register.
   298  	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
   299  	// If a register is unused, v==c==nil
   300  }
   301  
   302  type regAllocState struct {
   303  	f *Func
   304  
   305  	sdom        SparseTree
   306  	registers   []Register
   307  	numRegs     register
   308  	SPReg       register
   309  	SBReg       register
   310  	GReg        register
   311  	ZeroIntReg  register
   312  	allocatable regMask
   313  
   314  	// live values at the end of each block.  live[b.ID] is a list of value IDs
   315  	// which are live at the end of b, together with a count of how many instructions
   316  	// forward to the next use.
   317  	live [][]liveInfo
   318  	// desired register assignments at the end of each block.
   319  	// Note that this is a static map computed before allocation occurs. Dynamic
   320  	// register desires (from partially completed allocations) will trump
   321  	// this information.
   322  	desired []desiredState
   323  
   324  	// current state of each (preregalloc) Value
   325  	values []valState
   326  
   327  	// ID of SP, SB values
   328  	sp, sb ID
   329  
   330  	// For each Value, map from its value ID back to the
   331  	// preregalloc Value it was derived from.
   332  	orig []*Value
   333  
   334  	// current state of each register.
   335  	// Includes only registers in allocatable.
   336  	regs []regState
   337  
   338  	// registers that contain values which can't be kicked out
   339  	nospill regMask
   340  
   341  	// mask of registers currently in use
   342  	used regMask
   343  
   344  	// mask of registers used since the start of the current block
   345  	usedSinceBlockStart regMask
   346  
   347  	// mask of registers used in the current instruction
   348  	tmpused regMask
   349  
   350  	// current block we're working on
   351  	curBlock *Block
   352  
   353  	// cache of use records
   354  	freeUseRecords *use
   355  
   356  	// endRegs[blockid] is the register state at the end of each block.
   357  	// encoded as a set of endReg records.
   358  	endRegs [][]endReg
   359  
   360  	// startRegs[blockid] is the register state at the start of merge blocks.
   361  	// saved state does not include the state of phi ops in the block.
   362  	startRegs [][]startReg
   363  
   364  	// startRegsMask is a mask of the registers in startRegs[curBlock.ID].
   365  	// Registers dropped from startRegsMask are later synchronoized back to
   366  	// startRegs by dropping from there as well.
   367  	startRegsMask regMask
   368  
   369  	// spillLive[blockid] is the set of live spills at the end of each block
   370  	spillLive [][]ID
   371  
   372  	// a set of copies we generated to move things around, and
   373  	// whether it is used in shuffle. Unused copies will be deleted.
   374  	copies map[*Value]bool
   375  
   376  	loopnest *loopnest
   377  
   378  	// choose a good order in which to visit blocks for allocation purposes.
   379  	visitOrder []*Block
   380  
   381  	// blockOrder[b.ID] corresponds to the index of block b in visitOrder.
   382  	blockOrder []int32
   383  
   384  	// whether to insert instructions that clobber dead registers at call sites
   385  	doClobber bool
   386  
   387  	// For each instruction index in a basic block, the index of the next call
   388  	// at or after that instruction index.
   389  	// If there is no next call, returns maxInt32.
   390  	// nextCall for a call instruction points to itself.
   391  	// (Indexes and results are pre-regalloc.)
   392  	nextCall []int32
   393  
   394  	// Index of the instruction we're currently working on.
   395  	// Index is expressed in terms of the pre-regalloc b.Values list.
   396  	curIdx int
   397  }
   398  
   399  type endReg struct {
   400  	r register
   401  	v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
   402  	c *Value // cached version of the value
   403  }
   404  
   405  type startReg struct {
   406  	r   register
   407  	v   *Value   // pre-regalloc value needed in this register
   408  	c   *Value   // cached version of the value
   409  	pos src.XPos // source position of use of this register
   410  }
   411  
   412  // freeReg frees up register r. Any current user of r is kicked out.
   413  func (s *regAllocState) freeReg(r register) {
   414  	if !s.allocatable.hasReg(r) && !s.isGReg(r) {
   415  		return
   416  	}
   417  	v := s.regs[r].v
   418  	if v == nil {
   419  		s.f.Fatalf("tried to free an already free register %d\n", r)
   420  	}
   421  
   422  	// Mark r as unused.
   423  	if s.f.pass.debug > regDebug {
   424  		fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
   425  	}
   426  	s.regs[r] = regState{}
   427  	s.values[v.ID].regs = s.values[v.ID].regs.removeReg(r)
   428  	s.used = s.used.removeReg(r)
   429  }
   430  
   431  // freeRegs frees up all registers listed in m.
   432  func (s *regAllocState) freeRegs(m regMask) {
   433  	for !m.intersect(s.used).empty() {
   434  		s.freeReg(s.pickReg(m.intersect(s.used)))
   435  	}
   436  }
   437  
   438  // clobberRegs inserts instructions that clobber registers listed in m.
   439  func (s *regAllocState) clobberRegs(m regMask) {
   440  	m = m.intersect(s.allocatable.intersect(s.f.Config.gpRegMask)) // only integer register can contain pointers, only clobber them
   441  	for !m.empty() {
   442  		r := s.pickReg(m)
   443  		m = m.removeReg(r)
   444  		x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
   445  		s.f.setHome(x, &s.registers[r])
   446  	}
   447  }
   448  
   449  // setOrig records that c's original value is the same as
   450  // v's original value.
   451  func (s *regAllocState) setOrig(c *Value, v *Value) {
   452  	if int(c.ID) >= cap(s.orig) {
   453  		x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
   454  		copy(x, s.orig)
   455  		s.f.Cache.freeValueSlice(s.orig)
   456  		s.orig = x
   457  	}
   458  	for int(c.ID) >= len(s.orig) {
   459  		s.orig = append(s.orig, nil)
   460  	}
   461  	if s.orig[c.ID] != nil {
   462  		s.f.Fatalf("orig value set twice %s %s", c, v)
   463  	}
   464  	s.orig[c.ID] = s.orig[v.ID]
   465  }
   466  
   467  // assignReg assigns register r to hold c, a copy of v.
   468  // r must be unused.
   469  func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
   470  	if s.f.pass.debug > regDebug {
   471  		fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
   472  	}
   473  	// Allocate v to r.
   474  	s.values[v.ID].regs = s.values[v.ID].regs.addReg(r)
   475  	s.f.setHome(c, &s.registers[r])
   476  
   477  	// Allocate r to v.
   478  	if !s.allocatable.hasReg(r) && !s.isGReg(r) {
   479  		return
   480  	}
   481  	if s.regs[r].v != nil {
   482  		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
   483  	}
   484  	s.regs[r] = regState{v, c}
   485  	s.used = s.used.addReg(r)
   486  }
   487  
   488  // allocReg chooses a register from the set of registers in mask.
   489  // If there is no unused register, a Value will be kicked out of
   490  // a register to make room.
   491  func (s *regAllocState) allocReg(mask regMask, v *Value) register {
   492  	if v.OnWasmStack {
   493  		return noRegister
   494  	}
   495  
   496  	mask = mask.intersect(s.allocatable)
   497  	mask = mask.minus(s.nospill)
   498  	if mask.empty() {
   499  		s.f.Fatalf("no register available for %s", v.LongString())
   500  	}
   501  
   502  	// Pick an unused register if one is available.
   503  	if !mask.minus(s.used).empty() {
   504  		r := s.pickReg(mask.minus(s.used))
   505  		s.usedSinceBlockStart = s.usedSinceBlockStart.addReg(r)
   506  		return r
   507  	}
   508  
   509  	// Pick a value to spill. Spill the value with the
   510  	// farthest-in-the-future use.
   511  	// TODO: Prefer registers with already spilled Values?
   512  	// TODO: Modify preference using affinity graph.
   513  	// TODO: if a single value is in multiple registers, spill one of them
   514  	// before spilling a value in just a single register.
   515  
   516  	// Find a register to spill. We spill the register containing the value
   517  	// whose next use is as far in the future as possible.
   518  	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
   519  	var r register
   520  	maxuse := int32(-1)
   521  	for t := register(0); t < s.numRegs; t++ {
   522  		if !mask.hasReg(t) {
   523  			continue
   524  		}
   525  		v := s.regs[t].v
   526  		if n := s.values[v.ID].uses.dist; n > maxuse {
   527  			// v's next use is farther in the future than any value
   528  			// we've seen so far. A new best spill candidate.
   529  			r = t
   530  			maxuse = n
   531  		}
   532  	}
   533  	if maxuse == -1 {
   534  		s.f.Fatalf("couldn't find register to spill")
   535  	}
   536  
   537  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   538  		// TODO(neelance): In theory this should never happen, because all wasm registers are equal.
   539  		// So if there is still a free register, the allocation should have picked that one in the first place instead of
   540  		// trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
   541  		s.freeReg(r)
   542  		return r
   543  	}
   544  
   545  	// Try to move it around before kicking out, if there is a free register.
   546  	// We generate a Copy and record it. It will be deleted if never used.
   547  	v2 := s.regs[r].v
   548  	m := s.compatRegs(v2.Type).minus(s.used).minus(s.tmpused).removeReg(r)
   549  	if !m.empty() && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
   550  		s.usedSinceBlockStart = s.usedSinceBlockStart.addReg(r)
   551  		r2 := s.pickReg(m)
   552  		c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
   553  		s.copies[c] = false
   554  		if s.f.pass.debug > regDebug {
   555  			fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
   556  		}
   557  		s.setOrig(c, v2)
   558  		s.assignReg(r2, v2, c)
   559  	}
   560  
   561  	// If the evicted register isn't used between the start of the block
   562  	// and now then there is no reason to even request it on entry. We can
   563  	// drop from startRegs in that case.
   564  	if !s.usedSinceBlockStart.hasReg(r) {
   565  		if s.startRegsMask.hasReg(r) {
   566  			if s.f.pass.debug > regDebug {
   567  				fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
   568  			}
   569  			s.startRegsMask = s.startRegsMask.removeReg(r)
   570  		}
   571  	}
   572  
   573  	s.freeReg(r)
   574  	s.usedSinceBlockStart = s.usedSinceBlockStart.addReg(r)
   575  	return r
   576  }
   577  
   578  // makeSpill returns a Value which represents the spilled value of v.
   579  // b is the block in which the spill is used.
   580  func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
   581  	vi := &s.values[v.ID]
   582  	if vi.spill != nil {
   583  		// Final block not known - keep track of subtree where restores reside.
   584  		vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
   585  		vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
   586  		return vi.spill
   587  	}
   588  	// Make a spill for v. We don't know where we want
   589  	// to put it yet, so we leave it blockless for now.
   590  	spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
   591  	// We also don't know what the spill's arg will be.
   592  	// Leave it argless for now.
   593  	s.setOrig(spill, v)
   594  	vi.spill = spill
   595  	vi.restoreMin = s.sdom[b.ID].entry
   596  	vi.restoreMax = s.sdom[b.ID].exit
   597  	return spill
   598  }
   599  
   600  // allocValToReg allocates v to a register selected from regMask and
   601  // returns the register copy of v. Any previous user is kicked out and spilled
   602  // (if necessary). Load code is added at the current pc. If nospill is set the
   603  // allocated register is marked nospill so the assignment cannot be
   604  // undone until the caller allows it by clearing nospill. Returns a
   605  // *Value which is either v or a copy of v allocated to the chosen register.
   606  func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
   607  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
   608  		c := v.copyIntoWithXPos(s.curBlock, pos)
   609  		c.OnWasmStack = true
   610  		s.setOrig(c, v)
   611  		return c
   612  	}
   613  	if v.OnWasmStack {
   614  		return v
   615  	}
   616  
   617  	vi := &s.values[v.ID]
   618  	pos = pos.WithNotStmt()
   619  	// Check if v is already in a requested register.
   620  	if !mask.intersect(vi.regs).empty() {
   621  		mask = mask.intersect(vi.regs)
   622  		r := s.pickReg(mask)
   623  		if mask.hasReg(s.SPReg) {
   624  			// Prefer the stack pointer if it is allowed.
   625  			// (Needed because the op might have an Aux symbol
   626  			// that needs SP as its base.)
   627  			r = s.SPReg
   628  		}
   629  		if !s.allocatable.hasReg(r) {
   630  			return v // v is in a fixed register
   631  		}
   632  		if s.regs[r].v != v || s.regs[r].c == nil {
   633  			panic("bad register state")
   634  		}
   635  		if nospill {
   636  			s.nospill = s.nospill.addReg(r)
   637  		}
   638  		s.usedSinceBlockStart = s.usedSinceBlockStart.addReg(r)
   639  		return s.regs[r].c
   640  	}
   641  
   642  	var r register
   643  	// If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
   644  	onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
   645  	if !onWasmStack {
   646  		// Allocate a register.
   647  		r = s.allocReg(mask, v)
   648  	}
   649  
   650  	// Allocate v to the new register.
   651  	var c *Value
   652  	if !vi.regs.empty() {
   653  		// Copy from a register that v is already in.
   654  		var current *Value
   655  		if !vi.regs.minus(s.allocatable).empty() {
   656  			// v is in a fixed register, prefer that
   657  			current = v
   658  		} else {
   659  			r2 := s.pickReg(vi.regs)
   660  			if s.regs[r2].v != v {
   661  				panic("bad register state")
   662  			}
   663  			current = s.regs[r2].c
   664  			s.usedSinceBlockStart = s.usedSinceBlockStart.addReg(r2)
   665  		}
   666  		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
   667  	} else if v.rematerializeable() {
   668  		// Rematerialize instead of loading from the spill location.
   669  		c = v.copyIntoWithXPos(s.curBlock, pos)
   670  		// We need to consider its output mask and potentially issue a Copy
   671  		// if there are register mask conflicts.
   672  		// This currently happens for the SIMD package only between GP and FP
   673  		// register. Because Intel's vector extension can put integer value into
   674  		// FP, which is seen as a vector. Example instruction: VPSLL[BWDQ]
   675  		// Because GP and FP masks do not overlap, mask & outputMask == 0
   676  		// detects this situation thoroughly.
   677  		sourceMask := s.regspec(c).outputs[0].regs
   678  		if mask.intersect(sourceMask).empty() && !onWasmStack {
   679  			s.setOrig(c, v)
   680  			s.assignReg(s.allocReg(sourceMask, v), v, c)
   681  			// v.Type for the new OpCopy is likely wrong and it might delay the problem
   682  			// until ssa to asm lowering, which might need the types to generate the right
   683  			// assembly for OpCopy. For Intel's GP to FP move, it happens to be that
   684  			// MOV instruction has such a variant so it happens to be right.
   685  			// But it's unclear for other architectures or situations, and the problem
   686  			// might be exposed when the assembler sees illegal instructions.
   687  			// Right now make we still pick v.Type, because at least its size should be correct
   688  			// for the rematerialization case the amd64 SIMD package exposed.
   689  			// TODO: We might need to figure out a way to find the correct type or make
   690  			// the asm lowering use reg info only for OpCopy.
   691  			c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
   692  		}
   693  	} else {
   694  		// Load v from its spill location.
   695  		spill := s.makeSpill(v, s.curBlock)
   696  		if s.f.pass.debug > logSpills {
   697  			s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
   698  		}
   699  		c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
   700  	}
   701  
   702  	s.setOrig(c, v)
   703  
   704  	if onWasmStack {
   705  		c.OnWasmStack = true
   706  		return c
   707  	}
   708  
   709  	s.assignReg(r, v, c)
   710  	if c.Op == OpLoadReg && s.isGReg(r) {
   711  		s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
   712  	}
   713  	if nospill {
   714  		s.nospill = s.nospill.addReg(r)
   715  	}
   716  	return c
   717  }
   718  
   719  // isLeaf reports whether f performs any calls.
   720  func isLeaf(f *Func) bool {
   721  	for _, b := range f.Blocks {
   722  		for _, v := range b.Values {
   723  			if v.Op.IsCall() && !v.Op.IsTailCall() {
   724  				// tail call is not counted as it does not save the return PC or need a frame
   725  				return false
   726  			}
   727  		}
   728  	}
   729  	return true
   730  }
   731  
   732  // needRegister reports whether v needs a register.
   733  func (v *Value) needRegister() bool {
   734  	return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
   735  }
   736  
   737  func (s *regAllocState) init(f *Func) {
   738  	s.f = f
   739  	s.f.RegAlloc = s.f.Cache.locs[:0]
   740  	s.registers = f.Config.registers
   741  	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask{})*8) {
   742  		s.f.Fatalf("bad number of registers: %d", nr)
   743  	} else {
   744  		s.numRegs = register(nr)
   745  	}
   746  	// Locate SP, SB, and g registers.
   747  	s.SPReg = noRegister
   748  	s.SBReg = noRegister
   749  	s.GReg = noRegister
   750  	s.ZeroIntReg = noRegister
   751  	for r := register(0); r < s.numRegs; r++ {
   752  		switch s.registers[r].String() {
   753  		case "SP":
   754  			s.SPReg = r
   755  		case "SB":
   756  			s.SBReg = r
   757  		case "g":
   758  			s.GReg = r
   759  		case "ZERO": // TODO: arch-specific?
   760  			s.ZeroIntReg = r
   761  		}
   762  	}
   763  	// Make sure we found all required registers.
   764  	switch noRegister {
   765  	case s.SPReg:
   766  		s.f.Fatalf("no SP register found")
   767  	case s.SBReg:
   768  		s.f.Fatalf("no SB register found")
   769  	case s.GReg:
   770  		if f.Config.hasGReg {
   771  			s.f.Fatalf("no g register found")
   772  		}
   773  	}
   774  
   775  	// Figure out which registers we're allowed to use.
   776  	s.allocatable = s.f.Config.gpRegMask.union(s.f.Config.fpRegMask).union(s.f.Config.specialRegMask)
   777  	s.allocatable = s.allocatable.removeReg(s.SPReg)
   778  	s.allocatable = s.allocatable.removeReg(s.SBReg)
   779  	if s.f.Config.hasGReg {
   780  		s.allocatable = s.allocatable.removeReg(s.GReg)
   781  	}
   782  	if s.ZeroIntReg != noRegister {
   783  		s.allocatable = s.allocatable.removeReg(s.ZeroIntReg)
   784  	}
   785  	if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
   786  		s.allocatable = s.allocatable.removeReg(register(s.f.Config.FPReg))
   787  	}
   788  	if s.f.Config.LinkReg != -1 {
   789  		if isLeaf(f) {
   790  			// Leaf functions don't save/restore the link register.
   791  			s.allocatable = s.allocatable.removeReg(register(s.f.Config.LinkReg))
   792  		}
   793  	}
   794  	if s.f.Config.ctxt.Flag_dynlink {
   795  		switch s.f.Config.arch {
   796  		case "386":
   797  			// nothing to do.
   798  			// Note that for Flag_shared (position independent code)
   799  			// we do need to be careful, but that carefulness is hidden
   800  			// in the rewrite rules so we always have a free register
   801  			// available for global load/stores. See _gen/386.rules (search for Flag_shared).
   802  		case "amd64":
   803  			s.allocatable = s.allocatable.removeReg(15) // R15
   804  		case "arm":
   805  			s.allocatable = s.allocatable.removeReg(9) // R9
   806  		case "arm64":
   807  			// nothing to do
   808  		case "loong64": // R2 (aka TP) already reserved.
   809  			// nothing to do
   810  		case "ppc64", "ppc64le": // R2 already reserved.
   811  			// nothing to do
   812  		case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved.
   813  			// nothing to do
   814  		case "s390x":
   815  			s.allocatable = s.allocatable.removeReg(11) // R11
   816  		default:
   817  			s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
   818  		}
   819  	}
   820  
   821  	// Linear scan register allocation can be influenced by the order in which blocks appear.
   822  	// Decouple the register allocation order from the generated block order.
   823  	// This also creates an opportunity for experiments to find a better order.
   824  	s.visitOrder = layoutRegallocOrder(f)
   825  
   826  	// Compute block order. This array allows us to distinguish forward edges
   827  	// from backward edges and compute how far they go.
   828  	s.blockOrder = make([]int32, f.NumBlocks())
   829  	for i, b := range s.visitOrder {
   830  		s.blockOrder[b.ID] = int32(i)
   831  	}
   832  
   833  	s.regs = make([]regState, s.numRegs)
   834  	nv := f.NumValues()
   835  	if cap(s.f.Cache.regallocValues) >= nv {
   836  		s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
   837  	} else {
   838  		s.f.Cache.regallocValues = make([]valState, nv)
   839  	}
   840  	s.values = s.f.Cache.regallocValues
   841  	s.orig = s.f.Cache.allocValueSlice(nv)
   842  	s.copies = make(map[*Value]bool)
   843  	for _, b := range s.visitOrder {
   844  		for _, v := range b.Values {
   845  			if v.needRegister() {
   846  				s.values[v.ID].needReg = true
   847  				s.values[v.ID].rematerializeable = v.rematerializeable()
   848  				s.orig[v.ID] = v
   849  			}
   850  			// Note: needReg is false for values returning Tuple types.
   851  			// Instead, we mark the corresponding Selects as needReg.
   852  		}
   853  	}
   854  	s.computeLive()
   855  
   856  	s.endRegs = make([][]endReg, f.NumBlocks())
   857  	s.startRegs = make([][]startReg, f.NumBlocks())
   858  	s.spillLive = make([][]ID, f.NumBlocks())
   859  	s.sdom = f.Sdom()
   860  
   861  	// wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
   862  	if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   863  		canLiveOnStack := f.newSparseSet(f.NumValues())
   864  		defer f.retSparseSet(canLiveOnStack)
   865  		for _, b := range f.Blocks {
   866  			// New block. Clear candidate set.
   867  			canLiveOnStack.clear()
   868  			for _, c := range b.ControlValues() {
   869  				if c.Uses == 1 && !opcodeTable[c.Op].generic {
   870  					canLiveOnStack.add(c.ID)
   871  				}
   872  			}
   873  			// Walking backwards.
   874  			for i := len(b.Values) - 1; i >= 0; i-- {
   875  				v := b.Values[i]
   876  				if canLiveOnStack.contains(v.ID) {
   877  					v.OnWasmStack = true
   878  				} else {
   879  					// Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
   880  					canLiveOnStack.clear()
   881  				}
   882  				for _, arg := range v.Args {
   883  					// Value can live on the stack if:
   884  					// - it is only used once
   885  					// - it is used in the same basic block
   886  					// - it is not a "mem" value
   887  					// - it is a WebAssembly op
   888  					if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
   889  						canLiveOnStack.add(arg.ID)
   890  					}
   891  				}
   892  			}
   893  		}
   894  	}
   895  
   896  	// The clobberdeadreg experiment inserts code to clobber dead registers
   897  	// at call sites.
   898  	// Ignore huge functions to avoid doing too much work.
   899  	if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
   900  		// TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
   901  		s.doClobber = true
   902  	}
   903  }
   904  
   905  func (s *regAllocState) close() {
   906  	s.f.Cache.freeValueSlice(s.orig)
   907  }
   908  
   909  // Adds a use record for id at distance dist from the start of the block.
   910  // All calls to addUse must happen with nonincreasing dist.
   911  func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
   912  	r := s.freeUseRecords
   913  	if r != nil {
   914  		s.freeUseRecords = r.next
   915  	} else {
   916  		r = &use{}
   917  	}
   918  	r.dist = dist
   919  	r.pos = pos
   920  	r.next = s.values[id].uses
   921  	s.values[id].uses = r
   922  	if r.next != nil && dist > r.next.dist {
   923  		s.f.Fatalf("uses added in wrong order")
   924  	}
   925  }
   926  
   927  // advanceUses advances the uses of v's args from the state before v to the state after v.
   928  // Any values which have no more uses are deallocated from registers.
   929  func (s *regAllocState) advanceUses(v *Value) {
   930  	for _, a := range v.Args {
   931  		if !s.values[a.ID].needReg {
   932  			continue
   933  		}
   934  		ai := &s.values[a.ID]
   935  		r := ai.uses
   936  		ai.uses = r.next
   937  		if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
   938  			// Value is dead (or is not used again until after a call), free all registers that hold it.
   939  			s.freeRegs(ai.regs)
   940  		}
   941  		r.next = s.freeUseRecords
   942  		s.freeUseRecords = r
   943  	}
   944  	s.dropIfUnused(v)
   945  }
   946  
   947  // Drop v from registers if it isn't used again, or its only uses are after
   948  // a call instruction.
   949  func (s *regAllocState) dropIfUnused(v *Value) {
   950  	if !s.values[v.ID].needReg {
   951  		return
   952  	}
   953  	vi := &s.values[v.ID]
   954  	r := vi.uses
   955  	nextCall := s.nextCall[s.curIdx]
   956  	if opcodeTable[v.Op].call {
   957  		if s.curIdx == len(s.nextCall)-1 {
   958  			nextCall = math.MaxInt32
   959  		} else {
   960  			nextCall = s.nextCall[s.curIdx+1]
   961  		}
   962  	}
   963  	if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > nextCall) {
   964  		s.freeRegs(vi.regs)
   965  	}
   966  }
   967  
   968  // liveAfterCurrentInstruction reports whether v is live after
   969  // the current instruction is completed.  v must be used by the
   970  // current instruction.
   971  func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
   972  	u := s.values[v.ID].uses
   973  	if u == nil {
   974  		panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
   975  	}
   976  	d := u.dist
   977  	for u != nil && u.dist == d {
   978  		u = u.next
   979  	}
   980  	return u != nil && u.dist > d
   981  }
   982  
   983  // Sets the state of the registers to that encoded in regs.
   984  func (s *regAllocState) setState(regs []endReg) {
   985  	s.freeRegs(s.used)
   986  	for _, x := range regs {
   987  		s.assignReg(x.r, x.v, x.c)
   988  	}
   989  }
   990  
   991  // compatRegs returns the set of registers which can store a type t.
   992  func (s *regAllocState) compatRegs(t *types.Type) regMask {
   993  	var m regMask
   994  	if t.IsTuple() || t.IsFlags() {
   995  		return regMask{}
   996  	}
   997  	if t.IsSIMD() {
   998  		if t.Size() > 8 {
   999  			return s.f.Config.fpRegMask.intersect(s.allocatable)
  1000  		} else {
  1001  			// K mask
  1002  			return s.f.Config.gpRegMask.intersect(s.allocatable)
  1003  		}
  1004  	}
  1005  	if t.IsFloat() || t == types.TypeInt128 {
  1006  		if t.Kind() == types.TFLOAT32 && !s.f.Config.fp32RegMask.empty() {
  1007  			m = s.f.Config.fp32RegMask
  1008  		} else if t.Kind() == types.TFLOAT64 && !s.f.Config.fp64RegMask.empty() {
  1009  			m = s.f.Config.fp64RegMask
  1010  		} else {
  1011  			m = s.f.Config.fpRegMask
  1012  		}
  1013  	} else {
  1014  		m = s.f.Config.gpRegMask
  1015  	}
  1016  	return m.intersect(s.allocatable)
  1017  }
  1018  
  1019  // regspec returns the regInfo for operation op.
  1020  func (s *regAllocState) regspec(v *Value) regInfo {
  1021  	op := v.Op
  1022  	if op == OpConvert {
  1023  		// OpConvert is a generic op, so it doesn't have a
  1024  		// register set in the static table. It can use any
  1025  		// allocatable integer register.
  1026  		m := s.allocatable.intersect(s.f.Config.gpRegMask)
  1027  		return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
  1028  	}
  1029  	if op == OpArgIntReg {
  1030  		reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
  1031  		return regInfo{outputs: []outputInfo{{regs: regMaskAt(register(reg))}}}
  1032  	}
  1033  	if op == OpArgFloatReg {
  1034  		reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
  1035  		return regInfo{outputs: []outputInfo{{regs: regMaskAt(register(reg))}}}
  1036  	}
  1037  	if op.IsCall() {
  1038  		if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
  1039  			return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
  1040  		}
  1041  	}
  1042  	if op == OpMakeResult && s.f.OwnAux.reg != nil {
  1043  		return *s.f.OwnAux.ResultReg(s.f.Config)
  1044  	}
  1045  	return opcodeTable[op].reg
  1046  }
  1047  
  1048  func (s *regAllocState) isGReg(r register) bool {
  1049  	return s.f.Config.hasGReg && s.GReg == r
  1050  }
  1051  
  1052  // Dummy value used to represent the value being held in a temporary register.
  1053  var tmpVal Value
  1054  
  1055  func (s *regAllocState) regalloc(f *Func) {
  1056  	regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
  1057  	defer f.retSparseSet(regValLiveSet)
  1058  	var oldSched []*Value
  1059  	var phis []*Value
  1060  	var phiRegs []register
  1061  	var args []*Value
  1062  
  1063  	// Data structure used for computing desired registers.
  1064  	var desired desiredState
  1065  	desiredSecondReg := map[ID][4]register{} // desired register allocation for 2nd part of a tuple
  1066  
  1067  	// Desired registers for inputs & outputs for each instruction in the block.
  1068  	type dentry struct {
  1069  		out [4]register    // desired output registers
  1070  		in  [3][4]register // desired input registers (for inputs 0,1, and 2)
  1071  	}
  1072  	var dinfo []dentry
  1073  
  1074  	if f.Entry != f.Blocks[0] {
  1075  		f.Fatalf("entry block must be first")
  1076  	}
  1077  
  1078  	for _, b := range s.visitOrder {
  1079  		if s.f.pass.debug > regDebug {
  1080  			fmt.Printf("Begin processing block %v\n", b)
  1081  		}
  1082  		s.curBlock = b
  1083  		s.startRegsMask = regMask{}
  1084  		s.usedSinceBlockStart = regMask{}
  1085  		clear(desiredSecondReg)
  1086  
  1087  		// Initialize regValLiveSet and uses fields for this block.
  1088  		// Walk backwards through the block doing liveness analysis.
  1089  		regValLiveSet.clear()
  1090  		if s.live != nil {
  1091  			for _, e := range s.live[b.ID] {
  1092  				s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
  1093  				regValLiveSet.add(e.ID)
  1094  			}
  1095  		}
  1096  		for _, v := range b.ControlValues() {
  1097  			if s.values[v.ID].needReg {
  1098  				s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
  1099  				regValLiveSet.add(v.ID)
  1100  			}
  1101  		}
  1102  		if cap(s.nextCall) < len(b.Values) {
  1103  			c := cap(s.nextCall)
  1104  			s.nextCall = append(s.nextCall[:c], make([]int32, len(b.Values)-c)...)
  1105  		} else {
  1106  			s.nextCall = s.nextCall[:len(b.Values)]
  1107  		}
  1108  		var nextCall int32 = math.MaxInt32
  1109  		for i := len(b.Values) - 1; i >= 0; i-- {
  1110  			v := b.Values[i]
  1111  			regValLiveSet.remove(v.ID)
  1112  			if v.Op == OpPhi {
  1113  				// Remove v from the live set, but don't add
  1114  				// any inputs. This is the state the len(b.Preds)>1
  1115  				// case below desires; it wants to process phis specially.
  1116  				s.nextCall[i] = nextCall
  1117  				continue
  1118  			}
  1119  			if opcodeTable[v.Op].call {
  1120  				// Function call clobbers all the registers but SP and SB.
  1121  				regValLiveSet.clear()
  1122  				if s.sp != 0 && s.values[s.sp].uses != nil {
  1123  					regValLiveSet.add(s.sp)
  1124  				}
  1125  				if s.sb != 0 && s.values[s.sb].uses != nil {
  1126  					regValLiveSet.add(s.sb)
  1127  				}
  1128  				nextCall = int32(i)
  1129  			}
  1130  			for _, a := range v.Args {
  1131  				if !s.values[a.ID].needReg {
  1132  					continue
  1133  				}
  1134  				s.addUse(a.ID, int32(i), v.Pos)
  1135  				regValLiveSet.add(a.ID)
  1136  			}
  1137  			s.nextCall[i] = nextCall
  1138  		}
  1139  		if s.f.pass.debug > regDebug {
  1140  			fmt.Printf("use distances for %s\n", b)
  1141  			for i := range s.values {
  1142  				vi := &s.values[i]
  1143  				u := vi.uses
  1144  				if u == nil {
  1145  					continue
  1146  				}
  1147  				fmt.Printf("  v%d:", i)
  1148  				for u != nil {
  1149  					fmt.Printf(" %d", u.dist)
  1150  					u = u.next
  1151  				}
  1152  				fmt.Println()
  1153  			}
  1154  		}
  1155  
  1156  		// Make a copy of the block schedule so we can generate a new one in place.
  1157  		// We make a separate copy for phis and regular values.
  1158  		nphi := 0
  1159  		for _, v := range b.Values {
  1160  			if v.Op != OpPhi {
  1161  				break
  1162  			}
  1163  			nphi++
  1164  		}
  1165  		phis = append(phis[:0], b.Values[:nphi]...)
  1166  		oldSched = append(oldSched[:0], b.Values[nphi:]...)
  1167  		b.Values = b.Values[:0]
  1168  
  1169  		// Initialize start state of block.
  1170  		if b == f.Entry {
  1171  			// Regalloc state is empty to start.
  1172  			if nphi > 0 {
  1173  				f.Fatalf("phis in entry block")
  1174  			}
  1175  		} else if len(b.Preds) == 1 {
  1176  			// Start regalloc state with the end state of the previous block.
  1177  			s.setState(s.endRegs[b.Preds[0].b.ID])
  1178  			if nphi > 0 {
  1179  				f.Fatalf("phis in single-predecessor block")
  1180  			}
  1181  			// Drop any values which are no longer live.
  1182  			// This may happen because at the end of p, a value may be
  1183  			// live but only used by some other successor of p.
  1184  			for r := register(0); r < s.numRegs; r++ {
  1185  				v := s.regs[r].v
  1186  				if v != nil && !regValLiveSet.contains(v.ID) {
  1187  					s.freeReg(r)
  1188  				}
  1189  			}
  1190  		} else {
  1191  			// This is the complicated case. We have more than one predecessor,
  1192  			// which means we may have Phi ops.
  1193  
  1194  			// Start with the final register state of the predecessor with least spill values.
  1195  			// This is based on the following points:
  1196  			// 1, The less spill value indicates that the register pressure of this path is smaller,
  1197  			//    so the values of this block are more likely to be allocated to registers.
  1198  			// 2, Avoid the predecessor that contains the function call, because the predecessor that
  1199  			//    contains the function call usually generates a lot of spills and lose the previous
  1200  			//    allocation state.
  1201  			// TODO: Improve this part. At least the size of endRegs of the predecessor also has
  1202  			// an impact on the code size and compiler speed. But it is not easy to find a simple
  1203  			// and efficient method that combines multiple factors.
  1204  			idx := -1
  1205  			for i, p := range b.Preds {
  1206  				// If the predecessor has not been visited yet, skip it because its end state
  1207  				// (redRegs and spillLive) has not been computed yet.
  1208  				pb := p.b
  1209  				if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
  1210  					continue
  1211  				}
  1212  				if idx == -1 {
  1213  					idx = i
  1214  					continue
  1215  				}
  1216  				pSel := b.Preds[idx].b
  1217  				if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
  1218  					idx = i
  1219  				} else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
  1220  					// Use a bit of likely information. After critical pass, pb and pSel must
  1221  					// be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
  1222  					// TODO: improve the prediction of the likely predecessor. The following
  1223  					// method is only suitable for the simplest cases. For complex cases,
  1224  					// the prediction may be inaccurate, but this does not affect the
  1225  					// correctness of the program.
  1226  					// According to the layout algorithm, the predecessor with the
  1227  					// smaller blockOrder is the true branch, and the test results show
  1228  					// that it is better to choose the predecessor with a smaller
  1229  					// blockOrder than no choice.
  1230  					if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
  1231  						idx = i
  1232  					}
  1233  				}
  1234  			}
  1235  			if idx < 0 {
  1236  				f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
  1237  			}
  1238  			p := b.Preds[idx].b
  1239  			s.setState(s.endRegs[p.ID])
  1240  
  1241  			if s.f.pass.debug > regDebug {
  1242  				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
  1243  				for _, x := range s.endRegs[p.ID] {
  1244  					fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
  1245  				}
  1246  			}
  1247  
  1248  			// Decide on registers for phi ops. Use the registers determined
  1249  			// by the primary predecessor if we can.
  1250  			// TODO: pick best of (already processed) predecessors?
  1251  			// Majority vote? Deepest nesting level?
  1252  			phiRegs = phiRegs[:0]
  1253  			var phiUsed regMask
  1254  
  1255  			for _, v := range phis {
  1256  				if !s.values[v.ID].needReg {
  1257  					phiRegs = append(phiRegs, noRegister)
  1258  					continue
  1259  				}
  1260  				a := v.Args[idx]
  1261  				// Some instructions target not-allocatable registers.
  1262  				// They're not suitable for further (phi-function) allocation.
  1263  				m := s.values[a.ID].regs.minus(phiUsed).intersect(s.allocatable)
  1264  				if !m.empty() {
  1265  					r := s.pickReg(m)
  1266  					phiUsed = phiUsed.addReg(r)
  1267  					phiRegs = append(phiRegs, r)
  1268  				} else {
  1269  					phiRegs = append(phiRegs, noRegister)
  1270  				}
  1271  			}
  1272  
  1273  			// Second pass - deallocate all in-register phi inputs.
  1274  			for i, v := range phis {
  1275  				if !s.values[v.ID].needReg {
  1276  					continue
  1277  				}
  1278  				a := v.Args[idx]
  1279  				r := phiRegs[i]
  1280  				if r == noRegister {
  1281  					continue
  1282  				}
  1283  				if regValLiveSet.contains(a.ID) {
  1284  					// Input value is still live (it is used by something other than Phi).
  1285  					// Try to move it around before kicking out, if there is a free register.
  1286  					// We generate a Copy in the predecessor block and record it. It will be
  1287  					// deleted later if never used.
  1288  					//
  1289  					// Pick a free register. At this point some registers used in the predecessor
  1290  					// block may have been deallocated. Those are the ones used for Phis. Exclude
  1291  					// them (and they are not going to be helpful anyway).
  1292  					m := s.compatRegs(a.Type).minus(s.used).minus(phiUsed)
  1293  					if !m.empty() && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
  1294  						r2 := s.pickReg(m)
  1295  						c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
  1296  						s.copies[c] = false
  1297  						if s.f.pass.debug > regDebug {
  1298  							fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
  1299  						}
  1300  						s.setOrig(c, a)
  1301  						s.assignReg(r2, a, c)
  1302  						s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
  1303  					}
  1304  				}
  1305  				s.freeReg(r)
  1306  			}
  1307  
  1308  			// Copy phi ops into new schedule.
  1309  			b.Values = append(b.Values, phis...)
  1310  
  1311  			// Third pass - pick registers for phis whose input
  1312  			// was not in a register in the primary predecessor.
  1313  			for i, v := range phis {
  1314  				if !s.values[v.ID].needReg {
  1315  					continue
  1316  				}
  1317  				if phiRegs[i] != noRegister {
  1318  					continue
  1319  				}
  1320  				m := s.compatRegs(v.Type).minus(phiUsed).minus(s.used)
  1321  				// If one of the other inputs of v is in a register, and the register is available,
  1322  				// select this register, which can save some unnecessary copies.
  1323  				for i, pe := range b.Preds {
  1324  					if i == idx {
  1325  						continue
  1326  					}
  1327  					ri := noRegister
  1328  					for _, er := range s.endRegs[pe.b.ID] {
  1329  						if er.v == s.orig[v.Args[i].ID] {
  1330  							ri = er.r
  1331  							break
  1332  						}
  1333  					}
  1334  					if ri != noRegister && m.hasReg(ri) {
  1335  						m = regMaskAt(ri)
  1336  						break
  1337  					}
  1338  				}
  1339  				if !m.empty() {
  1340  					r := s.pickReg(m)
  1341  					phiRegs[i] = r
  1342  					phiUsed = phiUsed.addReg(r)
  1343  				}
  1344  			}
  1345  
  1346  			// Set registers for phis. Add phi spill code.
  1347  			for i, v := range phis {
  1348  				if !s.values[v.ID].needReg {
  1349  					continue
  1350  				}
  1351  				r := phiRegs[i]
  1352  				if r == noRegister {
  1353  					// stack-based phi
  1354  					// Spills will be inserted in all the predecessors below.
  1355  					s.values[v.ID].spill = v // v starts life spilled
  1356  					continue
  1357  				}
  1358  				// register-based phi
  1359  				s.assignReg(r, v, v)
  1360  			}
  1361  
  1362  			// Deallocate any values which are no longer live. Phis are excluded.
  1363  			for r := register(0); r < s.numRegs; r++ {
  1364  				if phiUsed.hasReg(r) {
  1365  					continue
  1366  				}
  1367  				v := s.regs[r].v
  1368  				if v != nil && !regValLiveSet.contains(v.ID) {
  1369  					s.freeReg(r)
  1370  				}
  1371  			}
  1372  
  1373  			// Save the starting state for use by merge edges.
  1374  			// We append to a stack allocated variable that we'll
  1375  			// later copy into s.startRegs in one fell swoop, to save
  1376  			// on allocations.
  1377  			regList := make([]startReg, 0, 32)
  1378  			for r := register(0); r < s.numRegs; r++ {
  1379  				v := s.regs[r].v
  1380  				if v == nil {
  1381  					continue
  1382  				}
  1383  				if phiUsed.hasReg(r) {
  1384  					// Skip registers that phis used, we'll handle those
  1385  					// specially during merge edge processing.
  1386  					continue
  1387  				}
  1388  				regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
  1389  				s.startRegsMask = s.startRegsMask.addReg(r)
  1390  			}
  1391  			s.startRegs[b.ID] = make([]startReg, len(regList))
  1392  			copy(s.startRegs[b.ID], regList)
  1393  
  1394  			if s.f.pass.debug > regDebug {
  1395  				fmt.Printf("after phis\n")
  1396  				for _, x := range s.startRegs[b.ID] {
  1397  					fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
  1398  				}
  1399  			}
  1400  		}
  1401  
  1402  		// Drop phis from registers if they immediately go dead.
  1403  		for i, v := range phis {
  1404  			s.curIdx = i
  1405  			s.dropIfUnused(v)
  1406  		}
  1407  
  1408  		// Allocate space to record the desired registers for each value.
  1409  		if l := len(oldSched); cap(dinfo) < l {
  1410  			dinfo = make([]dentry, l)
  1411  		} else {
  1412  			dinfo = dinfo[:l]
  1413  			clear(dinfo)
  1414  		}
  1415  
  1416  		// Load static desired register info at the end of the block.
  1417  		if s.desired != nil {
  1418  			desired.copy(&s.desired[b.ID])
  1419  		}
  1420  
  1421  		// Check actual assigned registers at the start of the next block(s).
  1422  		// Dynamically assigned registers will trump the static
  1423  		// desired registers computed during liveness analysis.
  1424  		// Note that we do this phase after startRegs is set above, so that
  1425  		// we get the right behavior for a block which branches to itself.
  1426  		for _, e := range b.Succs {
  1427  			succ := e.b
  1428  			// TODO: prioritize likely successor?
  1429  			for _, x := range s.startRegs[succ.ID] {
  1430  				desired.add(x.v.ID, x.r)
  1431  			}
  1432  			// Process phi ops in succ.
  1433  			pidx := e.i
  1434  			for _, v := range succ.Values {
  1435  				if v.Op != OpPhi {
  1436  					break
  1437  				}
  1438  				if !s.values[v.ID].needReg {
  1439  					continue
  1440  				}
  1441  				rp, ok := s.f.getHome(v.ID).(*Register)
  1442  				if !ok {
  1443  					// If v is not assigned a register, pick a register assigned to one of v's inputs.
  1444  					// Hopefully v will get assigned that register later.
  1445  					// If the inputs have allocated register information, add it to desired,
  1446  					// which may reduce spill or copy operations when the register is available.
  1447  					for _, a := range v.Args {
  1448  						rp, ok = s.f.getHome(a.ID).(*Register)
  1449  						if ok {
  1450  							break
  1451  						}
  1452  					}
  1453  					if !ok {
  1454  						continue
  1455  					}
  1456  				}
  1457  				desired.add(v.Args[pidx].ID, register(rp.num))
  1458  			}
  1459  		}
  1460  		// Walk values backwards computing desired register info.
  1461  		// See computeDesired for more comments.
  1462  		for i := len(oldSched) - 1; i >= 0; i-- {
  1463  			v := oldSched[i]
  1464  			prefs := desired.remove(v.ID)
  1465  			regspec := s.regspec(v)
  1466  			desired.clobber(regspec.clobbers)
  1467  			for _, j := range regspec.inputs {
  1468  				if countRegs(j.regs) != 1 {
  1469  					continue
  1470  				}
  1471  				desired.clobber(j.regs)
  1472  				desired.add(v.Args[j.idx].ID, s.pickReg(j.regs))
  1473  			}
  1474  			if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  1475  				if opcodeTable[v.Op].commutative {
  1476  					desired.addList(v.Args[1].ID, prefs)
  1477  				}
  1478  				desired.addList(v.Args[0].ID, prefs)
  1479  			}
  1480  			// Save desired registers for this value.
  1481  			dinfo[i].out = prefs
  1482  			for j, a := range v.Args {
  1483  				if j >= len(dinfo[i].in) {
  1484  					break
  1485  				}
  1486  				dinfo[i].in[j] = desired.get(a.ID)
  1487  			}
  1488  			if v.Op == OpSelect1 && prefs[0] != noRegister {
  1489  				// Save desired registers of select1 for
  1490  				// use by the tuple generating instruction.
  1491  				desiredSecondReg[v.Args[0].ID] = prefs
  1492  			}
  1493  		}
  1494  
  1495  		// Process all the non-phi values.
  1496  		for idx, v := range oldSched {
  1497  			s.curIdx = nphi + idx
  1498  			tmpReg := noRegister
  1499  			if s.f.pass.debug > regDebug {
  1500  				fmt.Printf("  processing %s\n", v.LongString())
  1501  			}
  1502  			regspec := s.regspec(v)
  1503  			if v.Op == OpPhi {
  1504  				f.Fatalf("phi %s not at start of block", v)
  1505  			}
  1506  			if opcodeTable[v.Op].fixedReg {
  1507  				switch v.Op {
  1508  				case OpSP:
  1509  					s.assignReg(s.SPReg, v, v)
  1510  					s.sp = v.ID
  1511  				case OpSB:
  1512  					s.assignReg(s.SBReg, v, v)
  1513  					s.sb = v.ID
  1514  				case OpARM64ZERO, OpLOONG64ZERO, OpMIPS64ZERO:
  1515  					s.assignReg(s.ZeroIntReg, v, v)
  1516  				case OpAMD64Zero128, OpAMD64Zero256, OpAMD64Zero512:
  1517  					regspec := s.regspec(v)
  1518  					m := regspec.outputs[0].regs
  1519  					if countRegs(m) != 1 {
  1520  						f.Fatalf("bad fixed-register op %s", v)
  1521  					}
  1522  					s.assignReg(s.pickReg(m), v, v)
  1523  				default:
  1524  					f.Fatalf("unknown fixed-register op %s", v)
  1525  				}
  1526  				b.Values = append(b.Values, v)
  1527  				s.advanceUses(v)
  1528  				continue
  1529  			}
  1530  			if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
  1531  				if s.values[v.ID].needReg {
  1532  					if v.Op == OpSelectN {
  1533  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
  1534  					} else {
  1535  						var i = 0
  1536  						if v.Op == OpSelect1 {
  1537  							i = 1
  1538  						}
  1539  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
  1540  					}
  1541  				}
  1542  				b.Values = append(b.Values, v)
  1543  				s.advanceUses(v)
  1544  				continue
  1545  			}
  1546  			if v.Op == OpGetG && s.f.Config.hasGReg {
  1547  				// use hardware g register
  1548  				if s.regs[s.GReg].v != nil {
  1549  					s.freeReg(s.GReg) // kick out the old value
  1550  				}
  1551  				s.assignReg(s.GReg, v, v)
  1552  				b.Values = append(b.Values, v)
  1553  				s.advanceUses(v)
  1554  				continue
  1555  			}
  1556  			if v.Op == OpArg {
  1557  				// Args are "pre-spilled" values. We don't allocate
  1558  				// any register here. We just set up the spill pointer to
  1559  				// point at itself and any later user will restore it to use it.
  1560  				s.values[v.ID].spill = v
  1561  				b.Values = append(b.Values, v)
  1562  				s.advanceUses(v)
  1563  				continue
  1564  			}
  1565  			if v.Op == OpKeepAlive {
  1566  				// Make sure the argument to v is still live here.
  1567  				s.advanceUses(v)
  1568  				a := v.Args[0]
  1569  				vi := &s.values[a.ID]
  1570  				if vi.regs.empty() && !vi.rematerializeable {
  1571  					// Use the spill location.
  1572  					// This forces later liveness analysis to make the
  1573  					// value live at this point.
  1574  					v.SetArg(0, s.makeSpill(a, b))
  1575  				} else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
  1576  					// Rematerializeable value with a *ir.Name. This is the address of
  1577  					// a stack object (e.g. an LEAQ). Keep the object live.
  1578  					// Change it to VarLive, which is what plive expects for locals.
  1579  					v.Op = OpVarLive
  1580  					v.SetArgs1(v.Args[1])
  1581  					v.Aux = a.Aux
  1582  				} else {
  1583  					// In-register and rematerializeable values are already live.
  1584  					// These are typically rematerializeable constants like nil,
  1585  					// or values of a variable that were modified since the last call.
  1586  					v.Op = OpCopy
  1587  					v.SetArgs1(v.Args[1])
  1588  				}
  1589  				b.Values = append(b.Values, v)
  1590  				continue
  1591  			}
  1592  			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
  1593  				// No register allocation required (or none specified yet)
  1594  				if s.doClobber && v.Op.IsCall() {
  1595  					s.clobberRegs(regspec.clobbers)
  1596  				}
  1597  				s.freeRegs(regspec.clobbers)
  1598  				b.Values = append(b.Values, v)
  1599  				s.advanceUses(v)
  1600  				continue
  1601  			}
  1602  
  1603  			if s.values[v.ID].rematerializeable {
  1604  				// Value is rematerializeable, don't issue it here.
  1605  				// It will get issued just before each use (see
  1606  				// allocValueToReg).
  1607  				for _, a := range v.Args {
  1608  					a.Uses--
  1609  				}
  1610  				s.advanceUses(v)
  1611  				continue
  1612  			}
  1613  
  1614  			if s.f.pass.debug > regDebug {
  1615  				fmt.Printf("value %s\n", v.LongString())
  1616  				fmt.Printf("  out:")
  1617  				for _, r := range dinfo[idx].out {
  1618  					if r != noRegister {
  1619  						fmt.Printf(" %s", &s.registers[r])
  1620  					}
  1621  				}
  1622  				fmt.Println()
  1623  				for i := 0; i < len(v.Args) && i < 3; i++ {
  1624  					fmt.Printf("  in%d:", i)
  1625  					for _, r := range dinfo[idx].in[i] {
  1626  						if r != noRegister {
  1627  							fmt.Printf(" %s", &s.registers[r])
  1628  						}
  1629  					}
  1630  					fmt.Println()
  1631  				}
  1632  			}
  1633  
  1634  			// Move arguments to registers.
  1635  			// First, if an arg must be in a specific register and it is already
  1636  			// in place, keep it.
  1637  			args = append(args[:0], make([]*Value, len(v.Args))...)
  1638  			for i, a := range v.Args {
  1639  				if !s.values[a.ID].needReg {
  1640  					args[i] = a
  1641  				}
  1642  			}
  1643  			for _, i := range regspec.inputs {
  1644  				mask := i.regs
  1645  				if countRegs(mask) == 1 && !mask.intersect(s.values[v.Args[i.idx].ID].regs).empty() {
  1646  					args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1647  				}
  1648  			}
  1649  			// Then, if an arg must be in a specific register and that
  1650  			// register is free, allocate that one. Otherwise when processing
  1651  			// another input we may kick a value into the free register, which
  1652  			// then will be kicked out again.
  1653  			// This is a common case for passing-in-register arguments for
  1654  			// function calls.
  1655  			for {
  1656  				freed := false
  1657  				for _, i := range regspec.inputs {
  1658  					if args[i.idx] != nil {
  1659  						continue // already allocated
  1660  					}
  1661  					mask := i.regs
  1662  					if countRegs(mask) == 1 && !mask.minus(s.used).empty() {
  1663  						args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1664  						// If the input is in other registers that will be clobbered by v,
  1665  						// or the input is dead, free the registers. This may make room
  1666  						// for other inputs.
  1667  						oldregs := s.values[v.Args[i.idx].ID].regs
  1668  						if oldregs.minus(regspec.clobbers).empty() || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
  1669  							s.freeRegs(oldregs.minus(mask).minus(s.nospill))
  1670  							freed = true
  1671  						}
  1672  					}
  1673  				}
  1674  				if !freed {
  1675  					break
  1676  				}
  1677  			}
  1678  			// Last, allocate remaining ones, in an ordering defined
  1679  			// by the register specification (most constrained first).
  1680  			for _, i := range regspec.inputs {
  1681  				if args[i.idx] != nil {
  1682  					continue // already allocated
  1683  				}
  1684  				mask := i.regs
  1685  				if mask.intersect(s.values[v.Args[i.idx].ID].regs).empty() {
  1686  					// Need a new register for the input.
  1687  					mask = mask.intersect(s.allocatable)
  1688  					mask = mask.minus(s.nospill)
  1689  					// Used desired register if available.
  1690  					if i.idx < 3 {
  1691  						for _, r := range dinfo[idx].in[i.idx] {
  1692  							if r != noRegister && mask.minus(s.used).hasReg(r) {
  1693  								// Desired register is allowed and unused.
  1694  								mask = regMaskAt(r)
  1695  								break
  1696  							}
  1697  						}
  1698  					}
  1699  					// Avoid registers we're saving for other values.
  1700  					if !mask.minus(desired.avoid).empty() {
  1701  						mask = mask.minus(desired.avoid)
  1702  					}
  1703  				}
  1704  				if mask.intersect(s.values[v.Args[i.idx].ID].regs).hasReg(s.SPReg) {
  1705  					// Prefer SP register. This ensures that local variables
  1706  					// use SP as their base register (instead of a copy of the
  1707  					// stack pointer living in another register). See issue 74836.
  1708  					mask = regMaskAt(s.SPReg)
  1709  				}
  1710  				args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1711  			}
  1712  
  1713  			// If the output clobbers the input register, make sure we have
  1714  			// at least two copies of the input register so we don't
  1715  			// have to reload the value from the spill location.
  1716  			if opcodeTable[v.Op].resultInArg0 {
  1717  				var m regMask
  1718  				if !s.liveAfterCurrentInstruction(v.Args[0]) {
  1719  					// arg0 is dead.  We can clobber its register.
  1720  					goto ok
  1721  				}
  1722  				if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
  1723  					args[0], args[1] = args[1], args[0]
  1724  					goto ok
  1725  				}
  1726  				if s.values[v.Args[0].ID].rematerializeable {
  1727  					// We can rematerialize the input, don't worry about clobbering it.
  1728  					goto ok
  1729  				}
  1730  				if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
  1731  					args[0], args[1] = args[1], args[0]
  1732  					goto ok
  1733  				}
  1734  				if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
  1735  					// we have at least 2 copies of arg0.  We can afford to clobber one.
  1736  					goto ok
  1737  				}
  1738  				if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
  1739  					args[0], args[1] = args[1], args[0]
  1740  					goto ok
  1741  				}
  1742  
  1743  				// We can't overwrite arg0 (or arg1, if commutative).  So we
  1744  				// need to make a copy of an input so we have a register we can modify.
  1745  
  1746  				// Possible new registers to copy into.
  1747  				m = s.compatRegs(v.Args[0].Type).minus(s.used)
  1748  				if m.empty() {
  1749  					// No free registers.  In this case we'll just clobber
  1750  					// an input and future uses of that input must use a restore.
  1751  					// TODO(khr): We should really do this like allocReg does it,
  1752  					// spilling the value with the most distant next use.
  1753  					goto ok
  1754  				}
  1755  
  1756  				// Try to move an input to the desired output, if allowed.
  1757  				for _, r := range dinfo[idx].out {
  1758  					if r != noRegister && m.intersect(regspec.outputs[0].regs).hasReg(r) {
  1759  						m = regMaskAt(r)
  1760  						args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
  1761  						// Note: we update args[0] so the instruction will
  1762  						// use the register copy we just made.
  1763  						goto ok
  1764  					}
  1765  				}
  1766  				// Try to copy input to its desired location & use its old
  1767  				// location as the result register.
  1768  				for _, r := range dinfo[idx].in[0] {
  1769  					if r != noRegister && m.hasReg(r) {
  1770  						m = regMaskAt(r)
  1771  						c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1772  						s.copies[c] = false
  1773  						// Note: no update to args[0] so the instruction will
  1774  						// use the original copy.
  1775  						goto ok
  1776  					}
  1777  				}
  1778  				if opcodeTable[v.Op].commutative {
  1779  					for _, r := range dinfo[idx].in[1] {
  1780  						if r != noRegister && m.hasReg(r) {
  1781  							m = regMaskAt(r)
  1782  							c := s.allocValToReg(v.Args[1], m, true, v.Pos)
  1783  							s.copies[c] = false
  1784  							args[0], args[1] = args[1], args[0]
  1785  							goto ok
  1786  						}
  1787  					}
  1788  				}
  1789  
  1790  				// Avoid future fixed uses if we can.
  1791  				if !m.minus(desired.avoid).empty() {
  1792  					m = m.minus(desired.avoid)
  1793  				}
  1794  				// Save input 0 to a new register so we can clobber it.
  1795  				c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1796  				s.copies[c] = false
  1797  
  1798  				// Normally we use the register of the old copy of input 0 as the target.
  1799  				// However, if input 0 is already in its desired register then we use
  1800  				// the register of the new copy instead.
  1801  				if regspec.outputs[0].regs.hasReg(register(s.f.getHome(c.ID).(*Register).num)) {
  1802  					if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
  1803  						r := register(rp.num)
  1804  						for _, r2 := range dinfo[idx].in[0] {
  1805  							if r == r2 {
  1806  								args[0] = c
  1807  								break
  1808  							}
  1809  						}
  1810  					}
  1811  				}
  1812  			}
  1813  		ok:
  1814  			for i := 0; i < 2; i++ {
  1815  				if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
  1816  					continue
  1817  				}
  1818  				if !s.liveAfterCurrentInstruction(v.Args[i]) {
  1819  					// arg is dead.  We can clobber its register.
  1820  					continue
  1821  				}
  1822  				if s.values[v.Args[i].ID].rematerializeable {
  1823  					// We can rematerialize the input, don't worry about clobbering it.
  1824  					continue
  1825  				}
  1826  				if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
  1827  					// We have at least 2 copies of arg.  We can afford to clobber one.
  1828  					continue
  1829  				}
  1830  				// Possible new registers to copy into.
  1831  				m := s.compatRegs(v.Args[i].Type).minus(s.used)
  1832  				if m.empty() {
  1833  					// No free registers.  In this case we'll just clobber the
  1834  					// input and future uses of that input must use a restore.
  1835  					// TODO(khr): We should really do this like allocReg does it,
  1836  					// spilling the value with the most distant next use.
  1837  					continue
  1838  				}
  1839  				// Copy input to a different register that won't be clobbered.
  1840  				c := s.allocValToReg(v.Args[i], m, true, v.Pos)
  1841  				s.copies[c] = false
  1842  			}
  1843  
  1844  			// Pick a temporary register if needed.
  1845  			// It should be distinct from all the input registers, so we
  1846  			// allocate it after all the input registers, but before
  1847  			// the input registers are freed via advanceUses below.
  1848  			// (Not all instructions need that distinct part, but it is conservative.)
  1849  			// We also ensure it is not any of the single-choice output registers.
  1850  			if opcodeTable[v.Op].needIntTemp {
  1851  				m := s.allocatable.intersect(s.f.Config.gpRegMask)
  1852  				for _, out := range regspec.outputs {
  1853  					if countRegs(out.regs) == 1 {
  1854  						m = m.minus(out.regs)
  1855  					}
  1856  				}
  1857  				if !m.minus(desired.avoid).minus(s.nospill).empty() {
  1858  					m = m.minus(desired.avoid)
  1859  				}
  1860  				tmpReg = s.allocReg(m, &tmpVal)
  1861  				s.nospill = s.nospill.addReg(tmpReg)
  1862  				s.tmpused = s.tmpused.addReg(tmpReg)
  1863  			}
  1864  
  1865  			if regspec.clobbersArg0 {
  1866  				s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
  1867  			}
  1868  			if regspec.clobbersArg1 && !(regspec.clobbersArg0 && s.f.getHome(args[0].ID) == s.f.getHome(args[1].ID)) {
  1869  				s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
  1870  			}
  1871  
  1872  			// Now that all args are in regs, we're ready to issue the value itself.
  1873  			// Before we pick a register for the output value, allow input registers
  1874  			// to be deallocated. We do this here so that the output can use the
  1875  			// same register as a dying input.
  1876  			if !opcodeTable[v.Op].resultNotInArgs {
  1877  				s.tmpused = s.nospill
  1878  				s.nospill = regMask{}
  1879  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1880  			}
  1881  
  1882  			// Dump any registers which will be clobbered
  1883  			if s.doClobber && v.Op.IsCall() {
  1884  				// clobber registers that are marked as clobber in regmask, but
  1885  				// don't clobber inputs.
  1886  				s.clobberRegs(regspec.clobbers.minus(s.tmpused).minus(s.nospill))
  1887  			}
  1888  			s.freeRegs(regspec.clobbers)
  1889  			s.tmpused = s.tmpused.union(regspec.clobbers)
  1890  
  1891  			// Pick registers for outputs.
  1892  			{
  1893  				outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
  1894  				maxOutIdx := -1
  1895  				var used regMask
  1896  				if tmpReg != noRegister {
  1897  					// Ensure output registers are distinct from the temporary register.
  1898  					// (Not all instructions need that distinct part, but it is conservative.)
  1899  					used = used.addReg(tmpReg)
  1900  				}
  1901  				for _, out := range regspec.outputs {
  1902  					if out.regs.empty() {
  1903  						continue
  1904  					}
  1905  					mask := out.regs.intersect(s.allocatable).minus(used)
  1906  					if mask.empty() {
  1907  						s.f.Fatalf("can't find any output register %s", v.LongString())
  1908  					}
  1909  					if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
  1910  						if !opcodeTable[v.Op].commutative {
  1911  							// Output must use the same register as input 0.
  1912  							r := register(s.f.getHome(args[0].ID).(*Register).num)
  1913  							if !mask.hasReg(r) {
  1914  								s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
  1915  							}
  1916  							mask = regMaskAt(r)
  1917  						} else {
  1918  							// Output must use the same register as input 0 or 1.
  1919  							r0 := register(s.f.getHome(args[0].ID).(*Register).num)
  1920  							r1 := register(s.f.getHome(args[1].ID).(*Register).num)
  1921  							// Check r0 and r1 for desired output register.
  1922  							found := false
  1923  							for _, r := range dinfo[idx].out {
  1924  								if (r == r0 || r == r1) && mask.minus(s.used).hasReg(r) {
  1925  									mask = regMaskAt(r)
  1926  									found = true
  1927  									if r == r1 {
  1928  										args[0], args[1] = args[1], args[0]
  1929  									}
  1930  									break
  1931  								}
  1932  							}
  1933  							if !found {
  1934  								// Neither are desired, pick r0.
  1935  								mask = regMaskAt(r0)
  1936  							}
  1937  						}
  1938  					}
  1939  					if out.idx == 0 { // desired registers only apply to the first element of a tuple result
  1940  						for _, r := range dinfo[idx].out {
  1941  							if r != noRegister && mask.minus(s.used).hasReg(r) {
  1942  								// Desired register is allowed and unused.
  1943  								mask = regMaskAt(r)
  1944  								break
  1945  							}
  1946  						}
  1947  					}
  1948  					if out.idx == 1 {
  1949  						if prefs, ok := desiredSecondReg[v.ID]; ok {
  1950  							for _, r := range prefs {
  1951  								if r != noRegister && mask.minus(s.used).hasReg(r) {
  1952  									// Desired register is allowed and unused.
  1953  									mask = regMaskAt(r)
  1954  									break
  1955  								}
  1956  							}
  1957  						}
  1958  					}
  1959  					// Avoid registers we're saving for other values.
  1960  					if !mask.minus(desired.avoid).minus(s.nospill).minus(s.used).empty() {
  1961  						mask = mask.minus(desired.avoid)
  1962  					}
  1963  					r := s.allocReg(mask, v)
  1964  					if out.idx > maxOutIdx {
  1965  						maxOutIdx = out.idx
  1966  					}
  1967  					outRegs[out.idx] = r
  1968  					used = used.addReg(r)
  1969  					s.tmpused = s.tmpused.addReg(r)
  1970  				}
  1971  				// Record register choices
  1972  				if v.Type.IsTuple() {
  1973  					var outLocs LocPair
  1974  					if r := outRegs[0]; r != noRegister {
  1975  						outLocs[0] = &s.registers[r]
  1976  					}
  1977  					if r := outRegs[1]; r != noRegister {
  1978  						outLocs[1] = &s.registers[r]
  1979  					}
  1980  					s.f.setHome(v, outLocs)
  1981  					// Note that subsequent SelectX instructions will do the assignReg calls.
  1982  				} else if v.Type.IsResults() {
  1983  					// preallocate outLocs to the right size, which is maxOutIdx+1
  1984  					outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
  1985  					for i := 0; i <= maxOutIdx; i++ {
  1986  						if r := outRegs[i]; r != noRegister {
  1987  							outLocs[i] = &s.registers[r]
  1988  						}
  1989  					}
  1990  					s.f.setHome(v, outLocs)
  1991  				} else {
  1992  					if r := outRegs[0]; r != noRegister {
  1993  						s.assignReg(r, v, v)
  1994  					}
  1995  				}
  1996  				if tmpReg != noRegister {
  1997  					// Remember the temp register allocation, if any.
  1998  					if s.f.tempRegs == nil {
  1999  						s.f.tempRegs = map[ID]*Register{}
  2000  					}
  2001  					s.f.tempRegs[v.ID] = &s.registers[tmpReg]
  2002  				}
  2003  			}
  2004  
  2005  			// deallocate dead args, if we have not done so
  2006  			if opcodeTable[v.Op].resultNotInArgs {
  2007  				s.nospill = regMask{}
  2008  				s.advanceUses(v) // frees any registers holding args that are no longer live
  2009  			}
  2010  			s.tmpused = regMask{}
  2011  
  2012  			// Issue the Value itself.
  2013  			for i, a := range args {
  2014  				v.SetArg(i, a) // use register version of arguments
  2015  			}
  2016  			b.Values = append(b.Values, v)
  2017  			s.dropIfUnused(v)
  2018  		}
  2019  
  2020  		// Copy the control values - we need this so we can reduce the
  2021  		// uses property of these values later.
  2022  		controls := append(make([]*Value, 0, 2), b.ControlValues()...)
  2023  
  2024  		// Load control values into registers.
  2025  		for i, v := range b.ControlValues() {
  2026  			if !s.values[v.ID].needReg {
  2027  				continue
  2028  			}
  2029  			if s.f.pass.debug > regDebug {
  2030  				fmt.Printf("  processing control %s\n", v.LongString())
  2031  			}
  2032  			// We assume that a control input can be passed in any
  2033  			// type-compatible register. If this turns out not to be true,
  2034  			// we'll need to introduce a regspec for a block's control value.
  2035  			b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
  2036  		}
  2037  
  2038  		// Reduce the uses of the control values once registers have been loaded.
  2039  		// This loop is equivalent to the advanceUses method.
  2040  		for _, v := range controls {
  2041  			vi := &s.values[v.ID]
  2042  			if !vi.needReg {
  2043  				continue
  2044  			}
  2045  			// Remove this use from the uses list.
  2046  			u := vi.uses
  2047  			vi.uses = u.next
  2048  			if u.next == nil {
  2049  				s.freeRegs(vi.regs) // value is dead
  2050  			}
  2051  			u.next = s.freeUseRecords
  2052  			s.freeUseRecords = u
  2053  		}
  2054  
  2055  		// If we are approaching a merge point and we are the primary
  2056  		// predecessor of it, find live values that we use soon after
  2057  		// the merge point and promote them to registers now.
  2058  		if len(b.Succs) == 1 {
  2059  			if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
  2060  				s.freeReg(s.GReg) // Spill value in G register before any merge.
  2061  			}
  2062  			if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
  2063  				// No point if we've already regalloc'd the destination.
  2064  				goto badloop
  2065  			}
  2066  			// For this to be worthwhile, the loop must have no calls in it.
  2067  			top := b.Succs[0].b
  2068  			loop := s.loopnest.b2l[top.ID]
  2069  			if loop == nil || loop.header != top || loop.containsUnavoidableCall {
  2070  				goto badloop
  2071  			}
  2072  
  2073  			// Look into target block, find Phi arguments that come from b.
  2074  			phiArgs := regValLiveSet // reuse this space
  2075  			phiArgs.clear()
  2076  			for _, v := range b.Succs[0].b.Values {
  2077  				if v.Op == OpPhi {
  2078  					phiArgs.add(v.Args[b.Succs[0].i].ID)
  2079  				}
  2080  			}
  2081  
  2082  			// Get mask of all registers that might be used soon in the destination.
  2083  			// We don't want to kick values out of these registers, but we will
  2084  			// kick out an unlikely-to-be-used value for a likely-to-be-used one.
  2085  			var likelyUsedRegs regMask
  2086  			for _, live := range s.live[b.ID] {
  2087  				if live.dist < unlikelyDistance {
  2088  					likelyUsedRegs = likelyUsedRegs.union(s.values[live.ID].regs)
  2089  				}
  2090  			}
  2091  			// Promote values we're going to use soon in the destination to registers.
  2092  			// Note that this iterates nearest-use first, as we sorted
  2093  			// live lists by distance in computeLive.
  2094  			for _, live := range s.live[b.ID] {
  2095  				if live.dist >= unlikelyDistance {
  2096  					// Don't preload anything live after the loop.
  2097  					continue
  2098  				}
  2099  				vid := live.ID
  2100  				vi := &s.values[vid]
  2101  				v := s.orig[vid]
  2102  				if phiArgs.contains(vid) {
  2103  					// A phi argument needs its value in a regular register,
  2104  					// as returned by compatRegs. Being in a fixed register
  2105  					// (e.g. the zero register) or being easily
  2106  					// rematerializeable isn't enough.
  2107  					if !vi.regs.intersect(s.compatRegs(v.Type)).empty() {
  2108  						continue
  2109  					}
  2110  				} else {
  2111  					if !vi.regs.empty() {
  2112  						continue
  2113  					}
  2114  					if vi.rematerializeable {
  2115  						// TODO: maybe we should not skip rematerializeable
  2116  						// values here. One rematerialization outside the loop
  2117  						// is better than N in the loop. But rematerializations
  2118  						// are cheap, and spilling another value may not be.
  2119  						// And we don't want to materialize the zero register
  2120  						// into a different register when it is just the
  2121  						// argument to a store.
  2122  						continue
  2123  					}
  2124  				}
  2125  				if vi.rematerializeable && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
  2126  					continue
  2127  				}
  2128  				// Registers we could load v into.
  2129  				// Don't kick out other likely-used values.
  2130  				m := s.compatRegs(v.Type).minus(likelyUsedRegs)
  2131  				if m.empty() {
  2132  					// To many likely-used values to give them all a register.
  2133  					continue
  2134  				}
  2135  
  2136  				// Used desired register if available.
  2137  			outerloop:
  2138  				for _, e := range desired.entries {
  2139  					if e.ID != v.ID {
  2140  						continue
  2141  					}
  2142  					for _, r := range e.regs {
  2143  						if r != noRegister && m.hasReg(r) {
  2144  							m = regMaskAt(r)
  2145  							break outerloop
  2146  						}
  2147  					}
  2148  				}
  2149  				if !m.minus(desired.avoid).empty() {
  2150  					m = m.minus(desired.avoid)
  2151  				}
  2152  				s.allocValToReg(v, m, false, b.Pos)
  2153  				likelyUsedRegs = likelyUsedRegs.union(s.values[v.ID].regs)
  2154  			}
  2155  		}
  2156  	badloop:
  2157  		;
  2158  
  2159  		// Save end-of-block register state.
  2160  		// First count how many, this cuts allocations in half.
  2161  		k := 0
  2162  		for r := register(0); r < s.numRegs; r++ {
  2163  			v := s.regs[r].v
  2164  			if v == nil {
  2165  				continue
  2166  			}
  2167  			k++
  2168  		}
  2169  		regList := make([]endReg, 0, k)
  2170  		for r := register(0); r < s.numRegs; r++ {
  2171  			v := s.regs[r].v
  2172  			if v == nil {
  2173  				continue
  2174  			}
  2175  			regList = append(regList, endReg{r, v, s.regs[r].c})
  2176  		}
  2177  		s.endRegs[b.ID] = regList
  2178  
  2179  		if checkEnabled {
  2180  			regValLiveSet.clear()
  2181  			if s.live != nil {
  2182  				for _, x := range s.live[b.ID] {
  2183  					regValLiveSet.add(x.ID)
  2184  				}
  2185  			}
  2186  			for r := register(0); r < s.numRegs; r++ {
  2187  				v := s.regs[r].v
  2188  				if v == nil {
  2189  					continue
  2190  				}
  2191  				if !regValLiveSet.contains(v.ID) {
  2192  					s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
  2193  				}
  2194  			}
  2195  		}
  2196  
  2197  		// If a value is live at the end of the block and
  2198  		// isn't in a register, generate a use for the spill location.
  2199  		// We need to remember this information so that
  2200  		// the liveness analysis in stackalloc is correct.
  2201  		if s.live != nil {
  2202  			for _, e := range s.live[b.ID] {
  2203  				vi := &s.values[e.ID]
  2204  				if !vi.regs.empty() {
  2205  					// in a register, we'll use that source for the merge.
  2206  					continue
  2207  				}
  2208  				if vi.rematerializeable {
  2209  					// we'll rematerialize during the merge.
  2210  					continue
  2211  				}
  2212  				if s.f.pass.debug > regDebug {
  2213  					fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
  2214  				}
  2215  				spill := s.makeSpill(s.orig[e.ID], b)
  2216  				s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
  2217  			}
  2218  
  2219  			// Clear any final uses.
  2220  			// All that is left should be the pseudo-uses added for values which
  2221  			// are live at the end of b.
  2222  			for _, e := range s.live[b.ID] {
  2223  				u := s.values[e.ID].uses
  2224  				if u == nil {
  2225  					f.Fatalf("live at end, no uses v%d", e.ID)
  2226  				}
  2227  				if u.next != nil {
  2228  					f.Fatalf("live at end, too many uses v%d", e.ID)
  2229  				}
  2230  				s.values[e.ID].uses = nil
  2231  				u.next = s.freeUseRecords
  2232  				s.freeUseRecords = u
  2233  			}
  2234  		}
  2235  
  2236  		// allocReg may have dropped registers from startRegsMask that
  2237  		// aren't actually needed in startRegs. Synchronize back to
  2238  		// startRegs.
  2239  		//
  2240  		// This must be done before placing spills, which will look at
  2241  		// startRegs to decide if a block is a valid block for a spill.
  2242  		if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
  2243  			regs := make([]startReg, 0, c)
  2244  			for _, sr := range s.startRegs[b.ID] {
  2245  				if !s.startRegsMask.hasReg(sr.r) {
  2246  					continue
  2247  				}
  2248  				regs = append(regs, sr)
  2249  			}
  2250  			s.startRegs[b.ID] = regs
  2251  		}
  2252  	}
  2253  
  2254  	// Decide where the spills we generated will go.
  2255  	s.placeSpills()
  2256  
  2257  	// Anything that didn't get a register gets a stack location here.
  2258  	// (StoreReg, stack-based phis, inputs, ...)
  2259  	stacklive := stackalloc(s.f, s.spillLive)
  2260  
  2261  	// Fix up all merge edges.
  2262  	s.shuffle(stacklive)
  2263  
  2264  	// Erase any copies we never used.
  2265  	// Also, an unused copy might be the only use of another copy,
  2266  	// so continue erasing until we reach a fixed point.
  2267  	for {
  2268  		progress := false
  2269  		for c, used := range s.copies {
  2270  			if !used && c.Uses == 0 {
  2271  				if s.f.pass.debug > regDebug {
  2272  					fmt.Printf("delete copied value %s\n", c.LongString())
  2273  				}
  2274  				c.resetArgs()
  2275  				f.freeValue(c)
  2276  				delete(s.copies, c)
  2277  				progress = true
  2278  			}
  2279  		}
  2280  		if !progress {
  2281  			break
  2282  		}
  2283  	}
  2284  
  2285  	for _, b := range s.visitOrder {
  2286  		i := 0
  2287  		for _, v := range b.Values {
  2288  			if v.Op == OpInvalid {
  2289  				continue
  2290  			}
  2291  			b.Values[i] = v
  2292  			i++
  2293  		}
  2294  		b.Values = b.Values[:i]
  2295  	}
  2296  }
  2297  
  2298  func (s *regAllocState) placeSpills() {
  2299  	mustBeFirst := func(op Op) bool {
  2300  		return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
  2301  	}
  2302  
  2303  	// Start maps block IDs to the list of spills
  2304  	// that go at the start of the block (but after any phis).
  2305  	start := map[ID][]*Value{}
  2306  	// After maps value IDs to the list of spills
  2307  	// that go immediately after that value ID.
  2308  	after := map[ID][]*Value{}
  2309  
  2310  	for i := range s.values {
  2311  		vi := s.values[i]
  2312  		spill := vi.spill
  2313  		if spill == nil {
  2314  			continue
  2315  		}
  2316  		if spill.Block != nil {
  2317  			// Some spills are already fully set up,
  2318  			// like OpArgs and stack-based phis.
  2319  			continue
  2320  		}
  2321  		v := s.orig[i]
  2322  
  2323  		// Walk down the dominator tree looking for a good place to
  2324  		// put the spill of v.  At the start "best" is the best place
  2325  		// we have found so far.
  2326  		// TODO: find a way to make this O(1) without arbitrary cutoffs.
  2327  		if v == nil {
  2328  			panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
  2329  		}
  2330  		best := v.Block
  2331  		bestArg := v
  2332  		var bestDepth int16
  2333  		if s.loopnest != nil && s.loopnest.b2l[best.ID] != nil {
  2334  			bestDepth = s.loopnest.b2l[best.ID].depth
  2335  		}
  2336  		b := best
  2337  		const maxSpillSearch = 100
  2338  		for i := 0; i < maxSpillSearch; i++ {
  2339  			// Find the child of b in the dominator tree which
  2340  			// dominates all restores.
  2341  			p := b
  2342  			b = nil
  2343  			for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
  2344  				if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
  2345  					// c also dominates all restores.  Walk down into c.
  2346  					b = c
  2347  					break
  2348  				}
  2349  			}
  2350  			if b == nil {
  2351  				// Ran out of blocks which dominate all restores.
  2352  				break
  2353  			}
  2354  
  2355  			var depth int16
  2356  			if s.loopnest != nil && s.loopnest.b2l[b.ID] != nil {
  2357  				depth = s.loopnest.b2l[b.ID].depth
  2358  			}
  2359  			if depth > bestDepth {
  2360  				// Don't push the spill into a deeper loop.
  2361  				continue
  2362  			}
  2363  
  2364  			// If v is in a register at the start of b, we can
  2365  			// place the spill here (after the phis).
  2366  			if len(b.Preds) == 1 {
  2367  				for _, e := range s.endRegs[b.Preds[0].b.ID] {
  2368  					if e.v == v {
  2369  						// Found a better spot for the spill.
  2370  						best = b
  2371  						bestArg = e.c
  2372  						bestDepth = depth
  2373  						break
  2374  					}
  2375  				}
  2376  			} else {
  2377  				for _, e := range s.startRegs[b.ID] {
  2378  					if e.v == v {
  2379  						// Found a better spot for the spill.
  2380  						best = b
  2381  						bestArg = e.c
  2382  						bestDepth = depth
  2383  						break
  2384  					}
  2385  				}
  2386  			}
  2387  		}
  2388  
  2389  		// Put the spill in the best block we found.
  2390  		spill.Block = best
  2391  		spill.AddArg(bestArg)
  2392  		if best == v.Block && !mustBeFirst(v.Op) {
  2393  			// Place immediately after v.
  2394  			after[v.ID] = append(after[v.ID], spill)
  2395  		} else {
  2396  			// Place at the start of best block.
  2397  			start[best.ID] = append(start[best.ID], spill)
  2398  		}
  2399  	}
  2400  
  2401  	// Insert spill instructions into the block schedules.
  2402  	var oldSched []*Value
  2403  	for _, b := range s.visitOrder {
  2404  		nfirst := 0
  2405  		for _, v := range b.Values {
  2406  			if !mustBeFirst(v.Op) {
  2407  				break
  2408  			}
  2409  			nfirst++
  2410  		}
  2411  		oldSched = append(oldSched[:0], b.Values[nfirst:]...)
  2412  		b.Values = b.Values[:nfirst]
  2413  		b.Values = append(b.Values, start[b.ID]...)
  2414  		for _, v := range oldSched {
  2415  			b.Values = append(b.Values, v)
  2416  			b.Values = append(b.Values, after[v.ID]...)
  2417  		}
  2418  	}
  2419  }
  2420  
  2421  // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
  2422  func (s *regAllocState) shuffle(stacklive [][]ID) {
  2423  	var e edgeState
  2424  	e.s = s
  2425  	e.cache = map[ID][]*Value{}
  2426  	e.contents = map[Location]contentRecord{}
  2427  	if s.f.pass.debug > regDebug {
  2428  		fmt.Printf("shuffle %s\n", s.f.Name)
  2429  		fmt.Println(s.f.String())
  2430  	}
  2431  
  2432  	for _, b := range s.visitOrder {
  2433  		if len(b.Preds) <= 1 {
  2434  			continue
  2435  		}
  2436  		e.b = b
  2437  		for i, edge := range b.Preds {
  2438  			p := edge.b
  2439  			e.p = p
  2440  			e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
  2441  			e.process()
  2442  		}
  2443  	}
  2444  
  2445  	if s.f.pass.debug > regDebug {
  2446  		fmt.Printf("post shuffle %s\n", s.f.Name)
  2447  		fmt.Println(s.f.String())
  2448  	}
  2449  }
  2450  
  2451  type edgeState struct {
  2452  	s    *regAllocState
  2453  	p, b *Block // edge goes from p->b.
  2454  
  2455  	// for each pre-regalloc value, a list of equivalent cached values
  2456  	cache      map[ID][]*Value
  2457  	cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
  2458  
  2459  	// map from location to the value it contains
  2460  	contents map[Location]contentRecord
  2461  
  2462  	// desired destination locations
  2463  	destinations []dstRecord
  2464  	extra        []dstRecord
  2465  
  2466  	usedRegs              regMask // registers currently holding something
  2467  	uniqueRegs            regMask // registers holding the only copy of a value
  2468  	finalRegs             regMask // registers holding final target
  2469  	rematerializeableRegs regMask // registers that hold rematerializeable values
  2470  }
  2471  
  2472  type contentRecord struct {
  2473  	vid   ID       // pre-regalloc value
  2474  	c     *Value   // cached value
  2475  	final bool     // this is a satisfied destination
  2476  	pos   src.XPos // source position of use of the value
  2477  }
  2478  
  2479  type dstRecord struct {
  2480  	loc    Location // register or stack slot
  2481  	vid    ID       // pre-regalloc value it should contain
  2482  	splice **Value  // place to store reference to the generating instruction
  2483  	pos    src.XPos // source position of use of this location
  2484  }
  2485  
  2486  // setup initializes the edge state for shuffling.
  2487  func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
  2488  	if e.s.f.pass.debug > regDebug {
  2489  		fmt.Printf("edge %s->%s\n", e.p, e.b)
  2490  	}
  2491  
  2492  	// Clear state.
  2493  	clear(e.cache)
  2494  	e.cachedVals = e.cachedVals[:0]
  2495  	clear(e.contents)
  2496  	e.usedRegs = regMask{}
  2497  	e.uniqueRegs = regMask{}
  2498  	e.finalRegs = regMask{}
  2499  	e.rematerializeableRegs = regMask{}
  2500  
  2501  	// Live registers can be sources.
  2502  	for _, x := range srcReg {
  2503  		e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
  2504  	}
  2505  	// So can all of the spill locations.
  2506  	for _, spillID := range stacklive {
  2507  		v := e.s.orig[spillID]
  2508  		spill := e.s.values[v.ID].spill
  2509  		if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
  2510  			// Spills were placed that only dominate the uses found
  2511  			// during the first regalloc pass. The edge fixup code
  2512  			// can't use a spill location if the spill doesn't dominate
  2513  			// the edge.
  2514  			// We are guaranteed that if the spill doesn't dominate this edge,
  2515  			// then the value is available in a register (because we called
  2516  			// makeSpill for every value not in a register at the start
  2517  			// of an edge).
  2518  			continue
  2519  		}
  2520  		e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
  2521  	}
  2522  
  2523  	// Figure out all the destinations we need.
  2524  	dsts := e.destinations[:0]
  2525  	for _, x := range dstReg {
  2526  		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
  2527  	}
  2528  	// Phis need their args to end up in a specific location.
  2529  	for _, v := range e.b.Values {
  2530  		if v.Op != OpPhi {
  2531  			break
  2532  		}
  2533  		loc := e.s.f.getHome(v.ID)
  2534  		if loc == nil {
  2535  			continue
  2536  		}
  2537  		dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
  2538  	}
  2539  	e.destinations = dsts
  2540  
  2541  	if e.s.f.pass.debug > regDebug {
  2542  		for _, vid := range e.cachedVals {
  2543  			a := e.cache[vid]
  2544  			for _, c := range a {
  2545  				fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
  2546  			}
  2547  		}
  2548  		for _, d := range e.destinations {
  2549  			fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
  2550  		}
  2551  	}
  2552  }
  2553  
  2554  // process generates code to move all the values to the right destination locations.
  2555  func (e *edgeState) process() {
  2556  	dsts := e.destinations
  2557  
  2558  	// Process the destinations until they are all satisfied.
  2559  	for len(dsts) > 0 {
  2560  		i := 0
  2561  		for _, d := range dsts {
  2562  			if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
  2563  				// Failed - save for next iteration.
  2564  				dsts[i] = d
  2565  				i++
  2566  			}
  2567  		}
  2568  		if i < len(dsts) {
  2569  			// Made some progress. Go around again.
  2570  			dsts = dsts[:i]
  2571  
  2572  			// Append any extras destinations we generated.
  2573  			dsts = append(dsts, e.extra...)
  2574  			e.extra = e.extra[:0]
  2575  			continue
  2576  		}
  2577  
  2578  		// We made no progress. That means that any
  2579  		// remaining unsatisfied moves are in simple cycles.
  2580  		// For example, A -> B -> C -> D -> A.
  2581  		//   A ----> B
  2582  		//   ^       |
  2583  		//   |       |
  2584  		//   |       v
  2585  		//   D <---- C
  2586  
  2587  		// To break the cycle, we pick an unused register, say R,
  2588  		// and put a copy of B there.
  2589  		//   A ----> B
  2590  		//   ^       |
  2591  		//   |       |
  2592  		//   |       v
  2593  		//   D <---- C <---- R=copyofB
  2594  		// When we resume the outer loop, the A->B move can now proceed,
  2595  		// and eventually the whole cycle completes.
  2596  
  2597  		// Copy any cycle location to a temp register. This duplicates
  2598  		// one of the cycle entries, allowing the just duplicated value
  2599  		// to be overwritten and the cycle to proceed.
  2600  		d := dsts[0]
  2601  		loc := d.loc
  2602  		vid := e.contents[loc].vid
  2603  		c := e.contents[loc].c
  2604  		r := e.findRegFor(c.Type)
  2605  		if e.s.f.pass.debug > regDebug {
  2606  			fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
  2607  		}
  2608  		e.erase(r)
  2609  		pos := d.pos.WithNotStmt()
  2610  		if _, isReg := loc.(*Register); isReg {
  2611  			c = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2612  		} else {
  2613  			c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2614  		}
  2615  		e.set(r, vid, c, false, pos)
  2616  		if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
  2617  			e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
  2618  		}
  2619  	}
  2620  }
  2621  
  2622  // processDest generates code to put value vid into location loc. Returns true
  2623  // if progress was made.
  2624  func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
  2625  	pos = pos.WithNotStmt()
  2626  	occupant := e.contents[loc]
  2627  	if occupant.vid == vid {
  2628  		// Value is already in the correct place.
  2629  		e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
  2630  		if splice != nil {
  2631  			(*splice).Uses--
  2632  			*splice = occupant.c
  2633  			occupant.c.Uses++
  2634  		}
  2635  		// Note: if splice==nil then c will appear dead. This is
  2636  		// non-SSA formed code, so be careful after this pass not to run
  2637  		// deadcode elimination.
  2638  		if _, ok := e.s.copies[occupant.c]; ok {
  2639  			// The copy at occupant.c was used to avoid spill.
  2640  			e.s.copies[occupant.c] = true
  2641  		}
  2642  		return true
  2643  	}
  2644  
  2645  	// Check if we're allowed to clobber the destination location.
  2646  	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
  2647  		// We can't overwrite the last copy
  2648  		// of a value that needs to survive.
  2649  		return false
  2650  	}
  2651  
  2652  	// Copy from a source of v, register preferred.
  2653  	v := e.s.orig[vid]
  2654  	var c *Value
  2655  	var src Location
  2656  	if e.s.f.pass.debug > regDebug {
  2657  		fmt.Printf("moving v%d to %s\n", vid, loc)
  2658  		fmt.Printf("sources of v%d:", vid)
  2659  	}
  2660  	if opcodeTable[v.Op].fixedReg {
  2661  		c = v
  2662  		src = e.s.f.getHome(v.ID)
  2663  	} else {
  2664  		for _, w := range e.cache[vid] {
  2665  			h := e.s.f.getHome(w.ID)
  2666  			if e.s.f.pass.debug > regDebug {
  2667  				fmt.Printf(" %s:%s", h, w)
  2668  			}
  2669  			_, isreg := h.(*Register)
  2670  			if src == nil || isreg {
  2671  				c = w
  2672  				src = h
  2673  			}
  2674  		}
  2675  	}
  2676  	if e.s.f.pass.debug > regDebug {
  2677  		if src != nil {
  2678  			fmt.Printf(" [use %s]\n", src)
  2679  		} else {
  2680  			fmt.Printf(" [no source]\n")
  2681  		}
  2682  	}
  2683  	_, dstReg := loc.(*Register)
  2684  
  2685  	// Pre-clobber destination. This avoids the
  2686  	// following situation:
  2687  	//   - v is currently held in R0 and stacktmp0.
  2688  	//   - We want to copy stacktmp1 to stacktmp0.
  2689  	//   - We choose R0 as the temporary register.
  2690  	// During the copy, both R0 and stacktmp0 are
  2691  	// clobbered, losing both copies of v. Oops!
  2692  	// Erasing the destination early means R0 will not
  2693  	// be chosen as the temp register, as it will then
  2694  	// be the last copy of v.
  2695  	e.erase(loc)
  2696  	var x *Value
  2697  	if c == nil || e.s.values[vid].rematerializeable {
  2698  		if !e.s.values[vid].rematerializeable {
  2699  			e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
  2700  		}
  2701  		if dstReg {
  2702  			// We want to rematerialize v into a register that is incompatible with v's op's register mask.
  2703  			// Instead of setting the wrong register for the rematerialized v, we should find the right register
  2704  			// for it and emit an additional copy to move to the desired register.
  2705  			// For #70451.
  2706  			if !e.s.regspec(v).outputs[0].regs.hasReg(register(loc.(*Register).num)) {
  2707  				_, srcReg := src.(*Register)
  2708  				if srcReg {
  2709  					// It exists in a valid register already, so just copy it to the desired register
  2710  					// If src is a Register, c must have already been set.
  2711  					x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2712  				} else {
  2713  					// We need a tmp register
  2714  					x = v.copyInto(e.p)
  2715  					r := e.findRegFor(x.Type)
  2716  					e.erase(r)
  2717  					// Rematerialize to the tmp register
  2718  					e.set(r, vid, x, false, pos)
  2719  					// Copy from tmp to the desired register
  2720  					x = e.p.NewValue1(pos, OpCopy, x.Type, x)
  2721  				}
  2722  			} else {
  2723  				x = v.copyInto(e.p)
  2724  			}
  2725  		} else {
  2726  			// Rematerialize into stack slot. Need a free
  2727  			// register to accomplish this.
  2728  			r := e.findRegFor(v.Type)
  2729  			e.erase(r)
  2730  			x = v.copyIntoWithXPos(e.p, pos)
  2731  			e.set(r, vid, x, false, pos)
  2732  			// Make sure we spill with the size of the slot, not the
  2733  			// size of x (which might be wider due to our dropping
  2734  			// of narrowing conversions).
  2735  			x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
  2736  		}
  2737  	} else {
  2738  		// Emit move from src to dst.
  2739  		_, srcReg := src.(*Register)
  2740  		if srcReg {
  2741  			if dstReg {
  2742  				x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2743  			} else {
  2744  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
  2745  			}
  2746  		} else {
  2747  			if dstReg {
  2748  				x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2749  			} else {
  2750  				// mem->mem. Use temp register.
  2751  				r := e.findRegFor(c.Type)
  2752  				e.erase(r)
  2753  				t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2754  				e.set(r, vid, t, false, pos)
  2755  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
  2756  			}
  2757  		}
  2758  	}
  2759  	e.set(loc, vid, x, true, pos)
  2760  	if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
  2761  		e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
  2762  	}
  2763  	if splice != nil {
  2764  		(*splice).Uses--
  2765  		*splice = x
  2766  		x.Uses++
  2767  	}
  2768  	return true
  2769  }
  2770  
  2771  // set changes the contents of location loc to hold the given value and its cached representative.
  2772  func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
  2773  	e.s.f.setHome(c, loc)
  2774  	e.contents[loc] = contentRecord{vid, c, final, pos}
  2775  	a := e.cache[vid]
  2776  	if len(a) == 0 {
  2777  		e.cachedVals = append(e.cachedVals, vid)
  2778  	}
  2779  	a = append(a, c)
  2780  	e.cache[vid] = a
  2781  	if r, ok := loc.(*Register); ok {
  2782  		if e.usedRegs.hasReg(register(r.num)) {
  2783  			e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
  2784  		}
  2785  		e.usedRegs = e.usedRegs.addReg(register(r.num))
  2786  		if final {
  2787  			e.finalRegs = e.finalRegs.addReg(register(r.num))
  2788  		}
  2789  		if len(a) == 1 {
  2790  			e.uniqueRegs = e.uniqueRegs.addReg(register(r.num))
  2791  		}
  2792  		if len(a) == 2 {
  2793  			if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2794  				e.uniqueRegs = e.uniqueRegs.removeReg(register(t.num))
  2795  			}
  2796  		}
  2797  		if e.s.values[vid].rematerializeable {
  2798  			e.rematerializeableRegs = e.rematerializeableRegs.addReg(register(r.num))
  2799  		}
  2800  	}
  2801  	if e.s.f.pass.debug > regDebug {
  2802  		fmt.Printf("%s\n", c.LongString())
  2803  		fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
  2804  	}
  2805  }
  2806  
  2807  // erase removes any user of loc.
  2808  func (e *edgeState) erase(loc Location) {
  2809  	cr := e.contents[loc]
  2810  	if cr.c == nil {
  2811  		return
  2812  	}
  2813  	vid := cr.vid
  2814  
  2815  	if cr.final {
  2816  		// Add a destination to move this value back into place.
  2817  		// Make sure it gets added to the tail of the destination queue
  2818  		// so we make progress on other moves first.
  2819  		e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
  2820  	}
  2821  
  2822  	// Remove c from the list of cached values.
  2823  	a := e.cache[vid]
  2824  	for i, c := range a {
  2825  		if e.s.f.getHome(c.ID) == loc {
  2826  			if e.s.f.pass.debug > regDebug {
  2827  				fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
  2828  			}
  2829  			a[i], a = a[len(a)-1], a[:len(a)-1]
  2830  			break
  2831  		}
  2832  	}
  2833  	e.cache[vid] = a
  2834  
  2835  	// Update register masks.
  2836  	if r, ok := loc.(*Register); ok {
  2837  		e.usedRegs = e.usedRegs.removeReg(register(r.num))
  2838  		if cr.final {
  2839  			e.finalRegs = e.finalRegs.removeReg(register(r.num))
  2840  		}
  2841  		e.rematerializeableRegs = e.rematerializeableRegs.removeReg(register(r.num))
  2842  	}
  2843  	if len(a) == 1 {
  2844  		if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2845  			e.uniqueRegs = e.uniqueRegs.addReg(register(r.num))
  2846  		}
  2847  	}
  2848  }
  2849  
  2850  // findRegFor finds a register we can use to make a temp copy of type typ.
  2851  func (e *edgeState) findRegFor(typ *types.Type) Location {
  2852  	// Which registers are possibilities.
  2853  	m := e.s.compatRegs(typ)
  2854  
  2855  	// Pick a register. In priority order:
  2856  	// 1) an unused register
  2857  	// 2) a non-unique register not holding a final value
  2858  	// 3) a non-unique register
  2859  	// 4) a register holding a rematerializeable value
  2860  	x := m.minus(e.usedRegs)
  2861  	if !x.empty() {
  2862  		return &e.s.registers[e.s.pickReg(x)]
  2863  	}
  2864  	x = m.minus(e.uniqueRegs).minus(e.finalRegs)
  2865  	if !x.empty() {
  2866  		return &e.s.registers[e.s.pickReg(x)]
  2867  	}
  2868  	x = m.minus(e.uniqueRegs)
  2869  	if !x.empty() {
  2870  		return &e.s.registers[e.s.pickReg(x)]
  2871  	}
  2872  	x = m.intersect(e.rematerializeableRegs)
  2873  	if !x.empty() {
  2874  		return &e.s.registers[e.s.pickReg(x)]
  2875  	}
  2876  
  2877  	// No register is available.
  2878  	// Pick a register to spill.
  2879  	for _, vid := range e.cachedVals {
  2880  		a := e.cache[vid]
  2881  		for _, c := range a {
  2882  			if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m.hasReg(register(r.num)) {
  2883  				if !c.rematerializeable() {
  2884  					x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
  2885  					// Allocate a temp location to spill a register to.
  2886  					t := LocalSlot{N: e.s.f.NewLocal(c.Pos, c.Type), Type: c.Type}
  2887  					// TODO: reuse these slots. They'll need to be erased first.
  2888  					e.set(t, vid, x, false, c.Pos)
  2889  					if e.s.f.pass.debug > regDebug {
  2890  						fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
  2891  					}
  2892  				}
  2893  				// r will now be overwritten by the caller. At some point
  2894  				// later, the newly saved value will be moved back to its
  2895  				// final destination in processDest.
  2896  				return r
  2897  			}
  2898  		}
  2899  	}
  2900  
  2901  	fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
  2902  	for _, vid := range e.cachedVals {
  2903  		a := e.cache[vid]
  2904  		for _, c := range a {
  2905  			fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
  2906  		}
  2907  	}
  2908  	e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
  2909  	return nil
  2910  }
  2911  
  2912  // rematerializeable reports whether the register allocator should recompute
  2913  // a value instead of spilling/restoring it.
  2914  func (v *Value) rematerializeable() bool {
  2915  	if !opcodeTable[v.Op].rematerializeable {
  2916  		return false
  2917  	}
  2918  	for _, a := range v.Args {
  2919  		// Fixed-register allocations (SP, SB, etc.) are always available.
  2920  		// Any other argument of an opcode makes it not rematerializeable.
  2921  		if !opcodeTable[a.Op].fixedReg {
  2922  			return false
  2923  		}
  2924  	}
  2925  	return true
  2926  }
  2927  
  2928  type liveInfo struct {
  2929  	ID   ID       // ID of value
  2930  	dist int32    // # of instructions before next use
  2931  	pos  src.XPos // source position of next use
  2932  }
  2933  
  2934  // computeLive computes a map from block ID to a list of value IDs live at the end
  2935  // of that block. Together with the value ID is a count of how many instructions
  2936  // to the next use of that value. The resulting map is stored in s.live.
  2937  func (s *regAllocState) computeLive() {
  2938  	f := s.f
  2939  	// single block functions do not have variables that are live across
  2940  	// branches
  2941  	if len(f.Blocks) == 1 {
  2942  		return
  2943  	}
  2944  	po := f.postorder()
  2945  	s.live = make([][]liveInfo, f.NumBlocks())
  2946  	s.desired = make([]desiredState, f.NumBlocks())
  2947  	s.loopnest = f.loopnest()
  2948  
  2949  	rematIDs := make([]ID, 0, 64)
  2950  
  2951  	live := f.newSparseMapPos(f.NumValues())
  2952  	defer f.retSparseMapPos(live)
  2953  	t := f.newSparseMapPos(f.NumValues())
  2954  	defer f.retSparseMapPos(t)
  2955  
  2956  	s.loopnest.computeUnavoidableCalls()
  2957  
  2958  	// Liveness analysis.
  2959  	// This is an adapted version of the algorithm described in chapter 2.4.2
  2960  	// of Fabrice Rastello's On Sparse Intermediate Representations.
  2961  	//   https://web.archive.org/web/20240417212122if_/https://inria.hal.science/hal-00761555/file/habilitation.pdf#section.50
  2962  	//
  2963  	// For our implementation, we fall back to a traditional iterative algorithm when we encounter
  2964  	// Irreducible CFGs. They are very uncommon in Go code because they need to be constructed with
  2965  	// gotos and our current loopnest definition does not compute all the information that
  2966  	// we'd need to compute the loop ancestors for that step of the algorithm.
  2967  	//
  2968  	// Additionally, instead of only considering non-loop successors in the initial DFS phase,
  2969  	// we compute the liveout as the union of all successors. This larger liveout set is a subset
  2970  	// of the final liveout for the block and adding this information in the DFS phase means that
  2971  	// we get slightly more accurate distance information.
  2972  	var loopLiveIn map[*loop][]liveInfo
  2973  	var numCalls []int32
  2974  	if len(s.loopnest.loops) > 0 && !s.loopnest.hasIrreducible {
  2975  		loopLiveIn = make(map[*loop][]liveInfo)
  2976  		numCalls = f.Cache.allocInt32Slice(f.NumBlocks())
  2977  		defer f.Cache.freeInt32Slice(numCalls)
  2978  	}
  2979  
  2980  	for {
  2981  		changed := false
  2982  
  2983  		for _, b := range po {
  2984  			// Start with known live values at the end of the block.
  2985  			live.clear()
  2986  			for _, e := range s.live[b.ID] {
  2987  				live.set(e.ID, e.dist, e.pos)
  2988  			}
  2989  			update := false
  2990  			// arguments to phi nodes are live at this blocks out
  2991  			for _, e := range b.Succs {
  2992  				succ := e.b
  2993  				delta := branchDistance(b, succ)
  2994  				for _, v := range succ.Values {
  2995  					if v.Op != OpPhi {
  2996  						break
  2997  					}
  2998  					arg := v.Args[e.i]
  2999  					if s.values[arg.ID].needReg && (!live.contains(arg.ID) || delta < live.get(arg.ID)) {
  3000  						live.set(arg.ID, delta, v.Pos)
  3001  						update = true
  3002  					}
  3003  				}
  3004  			}
  3005  			if update {
  3006  				s.live[b.ID] = updateLive(live, s.live[b.ID])
  3007  			}
  3008  			// Add len(b.Values) to adjust from end-of-block distance
  3009  			// to beginning-of-block distance.
  3010  			c := live.contents()
  3011  			for i := range c {
  3012  				c[i].val += int32(len(b.Values))
  3013  			}
  3014  
  3015  			// Mark control values as live
  3016  			for _, c := range b.ControlValues() {
  3017  				if s.values[c.ID].needReg {
  3018  					live.set(c.ID, int32(len(b.Values)), b.Pos)
  3019  				}
  3020  			}
  3021  
  3022  			for i := len(b.Values) - 1; i >= 0; i-- {
  3023  				v := b.Values[i]
  3024  				live.remove(v.ID)
  3025  				if v.Op == OpPhi {
  3026  					continue
  3027  				}
  3028  				if opcodeTable[v.Op].call {
  3029  					if numCalls != nil {
  3030  						numCalls[b.ID]++
  3031  					}
  3032  					rematIDs = rematIDs[:0]
  3033  					c := live.contents()
  3034  					for i := range c {
  3035  						c[i].val += unlikelyDistance
  3036  						vid := c[i].key
  3037  						if s.values[vid].rematerializeable {
  3038  							rematIDs = append(rematIDs, vid)
  3039  						}
  3040  					}
  3041  					// We don't spill rematerializeable values, and assuming they
  3042  					// are live across a call would only force shuffle to add some
  3043  					// (dead) constant rematerialization. Remove them.
  3044  					for _, r := range rematIDs {
  3045  						live.remove(r)
  3046  					}
  3047  				}
  3048  				for _, a := range v.Args {
  3049  					if s.values[a.ID].needReg {
  3050  						live.set(a.ID, int32(i), v.Pos)
  3051  					}
  3052  				}
  3053  			}
  3054  			// This is a loop header, save our live-in so that
  3055  			// we can use it to fill in the loop bodies later
  3056  			if loopLiveIn != nil {
  3057  				loop := s.loopnest.b2l[b.ID]
  3058  				if loop != nil && loop.header.ID == b.ID {
  3059  					loopLiveIn[loop] = updateLive(live, nil)
  3060  				}
  3061  			}
  3062  			// For each predecessor of b, expand its list of live-at-end values.
  3063  			// invariant: live contains the values live at the start of b
  3064  			for _, e := range b.Preds {
  3065  				p := e.b
  3066  				delta := branchDistance(p, b)
  3067  
  3068  				// Start t off with the previously known live values at the end of p.
  3069  				t.clear()
  3070  				for _, e := range s.live[p.ID] {
  3071  					t.set(e.ID, e.dist, e.pos)
  3072  				}
  3073  				update := false
  3074  
  3075  				// Add new live values from scanning this block.
  3076  				for _, e := range live.contents() {
  3077  					d := e.val + delta
  3078  					if !t.contains(e.key) || d < t.get(e.key) {
  3079  						update = true
  3080  						t.set(e.key, d, e.pos)
  3081  					}
  3082  				}
  3083  
  3084  				if !update {
  3085  					continue
  3086  				}
  3087  				s.live[p.ID] = updateLive(t, s.live[p.ID])
  3088  				changed = true
  3089  			}
  3090  		}
  3091  
  3092  		// Doing a traditional iterative algorithm and have run
  3093  		// out of changes
  3094  		if !changed {
  3095  			break
  3096  		}
  3097  
  3098  		// Doing a pre-pass and will fill in the liveness information
  3099  		// later
  3100  		if loopLiveIn != nil {
  3101  			break
  3102  		}
  3103  		// For loopless code, we have full liveness info after a single
  3104  		// iteration
  3105  		if len(s.loopnest.loops) == 0 {
  3106  			break
  3107  		}
  3108  	}
  3109  	if f.pass.debug > regDebug {
  3110  		s.debugPrintLive("after dfs walk", f, s.live, s.desired)
  3111  	}
  3112  
  3113  	// irreducible CFGs and functions without loops are already
  3114  	// done, compute their desired registers and return
  3115  	if loopLiveIn == nil {
  3116  		s.computeDesired()
  3117  		return
  3118  	}
  3119  
  3120  	// Walk the loopnest from outer to inner, adding
  3121  	// all live-in values from their parent. Instead of
  3122  	// a recursive algorithm, iterate in depth order.
  3123  	// TODO(dmo): can we permute the loopnest? can we avoid this copy?
  3124  	loops := slices.Clone(s.loopnest.loops)
  3125  	slices.SortFunc(loops, func(a, b *loop) int {
  3126  		return cmp.Compare(a.depth, b.depth)
  3127  	})
  3128  
  3129  	loopset := f.newSparseMapPos(f.NumValues())
  3130  	defer f.retSparseMapPos(loopset)
  3131  	for _, loop := range loops {
  3132  		if loop.outer == nil {
  3133  			continue
  3134  		}
  3135  		livein := loopLiveIn[loop]
  3136  		loopset.clear()
  3137  		for _, l := range livein {
  3138  			loopset.set(l.ID, l.dist, l.pos)
  3139  		}
  3140  		update := false
  3141  		for _, l := range loopLiveIn[loop.outer] {
  3142  			if !loopset.contains(l.ID) {
  3143  				loopset.set(l.ID, l.dist, l.pos)
  3144  				update = true
  3145  			}
  3146  		}
  3147  		if update {
  3148  			loopLiveIn[loop] = updateLive(loopset, livein)
  3149  		}
  3150  	}
  3151  	// unknownDistance is a sentinel value for when we know a variable
  3152  	// is live at any given block, but we do not yet know how far until it's next
  3153  	// use. The distance will be computed later.
  3154  	const unknownDistance = -1
  3155  
  3156  	// add live-in values of the loop headers to their children.
  3157  	// This includes the loop headers themselves, since they can have values
  3158  	// that die in the middle of the block and aren't live-out
  3159  	for _, b := range po {
  3160  		loop := s.loopnest.b2l[b.ID]
  3161  		if loop == nil {
  3162  			continue
  3163  		}
  3164  		headerLive := loopLiveIn[loop]
  3165  		loopset.clear()
  3166  		for _, l := range s.live[b.ID] {
  3167  			loopset.set(l.ID, l.dist, l.pos)
  3168  		}
  3169  		update := false
  3170  		for _, l := range headerLive {
  3171  			if !loopset.contains(l.ID) {
  3172  				loopset.set(l.ID, unknownDistance, src.NoXPos)
  3173  				update = true
  3174  			}
  3175  		}
  3176  		if update {
  3177  			s.live[b.ID] = updateLive(loopset, s.live[b.ID])
  3178  		}
  3179  	}
  3180  	if f.pass.debug > regDebug {
  3181  		s.debugPrintLive("after live loop prop", f, s.live, s.desired)
  3182  	}
  3183  	// Filling in liveness from loops leaves some blocks with no distance information
  3184  	// Run over them and fill in the information from their successors.
  3185  	// To stabilize faster, we quit when no block has missing values and we only
  3186  	// look at blocks that still have missing values in subsequent iterations
  3187  	unfinishedBlocks := f.Cache.allocBlockSlice(len(po))
  3188  	defer f.Cache.freeBlockSlice(unfinishedBlocks)
  3189  	copy(unfinishedBlocks, po)
  3190  
  3191  	for len(unfinishedBlocks) > 0 {
  3192  		n := 0
  3193  		for _, b := range unfinishedBlocks {
  3194  			live.clear()
  3195  			unfinishedValues := 0
  3196  			for _, l := range s.live[b.ID] {
  3197  				if l.dist == unknownDistance {
  3198  					unfinishedValues++
  3199  				}
  3200  				live.set(l.ID, l.dist, l.pos)
  3201  			}
  3202  			update := false
  3203  			for _, e := range b.Succs {
  3204  				succ := e.b
  3205  				for _, l := range s.live[succ.ID] {
  3206  					if !live.contains(l.ID) || l.dist == unknownDistance {
  3207  						continue
  3208  					}
  3209  					dist := int32(len(succ.Values)) + l.dist + branchDistance(b, succ)
  3210  					dist += numCalls[succ.ID] * unlikelyDistance
  3211  					val := live.get(l.ID)
  3212  					switch {
  3213  					case val == unknownDistance:
  3214  						unfinishedValues--
  3215  						fallthrough
  3216  					case dist < val:
  3217  						update = true
  3218  						live.set(l.ID, dist, l.pos)
  3219  					}
  3220  				}
  3221  			}
  3222  			if update {
  3223  				s.live[b.ID] = updateLive(live, s.live[b.ID])
  3224  			}
  3225  			if unfinishedValues > 0 {
  3226  				unfinishedBlocks[n] = b
  3227  				n++
  3228  			}
  3229  		}
  3230  		unfinishedBlocks = unfinishedBlocks[:n]
  3231  	}
  3232  
  3233  	// Sort live values in order of their nearest next use.
  3234  	// Useful for promoting values to registers, nearest use first.
  3235  	for _, b := range f.Blocks {
  3236  		slices.SortFunc(s.live[b.ID], func(a, b liveInfo) int {
  3237  			if a.dist != b.dist {
  3238  				return cmp.Compare(a.dist, b.dist)
  3239  			}
  3240  			return cmp.Compare(a.ID, b.ID) // for deterministic sorting
  3241  		})
  3242  	}
  3243  
  3244  	s.computeDesired()
  3245  
  3246  	if f.pass.debug > regDebug {
  3247  		s.debugPrintLive("final", f, s.live, s.desired)
  3248  	}
  3249  }
  3250  
  3251  // computeDesired computes the desired register information at the end of each block.
  3252  // It is essentially a liveness analysis on machine registers instead of SSA values
  3253  // The desired register information is stored in s.desired.
  3254  func (s *regAllocState) computeDesired() {
  3255  
  3256  	// TODO: Can we speed this up using the liveness information we have already
  3257  	// from computeLive?
  3258  	var desired desiredState
  3259  	f := s.f
  3260  	po := f.postorder()
  3261  	maxPreds := 0
  3262  	for _, b := range f.Blocks {
  3263  		maxPreds = max(maxPreds, len(b.Preds))
  3264  	}
  3265  	// phiPrefs[i] collects desired registers for phi inputs coming from b.Preds[i].
  3266  	phiPrefs := make([]desiredState, maxPreds)
  3267  	for {
  3268  		changed := false
  3269  		for _, b := range po {
  3270  			desired.copy(&s.desired[b.ID])
  3271  			for i := range b.Preds {
  3272  				phiPrefs[i].reset()
  3273  			}
  3274  			var headerLoop *loop // loop whose header is b, if any
  3275  			if l := s.loopnest.b2l[b.ID]; l != nil && l.header == b {
  3276  				headerLoop = l
  3277  			}
  3278  			// Process non-phis, then phis.
  3279  			i := len(b.Values) - 1
  3280  			for ; i >= 0; i-- {
  3281  				v := b.Values[i]
  3282  				if v.Op == OpPhi {
  3283  					break
  3284  				}
  3285  				prefs := desired.remove(v.ID)
  3286  				regspec := s.regspec(v)
  3287  				// Cancel desired registers if they get clobbered.
  3288  				desired.clobber(regspec.clobbers)
  3289  				// Update desired registers if there are any fixed register inputs.
  3290  				for _, j := range regspec.inputs {
  3291  					if countRegs(j.regs) != 1 {
  3292  						continue
  3293  					}
  3294  					desired.clobber(j.regs)
  3295  					desired.add(v.Args[j.idx].ID, s.pickReg(j.regs))
  3296  				}
  3297  				// Set desired register of input 0 if this is a 2-operand instruction.
  3298  				if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  3299  					// ADDQconst is added here because we want to treat it as resultInArg0 for
  3300  					// the purposes of desired registers, even though it is not an absolute requirement.
  3301  					// This is because we'd rather implement it as ADDQ instead of LEAQ.
  3302  					// Same for ADDLconst
  3303  					// Select0 is added here to propagate the desired register to the tuple-generating instruction.
  3304  					if opcodeTable[v.Op].commutative {
  3305  						desired.addList(v.Args[1].ID, prefs)
  3306  					}
  3307  					desired.addList(v.Args[0].ID, prefs)
  3308  				}
  3309  			}
  3310  			for ; i >= 0; i-- {
  3311  				v := b.Values[i]
  3312  				prefs := desired.remove(v.ID)
  3313  				if prefs[0] == noRegister {
  3314  					continue
  3315  				}
  3316  				// Phi desires go to phiPrefs (per-pred), so drop them from desired.avoid.
  3317  				// The merge below re-adds any bits other entries still need.
  3318  				for _, r := range prefs {
  3319  					if r != noRegister {
  3320  						desired.avoid = desired.avoid.minus(regMaskAt(r))
  3321  					}
  3322  				}
  3323  				// Propagate v's desired registers back to its args.
  3324  				for pidx, a := range v.Args {
  3325  					if headerLoop != nil && s.loopnest.b2l[b.Preds[pidx].b.ID] == headerLoop {
  3326  						// Skip direct back-edges to avoid pessimizing the loop body to skip a single reg-reg move.
  3327  						// We check only the immediate loop; it is simple and empirically sufficient.
  3328  						continue
  3329  					}
  3330  					phiPrefs[pidx].addList(a.ID, prefs)
  3331  				}
  3332  			}
  3333  			for pidx, e := range b.Preds {
  3334  				p := e.b
  3335  				changed = s.desired[p.ID].merge(&desired) || changed
  3336  				changed = s.desired[p.ID].merge(&phiPrefs[pidx]) || changed
  3337  			}
  3338  		}
  3339  		if !changed || (!s.loopnest.hasIrreducible && len(s.loopnest.loops) == 0) {
  3340  			break
  3341  		}
  3342  	}
  3343  }
  3344  
  3345  // updateLive updates a given liveInfo slice with the contents of t
  3346  func updateLive(t *sparseMapPos, live []liveInfo) []liveInfo {
  3347  	live = live[:0]
  3348  	if cap(live) < t.size() {
  3349  		live = make([]liveInfo, 0, t.size())
  3350  	}
  3351  	for _, e := range t.contents() {
  3352  		live = append(live, liveInfo{e.key, e.val, e.pos})
  3353  	}
  3354  	return live
  3355  }
  3356  
  3357  // branchDistance calculates the distance between a block and a
  3358  // successor in pseudo-instructions. This is used to indicate
  3359  // likeliness
  3360  func branchDistance(b *Block, s *Block) int32 {
  3361  	if len(b.Succs) == 2 {
  3362  		if b.Succs[0].b == s && b.Likely == BranchLikely ||
  3363  			b.Succs[1].b == s && b.Likely == BranchUnlikely {
  3364  			return likelyDistance
  3365  		}
  3366  		if b.Succs[0].b == s && b.Likely == BranchUnlikely ||
  3367  			b.Succs[1].b == s && b.Likely == BranchLikely {
  3368  			return unlikelyDistance
  3369  		}
  3370  	}
  3371  	// Note: the branch distance must be at least 1 to distinguish the control
  3372  	// value use from the first user in a successor block.
  3373  	return normalDistance
  3374  }
  3375  
  3376  func (s *regAllocState) debugPrintLive(stage string, f *Func, live [][]liveInfo, desired []desiredState) {
  3377  	fmt.Printf("%s: live values at end of each block: %s\n", stage, f.Name)
  3378  	for _, b := range f.Blocks {
  3379  		s.debugPrintLiveBlock(b, live[b.ID], &desired[b.ID])
  3380  	}
  3381  }
  3382  
  3383  func (s *regAllocState) debugPrintLiveBlock(b *Block, live []liveInfo, desired *desiredState) {
  3384  	fmt.Printf("  %s:", b)
  3385  	slices.SortFunc(live, func(a, b liveInfo) int {
  3386  		return cmp.Compare(a.ID, b.ID)
  3387  	})
  3388  	for _, x := range live {
  3389  		fmt.Printf(" v%d(%d)", x.ID, x.dist)
  3390  		for _, e := range desired.entries {
  3391  			if e.ID != x.ID {
  3392  				continue
  3393  			}
  3394  			fmt.Printf("[")
  3395  			first := true
  3396  			for _, r := range e.regs {
  3397  				if r == noRegister {
  3398  					continue
  3399  				}
  3400  				if !first {
  3401  					fmt.Printf(",")
  3402  				}
  3403  				fmt.Print(&s.registers[r])
  3404  				first = false
  3405  			}
  3406  			fmt.Printf("]")
  3407  		}
  3408  	}
  3409  	if avoid := desired.avoid; !avoid.empty() {
  3410  		fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
  3411  	}
  3412  	fmt.Println()
  3413  }
  3414  
  3415  // A desiredState represents desired register assignments.
  3416  type desiredState struct {
  3417  	// Desired assignments will be small, so we just use a list
  3418  	// of valueID+registers entries.
  3419  	entries []desiredStateEntry
  3420  	// Registers that other values want to be in.  This value will
  3421  	// contain at least the union of the regs fields of entries, but
  3422  	// may contain additional entries for values that were once in
  3423  	// this data structure but are no longer.
  3424  	avoid regMask
  3425  }
  3426  type desiredStateEntry struct {
  3427  	// (pre-regalloc) value
  3428  	ID ID
  3429  	// Registers it would like to be in, in priority order.
  3430  	// Unused slots are filled with noRegister.
  3431  	// For opcodes that return tuples, we track desired registers only
  3432  	// for the first element of the tuple (see desiredSecondReg for
  3433  	// tracking the desired register for second part of a tuple).
  3434  	regs [4]register
  3435  }
  3436  
  3437  // get returns a list of desired registers for value vid.
  3438  func (d *desiredState) get(vid ID) [4]register {
  3439  	for _, e := range d.entries {
  3440  		if e.ID == vid {
  3441  			return e.regs
  3442  		}
  3443  	}
  3444  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  3445  }
  3446  
  3447  // add records that we'd like value vid to be in register r.
  3448  func (d *desiredState) add(vid ID, r register) {
  3449  	d.avoid = d.avoid.addReg(r)
  3450  	for i := range d.entries {
  3451  		e := &d.entries[i]
  3452  		if e.ID != vid {
  3453  			continue
  3454  		}
  3455  		if e.regs[0] == r {
  3456  			// Already known and highest priority
  3457  			return
  3458  		}
  3459  		for j := 1; j < len(e.regs); j++ {
  3460  			if e.regs[j] == r {
  3461  				// Move from lower priority to top priority
  3462  				copy(e.regs[1:], e.regs[:j])
  3463  				e.regs[0] = r
  3464  				return
  3465  			}
  3466  		}
  3467  		copy(e.regs[1:], e.regs[:])
  3468  		e.regs[0] = r
  3469  		return
  3470  	}
  3471  	d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
  3472  }
  3473  
  3474  func (d *desiredState) addList(vid ID, regs [4]register) {
  3475  	// regs is in priority order, so iterate in reverse order.
  3476  	for i := len(regs) - 1; i >= 0; i-- {
  3477  		r := regs[i]
  3478  		if r != noRegister {
  3479  			d.add(vid, r)
  3480  		}
  3481  	}
  3482  }
  3483  
  3484  // clobber erases any desired registers in the set m.
  3485  func (d *desiredState) clobber(m regMask) {
  3486  	for i := 0; i < len(d.entries); {
  3487  		e := &d.entries[i]
  3488  		j := 0
  3489  		for _, r := range e.regs {
  3490  			if r != noRegister && !m.hasReg(r) {
  3491  				e.regs[j] = r
  3492  				j++
  3493  			}
  3494  		}
  3495  		if j == 0 {
  3496  			// No more desired registers for this value.
  3497  			d.entries[i] = d.entries[len(d.entries)-1]
  3498  			d.entries = d.entries[:len(d.entries)-1]
  3499  			continue
  3500  		}
  3501  		for ; j < len(e.regs); j++ {
  3502  			e.regs[j] = noRegister
  3503  		}
  3504  		i++
  3505  	}
  3506  	d.avoid = d.avoid.minus(m)
  3507  }
  3508  
  3509  // reset prepares d for re-use.
  3510  func (d *desiredState) reset() {
  3511  	d.entries = d.entries[:0]
  3512  	d.avoid = regMask{}
  3513  }
  3514  
  3515  // copy copies a desired state from another desiredState x.
  3516  func (d *desiredState) copy(x *desiredState) {
  3517  	d.entries = append(d.entries[:0], x.entries...)
  3518  	d.avoid = x.avoid
  3519  }
  3520  
  3521  // remove removes the desired registers for vid and returns them.
  3522  func (d *desiredState) remove(vid ID) [4]register {
  3523  	for i := range d.entries {
  3524  		if d.entries[i].ID == vid {
  3525  			regs := d.entries[i].regs
  3526  			d.entries[i] = d.entries[len(d.entries)-1]
  3527  			d.entries = d.entries[:len(d.entries)-1]
  3528  			return regs
  3529  		}
  3530  	}
  3531  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  3532  }
  3533  
  3534  // merge merges another desired state x into d. Returns whether the set has
  3535  // changed
  3536  func (d *desiredState) merge(x *desiredState) bool {
  3537  	oldAvoid := d.avoid
  3538  	d.avoid = d.avoid.union(x.avoid)
  3539  	// There should only be a few desired registers, so
  3540  	// linear insert is ok.
  3541  	for _, e := range x.entries {
  3542  		d.addList(e.ID, e.regs)
  3543  	}
  3544  	return oldAvoid != d.avoid
  3545  }
  3546  
  3547  // computeUnavoidableCalls computes the containsUnavoidableCall fields in the loop nest.
  3548  func (loopnest *loopnest) computeUnavoidableCalls() {
  3549  	f := loopnest.f
  3550  
  3551  	hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
  3552  	defer f.Cache.freeBoolSlice(hasCall)
  3553  	for _, b := range f.Blocks {
  3554  		if b.containsCall() {
  3555  			hasCall[b.ID] = true
  3556  		}
  3557  	}
  3558  	found := f.Cache.allocSparseSet(f.NumBlocks())
  3559  	defer f.Cache.freeSparseSet(found)
  3560  	// Run dfs to find path through the loop that avoids all calls.
  3561  	// Such path either escapes the loop or returns back to the header.
  3562  	// It isn't enough to have exit not dominated by any call, for example:
  3563  	// ... some loop
  3564  	// call1    call2
  3565  	//   \       /
  3566  	//     block
  3567  	// ...
  3568  	// block is not dominated by any single call, but we don't have call-free path to it.
  3569  loopLoop:
  3570  	for _, l := range loopnest.loops {
  3571  		found.clear()
  3572  		tovisit := make([]*Block, 0, 8)
  3573  		tovisit = append(tovisit, l.header)
  3574  		for len(tovisit) > 0 {
  3575  			cur := tovisit[len(tovisit)-1]
  3576  			tovisit = tovisit[:len(tovisit)-1]
  3577  			if hasCall[cur.ID] {
  3578  				continue
  3579  			}
  3580  			for _, s := range cur.Succs {
  3581  				nb := s.Block()
  3582  				if nb == l.header {
  3583  					// Found a call-free path around the loop.
  3584  					continue loopLoop
  3585  				}
  3586  				if found.contains(nb.ID) {
  3587  					// Already found via another path.
  3588  					continue
  3589  				}
  3590  				nl := loopnest.b2l[nb.ID]
  3591  				if nl == nil || (nl.depth <= l.depth && nl != l) {
  3592  					// Left the loop.
  3593  					continue
  3594  				}
  3595  				tovisit = append(tovisit, nb)
  3596  				found.add(nb.ID)
  3597  			}
  3598  		}
  3599  		// No call-free path was found.
  3600  		l.containsUnavoidableCall = true
  3601  	}
  3602  }
  3603  
  3604  func (b *Block) containsCall() bool {
  3605  	if b.Kind == BlockDefer {
  3606  		return true
  3607  	}
  3608  	for _, v := range b.Values {
  3609  		if opcodeTable[v.Op].call {
  3610  			return true
  3611  		}
  3612  	}
  3613  	return false
  3614  }
  3615  

View as plain text