Source file src/cmd/compile/internal/ssa/regalloc.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Register allocation.
     6  //
     7  // We use a version of a linear scan register allocator. We treat the
     8  // whole function as a single long basic block and run through
     9  // it using a greedy register allocator. Then all merge edges
    10  // (those targeting a block with len(Preds)>1) are processed to
    11  // shuffle data into the place that the target of the edge expects.
    12  //
    13  // The greedy allocator moves values into registers just before they
    14  // are used, spills registers only when necessary, and spills the
    15  // value whose next use is farthest in the future.
    16  //
    17  // The register allocator requires that a block is not scheduled until
    18  // at least one of its predecessors have been scheduled. The most recent
    19  // such predecessor provides the starting register state for a block.
    20  //
    21  // It also requires that there are no critical edges (critical =
    22  // comes from a block with >1 successor and goes to a block with >1
    23  // predecessor).  This makes it easy to add fixup code on merge edges -
    24  // the source of a merge edge has only one successor, so we can add
    25  // fixup code to the end of that block.
    26  
    27  // Spilling
    28  //
    29  // During the normal course of the allocator, we might throw a still-live
    30  // value out of all registers. When that value is subsequently used, we must
    31  // load it from a slot on the stack. We must also issue an instruction to
    32  // initialize that stack location with a copy of v.
    33  //
    34  // pre-regalloc:
    35  //   (1) v = Op ...
    36  //   (2) x = Op ...
    37  //   (3) ... = Op v ...
    38  //
    39  // post-regalloc:
    40  //   (1) v = Op ...    : AX // computes v, store result in AX
    41  //       s = StoreReg v     // spill v to a stack slot
    42  //   (2) x = Op ...    : AX // some other op uses AX
    43  //       c = LoadReg s : CX // restore v from stack slot
    44  //   (3) ... = Op c ...     // use the restored value
    45  //
    46  // Allocation occurs normally until we reach (3) and we realize we have
    47  // a use of v and it isn't in any register. At that point, we allocate
    48  // a spill (a StoreReg) for v. We can't determine the correct place for
    49  // the spill at this point, so we allocate the spill as blockless initially.
    50  // The restore is then generated to load v back into a register so it can
    51  // be used. Subsequent uses of v will use the restored value c instead.
    52  //
    53  // What remains is the question of where to schedule the spill.
    54  // During allocation, we keep track of the dominator of all restores of v.
    55  // The spill of v must dominate that block. The spill must also be issued at
    56  // a point where v is still in a register.
    57  //
    58  // To find the right place, start at b, the block which dominates all restores.
    59  //  - If b is v.Block, then issue the spill right after v.
    60  //    It is known to be in a register at that point, and dominates any restores.
    61  //  - Otherwise, if v is in a register at the start of b,
    62  //    put the spill of v at the start of b.
    63  //  - Otherwise, set b = immediate dominator of b, and repeat.
    64  //
    65  // Phi values are special, as always. We define two kinds of phis, those
    66  // where the merge happens in a register (a "register" phi) and those where
    67  // the merge happens in a stack location (a "stack" phi).
    68  //
    69  // A register phi must have the phi and all of its inputs allocated to the
    70  // same register. Register phis are spilled similarly to regular ops.
    71  //
    72  // A stack phi must have the phi and all of its inputs allocated to the same
    73  // stack location. Stack phis start out life already spilled - each phi
    74  // input must be a store (using StoreReg) at the end of the corresponding
    75  // predecessor block.
    76  //     b1: y = ... : AX        b2: z = ... : BX
    77  //         y2 = StoreReg y         z2 = StoreReg z
    78  //         goto b3                 goto b3
    79  //     b3: x = phi(y2, z2)
    80  // The stack allocator knows that StoreReg args of stack-allocated phis
    81  // must be allocated to the same stack slot as the phi that uses them.
    82  // x is now a spilled value and a restore must appear before its first use.
    83  
    84  // TODO
    85  
    86  // Use an affinity graph to mark two values which should use the
    87  // same register. This affinity graph will be used to prefer certain
    88  // registers for allocation. This affinity helps eliminate moves that
    89  // are required for phi implementations and helps generate allocations
    90  // for 2-register architectures.
    91  
    92  // Note: regalloc generates a not-quite-SSA output. If we have:
    93  //
    94  //             b1: x = ... : AX
    95  //                 x2 = StoreReg x
    96  //                 ... AX gets reused for something else ...
    97  //                 if ... goto b3 else b4
    98  //
    99  //   b3: x3 = LoadReg x2 : BX       b4: x4 = LoadReg x2 : CX
   100  //       ... use x3 ...                 ... use x4 ...
   101  //
   102  //             b2: ... use x3 ...
   103  //
   104  // If b3 is the primary predecessor of b2, then we use x3 in b2 and
   105  // add a x4:CX->BX copy at the end of b4.
   106  // But the definition of x3 doesn't dominate b2.  We should really
   107  // insert an extra phi at the start of b2 (x5=phi(x3,x4):BX) to keep
   108  // SSA form. For now, we ignore this problem as remaining in strict
   109  // SSA form isn't needed after regalloc. We'll just leave the use
   110  // of x3 not dominated by the definition of x3, and the CX->BX copy
   111  // will have no use (so don't run deadcode after regalloc!).
   112  // TODO: maybe we should introduce these extra phis?
   113  
   114  package ssa
   115  
   116  import (
   117  	"cmd/compile/internal/base"
   118  	"cmd/compile/internal/ir"
   119  	"cmd/compile/internal/types"
   120  	"cmd/internal/src"
   121  	"cmd/internal/sys"
   122  	"cmp"
   123  	"fmt"
   124  	"internal/buildcfg"
   125  	"math"
   126  	"math/bits"
   127  	"slices"
   128  	"unsafe"
   129  )
   130  
   131  const (
   132  	moveSpills = iota
   133  	logSpills
   134  	regDebug
   135  	stackDebug
   136  )
   137  
   138  // distance is a measure of how far into the future values are used.
   139  // distance is measured in units of instructions.
   140  const (
   141  	likelyDistance   = 1
   142  	normalDistance   = 10
   143  	unlikelyDistance = 100
   144  )
   145  
   146  // regalloc performs register allocation on f. It sets f.RegAlloc
   147  // to the resulting allocation.
   148  func regalloc(f *Func) {
   149  	var s regAllocState
   150  	s.init(f)
   151  	s.regalloc(f)
   152  	s.close()
   153  }
   154  
   155  type register uint8
   156  
   157  const noRegister register = 255
   158  
   159  // For bulk initializing
   160  var noRegisters [32]register = [32]register{
   161  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   162  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   163  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   164  	noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister, noRegister,
   165  }
   166  
   167  // A regMask encodes a set of machine registers.
   168  type regMask struct {
   169  	v1, v2 uint64
   170  }
   171  
   172  func (r regMask) intersect(s regMask) regMask {
   173  	return regMask{r.v1 & s.v1, r.v2 & s.v2}
   174  }
   175  
   176  func (r regMask) union(s regMask) regMask {
   177  	return regMask{r.v1 | s.v1, r.v2 | s.v2}
   178  }
   179  
   180  func (r regMask) minus(s regMask) regMask {
   181  	return regMask{r.v1 &^ s.v1, r.v2 &^ s.v2}
   182  }
   183  
   184  func (r regMask) empty() bool {
   185  	return r.v1 == 0 && r.v2 == 0
   186  }
   187  
   188  func regMaskAt(i register) regMask {
   189  	if i < 64 {
   190  		return regMask{v1: 1 << i}
   191  	}
   192  	return regMask{v2: 1 << (i - 64)}
   193  }
   194  
   195  func (r regMask) addReg(i register) regMask {
   196  	if i < 64 {
   197  		return regMask{r.v1 | 1<<i, r.v2}
   198  	}
   199  	return regMask{r.v1, r.v2 | 1<<(i-64)}
   200  }
   201  
   202  func (r regMask) removeReg(i register) regMask {
   203  	if i < 64 {
   204  		return regMask{r.v1 &^ (1 << i), r.v2}
   205  	}
   206  	return regMask{r.v1, r.v2 &^ (1 << (i - 64))}
   207  }
   208  
   209  func (r regMask) hasReg(i register) bool {
   210  	if i < 64 {
   211  		return (r.v1>>i)&1 != 0
   212  	}
   213  	return (r.v2>>(i-64))&1 != 0
   214  }
   215  
   216  func (m regMask) String() string {
   217  	s := ""
   218  	for r := register(0); !m.empty(); r++ {
   219  		if !m.hasReg(r) {
   220  			continue
   221  		}
   222  		m = m.removeReg(r)
   223  		if s != "" {
   224  			s += " "
   225  		}
   226  		s += fmt.Sprintf("r%d", r)
   227  	}
   228  	return s
   229  }
   230  
   231  func (s *regAllocState) RegMaskString(m regMask) string {
   232  	str := ""
   233  	for r := register(0); !m.empty(); r++ {
   234  		if !m.hasReg(r) {
   235  			continue
   236  		}
   237  		m = m.removeReg(r)
   238  		if str != "" {
   239  			str += " "
   240  		}
   241  		str += s.registers[r].String()
   242  	}
   243  	return str
   244  }
   245  
   246  // countRegs returns the number of set bits in the register mask.
   247  func countRegs(r regMask) int {
   248  	return bits.OnesCount64(r.v1) + bits.OnesCount64(r.v2)
   249  }
   250  
   251  // pickReg picks an arbitrary register from the register mask.
   252  func pickReg(r regMask) register {
   253  	if r.empty() {
   254  		panic("can't pick a register from an empty set")
   255  	}
   256  	// pick the lowest one
   257  	if r.v1 != 0 {
   258  		return register(bits.TrailingZeros64(r.v1))
   259  	}
   260  	return register(bits.TrailingZeros64(r.v2) + 64)
   261  }
   262  
   263  type use struct {
   264  	// distance from start of the block to a use of a value
   265  	//   dist == 0                 used by first instruction in block
   266  	//   dist == len(b.Values)-1   used by last instruction in block
   267  	//   dist == len(b.Values)     used by block's control value
   268  	//   dist  > len(b.Values)     used by a subsequent block
   269  	dist int32
   270  	pos  src.XPos // source position of the use
   271  	next *use     // linked list of uses of a value in nondecreasing dist order
   272  }
   273  
   274  // A valState records the register allocation state for a (pre-regalloc) value.
   275  type valState struct {
   276  	regs              regMask // the set of registers holding a Value (usually just one)
   277  	uses              *use    // list of uses in this block
   278  	spill             *Value  // spilled copy of the Value (if any)
   279  	restoreMin        int32   // minimum of all restores' blocks' sdom.entry
   280  	restoreMax        int32   // maximum of all restores' blocks' sdom.exit
   281  	needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
   282  	rematerializeable bool    // cached value of v.rematerializeable()
   283  }
   284  
   285  type regState struct {
   286  	v *Value // Original (preregalloc) Value stored in this register.
   287  	c *Value // A Value equal to v which is currently in a register.  Might be v or a copy of it.
   288  	// If a register is unused, v==c==nil
   289  }
   290  
   291  type regAllocState struct {
   292  	f *Func
   293  
   294  	sdom        SparseTree
   295  	registers   []Register
   296  	numRegs     register
   297  	SPReg       register
   298  	SBReg       register
   299  	GReg        register
   300  	ZeroIntReg  register
   301  	allocatable regMask
   302  
   303  	// live values at the end of each block.  live[b.ID] is a list of value IDs
   304  	// which are live at the end of b, together with a count of how many instructions
   305  	// forward to the next use.
   306  	live [][]liveInfo
   307  	// desired register assignments at the end of each block.
   308  	// Note that this is a static map computed before allocation occurs. Dynamic
   309  	// register desires (from partially completed allocations) will trump
   310  	// this information.
   311  	desired []desiredState
   312  
   313  	// current state of each (preregalloc) Value
   314  	values []valState
   315  
   316  	// ID of SP, SB values
   317  	sp, sb ID
   318  
   319  	// For each Value, map from its value ID back to the
   320  	// preregalloc Value it was derived from.
   321  	orig []*Value
   322  
   323  	// current state of each register.
   324  	// Includes only registers in allocatable.
   325  	regs []regState
   326  
   327  	// registers that contain values which can't be kicked out
   328  	nospill regMask
   329  
   330  	// mask of registers currently in use
   331  	used regMask
   332  
   333  	// mask of registers used since the start of the current block
   334  	usedSinceBlockStart regMask
   335  
   336  	// mask of registers used in the current instruction
   337  	tmpused regMask
   338  
   339  	// current block we're working on
   340  	curBlock *Block
   341  
   342  	// cache of use records
   343  	freeUseRecords *use
   344  
   345  	// endRegs[blockid] is the register state at the end of each block.
   346  	// encoded as a set of endReg records.
   347  	endRegs [][]endReg
   348  
   349  	// startRegs[blockid] is the register state at the start of merge blocks.
   350  	// saved state does not include the state of phi ops in the block.
   351  	startRegs [][]startReg
   352  
   353  	// startRegsMask is a mask of the registers in startRegs[curBlock.ID].
   354  	// Registers dropped from startRegsMask are later synchronoized back to
   355  	// startRegs by dropping from there as well.
   356  	startRegsMask regMask
   357  
   358  	// spillLive[blockid] is the set of live spills at the end of each block
   359  	spillLive [][]ID
   360  
   361  	// a set of copies we generated to move things around, and
   362  	// whether it is used in shuffle. Unused copies will be deleted.
   363  	copies map[*Value]bool
   364  
   365  	loopnest *loopnest
   366  
   367  	// choose a good order in which to visit blocks for allocation purposes.
   368  	visitOrder []*Block
   369  
   370  	// blockOrder[b.ID] corresponds to the index of block b in visitOrder.
   371  	blockOrder []int32
   372  
   373  	// whether to insert instructions that clobber dead registers at call sites
   374  	doClobber bool
   375  
   376  	// For each instruction index in a basic block, the index of the next call
   377  	// at or after that instruction index.
   378  	// If there is no next call, returns maxInt32.
   379  	// nextCall for a call instruction points to itself.
   380  	// (Indexes and results are pre-regalloc.)
   381  	nextCall []int32
   382  
   383  	// Index of the instruction we're currently working on.
   384  	// Index is expressed in terms of the pre-regalloc b.Values list.
   385  	curIdx int
   386  }
   387  
   388  type endReg struct {
   389  	r register
   390  	v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
   391  	c *Value // cached version of the value
   392  }
   393  
   394  type startReg struct {
   395  	r   register
   396  	v   *Value   // pre-regalloc value needed in this register
   397  	c   *Value   // cached version of the value
   398  	pos src.XPos // source position of use of this register
   399  }
   400  
   401  // freeReg frees up register r. Any current user of r is kicked out.
   402  func (s *regAllocState) freeReg(r register) {
   403  	if !s.allocatable.hasReg(r) && !s.isGReg(r) {
   404  		return
   405  	}
   406  	v := s.regs[r].v
   407  	if v == nil {
   408  		s.f.Fatalf("tried to free an already free register %d\n", r)
   409  	}
   410  
   411  	// Mark r as unused.
   412  	if s.f.pass.debug > regDebug {
   413  		fmt.Printf("freeReg %s (dump %s/%s)\n", &s.registers[r], v, s.regs[r].c)
   414  	}
   415  	s.regs[r] = regState{}
   416  	s.values[v.ID].regs = s.values[v.ID].regs.removeReg(r)
   417  	s.used = s.used.removeReg(r)
   418  }
   419  
   420  // freeRegs frees up all registers listed in m.
   421  func (s *regAllocState) freeRegs(m regMask) {
   422  	for !m.intersect(s.used).empty() {
   423  		s.freeReg(pickReg(m.intersect(s.used)))
   424  	}
   425  }
   426  
   427  // clobberRegs inserts instructions that clobber registers listed in m.
   428  func (s *regAllocState) clobberRegs(m regMask) {
   429  	m = m.intersect(s.allocatable.intersect(s.f.Config.gpRegMask)) // only integer register can contain pointers, only clobber them
   430  	for !m.empty() {
   431  		r := pickReg(m)
   432  		m = m.removeReg(r)
   433  		x := s.curBlock.NewValue0(src.NoXPos, OpClobberReg, types.TypeVoid)
   434  		s.f.setHome(x, &s.registers[r])
   435  	}
   436  }
   437  
   438  // setOrig records that c's original value is the same as
   439  // v's original value.
   440  func (s *regAllocState) setOrig(c *Value, v *Value) {
   441  	if int(c.ID) >= cap(s.orig) {
   442  		x := s.f.Cache.allocValueSlice(int(c.ID) + 1)
   443  		copy(x, s.orig)
   444  		s.f.Cache.freeValueSlice(s.orig)
   445  		s.orig = x
   446  	}
   447  	for int(c.ID) >= len(s.orig) {
   448  		s.orig = append(s.orig, nil)
   449  	}
   450  	if s.orig[c.ID] != nil {
   451  		s.f.Fatalf("orig value set twice %s %s", c, v)
   452  	}
   453  	s.orig[c.ID] = s.orig[v.ID]
   454  }
   455  
   456  // assignReg assigns register r to hold c, a copy of v.
   457  // r must be unused.
   458  func (s *regAllocState) assignReg(r register, v *Value, c *Value) {
   459  	if s.f.pass.debug > regDebug {
   460  		fmt.Printf("assignReg %s %s/%s\n", &s.registers[r], v, c)
   461  	}
   462  	// Allocate v to r.
   463  	s.values[v.ID].regs = s.values[v.ID].regs.addReg(r)
   464  	s.f.setHome(c, &s.registers[r])
   465  
   466  	// Allocate r to v.
   467  	if !s.allocatable.hasReg(r) && !s.isGReg(r) {
   468  		return
   469  	}
   470  	if s.regs[r].v != nil {
   471  		s.f.Fatalf("tried to assign register %d to %s/%s but it is already used by %s", r, v, c, s.regs[r].v)
   472  	}
   473  	s.regs[r] = regState{v, c}
   474  	s.used = s.used.addReg(r)
   475  }
   476  
   477  // allocReg chooses a register from the set of registers in mask.
   478  // If there is no unused register, a Value will be kicked out of
   479  // a register to make room.
   480  func (s *regAllocState) allocReg(mask regMask, v *Value) register {
   481  	if v.OnWasmStack {
   482  		return noRegister
   483  	}
   484  
   485  	mask = mask.intersect(s.allocatable)
   486  	mask = mask.minus(s.nospill)
   487  	if mask.empty() {
   488  		s.f.Fatalf("no register available for %s", v.LongString())
   489  	}
   490  
   491  	// Pick an unused register if one is available.
   492  	if !mask.minus(s.used).empty() {
   493  		r := pickReg(mask.minus(s.used))
   494  		s.usedSinceBlockStart = s.usedSinceBlockStart.addReg(r)
   495  		return r
   496  	}
   497  
   498  	// Pick a value to spill. Spill the value with the
   499  	// farthest-in-the-future use.
   500  	// TODO: Prefer registers with already spilled Values?
   501  	// TODO: Modify preference using affinity graph.
   502  	// TODO: if a single value is in multiple registers, spill one of them
   503  	// before spilling a value in just a single register.
   504  
   505  	// Find a register to spill. We spill the register containing the value
   506  	// whose next use is as far in the future as possible.
   507  	// https://en.wikipedia.org/wiki/Page_replacement_algorithm#The_theoretically_optimal_page_replacement_algorithm
   508  	var r register
   509  	maxuse := int32(-1)
   510  	for t := register(0); t < s.numRegs; t++ {
   511  		if !mask.hasReg(t) {
   512  			continue
   513  		}
   514  		v := s.regs[t].v
   515  		if n := s.values[v.ID].uses.dist; n > maxuse {
   516  			// v's next use is farther in the future than any value
   517  			// we've seen so far. A new best spill candidate.
   518  			r = t
   519  			maxuse = n
   520  		}
   521  	}
   522  	if maxuse == -1 {
   523  		s.f.Fatalf("couldn't find register to spill")
   524  	}
   525  
   526  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   527  		// TODO(neelance): In theory this should never happen, because all wasm registers are equal.
   528  		// So if there is still a free register, the allocation should have picked that one in the first place instead of
   529  		// trying to kick some other value out. In practice, this case does happen and it breaks the stack optimization.
   530  		s.freeReg(r)
   531  		return r
   532  	}
   533  
   534  	// Try to move it around before kicking out, if there is a free register.
   535  	// We generate a Copy and record it. It will be deleted if never used.
   536  	v2 := s.regs[r].v
   537  	m := s.compatRegs(v2.Type).minus(s.used).minus(s.tmpused).removeReg(r)
   538  	if !m.empty() && !s.values[v2.ID].rematerializeable && countRegs(s.values[v2.ID].regs) == 1 {
   539  		s.usedSinceBlockStart = s.usedSinceBlockStart.addReg(r)
   540  		r2 := pickReg(m)
   541  		c := s.curBlock.NewValue1(v2.Pos, OpCopy, v2.Type, s.regs[r].c)
   542  		s.copies[c] = false
   543  		if s.f.pass.debug > regDebug {
   544  			fmt.Printf("copy %s to %s : %s\n", v2, c, &s.registers[r2])
   545  		}
   546  		s.setOrig(c, v2)
   547  		s.assignReg(r2, v2, c)
   548  	}
   549  
   550  	// If the evicted register isn't used between the start of the block
   551  	// and now then there is no reason to even request it on entry. We can
   552  	// drop from startRegs in that case.
   553  	if !s.usedSinceBlockStart.hasReg(r) {
   554  		if s.startRegsMask.hasReg(r) {
   555  			if s.f.pass.debug > regDebug {
   556  				fmt.Printf("dropped from startRegs: %s\n", &s.registers[r])
   557  			}
   558  			s.startRegsMask = s.startRegsMask.removeReg(r)
   559  		}
   560  	}
   561  
   562  	s.freeReg(r)
   563  	s.usedSinceBlockStart = s.usedSinceBlockStart.addReg(r)
   564  	return r
   565  }
   566  
   567  // makeSpill returns a Value which represents the spilled value of v.
   568  // b is the block in which the spill is used.
   569  func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
   570  	vi := &s.values[v.ID]
   571  	if vi.spill != nil {
   572  		// Final block not known - keep track of subtree where restores reside.
   573  		vi.restoreMin = min(vi.restoreMin, s.sdom[b.ID].entry)
   574  		vi.restoreMax = max(vi.restoreMax, s.sdom[b.ID].exit)
   575  		return vi.spill
   576  	}
   577  	// Make a spill for v. We don't know where we want
   578  	// to put it yet, so we leave it blockless for now.
   579  	spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
   580  	// We also don't know what the spill's arg will be.
   581  	// Leave it argless for now.
   582  	s.setOrig(spill, v)
   583  	vi.spill = spill
   584  	vi.restoreMin = s.sdom[b.ID].entry
   585  	vi.restoreMax = s.sdom[b.ID].exit
   586  	return spill
   587  }
   588  
   589  // allocValToReg allocates v to a register selected from regMask and
   590  // returns the register copy of v. Any previous user is kicked out and spilled
   591  // (if necessary). Load code is added at the current pc. If nospill is set the
   592  // allocated register is marked nospill so the assignment cannot be
   593  // undone until the caller allows it by clearing nospill. Returns a
   594  // *Value which is either v or a copy of v allocated to the chosen register.
   595  func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos src.XPos) *Value {
   596  	if s.f.Config.ctxt.Arch.Arch == sys.ArchWasm && v.rematerializeable() {
   597  		c := v.copyIntoWithXPos(s.curBlock, pos)
   598  		c.OnWasmStack = true
   599  		s.setOrig(c, v)
   600  		return c
   601  	}
   602  	if v.OnWasmStack {
   603  		return v
   604  	}
   605  
   606  	vi := &s.values[v.ID]
   607  	pos = pos.WithNotStmt()
   608  	// Check if v is already in a requested register.
   609  	if !mask.intersect(vi.regs).empty() {
   610  		mask = mask.intersect(vi.regs)
   611  		r := pickReg(mask)
   612  		if mask.hasReg(s.SPReg) {
   613  			// Prefer the stack pointer if it is allowed.
   614  			// (Needed because the op might have an Aux symbol
   615  			// that needs SP as its base.)
   616  			r = s.SPReg
   617  		}
   618  		if !s.allocatable.hasReg(r) {
   619  			return v // v is in a fixed register
   620  		}
   621  		if s.regs[r].v != v || s.regs[r].c == nil {
   622  			panic("bad register state")
   623  		}
   624  		if nospill {
   625  			s.nospill = s.nospill.addReg(r)
   626  		}
   627  		s.usedSinceBlockStart = s.usedSinceBlockStart.addReg(r)
   628  		return s.regs[r].c
   629  	}
   630  
   631  	var r register
   632  	// If nospill is set, the value is used immediately, so it can live on the WebAssembly stack.
   633  	onWasmStack := nospill && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm
   634  	if !onWasmStack {
   635  		// Allocate a register.
   636  		r = s.allocReg(mask, v)
   637  	}
   638  
   639  	// Allocate v to the new register.
   640  	var c *Value
   641  	if !vi.regs.empty() {
   642  		// Copy from a register that v is already in.
   643  		var current *Value
   644  		if !vi.regs.minus(s.allocatable).empty() {
   645  			// v is in a fixed register, prefer that
   646  			current = v
   647  		} else {
   648  			r2 := pickReg(vi.regs)
   649  			if s.regs[r2].v != v {
   650  				panic("bad register state")
   651  			}
   652  			current = s.regs[r2].c
   653  			s.usedSinceBlockStart = s.usedSinceBlockStart.addReg(r2)
   654  		}
   655  		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
   656  	} else if v.rematerializeable() {
   657  		// Rematerialize instead of loading from the spill location.
   658  		c = v.copyIntoWithXPos(s.curBlock, pos)
   659  		// We need to consider its output mask and potentially issue a Copy
   660  		// if there are register mask conflicts.
   661  		// This currently happens for the SIMD package only between GP and FP
   662  		// register. Because Intel's vector extension can put integer value into
   663  		// FP, which is seen as a vector. Example instruction: VPSLL[BWDQ]
   664  		// Because GP and FP masks do not overlap, mask & outputMask == 0
   665  		// detects this situation thoroughly.
   666  		sourceMask := s.regspec(c).outputs[0].regs
   667  		if mask.intersect(sourceMask).empty() && !onWasmStack {
   668  			s.setOrig(c, v)
   669  			s.assignReg(s.allocReg(sourceMask, v), v, c)
   670  			// v.Type for the new OpCopy is likely wrong and it might delay the problem
   671  			// until ssa to asm lowering, which might need the types to generate the right
   672  			// assembly for OpCopy. For Intel's GP to FP move, it happens to be that
   673  			// MOV instruction has such a variant so it happens to be right.
   674  			// But it's unclear for other architectures or situations, and the problem
   675  			// might be exposed when the assembler sees illegal instructions.
   676  			// Right now make we still pick v.Type, because at least its size should be correct
   677  			// for the rematerialization case the amd64 SIMD package exposed.
   678  			// TODO: We might need to figure out a way to find the correct type or make
   679  			// the asm lowering use reg info only for OpCopy.
   680  			c = s.curBlock.NewValue1(pos, OpCopy, v.Type, c)
   681  		}
   682  	} else {
   683  		// Load v from its spill location.
   684  		spill := s.makeSpill(v, s.curBlock)
   685  		if s.f.pass.debug > logSpills {
   686  			s.f.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
   687  		}
   688  		c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
   689  	}
   690  
   691  	s.setOrig(c, v)
   692  
   693  	if onWasmStack {
   694  		c.OnWasmStack = true
   695  		return c
   696  	}
   697  
   698  	s.assignReg(r, v, c)
   699  	if c.Op == OpLoadReg && s.isGReg(r) {
   700  		s.f.Fatalf("allocValToReg.OpLoadReg targeting g: " + c.LongString())
   701  	}
   702  	if nospill {
   703  		s.nospill = s.nospill.addReg(r)
   704  	}
   705  	return c
   706  }
   707  
   708  // isLeaf reports whether f performs any calls.
   709  func isLeaf(f *Func) bool {
   710  	for _, b := range f.Blocks {
   711  		for _, v := range b.Values {
   712  			if v.Op.IsCall() && !v.Op.IsTailCall() {
   713  				// tail call is not counted as it does not save the return PC or need a frame
   714  				return false
   715  			}
   716  		}
   717  	}
   718  	return true
   719  }
   720  
   721  // needRegister reports whether v needs a register.
   722  func (v *Value) needRegister() bool {
   723  	return !v.Type.IsMemory() && !v.Type.IsVoid() && !v.Type.IsFlags() && !v.Type.IsTuple()
   724  }
   725  
   726  func (s *regAllocState) init(f *Func) {
   727  	s.f = f
   728  	s.f.RegAlloc = s.f.Cache.locs[:0]
   729  	s.registers = f.Config.registers
   730  	if nr := len(s.registers); nr == 0 || nr > int(noRegister) || nr > int(unsafe.Sizeof(regMask{})*8) {
   731  		s.f.Fatalf("bad number of registers: %d", nr)
   732  	} else {
   733  		s.numRegs = register(nr)
   734  	}
   735  	// Locate SP, SB, and g registers.
   736  	s.SPReg = noRegister
   737  	s.SBReg = noRegister
   738  	s.GReg = noRegister
   739  	s.ZeroIntReg = noRegister
   740  	for r := register(0); r < s.numRegs; r++ {
   741  		switch s.registers[r].String() {
   742  		case "SP":
   743  			s.SPReg = r
   744  		case "SB":
   745  			s.SBReg = r
   746  		case "g":
   747  			s.GReg = r
   748  		case "ZERO": // TODO: arch-specific?
   749  			s.ZeroIntReg = r
   750  		}
   751  	}
   752  	// Make sure we found all required registers.
   753  	switch noRegister {
   754  	case s.SPReg:
   755  		s.f.Fatalf("no SP register found")
   756  	case s.SBReg:
   757  		s.f.Fatalf("no SB register found")
   758  	case s.GReg:
   759  		if f.Config.hasGReg {
   760  			s.f.Fatalf("no g register found")
   761  		}
   762  	}
   763  
   764  	// Figure out which registers we're allowed to use.
   765  	s.allocatable = s.f.Config.gpRegMask.union(s.f.Config.fpRegMask).union(s.f.Config.specialRegMask)
   766  	s.allocatable = s.allocatable.removeReg(s.SPReg)
   767  	s.allocatable = s.allocatable.removeReg(s.SBReg)
   768  	if s.f.Config.hasGReg {
   769  		s.allocatable = s.allocatable.removeReg(s.GReg)
   770  	}
   771  	if s.ZeroIntReg != noRegister {
   772  		s.allocatable = s.allocatable.removeReg(s.ZeroIntReg)
   773  	}
   774  	if buildcfg.FramePointerEnabled && s.f.Config.FPReg >= 0 {
   775  		s.allocatable = s.allocatable.removeReg(register(s.f.Config.FPReg))
   776  	}
   777  	if s.f.Config.LinkReg != -1 {
   778  		if isLeaf(f) {
   779  			// Leaf functions don't save/restore the link register.
   780  			s.allocatable = s.allocatable.removeReg(register(s.f.Config.LinkReg))
   781  		}
   782  	}
   783  	if s.f.Config.ctxt.Flag_dynlink {
   784  		switch s.f.Config.arch {
   785  		case "386":
   786  			// nothing to do.
   787  			// Note that for Flag_shared (position independent code)
   788  			// we do need to be careful, but that carefulness is hidden
   789  			// in the rewrite rules so we always have a free register
   790  			// available for global load/stores. See _gen/386.rules (search for Flag_shared).
   791  		case "amd64":
   792  			s.allocatable = s.allocatable.removeReg(15) // R15
   793  		case "arm":
   794  			s.allocatable = s.allocatable.removeReg(9) // R9
   795  		case "arm64":
   796  			// nothing to do
   797  		case "loong64": // R2 (aka TP) already reserved.
   798  			// nothing to do
   799  		case "ppc64", "ppc64le": // R2 already reserved.
   800  			// nothing to do
   801  		case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved.
   802  			// nothing to do
   803  		case "s390x":
   804  			s.allocatable = s.allocatable.removeReg(11) // R11
   805  		default:
   806  			s.f.fe.Fatalf(src.NoXPos, "arch %s not implemented", s.f.Config.arch)
   807  		}
   808  	}
   809  
   810  	// Linear scan register allocation can be influenced by the order in which blocks appear.
   811  	// Decouple the register allocation order from the generated block order.
   812  	// This also creates an opportunity for experiments to find a better order.
   813  	s.visitOrder = layoutRegallocOrder(f)
   814  
   815  	// Compute block order. This array allows us to distinguish forward edges
   816  	// from backward edges and compute how far they go.
   817  	s.blockOrder = make([]int32, f.NumBlocks())
   818  	for i, b := range s.visitOrder {
   819  		s.blockOrder[b.ID] = int32(i)
   820  	}
   821  
   822  	s.regs = make([]regState, s.numRegs)
   823  	nv := f.NumValues()
   824  	if cap(s.f.Cache.regallocValues) >= nv {
   825  		s.f.Cache.regallocValues = s.f.Cache.regallocValues[:nv]
   826  	} else {
   827  		s.f.Cache.regallocValues = make([]valState, nv)
   828  	}
   829  	s.values = s.f.Cache.regallocValues
   830  	s.orig = s.f.Cache.allocValueSlice(nv)
   831  	s.copies = make(map[*Value]bool)
   832  	for _, b := range s.visitOrder {
   833  		for _, v := range b.Values {
   834  			if v.needRegister() {
   835  				s.values[v.ID].needReg = true
   836  				s.values[v.ID].rematerializeable = v.rematerializeable()
   837  				s.orig[v.ID] = v
   838  			}
   839  			// Note: needReg is false for values returning Tuple types.
   840  			// Instead, we mark the corresponding Selects as needReg.
   841  		}
   842  	}
   843  	s.computeLive()
   844  
   845  	s.endRegs = make([][]endReg, f.NumBlocks())
   846  	s.startRegs = make([][]startReg, f.NumBlocks())
   847  	s.spillLive = make([][]ID, f.NumBlocks())
   848  	s.sdom = f.Sdom()
   849  
   850  	// wasm: Mark instructions that can be optimized to have their values only on the WebAssembly stack.
   851  	if f.Config.ctxt.Arch.Arch == sys.ArchWasm {
   852  		canLiveOnStack := f.newSparseSet(f.NumValues())
   853  		defer f.retSparseSet(canLiveOnStack)
   854  		for _, b := range f.Blocks {
   855  			// New block. Clear candidate set.
   856  			canLiveOnStack.clear()
   857  			for _, c := range b.ControlValues() {
   858  				if c.Uses == 1 && !opcodeTable[c.Op].generic {
   859  					canLiveOnStack.add(c.ID)
   860  				}
   861  			}
   862  			// Walking backwards.
   863  			for i := len(b.Values) - 1; i >= 0; i-- {
   864  				v := b.Values[i]
   865  				if canLiveOnStack.contains(v.ID) {
   866  					v.OnWasmStack = true
   867  				} else {
   868  					// Value can not live on stack. Values are not allowed to be reordered, so clear candidate set.
   869  					canLiveOnStack.clear()
   870  				}
   871  				for _, arg := range v.Args {
   872  					// Value can live on the stack if:
   873  					// - it is only used once
   874  					// - it is used in the same basic block
   875  					// - it is not a "mem" value
   876  					// - it is a WebAssembly op
   877  					if arg.Uses == 1 && arg.Block == v.Block && !arg.Type.IsMemory() && !opcodeTable[arg.Op].generic {
   878  						canLiveOnStack.add(arg.ID)
   879  					}
   880  				}
   881  			}
   882  		}
   883  	}
   884  
   885  	// The clobberdeadreg experiment inserts code to clobber dead registers
   886  	// at call sites.
   887  	// Ignore huge functions to avoid doing too much work.
   888  	if base.Flag.ClobberDeadReg && len(s.f.Blocks) <= 10000 {
   889  		// TODO: honor GOCLOBBERDEADHASH, or maybe GOSSAHASH.
   890  		s.doClobber = true
   891  	}
   892  }
   893  
   894  func (s *regAllocState) close() {
   895  	s.f.Cache.freeValueSlice(s.orig)
   896  }
   897  
   898  // Adds a use record for id at distance dist from the start of the block.
   899  // All calls to addUse must happen with nonincreasing dist.
   900  func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) {
   901  	r := s.freeUseRecords
   902  	if r != nil {
   903  		s.freeUseRecords = r.next
   904  	} else {
   905  		r = &use{}
   906  	}
   907  	r.dist = dist
   908  	r.pos = pos
   909  	r.next = s.values[id].uses
   910  	s.values[id].uses = r
   911  	if r.next != nil && dist > r.next.dist {
   912  		s.f.Fatalf("uses added in wrong order")
   913  	}
   914  }
   915  
   916  // advanceUses advances the uses of v's args from the state before v to the state after v.
   917  // Any values which have no more uses are deallocated from registers.
   918  func (s *regAllocState) advanceUses(v *Value) {
   919  	for _, a := range v.Args {
   920  		if !s.values[a.ID].needReg {
   921  			continue
   922  		}
   923  		ai := &s.values[a.ID]
   924  		r := ai.uses
   925  		ai.uses = r.next
   926  		if r.next == nil || (!opcodeTable[a.Op].fixedReg && r.next.dist > s.nextCall[s.curIdx]) {
   927  			// Value is dead (or is not used again until after a call), free all registers that hold it.
   928  			s.freeRegs(ai.regs)
   929  		}
   930  		r.next = s.freeUseRecords
   931  		s.freeUseRecords = r
   932  	}
   933  	s.dropIfUnused(v)
   934  }
   935  
   936  // Drop v from registers if it isn't used again, or its only uses are after
   937  // a call instruction.
   938  func (s *regAllocState) dropIfUnused(v *Value) {
   939  	if !s.values[v.ID].needReg {
   940  		return
   941  	}
   942  	vi := &s.values[v.ID]
   943  	r := vi.uses
   944  	nextCall := s.nextCall[s.curIdx]
   945  	if opcodeTable[v.Op].call {
   946  		if s.curIdx == len(s.nextCall)-1 {
   947  			nextCall = math.MaxInt32
   948  		} else {
   949  			nextCall = s.nextCall[s.curIdx+1]
   950  		}
   951  	}
   952  	if r == nil || (!opcodeTable[v.Op].fixedReg && r.dist > nextCall) {
   953  		s.freeRegs(vi.regs)
   954  	}
   955  }
   956  
   957  // liveAfterCurrentInstruction reports whether v is live after
   958  // the current instruction is completed.  v must be used by the
   959  // current instruction.
   960  func (s *regAllocState) liveAfterCurrentInstruction(v *Value) bool {
   961  	u := s.values[v.ID].uses
   962  	if u == nil {
   963  		panic(fmt.Errorf("u is nil, v = %s, s.values[v.ID] = %v", v.LongString(), s.values[v.ID]))
   964  	}
   965  	d := u.dist
   966  	for u != nil && u.dist == d {
   967  		u = u.next
   968  	}
   969  	return u != nil && u.dist > d
   970  }
   971  
   972  // Sets the state of the registers to that encoded in regs.
   973  func (s *regAllocState) setState(regs []endReg) {
   974  	s.freeRegs(s.used)
   975  	for _, x := range regs {
   976  		s.assignReg(x.r, x.v, x.c)
   977  	}
   978  }
   979  
   980  // compatRegs returns the set of registers which can store a type t.
   981  func (s *regAllocState) compatRegs(t *types.Type) regMask {
   982  	var m regMask
   983  	if t.IsTuple() || t.IsFlags() {
   984  		return regMask{}
   985  	}
   986  	if t.IsSIMD() {
   987  		if t.Size() > 8 {
   988  			return s.f.Config.fpRegMask.intersect(s.allocatable)
   989  		} else {
   990  			// K mask
   991  			return s.f.Config.gpRegMask.intersect(s.allocatable)
   992  		}
   993  	}
   994  	if t.IsFloat() || t == types.TypeInt128 {
   995  		if t.Kind() == types.TFLOAT32 && !s.f.Config.fp32RegMask.empty() {
   996  			m = s.f.Config.fp32RegMask
   997  		} else if t.Kind() == types.TFLOAT64 && !s.f.Config.fp64RegMask.empty() {
   998  			m = s.f.Config.fp64RegMask
   999  		} else {
  1000  			m = s.f.Config.fpRegMask
  1001  		}
  1002  	} else {
  1003  		m = s.f.Config.gpRegMask
  1004  	}
  1005  	return m.intersect(s.allocatable)
  1006  }
  1007  
  1008  // regspec returns the regInfo for operation op.
  1009  func (s *regAllocState) regspec(v *Value) regInfo {
  1010  	op := v.Op
  1011  	if op == OpConvert {
  1012  		// OpConvert is a generic op, so it doesn't have a
  1013  		// register set in the static table. It can use any
  1014  		// allocatable integer register.
  1015  		m := s.allocatable.intersect(s.f.Config.gpRegMask)
  1016  		return regInfo{inputs: []inputInfo{{regs: m}}, outputs: []outputInfo{{regs: m}}}
  1017  	}
  1018  	if op == OpArgIntReg {
  1019  		reg := v.Block.Func.Config.intParamRegs[v.AuxInt8()]
  1020  		return regInfo{outputs: []outputInfo{{regs: regMaskAt(register(reg))}}}
  1021  	}
  1022  	if op == OpArgFloatReg {
  1023  		reg := v.Block.Func.Config.floatParamRegs[v.AuxInt8()]
  1024  		return regInfo{outputs: []outputInfo{{regs: regMaskAt(register(reg))}}}
  1025  	}
  1026  	if op.IsCall() {
  1027  		if ac, ok := v.Aux.(*AuxCall); ok && ac.reg != nil {
  1028  			return *ac.Reg(&opcodeTable[op].reg, s.f.Config)
  1029  		}
  1030  	}
  1031  	if op == OpMakeResult && s.f.OwnAux.reg != nil {
  1032  		return *s.f.OwnAux.ResultReg(s.f.Config)
  1033  	}
  1034  	return opcodeTable[op].reg
  1035  }
  1036  
  1037  func (s *regAllocState) isGReg(r register) bool {
  1038  	return s.f.Config.hasGReg && s.GReg == r
  1039  }
  1040  
  1041  // Dummy value used to represent the value being held in a temporary register.
  1042  var tmpVal Value
  1043  
  1044  func (s *regAllocState) regalloc(f *Func) {
  1045  	regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
  1046  	defer f.retSparseSet(regValLiveSet)
  1047  	var oldSched []*Value
  1048  	var phis []*Value
  1049  	var phiRegs []register
  1050  	var args []*Value
  1051  
  1052  	// Data structure used for computing desired registers.
  1053  	var desired desiredState
  1054  	desiredSecondReg := map[ID][4]register{} // desired register allocation for 2nd part of a tuple
  1055  
  1056  	// Desired registers for inputs & outputs for each instruction in the block.
  1057  	type dentry struct {
  1058  		out [4]register    // desired output registers
  1059  		in  [3][4]register // desired input registers (for inputs 0,1, and 2)
  1060  	}
  1061  	var dinfo []dentry
  1062  
  1063  	if f.Entry != f.Blocks[0] {
  1064  		f.Fatalf("entry block must be first")
  1065  	}
  1066  
  1067  	for _, b := range s.visitOrder {
  1068  		if s.f.pass.debug > regDebug {
  1069  			fmt.Printf("Begin processing block %v\n", b)
  1070  		}
  1071  		s.curBlock = b
  1072  		s.startRegsMask = regMask{}
  1073  		s.usedSinceBlockStart = regMask{}
  1074  		clear(desiredSecondReg)
  1075  
  1076  		// Initialize regValLiveSet and uses fields for this block.
  1077  		// Walk backwards through the block doing liveness analysis.
  1078  		regValLiveSet.clear()
  1079  		if s.live != nil {
  1080  			for _, e := range s.live[b.ID] {
  1081  				s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
  1082  				regValLiveSet.add(e.ID)
  1083  			}
  1084  		}
  1085  		for _, v := range b.ControlValues() {
  1086  			if s.values[v.ID].needReg {
  1087  				s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control values
  1088  				regValLiveSet.add(v.ID)
  1089  			}
  1090  		}
  1091  		if cap(s.nextCall) < len(b.Values) {
  1092  			c := cap(s.nextCall)
  1093  			s.nextCall = append(s.nextCall[:c], make([]int32, len(b.Values)-c)...)
  1094  		} else {
  1095  			s.nextCall = s.nextCall[:len(b.Values)]
  1096  		}
  1097  		var nextCall int32 = math.MaxInt32
  1098  		for i := len(b.Values) - 1; i >= 0; i-- {
  1099  			v := b.Values[i]
  1100  			regValLiveSet.remove(v.ID)
  1101  			if v.Op == OpPhi {
  1102  				// Remove v from the live set, but don't add
  1103  				// any inputs. This is the state the len(b.Preds)>1
  1104  				// case below desires; it wants to process phis specially.
  1105  				s.nextCall[i] = nextCall
  1106  				continue
  1107  			}
  1108  			if opcodeTable[v.Op].call {
  1109  				// Function call clobbers all the registers but SP and SB.
  1110  				regValLiveSet.clear()
  1111  				if s.sp != 0 && s.values[s.sp].uses != nil {
  1112  					regValLiveSet.add(s.sp)
  1113  				}
  1114  				if s.sb != 0 && s.values[s.sb].uses != nil {
  1115  					regValLiveSet.add(s.sb)
  1116  				}
  1117  				nextCall = int32(i)
  1118  			}
  1119  			for _, a := range v.Args {
  1120  				if !s.values[a.ID].needReg {
  1121  					continue
  1122  				}
  1123  				s.addUse(a.ID, int32(i), v.Pos)
  1124  				regValLiveSet.add(a.ID)
  1125  			}
  1126  			s.nextCall[i] = nextCall
  1127  		}
  1128  		if s.f.pass.debug > regDebug {
  1129  			fmt.Printf("use distances for %s\n", b)
  1130  			for i := range s.values {
  1131  				vi := &s.values[i]
  1132  				u := vi.uses
  1133  				if u == nil {
  1134  					continue
  1135  				}
  1136  				fmt.Printf("  v%d:", i)
  1137  				for u != nil {
  1138  					fmt.Printf(" %d", u.dist)
  1139  					u = u.next
  1140  				}
  1141  				fmt.Println()
  1142  			}
  1143  		}
  1144  
  1145  		// Make a copy of the block schedule so we can generate a new one in place.
  1146  		// We make a separate copy for phis and regular values.
  1147  		nphi := 0
  1148  		for _, v := range b.Values {
  1149  			if v.Op != OpPhi {
  1150  				break
  1151  			}
  1152  			nphi++
  1153  		}
  1154  		phis = append(phis[:0], b.Values[:nphi]...)
  1155  		oldSched = append(oldSched[:0], b.Values[nphi:]...)
  1156  		b.Values = b.Values[:0]
  1157  
  1158  		// Initialize start state of block.
  1159  		if b == f.Entry {
  1160  			// Regalloc state is empty to start.
  1161  			if nphi > 0 {
  1162  				f.Fatalf("phis in entry block")
  1163  			}
  1164  		} else if len(b.Preds) == 1 {
  1165  			// Start regalloc state with the end state of the previous block.
  1166  			s.setState(s.endRegs[b.Preds[0].b.ID])
  1167  			if nphi > 0 {
  1168  				f.Fatalf("phis in single-predecessor block")
  1169  			}
  1170  			// Drop any values which are no longer live.
  1171  			// This may happen because at the end of p, a value may be
  1172  			// live but only used by some other successor of p.
  1173  			for r := register(0); r < s.numRegs; r++ {
  1174  				v := s.regs[r].v
  1175  				if v != nil && !regValLiveSet.contains(v.ID) {
  1176  					s.freeReg(r)
  1177  				}
  1178  			}
  1179  		} else {
  1180  			// This is the complicated case. We have more than one predecessor,
  1181  			// which means we may have Phi ops.
  1182  
  1183  			// Start with the final register state of the predecessor with least spill values.
  1184  			// This is based on the following points:
  1185  			// 1, The less spill value indicates that the register pressure of this path is smaller,
  1186  			//    so the values of this block are more likely to be allocated to registers.
  1187  			// 2, Avoid the predecessor that contains the function call, because the predecessor that
  1188  			//    contains the function call usually generates a lot of spills and lose the previous
  1189  			//    allocation state.
  1190  			// TODO: Improve this part. At least the size of endRegs of the predecessor also has
  1191  			// an impact on the code size and compiler speed. But it is not easy to find a simple
  1192  			// and efficient method that combines multiple factors.
  1193  			idx := -1
  1194  			for i, p := range b.Preds {
  1195  				// If the predecessor has not been visited yet, skip it because its end state
  1196  				// (redRegs and spillLive) has not been computed yet.
  1197  				pb := p.b
  1198  				if s.blockOrder[pb.ID] >= s.blockOrder[b.ID] {
  1199  					continue
  1200  				}
  1201  				if idx == -1 {
  1202  					idx = i
  1203  					continue
  1204  				}
  1205  				pSel := b.Preds[idx].b
  1206  				if len(s.spillLive[pb.ID]) < len(s.spillLive[pSel.ID]) {
  1207  					idx = i
  1208  				} else if len(s.spillLive[pb.ID]) == len(s.spillLive[pSel.ID]) {
  1209  					// Use a bit of likely information. After critical pass, pb and pSel must
  1210  					// be plain blocks, so check edge pb->pb.Preds instead of edge pb->b.
  1211  					// TODO: improve the prediction of the likely predecessor. The following
  1212  					// method is only suitable for the simplest cases. For complex cases,
  1213  					// the prediction may be inaccurate, but this does not affect the
  1214  					// correctness of the program.
  1215  					// According to the layout algorithm, the predecessor with the
  1216  					// smaller blockOrder is the true branch, and the test results show
  1217  					// that it is better to choose the predecessor with a smaller
  1218  					// blockOrder than no choice.
  1219  					if pb.likelyBranch() && !pSel.likelyBranch() || s.blockOrder[pb.ID] < s.blockOrder[pSel.ID] {
  1220  						idx = i
  1221  					}
  1222  				}
  1223  			}
  1224  			if idx < 0 {
  1225  				f.Fatalf("bad visitOrder, no predecessor of %s has been visited before it", b)
  1226  			}
  1227  			p := b.Preds[idx].b
  1228  			s.setState(s.endRegs[p.ID])
  1229  
  1230  			if s.f.pass.debug > regDebug {
  1231  				fmt.Printf("starting merge block %s with end state of %s:\n", b, p)
  1232  				for _, x := range s.endRegs[p.ID] {
  1233  					fmt.Printf("  %s: orig:%s cache:%s\n", &s.registers[x.r], x.v, x.c)
  1234  				}
  1235  			}
  1236  
  1237  			// Decide on registers for phi ops. Use the registers determined
  1238  			// by the primary predecessor if we can.
  1239  			// TODO: pick best of (already processed) predecessors?
  1240  			// Majority vote? Deepest nesting level?
  1241  			phiRegs = phiRegs[:0]
  1242  			var phiUsed regMask
  1243  
  1244  			for _, v := range phis {
  1245  				if !s.values[v.ID].needReg {
  1246  					phiRegs = append(phiRegs, noRegister)
  1247  					continue
  1248  				}
  1249  				a := v.Args[idx]
  1250  				// Some instructions target not-allocatable registers.
  1251  				// They're not suitable for further (phi-function) allocation.
  1252  				m := s.values[a.ID].regs.minus(phiUsed).intersect(s.allocatable)
  1253  				if !m.empty() {
  1254  					r := pickReg(m)
  1255  					phiUsed = phiUsed.addReg(r)
  1256  					phiRegs = append(phiRegs, r)
  1257  				} else {
  1258  					phiRegs = append(phiRegs, noRegister)
  1259  				}
  1260  			}
  1261  
  1262  			// Second pass - deallocate all in-register phi inputs.
  1263  			for i, v := range phis {
  1264  				if !s.values[v.ID].needReg {
  1265  					continue
  1266  				}
  1267  				a := v.Args[idx]
  1268  				r := phiRegs[i]
  1269  				if r == noRegister {
  1270  					continue
  1271  				}
  1272  				if regValLiveSet.contains(a.ID) {
  1273  					// Input value is still live (it is used by something other than Phi).
  1274  					// Try to move it around before kicking out, if there is a free register.
  1275  					// We generate a Copy in the predecessor block and record it. It will be
  1276  					// deleted later if never used.
  1277  					//
  1278  					// Pick a free register. At this point some registers used in the predecessor
  1279  					// block may have been deallocated. Those are the ones used for Phis. Exclude
  1280  					// them (and they are not going to be helpful anyway).
  1281  					m := s.compatRegs(a.Type).minus(s.used).minus(phiUsed)
  1282  					if !m.empty() && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 {
  1283  						r2 := pickReg(m)
  1284  						c := p.NewValue1(a.Pos, OpCopy, a.Type, s.regs[r].c)
  1285  						s.copies[c] = false
  1286  						if s.f.pass.debug > regDebug {
  1287  							fmt.Printf("copy %s to %s : %s\n", a, c, &s.registers[r2])
  1288  						}
  1289  						s.setOrig(c, a)
  1290  						s.assignReg(r2, a, c)
  1291  						s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c})
  1292  					}
  1293  				}
  1294  				s.freeReg(r)
  1295  			}
  1296  
  1297  			// Copy phi ops into new schedule.
  1298  			b.Values = append(b.Values, phis...)
  1299  
  1300  			// Third pass - pick registers for phis whose input
  1301  			// was not in a register in the primary predecessor.
  1302  			for i, v := range phis {
  1303  				if !s.values[v.ID].needReg {
  1304  					continue
  1305  				}
  1306  				if phiRegs[i] != noRegister {
  1307  					continue
  1308  				}
  1309  				m := s.compatRegs(v.Type).minus(phiUsed).minus(s.used)
  1310  				// If one of the other inputs of v is in a register, and the register is available,
  1311  				// select this register, which can save some unnecessary copies.
  1312  				for i, pe := range b.Preds {
  1313  					if i == idx {
  1314  						continue
  1315  					}
  1316  					ri := noRegister
  1317  					for _, er := range s.endRegs[pe.b.ID] {
  1318  						if er.v == s.orig[v.Args[i].ID] {
  1319  							ri = er.r
  1320  							break
  1321  						}
  1322  					}
  1323  					if ri != noRegister && m.hasReg(ri) {
  1324  						m = regMaskAt(ri)
  1325  						break
  1326  					}
  1327  				}
  1328  				if !m.empty() {
  1329  					r := pickReg(m)
  1330  					phiRegs[i] = r
  1331  					phiUsed = phiUsed.addReg(r)
  1332  				}
  1333  			}
  1334  
  1335  			// Set registers for phis. Add phi spill code.
  1336  			for i, v := range phis {
  1337  				if !s.values[v.ID].needReg {
  1338  					continue
  1339  				}
  1340  				r := phiRegs[i]
  1341  				if r == noRegister {
  1342  					// stack-based phi
  1343  					// Spills will be inserted in all the predecessors below.
  1344  					s.values[v.ID].spill = v // v starts life spilled
  1345  					continue
  1346  				}
  1347  				// register-based phi
  1348  				s.assignReg(r, v, v)
  1349  			}
  1350  
  1351  			// Deallocate any values which are no longer live. Phis are excluded.
  1352  			for r := register(0); r < s.numRegs; r++ {
  1353  				if phiUsed.hasReg(r) {
  1354  					continue
  1355  				}
  1356  				v := s.regs[r].v
  1357  				if v != nil && !regValLiveSet.contains(v.ID) {
  1358  					s.freeReg(r)
  1359  				}
  1360  			}
  1361  
  1362  			// Save the starting state for use by merge edges.
  1363  			// We append to a stack allocated variable that we'll
  1364  			// later copy into s.startRegs in one fell swoop, to save
  1365  			// on allocations.
  1366  			regList := make([]startReg, 0, 32)
  1367  			for r := register(0); r < s.numRegs; r++ {
  1368  				v := s.regs[r].v
  1369  				if v == nil {
  1370  					continue
  1371  				}
  1372  				if phiUsed.hasReg(r) {
  1373  					// Skip registers that phis used, we'll handle those
  1374  					// specially during merge edge processing.
  1375  					continue
  1376  				}
  1377  				regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
  1378  				s.startRegsMask = s.startRegsMask.addReg(r)
  1379  			}
  1380  			s.startRegs[b.ID] = make([]startReg, len(regList))
  1381  			copy(s.startRegs[b.ID], regList)
  1382  
  1383  			if s.f.pass.debug > regDebug {
  1384  				fmt.Printf("after phis\n")
  1385  				for _, x := range s.startRegs[b.ID] {
  1386  					fmt.Printf("  %s: v%d\n", &s.registers[x.r], x.v.ID)
  1387  				}
  1388  			}
  1389  		}
  1390  
  1391  		// Drop phis from registers if they immediately go dead.
  1392  		for i, v := range phis {
  1393  			s.curIdx = i
  1394  			s.dropIfUnused(v)
  1395  		}
  1396  
  1397  		// Allocate space to record the desired registers for each value.
  1398  		if l := len(oldSched); cap(dinfo) < l {
  1399  			dinfo = make([]dentry, l)
  1400  		} else {
  1401  			dinfo = dinfo[:l]
  1402  			clear(dinfo)
  1403  		}
  1404  
  1405  		// Load static desired register info at the end of the block.
  1406  		if s.desired != nil {
  1407  			desired.copy(&s.desired[b.ID])
  1408  		}
  1409  
  1410  		// Check actual assigned registers at the start of the next block(s).
  1411  		// Dynamically assigned registers will trump the static
  1412  		// desired registers computed during liveness analysis.
  1413  		// Note that we do this phase after startRegs is set above, so that
  1414  		// we get the right behavior for a block which branches to itself.
  1415  		for _, e := range b.Succs {
  1416  			succ := e.b
  1417  			// TODO: prioritize likely successor?
  1418  			for _, x := range s.startRegs[succ.ID] {
  1419  				desired.add(x.v.ID, x.r)
  1420  			}
  1421  			// Process phi ops in succ.
  1422  			pidx := e.i
  1423  			for _, v := range succ.Values {
  1424  				if v.Op != OpPhi {
  1425  					break
  1426  				}
  1427  				if !s.values[v.ID].needReg {
  1428  					continue
  1429  				}
  1430  				rp, ok := s.f.getHome(v.ID).(*Register)
  1431  				if !ok {
  1432  					// If v is not assigned a register, pick a register assigned to one of v's inputs.
  1433  					// Hopefully v will get assigned that register later.
  1434  					// If the inputs have allocated register information, add it to desired,
  1435  					// which may reduce spill or copy operations when the register is available.
  1436  					for _, a := range v.Args {
  1437  						rp, ok = s.f.getHome(a.ID).(*Register)
  1438  						if ok {
  1439  							break
  1440  						}
  1441  					}
  1442  					if !ok {
  1443  						continue
  1444  					}
  1445  				}
  1446  				desired.add(v.Args[pidx].ID, register(rp.num))
  1447  			}
  1448  		}
  1449  		// Walk values backwards computing desired register info.
  1450  		// See computeDesired for more comments.
  1451  		for i := len(oldSched) - 1; i >= 0; i-- {
  1452  			v := oldSched[i]
  1453  			prefs := desired.remove(v.ID)
  1454  			regspec := s.regspec(v)
  1455  			desired.clobber(regspec.clobbers)
  1456  			for _, j := range regspec.inputs {
  1457  				if countRegs(j.regs) != 1 {
  1458  					continue
  1459  				}
  1460  				desired.clobber(j.regs)
  1461  				desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  1462  			}
  1463  			if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  1464  				if opcodeTable[v.Op].commutative {
  1465  					desired.addList(v.Args[1].ID, prefs)
  1466  				}
  1467  				desired.addList(v.Args[0].ID, prefs)
  1468  			}
  1469  			// Save desired registers for this value.
  1470  			dinfo[i].out = prefs
  1471  			for j, a := range v.Args {
  1472  				if j >= len(dinfo[i].in) {
  1473  					break
  1474  				}
  1475  				dinfo[i].in[j] = desired.get(a.ID)
  1476  			}
  1477  			if v.Op == OpSelect1 && prefs[0] != noRegister {
  1478  				// Save desired registers of select1 for
  1479  				// use by the tuple generating instruction.
  1480  				desiredSecondReg[v.Args[0].ID] = prefs
  1481  			}
  1482  		}
  1483  
  1484  		// Process all the non-phi values.
  1485  		for idx, v := range oldSched {
  1486  			s.curIdx = nphi + idx
  1487  			tmpReg := noRegister
  1488  			if s.f.pass.debug > regDebug {
  1489  				fmt.Printf("  processing %s\n", v.LongString())
  1490  			}
  1491  			regspec := s.regspec(v)
  1492  			if v.Op == OpPhi {
  1493  				f.Fatalf("phi %s not at start of block", v)
  1494  			}
  1495  			if opcodeTable[v.Op].fixedReg {
  1496  				switch v.Op {
  1497  				case OpSP:
  1498  					s.assignReg(s.SPReg, v, v)
  1499  					s.sp = v.ID
  1500  				case OpSB:
  1501  					s.assignReg(s.SBReg, v, v)
  1502  					s.sb = v.ID
  1503  				case OpARM64ZERO, OpLOONG64ZERO, OpMIPS64ZERO:
  1504  					s.assignReg(s.ZeroIntReg, v, v)
  1505  				case OpAMD64Zero128, OpAMD64Zero256, OpAMD64Zero512:
  1506  					regspec := s.regspec(v)
  1507  					m := regspec.outputs[0].regs
  1508  					if countRegs(m) != 1 {
  1509  						f.Fatalf("bad fixed-register op %s", v)
  1510  					}
  1511  					s.assignReg(pickReg(m), v, v)
  1512  				default:
  1513  					f.Fatalf("unknown fixed-register op %s", v)
  1514  				}
  1515  				b.Values = append(b.Values, v)
  1516  				s.advanceUses(v)
  1517  				continue
  1518  			}
  1519  			if v.Op == OpSelect0 || v.Op == OpSelect1 || v.Op == OpSelectN {
  1520  				if s.values[v.ID].needReg {
  1521  					if v.Op == OpSelectN {
  1522  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocResults)[int(v.AuxInt)].(*Register).num), v, v)
  1523  					} else {
  1524  						var i = 0
  1525  						if v.Op == OpSelect1 {
  1526  							i = 1
  1527  						}
  1528  						s.assignReg(register(s.f.getHome(v.Args[0].ID).(LocPair)[i].(*Register).num), v, v)
  1529  					}
  1530  				}
  1531  				b.Values = append(b.Values, v)
  1532  				s.advanceUses(v)
  1533  				continue
  1534  			}
  1535  			if v.Op == OpGetG && s.f.Config.hasGReg {
  1536  				// use hardware g register
  1537  				if s.regs[s.GReg].v != nil {
  1538  					s.freeReg(s.GReg) // kick out the old value
  1539  				}
  1540  				s.assignReg(s.GReg, v, v)
  1541  				b.Values = append(b.Values, v)
  1542  				s.advanceUses(v)
  1543  				continue
  1544  			}
  1545  			if v.Op == OpArg {
  1546  				// Args are "pre-spilled" values. We don't allocate
  1547  				// any register here. We just set up the spill pointer to
  1548  				// point at itself and any later user will restore it to use it.
  1549  				s.values[v.ID].spill = v
  1550  				b.Values = append(b.Values, v)
  1551  				s.advanceUses(v)
  1552  				continue
  1553  			}
  1554  			if v.Op == OpKeepAlive {
  1555  				// Make sure the argument to v is still live here.
  1556  				s.advanceUses(v)
  1557  				a := v.Args[0]
  1558  				vi := &s.values[a.ID]
  1559  				if vi.regs.empty() && !vi.rematerializeable {
  1560  					// Use the spill location.
  1561  					// This forces later liveness analysis to make the
  1562  					// value live at this point.
  1563  					v.SetArg(0, s.makeSpill(a, b))
  1564  				} else if _, ok := a.Aux.(*ir.Name); ok && vi.rematerializeable {
  1565  					// Rematerializeable value with a *ir.Name. This is the address of
  1566  					// a stack object (e.g. an LEAQ). Keep the object live.
  1567  					// Change it to VarLive, which is what plive expects for locals.
  1568  					v.Op = OpVarLive
  1569  					v.SetArgs1(v.Args[1])
  1570  					v.Aux = a.Aux
  1571  				} else {
  1572  					// In-register and rematerializeable values are already live.
  1573  					// These are typically rematerializeable constants like nil,
  1574  					// or values of a variable that were modified since the last call.
  1575  					v.Op = OpCopy
  1576  					v.SetArgs1(v.Args[1])
  1577  				}
  1578  				b.Values = append(b.Values, v)
  1579  				continue
  1580  			}
  1581  			if len(regspec.inputs) == 0 && len(regspec.outputs) == 0 {
  1582  				// No register allocation required (or none specified yet)
  1583  				if s.doClobber && v.Op.IsCall() {
  1584  					s.clobberRegs(regspec.clobbers)
  1585  				}
  1586  				s.freeRegs(regspec.clobbers)
  1587  				b.Values = append(b.Values, v)
  1588  				s.advanceUses(v)
  1589  				continue
  1590  			}
  1591  
  1592  			if s.values[v.ID].rematerializeable {
  1593  				// Value is rematerializeable, don't issue it here.
  1594  				// It will get issued just before each use (see
  1595  				// allocValueToReg).
  1596  				for _, a := range v.Args {
  1597  					a.Uses--
  1598  				}
  1599  				s.advanceUses(v)
  1600  				continue
  1601  			}
  1602  
  1603  			if s.f.pass.debug > regDebug {
  1604  				fmt.Printf("value %s\n", v.LongString())
  1605  				fmt.Printf("  out:")
  1606  				for _, r := range dinfo[idx].out {
  1607  					if r != noRegister {
  1608  						fmt.Printf(" %s", &s.registers[r])
  1609  					}
  1610  				}
  1611  				fmt.Println()
  1612  				for i := 0; i < len(v.Args) && i < 3; i++ {
  1613  					fmt.Printf("  in%d:", i)
  1614  					for _, r := range dinfo[idx].in[i] {
  1615  						if r != noRegister {
  1616  							fmt.Printf(" %s", &s.registers[r])
  1617  						}
  1618  					}
  1619  					fmt.Println()
  1620  				}
  1621  			}
  1622  
  1623  			// Move arguments to registers.
  1624  			// First, if an arg must be in a specific register and it is already
  1625  			// in place, keep it.
  1626  			args = append(args[:0], make([]*Value, len(v.Args))...)
  1627  			for i, a := range v.Args {
  1628  				if !s.values[a.ID].needReg {
  1629  					args[i] = a
  1630  				}
  1631  			}
  1632  			for _, i := range regspec.inputs {
  1633  				mask := i.regs
  1634  				if countRegs(mask) == 1 && !mask.intersect(s.values[v.Args[i.idx].ID].regs).empty() {
  1635  					args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1636  				}
  1637  			}
  1638  			// Then, if an arg must be in a specific register and that
  1639  			// register is free, allocate that one. Otherwise when processing
  1640  			// another input we may kick a value into the free register, which
  1641  			// then will be kicked out again.
  1642  			// This is a common case for passing-in-register arguments for
  1643  			// function calls.
  1644  			for {
  1645  				freed := false
  1646  				for _, i := range regspec.inputs {
  1647  					if args[i.idx] != nil {
  1648  						continue // already allocated
  1649  					}
  1650  					mask := i.regs
  1651  					if countRegs(mask) == 1 && !mask.minus(s.used).empty() {
  1652  						args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1653  						// If the input is in other registers that will be clobbered by v,
  1654  						// or the input is dead, free the registers. This may make room
  1655  						// for other inputs.
  1656  						oldregs := s.values[v.Args[i.idx].ID].regs
  1657  						if oldregs.minus(regspec.clobbers).empty() || !s.liveAfterCurrentInstruction(v.Args[i.idx]) {
  1658  							s.freeRegs(oldregs.minus(mask).minus(s.nospill))
  1659  							freed = true
  1660  						}
  1661  					}
  1662  				}
  1663  				if !freed {
  1664  					break
  1665  				}
  1666  			}
  1667  			// Last, allocate remaining ones, in an ordering defined
  1668  			// by the register specification (most constrained first).
  1669  			for _, i := range regspec.inputs {
  1670  				if args[i.idx] != nil {
  1671  					continue // already allocated
  1672  				}
  1673  				mask := i.regs
  1674  				if mask.intersect(s.values[v.Args[i.idx].ID].regs).empty() {
  1675  					// Need a new register for the input.
  1676  					mask = mask.intersect(s.allocatable)
  1677  					mask = mask.minus(s.nospill)
  1678  					// Used desired register if available.
  1679  					if i.idx < 3 {
  1680  						for _, r := range dinfo[idx].in[i.idx] {
  1681  							if r != noRegister && mask.minus(s.used).hasReg(r) {
  1682  								// Desired register is allowed and unused.
  1683  								mask = regMaskAt(r)
  1684  								break
  1685  							}
  1686  						}
  1687  					}
  1688  					// Avoid registers we're saving for other values.
  1689  					if !mask.minus(desired.avoid).empty() {
  1690  						mask = mask.minus(desired.avoid)
  1691  					}
  1692  				}
  1693  				if mask.intersect(s.values[v.Args[i.idx].ID].regs).hasReg(s.SPReg) {
  1694  					// Prefer SP register. This ensures that local variables
  1695  					// use SP as their base register (instead of a copy of the
  1696  					// stack pointer living in another register). See issue 74836.
  1697  					mask = regMaskAt(s.SPReg)
  1698  				}
  1699  				args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
  1700  			}
  1701  
  1702  			// If the output clobbers the input register, make sure we have
  1703  			// at least two copies of the input register so we don't
  1704  			// have to reload the value from the spill location.
  1705  			if opcodeTable[v.Op].resultInArg0 {
  1706  				var m regMask
  1707  				if !s.liveAfterCurrentInstruction(v.Args[0]) {
  1708  					// arg0 is dead.  We can clobber its register.
  1709  					goto ok
  1710  				}
  1711  				if opcodeTable[v.Op].commutative && !s.liveAfterCurrentInstruction(v.Args[1]) {
  1712  					args[0], args[1] = args[1], args[0]
  1713  					goto ok
  1714  				}
  1715  				if s.values[v.Args[0].ID].rematerializeable {
  1716  					// We can rematerialize the input, don't worry about clobbering it.
  1717  					goto ok
  1718  				}
  1719  				if opcodeTable[v.Op].commutative && s.values[v.Args[1].ID].rematerializeable {
  1720  					args[0], args[1] = args[1], args[0]
  1721  					goto ok
  1722  				}
  1723  				if countRegs(s.values[v.Args[0].ID].regs) >= 2 {
  1724  					// we have at least 2 copies of arg0.  We can afford to clobber one.
  1725  					goto ok
  1726  				}
  1727  				if opcodeTable[v.Op].commutative && countRegs(s.values[v.Args[1].ID].regs) >= 2 {
  1728  					args[0], args[1] = args[1], args[0]
  1729  					goto ok
  1730  				}
  1731  
  1732  				// We can't overwrite arg0 (or arg1, if commutative).  So we
  1733  				// need to make a copy of an input so we have a register we can modify.
  1734  
  1735  				// Possible new registers to copy into.
  1736  				m = s.compatRegs(v.Args[0].Type).minus(s.used)
  1737  				if m.empty() {
  1738  					// No free registers.  In this case we'll just clobber
  1739  					// an input and future uses of that input must use a restore.
  1740  					// TODO(khr): We should really do this like allocReg does it,
  1741  					// spilling the value with the most distant next use.
  1742  					goto ok
  1743  				}
  1744  
  1745  				// Try to move an input to the desired output, if allowed.
  1746  				for _, r := range dinfo[idx].out {
  1747  					if r != noRegister && m.intersect(regspec.outputs[0].regs).hasReg(r) {
  1748  						m = regMaskAt(r)
  1749  						args[0] = s.allocValToReg(v.Args[0], m, true, v.Pos)
  1750  						// Note: we update args[0] so the instruction will
  1751  						// use the register copy we just made.
  1752  						goto ok
  1753  					}
  1754  				}
  1755  				// Try to copy input to its desired location & use its old
  1756  				// location as the result register.
  1757  				for _, r := range dinfo[idx].in[0] {
  1758  					if r != noRegister && m.hasReg(r) {
  1759  						m = regMaskAt(r)
  1760  						c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1761  						s.copies[c] = false
  1762  						// Note: no update to args[0] so the instruction will
  1763  						// use the original copy.
  1764  						goto ok
  1765  					}
  1766  				}
  1767  				if opcodeTable[v.Op].commutative {
  1768  					for _, r := range dinfo[idx].in[1] {
  1769  						if r != noRegister && m.hasReg(r) {
  1770  							m = regMaskAt(r)
  1771  							c := s.allocValToReg(v.Args[1], m, true, v.Pos)
  1772  							s.copies[c] = false
  1773  							args[0], args[1] = args[1], args[0]
  1774  							goto ok
  1775  						}
  1776  					}
  1777  				}
  1778  
  1779  				// Avoid future fixed uses if we can.
  1780  				if !m.minus(desired.avoid).empty() {
  1781  					m = m.minus(desired.avoid)
  1782  				}
  1783  				// Save input 0 to a new register so we can clobber it.
  1784  				c := s.allocValToReg(v.Args[0], m, true, v.Pos)
  1785  				s.copies[c] = false
  1786  
  1787  				// Normally we use the register of the old copy of input 0 as the target.
  1788  				// However, if input 0 is already in its desired register then we use
  1789  				// the register of the new copy instead.
  1790  				if regspec.outputs[0].regs.hasReg(register(s.f.getHome(c.ID).(*Register).num)) {
  1791  					if rp, ok := s.f.getHome(args[0].ID).(*Register); ok {
  1792  						r := register(rp.num)
  1793  						for _, r2 := range dinfo[idx].in[0] {
  1794  							if r == r2 {
  1795  								args[0] = c
  1796  								break
  1797  							}
  1798  						}
  1799  					}
  1800  				}
  1801  			}
  1802  		ok:
  1803  			for i := 0; i < 2; i++ {
  1804  				if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
  1805  					continue
  1806  				}
  1807  				if !s.liveAfterCurrentInstruction(v.Args[i]) {
  1808  					// arg is dead.  We can clobber its register.
  1809  					continue
  1810  				}
  1811  				if s.values[v.Args[i].ID].rematerializeable {
  1812  					// We can rematerialize the input, don't worry about clobbering it.
  1813  					continue
  1814  				}
  1815  				if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
  1816  					// We have at least 2 copies of arg.  We can afford to clobber one.
  1817  					continue
  1818  				}
  1819  				// Possible new registers to copy into.
  1820  				m := s.compatRegs(v.Args[i].Type).minus(s.used)
  1821  				if m.empty() {
  1822  					// No free registers.  In this case we'll just clobber the
  1823  					// input and future uses of that input must use a restore.
  1824  					// TODO(khr): We should really do this like allocReg does it,
  1825  					// spilling the value with the most distant next use.
  1826  					continue
  1827  				}
  1828  				// Copy input to a different register that won't be clobbered.
  1829  				c := s.allocValToReg(v.Args[i], m, true, v.Pos)
  1830  				s.copies[c] = false
  1831  			}
  1832  
  1833  			// Pick a temporary register if needed.
  1834  			// It should be distinct from all the input registers, so we
  1835  			// allocate it after all the input registers, but before
  1836  			// the input registers are freed via advanceUses below.
  1837  			// (Not all instructions need that distinct part, but it is conservative.)
  1838  			// We also ensure it is not any of the single-choice output registers.
  1839  			if opcodeTable[v.Op].needIntTemp {
  1840  				m := s.allocatable.intersect(s.f.Config.gpRegMask)
  1841  				for _, out := range regspec.outputs {
  1842  					if countRegs(out.regs) == 1 {
  1843  						m = m.minus(out.regs)
  1844  					}
  1845  				}
  1846  				if !m.minus(desired.avoid).minus(s.nospill).empty() {
  1847  					m = m.minus(desired.avoid)
  1848  				}
  1849  				tmpReg = s.allocReg(m, &tmpVal)
  1850  				s.nospill = s.nospill.addReg(tmpReg)
  1851  				s.tmpused = s.tmpused.addReg(tmpReg)
  1852  			}
  1853  
  1854  			if regspec.clobbersArg0 {
  1855  				s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
  1856  			}
  1857  			if regspec.clobbersArg1 && !(regspec.clobbersArg0 && s.f.getHome(args[0].ID) == s.f.getHome(args[1].ID)) {
  1858  				s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
  1859  			}
  1860  
  1861  			// Now that all args are in regs, we're ready to issue the value itself.
  1862  			// Before we pick a register for the output value, allow input registers
  1863  			// to be deallocated. We do this here so that the output can use the
  1864  			// same register as a dying input.
  1865  			if !opcodeTable[v.Op].resultNotInArgs {
  1866  				s.tmpused = s.nospill
  1867  				s.nospill = regMask{}
  1868  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1869  			}
  1870  
  1871  			// Dump any registers which will be clobbered
  1872  			if s.doClobber && v.Op.IsCall() {
  1873  				// clobber registers that are marked as clobber in regmask, but
  1874  				// don't clobber inputs.
  1875  				s.clobberRegs(regspec.clobbers.minus(s.tmpused).minus(s.nospill))
  1876  			}
  1877  			s.freeRegs(regspec.clobbers)
  1878  			s.tmpused = s.tmpused.union(regspec.clobbers)
  1879  
  1880  			// Pick registers for outputs.
  1881  			{
  1882  				outRegs := noRegisters // TODO if this is costly, hoist and clear incrementally below.
  1883  				maxOutIdx := -1
  1884  				var used regMask
  1885  				if tmpReg != noRegister {
  1886  					// Ensure output registers are distinct from the temporary register.
  1887  					// (Not all instructions need that distinct part, but it is conservative.)
  1888  					used = used.addReg(tmpReg)
  1889  				}
  1890  				for _, out := range regspec.outputs {
  1891  					if out.regs.empty() {
  1892  						continue
  1893  					}
  1894  					mask := out.regs.intersect(s.allocatable).minus(used)
  1895  					if mask.empty() {
  1896  						s.f.Fatalf("can't find any output register %s", v.LongString())
  1897  					}
  1898  					if opcodeTable[v.Op].resultInArg0 && out.idx == 0 {
  1899  						if !opcodeTable[v.Op].commutative {
  1900  							// Output must use the same register as input 0.
  1901  							r := register(s.f.getHome(args[0].ID).(*Register).num)
  1902  							if !mask.hasReg(r) {
  1903  								s.f.Fatalf("resultInArg0 value's input %v cannot be an output of %s", s.f.getHome(args[0].ID).(*Register), v.LongString())
  1904  							}
  1905  							mask = regMaskAt(r)
  1906  						} else {
  1907  							// Output must use the same register as input 0 or 1.
  1908  							r0 := register(s.f.getHome(args[0].ID).(*Register).num)
  1909  							r1 := register(s.f.getHome(args[1].ID).(*Register).num)
  1910  							// Check r0 and r1 for desired output register.
  1911  							found := false
  1912  							for _, r := range dinfo[idx].out {
  1913  								if (r == r0 || r == r1) && mask.minus(s.used).hasReg(r) {
  1914  									mask = regMaskAt(r)
  1915  									found = true
  1916  									if r == r1 {
  1917  										args[0], args[1] = args[1], args[0]
  1918  									}
  1919  									break
  1920  								}
  1921  							}
  1922  							if !found {
  1923  								// Neither are desired, pick r0.
  1924  								mask = regMaskAt(r0)
  1925  							}
  1926  						}
  1927  					}
  1928  					if out.idx == 0 { // desired registers only apply to the first element of a tuple result
  1929  						for _, r := range dinfo[idx].out {
  1930  							if r != noRegister && mask.minus(s.used).hasReg(r) {
  1931  								// Desired register is allowed and unused.
  1932  								mask = regMaskAt(r)
  1933  								break
  1934  							}
  1935  						}
  1936  					}
  1937  					if out.idx == 1 {
  1938  						if prefs, ok := desiredSecondReg[v.ID]; ok {
  1939  							for _, r := range prefs {
  1940  								if r != noRegister && mask.minus(s.used).hasReg(r) {
  1941  									// Desired register is allowed and unused.
  1942  									mask = regMaskAt(r)
  1943  									break
  1944  								}
  1945  							}
  1946  						}
  1947  					}
  1948  					// Avoid registers we're saving for other values.
  1949  					if !mask.minus(desired.avoid).minus(s.nospill).minus(s.used).empty() {
  1950  						mask = mask.minus(desired.avoid)
  1951  					}
  1952  					r := s.allocReg(mask, v)
  1953  					if out.idx > maxOutIdx {
  1954  						maxOutIdx = out.idx
  1955  					}
  1956  					outRegs[out.idx] = r
  1957  					used = used.addReg(r)
  1958  					s.tmpused = s.tmpused.addReg(r)
  1959  				}
  1960  				// Record register choices
  1961  				if v.Type.IsTuple() {
  1962  					var outLocs LocPair
  1963  					if r := outRegs[0]; r != noRegister {
  1964  						outLocs[0] = &s.registers[r]
  1965  					}
  1966  					if r := outRegs[1]; r != noRegister {
  1967  						outLocs[1] = &s.registers[r]
  1968  					}
  1969  					s.f.setHome(v, outLocs)
  1970  					// Note that subsequent SelectX instructions will do the assignReg calls.
  1971  				} else if v.Type.IsResults() {
  1972  					// preallocate outLocs to the right size, which is maxOutIdx+1
  1973  					outLocs := make(LocResults, maxOutIdx+1, maxOutIdx+1)
  1974  					for i := 0; i <= maxOutIdx; i++ {
  1975  						if r := outRegs[i]; r != noRegister {
  1976  							outLocs[i] = &s.registers[r]
  1977  						}
  1978  					}
  1979  					s.f.setHome(v, outLocs)
  1980  				} else {
  1981  					if r := outRegs[0]; r != noRegister {
  1982  						s.assignReg(r, v, v)
  1983  					}
  1984  				}
  1985  				if tmpReg != noRegister {
  1986  					// Remember the temp register allocation, if any.
  1987  					if s.f.tempRegs == nil {
  1988  						s.f.tempRegs = map[ID]*Register{}
  1989  					}
  1990  					s.f.tempRegs[v.ID] = &s.registers[tmpReg]
  1991  				}
  1992  			}
  1993  
  1994  			// deallocate dead args, if we have not done so
  1995  			if opcodeTable[v.Op].resultNotInArgs {
  1996  				s.nospill = regMask{}
  1997  				s.advanceUses(v) // frees any registers holding args that are no longer live
  1998  			}
  1999  			s.tmpused = regMask{}
  2000  
  2001  			// Issue the Value itself.
  2002  			for i, a := range args {
  2003  				v.SetArg(i, a) // use register version of arguments
  2004  			}
  2005  			b.Values = append(b.Values, v)
  2006  			s.dropIfUnused(v)
  2007  		}
  2008  
  2009  		// Copy the control values - we need this so we can reduce the
  2010  		// uses property of these values later.
  2011  		controls := append(make([]*Value, 0, 2), b.ControlValues()...)
  2012  
  2013  		// Load control values into registers.
  2014  		for i, v := range b.ControlValues() {
  2015  			if !s.values[v.ID].needReg {
  2016  				continue
  2017  			}
  2018  			if s.f.pass.debug > regDebug {
  2019  				fmt.Printf("  processing control %s\n", v.LongString())
  2020  			}
  2021  			// We assume that a control input can be passed in any
  2022  			// type-compatible register. If this turns out not to be true,
  2023  			// we'll need to introduce a regspec for a block's control value.
  2024  			b.ReplaceControl(i, s.allocValToReg(v, s.compatRegs(v.Type), false, b.Pos))
  2025  		}
  2026  
  2027  		// Reduce the uses of the control values once registers have been loaded.
  2028  		// This loop is equivalent to the advanceUses method.
  2029  		for _, v := range controls {
  2030  			vi := &s.values[v.ID]
  2031  			if !vi.needReg {
  2032  				continue
  2033  			}
  2034  			// Remove this use from the uses list.
  2035  			u := vi.uses
  2036  			vi.uses = u.next
  2037  			if u.next == nil {
  2038  				s.freeRegs(vi.regs) // value is dead
  2039  			}
  2040  			u.next = s.freeUseRecords
  2041  			s.freeUseRecords = u
  2042  		}
  2043  
  2044  		// If we are approaching a merge point and we are the primary
  2045  		// predecessor of it, find live values that we use soon after
  2046  		// the merge point and promote them to registers now.
  2047  		if len(b.Succs) == 1 {
  2048  			if s.f.Config.hasGReg && s.regs[s.GReg].v != nil {
  2049  				s.freeReg(s.GReg) // Spill value in G register before any merge.
  2050  			}
  2051  			if s.blockOrder[b.ID] > s.blockOrder[b.Succs[0].b.ID] {
  2052  				// No point if we've already regalloc'd the destination.
  2053  				goto badloop
  2054  			}
  2055  			// For this to be worthwhile, the loop must have no calls in it.
  2056  			top := b.Succs[0].b
  2057  			loop := s.loopnest.b2l[top.ID]
  2058  			if loop == nil || loop.header != top || loop.containsUnavoidableCall {
  2059  				goto badloop
  2060  			}
  2061  
  2062  			// Look into target block, find Phi arguments that come from b.
  2063  			phiArgs := regValLiveSet // reuse this space
  2064  			phiArgs.clear()
  2065  			for _, v := range b.Succs[0].b.Values {
  2066  				if v.Op == OpPhi {
  2067  					phiArgs.add(v.Args[b.Succs[0].i].ID)
  2068  				}
  2069  			}
  2070  
  2071  			// Get mask of all registers that might be used soon in the destination.
  2072  			// We don't want to kick values out of these registers, but we will
  2073  			// kick out an unlikely-to-be-used value for a likely-to-be-used one.
  2074  			var likelyUsedRegs regMask
  2075  			for _, live := range s.live[b.ID] {
  2076  				if live.dist < unlikelyDistance {
  2077  					likelyUsedRegs = likelyUsedRegs.union(s.values[live.ID].regs)
  2078  				}
  2079  			}
  2080  			// Promote values we're going to use soon in the destination to registers.
  2081  			// Note that this iterates nearest-use first, as we sorted
  2082  			// live lists by distance in computeLive.
  2083  			for _, live := range s.live[b.ID] {
  2084  				if live.dist >= unlikelyDistance {
  2085  					// Don't preload anything live after the loop.
  2086  					continue
  2087  				}
  2088  				vid := live.ID
  2089  				vi := &s.values[vid]
  2090  				v := s.orig[vid]
  2091  				if phiArgs.contains(vid) {
  2092  					// A phi argument needs its value in a regular register,
  2093  					// as returned by compatRegs. Being in a fixed register
  2094  					// (e.g. the zero register) or being easily
  2095  					// rematerializeable isn't enough.
  2096  					if !vi.regs.intersect(s.compatRegs(v.Type)).empty() {
  2097  						continue
  2098  					}
  2099  				} else {
  2100  					if !vi.regs.empty() {
  2101  						continue
  2102  					}
  2103  					if vi.rematerializeable {
  2104  						// TODO: maybe we should not skip rematerializeable
  2105  						// values here. One rematerialization outside the loop
  2106  						// is better than N in the loop. But rematerializations
  2107  						// are cheap, and spilling another value may not be.
  2108  						// And we don't want to materialize the zero register
  2109  						// into a different register when it is just the
  2110  						// argument to a store.
  2111  						continue
  2112  					}
  2113  				}
  2114  				if vi.rematerializeable && s.f.Config.ctxt.Arch.Arch == sys.ArchWasm {
  2115  					continue
  2116  				}
  2117  				// Registers we could load v into.
  2118  				// Don't kick out other likely-used values.
  2119  				m := s.compatRegs(v.Type).minus(likelyUsedRegs)
  2120  				if m.empty() {
  2121  					// To many likely-used values to give them all a register.
  2122  					continue
  2123  				}
  2124  
  2125  				// Used desired register if available.
  2126  			outerloop:
  2127  				for _, e := range desired.entries {
  2128  					if e.ID != v.ID {
  2129  						continue
  2130  					}
  2131  					for _, r := range e.regs {
  2132  						if r != noRegister && m.hasReg(r) {
  2133  							m = regMaskAt(r)
  2134  							break outerloop
  2135  						}
  2136  					}
  2137  				}
  2138  				if !m.minus(desired.avoid).empty() {
  2139  					m = m.minus(desired.avoid)
  2140  				}
  2141  				s.allocValToReg(v, m, false, b.Pos)
  2142  				likelyUsedRegs = likelyUsedRegs.union(s.values[v.ID].regs)
  2143  			}
  2144  		}
  2145  	badloop:
  2146  		;
  2147  
  2148  		// Save end-of-block register state.
  2149  		// First count how many, this cuts allocations in half.
  2150  		k := 0
  2151  		for r := register(0); r < s.numRegs; r++ {
  2152  			v := s.regs[r].v
  2153  			if v == nil {
  2154  				continue
  2155  			}
  2156  			k++
  2157  		}
  2158  		regList := make([]endReg, 0, k)
  2159  		for r := register(0); r < s.numRegs; r++ {
  2160  			v := s.regs[r].v
  2161  			if v == nil {
  2162  				continue
  2163  			}
  2164  			regList = append(regList, endReg{r, v, s.regs[r].c})
  2165  		}
  2166  		s.endRegs[b.ID] = regList
  2167  
  2168  		if checkEnabled {
  2169  			regValLiveSet.clear()
  2170  			if s.live != nil {
  2171  				for _, x := range s.live[b.ID] {
  2172  					regValLiveSet.add(x.ID)
  2173  				}
  2174  			}
  2175  			for r := register(0); r < s.numRegs; r++ {
  2176  				v := s.regs[r].v
  2177  				if v == nil {
  2178  					continue
  2179  				}
  2180  				if !regValLiveSet.contains(v.ID) {
  2181  					s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
  2182  				}
  2183  			}
  2184  		}
  2185  
  2186  		// If a value is live at the end of the block and
  2187  		// isn't in a register, generate a use for the spill location.
  2188  		// We need to remember this information so that
  2189  		// the liveness analysis in stackalloc is correct.
  2190  		if s.live != nil {
  2191  			for _, e := range s.live[b.ID] {
  2192  				vi := &s.values[e.ID]
  2193  				if !vi.regs.empty() {
  2194  					// in a register, we'll use that source for the merge.
  2195  					continue
  2196  				}
  2197  				if vi.rematerializeable {
  2198  					// we'll rematerialize during the merge.
  2199  					continue
  2200  				}
  2201  				if s.f.pass.debug > regDebug {
  2202  					fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
  2203  				}
  2204  				spill := s.makeSpill(s.orig[e.ID], b)
  2205  				s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
  2206  			}
  2207  
  2208  			// Clear any final uses.
  2209  			// All that is left should be the pseudo-uses added for values which
  2210  			// are live at the end of b.
  2211  			for _, e := range s.live[b.ID] {
  2212  				u := s.values[e.ID].uses
  2213  				if u == nil {
  2214  					f.Fatalf("live at end, no uses v%d", e.ID)
  2215  				}
  2216  				if u.next != nil {
  2217  					f.Fatalf("live at end, too many uses v%d", e.ID)
  2218  				}
  2219  				s.values[e.ID].uses = nil
  2220  				u.next = s.freeUseRecords
  2221  				s.freeUseRecords = u
  2222  			}
  2223  		}
  2224  
  2225  		// allocReg may have dropped registers from startRegsMask that
  2226  		// aren't actually needed in startRegs. Synchronize back to
  2227  		// startRegs.
  2228  		//
  2229  		// This must be done before placing spills, which will look at
  2230  		// startRegs to decide if a block is a valid block for a spill.
  2231  		if c := countRegs(s.startRegsMask); c != len(s.startRegs[b.ID]) {
  2232  			regs := make([]startReg, 0, c)
  2233  			for _, sr := range s.startRegs[b.ID] {
  2234  				if !s.startRegsMask.hasReg(sr.r) {
  2235  					continue
  2236  				}
  2237  				regs = append(regs, sr)
  2238  			}
  2239  			s.startRegs[b.ID] = regs
  2240  		}
  2241  	}
  2242  
  2243  	// Decide where the spills we generated will go.
  2244  	s.placeSpills()
  2245  
  2246  	// Anything that didn't get a register gets a stack location here.
  2247  	// (StoreReg, stack-based phis, inputs, ...)
  2248  	stacklive := stackalloc(s.f, s.spillLive)
  2249  
  2250  	// Fix up all merge edges.
  2251  	s.shuffle(stacklive)
  2252  
  2253  	// Erase any copies we never used.
  2254  	// Also, an unused copy might be the only use of another copy,
  2255  	// so continue erasing until we reach a fixed point.
  2256  	for {
  2257  		progress := false
  2258  		for c, used := range s.copies {
  2259  			if !used && c.Uses == 0 {
  2260  				if s.f.pass.debug > regDebug {
  2261  					fmt.Printf("delete copied value %s\n", c.LongString())
  2262  				}
  2263  				c.resetArgs()
  2264  				f.freeValue(c)
  2265  				delete(s.copies, c)
  2266  				progress = true
  2267  			}
  2268  		}
  2269  		if !progress {
  2270  			break
  2271  		}
  2272  	}
  2273  
  2274  	for _, b := range s.visitOrder {
  2275  		i := 0
  2276  		for _, v := range b.Values {
  2277  			if v.Op == OpInvalid {
  2278  				continue
  2279  			}
  2280  			b.Values[i] = v
  2281  			i++
  2282  		}
  2283  		b.Values = b.Values[:i]
  2284  	}
  2285  }
  2286  
  2287  func (s *regAllocState) placeSpills() {
  2288  	mustBeFirst := func(op Op) bool {
  2289  		return op.isLoweredGetClosurePtr() || op == OpPhi || op == OpArgIntReg || op == OpArgFloatReg
  2290  	}
  2291  
  2292  	// Start maps block IDs to the list of spills
  2293  	// that go at the start of the block (but after any phis).
  2294  	start := map[ID][]*Value{}
  2295  	// After maps value IDs to the list of spills
  2296  	// that go immediately after that value ID.
  2297  	after := map[ID][]*Value{}
  2298  
  2299  	for i := range s.values {
  2300  		vi := s.values[i]
  2301  		spill := vi.spill
  2302  		if spill == nil {
  2303  			continue
  2304  		}
  2305  		if spill.Block != nil {
  2306  			// Some spills are already fully set up,
  2307  			// like OpArgs and stack-based phis.
  2308  			continue
  2309  		}
  2310  		v := s.orig[i]
  2311  
  2312  		// Walk down the dominator tree looking for a good place to
  2313  		// put the spill of v.  At the start "best" is the best place
  2314  		// we have found so far.
  2315  		// TODO: find a way to make this O(1) without arbitrary cutoffs.
  2316  		if v == nil {
  2317  			panic(fmt.Errorf("nil v, s.orig[%d], vi = %v, spill = %s", i, vi, spill.LongString()))
  2318  		}
  2319  		best := v.Block
  2320  		bestArg := v
  2321  		var bestDepth int16
  2322  		if s.loopnest != nil && s.loopnest.b2l[best.ID] != nil {
  2323  			bestDepth = s.loopnest.b2l[best.ID].depth
  2324  		}
  2325  		b := best
  2326  		const maxSpillSearch = 100
  2327  		for i := 0; i < maxSpillSearch; i++ {
  2328  			// Find the child of b in the dominator tree which
  2329  			// dominates all restores.
  2330  			p := b
  2331  			b = nil
  2332  			for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
  2333  				if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
  2334  					// c also dominates all restores.  Walk down into c.
  2335  					b = c
  2336  					break
  2337  				}
  2338  			}
  2339  			if b == nil {
  2340  				// Ran out of blocks which dominate all restores.
  2341  				break
  2342  			}
  2343  
  2344  			var depth int16
  2345  			if s.loopnest != nil && s.loopnest.b2l[b.ID] != nil {
  2346  				depth = s.loopnest.b2l[b.ID].depth
  2347  			}
  2348  			if depth > bestDepth {
  2349  				// Don't push the spill into a deeper loop.
  2350  				continue
  2351  			}
  2352  
  2353  			// If v is in a register at the start of b, we can
  2354  			// place the spill here (after the phis).
  2355  			if len(b.Preds) == 1 {
  2356  				for _, e := range s.endRegs[b.Preds[0].b.ID] {
  2357  					if e.v == v {
  2358  						// Found a better spot for the spill.
  2359  						best = b
  2360  						bestArg = e.c
  2361  						bestDepth = depth
  2362  						break
  2363  					}
  2364  				}
  2365  			} else {
  2366  				for _, e := range s.startRegs[b.ID] {
  2367  					if e.v == v {
  2368  						// Found a better spot for the spill.
  2369  						best = b
  2370  						bestArg = e.c
  2371  						bestDepth = depth
  2372  						break
  2373  					}
  2374  				}
  2375  			}
  2376  		}
  2377  
  2378  		// Put the spill in the best block we found.
  2379  		spill.Block = best
  2380  		spill.AddArg(bestArg)
  2381  		if best == v.Block && !mustBeFirst(v.Op) {
  2382  			// Place immediately after v.
  2383  			after[v.ID] = append(after[v.ID], spill)
  2384  		} else {
  2385  			// Place at the start of best block.
  2386  			start[best.ID] = append(start[best.ID], spill)
  2387  		}
  2388  	}
  2389  
  2390  	// Insert spill instructions into the block schedules.
  2391  	var oldSched []*Value
  2392  	for _, b := range s.visitOrder {
  2393  		nfirst := 0
  2394  		for _, v := range b.Values {
  2395  			if !mustBeFirst(v.Op) {
  2396  				break
  2397  			}
  2398  			nfirst++
  2399  		}
  2400  		oldSched = append(oldSched[:0], b.Values[nfirst:]...)
  2401  		b.Values = b.Values[:nfirst]
  2402  		b.Values = append(b.Values, start[b.ID]...)
  2403  		for _, v := range oldSched {
  2404  			b.Values = append(b.Values, v)
  2405  			b.Values = append(b.Values, after[v.ID]...)
  2406  		}
  2407  	}
  2408  }
  2409  
  2410  // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
  2411  func (s *regAllocState) shuffle(stacklive [][]ID) {
  2412  	var e edgeState
  2413  	e.s = s
  2414  	e.cache = map[ID][]*Value{}
  2415  	e.contents = map[Location]contentRecord{}
  2416  	if s.f.pass.debug > regDebug {
  2417  		fmt.Printf("shuffle %s\n", s.f.Name)
  2418  		fmt.Println(s.f.String())
  2419  	}
  2420  
  2421  	for _, b := range s.visitOrder {
  2422  		if len(b.Preds) <= 1 {
  2423  			continue
  2424  		}
  2425  		e.b = b
  2426  		for i, edge := range b.Preds {
  2427  			p := edge.b
  2428  			e.p = p
  2429  			e.setup(i, s.endRegs[p.ID], s.startRegs[b.ID], stacklive[p.ID])
  2430  			e.process()
  2431  		}
  2432  	}
  2433  
  2434  	if s.f.pass.debug > regDebug {
  2435  		fmt.Printf("post shuffle %s\n", s.f.Name)
  2436  		fmt.Println(s.f.String())
  2437  	}
  2438  }
  2439  
  2440  type edgeState struct {
  2441  	s    *regAllocState
  2442  	p, b *Block // edge goes from p->b.
  2443  
  2444  	// for each pre-regalloc value, a list of equivalent cached values
  2445  	cache      map[ID][]*Value
  2446  	cachedVals []ID // (superset of) keys of the above map, for deterministic iteration
  2447  
  2448  	// map from location to the value it contains
  2449  	contents map[Location]contentRecord
  2450  
  2451  	// desired destination locations
  2452  	destinations []dstRecord
  2453  	extra        []dstRecord
  2454  
  2455  	usedRegs              regMask // registers currently holding something
  2456  	uniqueRegs            regMask // registers holding the only copy of a value
  2457  	finalRegs             regMask // registers holding final target
  2458  	rematerializeableRegs regMask // registers that hold rematerializeable values
  2459  }
  2460  
  2461  type contentRecord struct {
  2462  	vid   ID       // pre-regalloc value
  2463  	c     *Value   // cached value
  2464  	final bool     // this is a satisfied destination
  2465  	pos   src.XPos // source position of use of the value
  2466  }
  2467  
  2468  type dstRecord struct {
  2469  	loc    Location // register or stack slot
  2470  	vid    ID       // pre-regalloc value it should contain
  2471  	splice **Value  // place to store reference to the generating instruction
  2472  	pos    src.XPos // source position of use of this location
  2473  }
  2474  
  2475  // setup initializes the edge state for shuffling.
  2476  func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive []ID) {
  2477  	if e.s.f.pass.debug > regDebug {
  2478  		fmt.Printf("edge %s->%s\n", e.p, e.b)
  2479  	}
  2480  
  2481  	// Clear state.
  2482  	clear(e.cache)
  2483  	e.cachedVals = e.cachedVals[:0]
  2484  	clear(e.contents)
  2485  	e.usedRegs = regMask{}
  2486  	e.uniqueRegs = regMask{}
  2487  	e.finalRegs = regMask{}
  2488  	e.rematerializeableRegs = regMask{}
  2489  
  2490  	// Live registers can be sources.
  2491  	for _, x := range srcReg {
  2492  		e.set(&e.s.registers[x.r], x.v.ID, x.c, false, src.NoXPos) // don't care the position of the source
  2493  	}
  2494  	// So can all of the spill locations.
  2495  	for _, spillID := range stacklive {
  2496  		v := e.s.orig[spillID]
  2497  		spill := e.s.values[v.ID].spill
  2498  		if !e.s.sdom.IsAncestorEq(spill.Block, e.p) {
  2499  			// Spills were placed that only dominate the uses found
  2500  			// during the first regalloc pass. The edge fixup code
  2501  			// can't use a spill location if the spill doesn't dominate
  2502  			// the edge.
  2503  			// We are guaranteed that if the spill doesn't dominate this edge,
  2504  			// then the value is available in a register (because we called
  2505  			// makeSpill for every value not in a register at the start
  2506  			// of an edge).
  2507  			continue
  2508  		}
  2509  		e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
  2510  	}
  2511  
  2512  	// Figure out all the destinations we need.
  2513  	dsts := e.destinations[:0]
  2514  	for _, x := range dstReg {
  2515  		dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
  2516  	}
  2517  	// Phis need their args to end up in a specific location.
  2518  	for _, v := range e.b.Values {
  2519  		if v.Op != OpPhi {
  2520  			break
  2521  		}
  2522  		loc := e.s.f.getHome(v.ID)
  2523  		if loc == nil {
  2524  			continue
  2525  		}
  2526  		dsts = append(dsts, dstRecord{loc, v.Args[idx].ID, &v.Args[idx], v.Pos})
  2527  	}
  2528  	e.destinations = dsts
  2529  
  2530  	if e.s.f.pass.debug > regDebug {
  2531  		for _, vid := range e.cachedVals {
  2532  			a := e.cache[vid]
  2533  			for _, c := range a {
  2534  				fmt.Printf("src %s: v%d cache=%s\n", e.s.f.getHome(c.ID), vid, c)
  2535  			}
  2536  		}
  2537  		for _, d := range e.destinations {
  2538  			fmt.Printf("dst %s: v%d\n", d.loc, d.vid)
  2539  		}
  2540  	}
  2541  }
  2542  
  2543  // process generates code to move all the values to the right destination locations.
  2544  func (e *edgeState) process() {
  2545  	dsts := e.destinations
  2546  
  2547  	// Process the destinations until they are all satisfied.
  2548  	for len(dsts) > 0 {
  2549  		i := 0
  2550  		for _, d := range dsts {
  2551  			if !e.processDest(d.loc, d.vid, d.splice, d.pos) {
  2552  				// Failed - save for next iteration.
  2553  				dsts[i] = d
  2554  				i++
  2555  			}
  2556  		}
  2557  		if i < len(dsts) {
  2558  			// Made some progress. Go around again.
  2559  			dsts = dsts[:i]
  2560  
  2561  			// Append any extras destinations we generated.
  2562  			dsts = append(dsts, e.extra...)
  2563  			e.extra = e.extra[:0]
  2564  			continue
  2565  		}
  2566  
  2567  		// We made no progress. That means that any
  2568  		// remaining unsatisfied moves are in simple cycles.
  2569  		// For example, A -> B -> C -> D -> A.
  2570  		//   A ----> B
  2571  		//   ^       |
  2572  		//   |       |
  2573  		//   |       v
  2574  		//   D <---- C
  2575  
  2576  		// To break the cycle, we pick an unused register, say R,
  2577  		// and put a copy of B there.
  2578  		//   A ----> B
  2579  		//   ^       |
  2580  		//   |       |
  2581  		//   |       v
  2582  		//   D <---- C <---- R=copyofB
  2583  		// When we resume the outer loop, the A->B move can now proceed,
  2584  		// and eventually the whole cycle completes.
  2585  
  2586  		// Copy any cycle location to a temp register. This duplicates
  2587  		// one of the cycle entries, allowing the just duplicated value
  2588  		// to be overwritten and the cycle to proceed.
  2589  		d := dsts[0]
  2590  		loc := d.loc
  2591  		vid := e.contents[loc].vid
  2592  		c := e.contents[loc].c
  2593  		r := e.findRegFor(c.Type)
  2594  		if e.s.f.pass.debug > regDebug {
  2595  			fmt.Printf("breaking cycle with v%d in %s:%s\n", vid, loc, c)
  2596  		}
  2597  		e.erase(r)
  2598  		pos := d.pos.WithNotStmt()
  2599  		if _, isReg := loc.(*Register); isReg {
  2600  			c = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2601  		} else {
  2602  			c = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2603  		}
  2604  		e.set(r, vid, c, false, pos)
  2605  		if c.Op == OpLoadReg && e.s.isGReg(register(r.(*Register).num)) {
  2606  			e.s.f.Fatalf("process.OpLoadReg targeting g: " + c.LongString())
  2607  		}
  2608  	}
  2609  }
  2610  
  2611  // processDest generates code to put value vid into location loc. Returns true
  2612  // if progress was made.
  2613  func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XPos) bool {
  2614  	pos = pos.WithNotStmt()
  2615  	occupant := e.contents[loc]
  2616  	if occupant.vid == vid {
  2617  		// Value is already in the correct place.
  2618  		e.contents[loc] = contentRecord{vid, occupant.c, true, pos}
  2619  		if splice != nil {
  2620  			(*splice).Uses--
  2621  			*splice = occupant.c
  2622  			occupant.c.Uses++
  2623  		}
  2624  		// Note: if splice==nil then c will appear dead. This is
  2625  		// non-SSA formed code, so be careful after this pass not to run
  2626  		// deadcode elimination.
  2627  		if _, ok := e.s.copies[occupant.c]; ok {
  2628  			// The copy at occupant.c was used to avoid spill.
  2629  			e.s.copies[occupant.c] = true
  2630  		}
  2631  		return true
  2632  	}
  2633  
  2634  	// Check if we're allowed to clobber the destination location.
  2635  	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
  2636  		// We can't overwrite the last copy
  2637  		// of a value that needs to survive.
  2638  		return false
  2639  	}
  2640  
  2641  	// Copy from a source of v, register preferred.
  2642  	v := e.s.orig[vid]
  2643  	var c *Value
  2644  	var src Location
  2645  	if e.s.f.pass.debug > regDebug {
  2646  		fmt.Printf("moving v%d to %s\n", vid, loc)
  2647  		fmt.Printf("sources of v%d:", vid)
  2648  	}
  2649  	if opcodeTable[v.Op].fixedReg {
  2650  		c = v
  2651  		src = e.s.f.getHome(v.ID)
  2652  	} else {
  2653  		for _, w := range e.cache[vid] {
  2654  			h := e.s.f.getHome(w.ID)
  2655  			if e.s.f.pass.debug > regDebug {
  2656  				fmt.Printf(" %s:%s", h, w)
  2657  			}
  2658  			_, isreg := h.(*Register)
  2659  			if src == nil || isreg {
  2660  				c = w
  2661  				src = h
  2662  			}
  2663  		}
  2664  	}
  2665  	if e.s.f.pass.debug > regDebug {
  2666  		if src != nil {
  2667  			fmt.Printf(" [use %s]\n", src)
  2668  		} else {
  2669  			fmt.Printf(" [no source]\n")
  2670  		}
  2671  	}
  2672  	_, dstReg := loc.(*Register)
  2673  
  2674  	// Pre-clobber destination. This avoids the
  2675  	// following situation:
  2676  	//   - v is currently held in R0 and stacktmp0.
  2677  	//   - We want to copy stacktmp1 to stacktmp0.
  2678  	//   - We choose R0 as the temporary register.
  2679  	// During the copy, both R0 and stacktmp0 are
  2680  	// clobbered, losing both copies of v. Oops!
  2681  	// Erasing the destination early means R0 will not
  2682  	// be chosen as the temp register, as it will then
  2683  	// be the last copy of v.
  2684  	e.erase(loc)
  2685  	var x *Value
  2686  	if c == nil || e.s.values[vid].rematerializeable {
  2687  		if !e.s.values[vid].rematerializeable {
  2688  			e.s.f.Fatalf("can't find source for %s->%s: %s\n", e.p, e.b, v.LongString())
  2689  		}
  2690  		if dstReg {
  2691  			// We want to rematerialize v into a register that is incompatible with v's op's register mask.
  2692  			// Instead of setting the wrong register for the rematerialized v, we should find the right register
  2693  			// for it and emit an additional copy to move to the desired register.
  2694  			// For #70451.
  2695  			if !e.s.regspec(v).outputs[0].regs.hasReg(register(loc.(*Register).num)) {
  2696  				_, srcReg := src.(*Register)
  2697  				if srcReg {
  2698  					// It exists in a valid register already, so just copy it to the desired register
  2699  					// If src is a Register, c must have already been set.
  2700  					x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2701  				} else {
  2702  					// We need a tmp register
  2703  					x = v.copyInto(e.p)
  2704  					r := e.findRegFor(x.Type)
  2705  					e.erase(r)
  2706  					// Rematerialize to the tmp register
  2707  					e.set(r, vid, x, false, pos)
  2708  					// Copy from tmp to the desired register
  2709  					x = e.p.NewValue1(pos, OpCopy, x.Type, x)
  2710  				}
  2711  			} else {
  2712  				x = v.copyInto(e.p)
  2713  			}
  2714  		} else {
  2715  			// Rematerialize into stack slot. Need a free
  2716  			// register to accomplish this.
  2717  			r := e.findRegFor(v.Type)
  2718  			e.erase(r)
  2719  			x = v.copyIntoWithXPos(e.p, pos)
  2720  			e.set(r, vid, x, false, pos)
  2721  			// Make sure we spill with the size of the slot, not the
  2722  			// size of x (which might be wider due to our dropping
  2723  			// of narrowing conversions).
  2724  			x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, x)
  2725  		}
  2726  	} else {
  2727  		// Emit move from src to dst.
  2728  		_, srcReg := src.(*Register)
  2729  		if srcReg {
  2730  			if dstReg {
  2731  				x = e.p.NewValue1(pos, OpCopy, c.Type, c)
  2732  			} else {
  2733  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, c)
  2734  			}
  2735  		} else {
  2736  			if dstReg {
  2737  				x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2738  			} else {
  2739  				// mem->mem. Use temp register.
  2740  				r := e.findRegFor(c.Type)
  2741  				e.erase(r)
  2742  				t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
  2743  				e.set(r, vid, t, false, pos)
  2744  				x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
  2745  			}
  2746  		}
  2747  	}
  2748  	e.set(loc, vid, x, true, pos)
  2749  	if x.Op == OpLoadReg && e.s.isGReg(register(loc.(*Register).num)) {
  2750  		e.s.f.Fatalf("processDest.OpLoadReg targeting g: " + x.LongString())
  2751  	}
  2752  	if splice != nil {
  2753  		(*splice).Uses--
  2754  		*splice = x
  2755  		x.Uses++
  2756  	}
  2757  	return true
  2758  }
  2759  
  2760  // set changes the contents of location loc to hold the given value and its cached representative.
  2761  func (e *edgeState) set(loc Location, vid ID, c *Value, final bool, pos src.XPos) {
  2762  	e.s.f.setHome(c, loc)
  2763  	e.contents[loc] = contentRecord{vid, c, final, pos}
  2764  	a := e.cache[vid]
  2765  	if len(a) == 0 {
  2766  		e.cachedVals = append(e.cachedVals, vid)
  2767  	}
  2768  	a = append(a, c)
  2769  	e.cache[vid] = a
  2770  	if r, ok := loc.(*Register); ok {
  2771  		if e.usedRegs.hasReg(register(r.num)) {
  2772  			e.s.f.Fatalf("%v is already set (v%d/%v)", r, vid, c)
  2773  		}
  2774  		e.usedRegs = e.usedRegs.addReg(register(r.num))
  2775  		if final {
  2776  			e.finalRegs = e.finalRegs.addReg(register(r.num))
  2777  		}
  2778  		if len(a) == 1 {
  2779  			e.uniqueRegs = e.uniqueRegs.addReg(register(r.num))
  2780  		}
  2781  		if len(a) == 2 {
  2782  			if t, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2783  				e.uniqueRegs = e.uniqueRegs.removeReg(register(t.num))
  2784  			}
  2785  		}
  2786  		if e.s.values[vid].rematerializeable {
  2787  			e.rematerializeableRegs = e.rematerializeableRegs.addReg(register(r.num))
  2788  		}
  2789  	}
  2790  	if e.s.f.pass.debug > regDebug {
  2791  		fmt.Printf("%s\n", c.LongString())
  2792  		fmt.Printf("v%d now available in %s:%s\n", vid, loc, c)
  2793  	}
  2794  }
  2795  
  2796  // erase removes any user of loc.
  2797  func (e *edgeState) erase(loc Location) {
  2798  	cr := e.contents[loc]
  2799  	if cr.c == nil {
  2800  		return
  2801  	}
  2802  	vid := cr.vid
  2803  
  2804  	if cr.final {
  2805  		// Add a destination to move this value back into place.
  2806  		// Make sure it gets added to the tail of the destination queue
  2807  		// so we make progress on other moves first.
  2808  		e.extra = append(e.extra, dstRecord{loc, cr.vid, nil, cr.pos})
  2809  	}
  2810  
  2811  	// Remove c from the list of cached values.
  2812  	a := e.cache[vid]
  2813  	for i, c := range a {
  2814  		if e.s.f.getHome(c.ID) == loc {
  2815  			if e.s.f.pass.debug > regDebug {
  2816  				fmt.Printf("v%d no longer available in %s:%s\n", vid, loc, c)
  2817  			}
  2818  			a[i], a = a[len(a)-1], a[:len(a)-1]
  2819  			break
  2820  		}
  2821  	}
  2822  	e.cache[vid] = a
  2823  
  2824  	// Update register masks.
  2825  	if r, ok := loc.(*Register); ok {
  2826  		e.usedRegs = e.usedRegs.removeReg(register(r.num))
  2827  		if cr.final {
  2828  			e.finalRegs = e.finalRegs.removeReg(register(r.num))
  2829  		}
  2830  		e.rematerializeableRegs = e.rematerializeableRegs.removeReg(register(r.num))
  2831  	}
  2832  	if len(a) == 1 {
  2833  		if r, ok := e.s.f.getHome(a[0].ID).(*Register); ok {
  2834  			e.uniqueRegs = e.uniqueRegs.addReg(register(r.num))
  2835  		}
  2836  	}
  2837  }
  2838  
  2839  // findRegFor finds a register we can use to make a temp copy of type typ.
  2840  func (e *edgeState) findRegFor(typ *types.Type) Location {
  2841  	// Which registers are possibilities.
  2842  	m := e.s.compatRegs(typ)
  2843  
  2844  	// Pick a register. In priority order:
  2845  	// 1) an unused register
  2846  	// 2) a non-unique register not holding a final value
  2847  	// 3) a non-unique register
  2848  	// 4) a register holding a rematerializeable value
  2849  	x := m.minus(e.usedRegs)
  2850  	if !x.empty() {
  2851  		return &e.s.registers[pickReg(x)]
  2852  	}
  2853  	x = m.minus(e.uniqueRegs).minus(e.finalRegs)
  2854  	if !x.empty() {
  2855  		return &e.s.registers[pickReg(x)]
  2856  	}
  2857  	x = m.minus(e.uniqueRegs)
  2858  	if !x.empty() {
  2859  		return &e.s.registers[pickReg(x)]
  2860  	}
  2861  	x = m.intersect(e.rematerializeableRegs)
  2862  	if !x.empty() {
  2863  		return &e.s.registers[pickReg(x)]
  2864  	}
  2865  
  2866  	// No register is available.
  2867  	// Pick a register to spill.
  2868  	for _, vid := range e.cachedVals {
  2869  		a := e.cache[vid]
  2870  		for _, c := range a {
  2871  			if r, ok := e.s.f.getHome(c.ID).(*Register); ok && m.hasReg(register(r.num)) {
  2872  				if !c.rematerializeable() {
  2873  					x := e.p.NewValue1(c.Pos, OpStoreReg, c.Type, c)
  2874  					// Allocate a temp location to spill a register to.
  2875  					t := LocalSlot{N: e.s.f.NewLocal(c.Pos, c.Type), Type: c.Type}
  2876  					// TODO: reuse these slots. They'll need to be erased first.
  2877  					e.set(t, vid, x, false, c.Pos)
  2878  					if e.s.f.pass.debug > regDebug {
  2879  						fmt.Printf("  SPILL %s->%s %s\n", r, t, x.LongString())
  2880  					}
  2881  				}
  2882  				// r will now be overwritten by the caller. At some point
  2883  				// later, the newly saved value will be moved back to its
  2884  				// final destination in processDest.
  2885  				return r
  2886  			}
  2887  		}
  2888  	}
  2889  
  2890  	fmt.Printf("m:%d unique:%d final:%d rematerializable:%d\n", m, e.uniqueRegs, e.finalRegs, e.rematerializeableRegs)
  2891  	for _, vid := range e.cachedVals {
  2892  		a := e.cache[vid]
  2893  		for _, c := range a {
  2894  			fmt.Printf("v%d: %s %s\n", vid, c, e.s.f.getHome(c.ID))
  2895  		}
  2896  	}
  2897  	e.s.f.Fatalf("can't find empty register on edge %s->%s", e.p, e.b)
  2898  	return nil
  2899  }
  2900  
  2901  // rematerializeable reports whether the register allocator should recompute
  2902  // a value instead of spilling/restoring it.
  2903  func (v *Value) rematerializeable() bool {
  2904  	if !opcodeTable[v.Op].rematerializeable {
  2905  		return false
  2906  	}
  2907  	for _, a := range v.Args {
  2908  		// Fixed-register allocations (SP, SB, etc.) are always available.
  2909  		// Any other argument of an opcode makes it not rematerializeable.
  2910  		if !opcodeTable[a.Op].fixedReg {
  2911  			return false
  2912  		}
  2913  	}
  2914  	return true
  2915  }
  2916  
  2917  type liveInfo struct {
  2918  	ID   ID       // ID of value
  2919  	dist int32    // # of instructions before next use
  2920  	pos  src.XPos // source position of next use
  2921  }
  2922  
  2923  // computeLive computes a map from block ID to a list of value IDs live at the end
  2924  // of that block. Together with the value ID is a count of how many instructions
  2925  // to the next use of that value. The resulting map is stored in s.live.
  2926  func (s *regAllocState) computeLive() {
  2927  	f := s.f
  2928  	// single block functions do not have variables that are live across
  2929  	// branches
  2930  	if len(f.Blocks) == 1 {
  2931  		return
  2932  	}
  2933  	po := f.postorder()
  2934  	s.live = make([][]liveInfo, f.NumBlocks())
  2935  	s.desired = make([]desiredState, f.NumBlocks())
  2936  	s.loopnest = f.loopnest()
  2937  
  2938  	rematIDs := make([]ID, 0, 64)
  2939  
  2940  	live := f.newSparseMapPos(f.NumValues())
  2941  	defer f.retSparseMapPos(live)
  2942  	t := f.newSparseMapPos(f.NumValues())
  2943  	defer f.retSparseMapPos(t)
  2944  
  2945  	s.loopnest.computeUnavoidableCalls()
  2946  
  2947  	// Liveness analysis.
  2948  	// This is an adapted version of the algorithm described in chapter 2.4.2
  2949  	// of Fabrice Rastello's On Sparse Intermediate Representations.
  2950  	//   https://web.archive.org/web/20240417212122if_/https://inria.hal.science/hal-00761555/file/habilitation.pdf#section.50
  2951  	//
  2952  	// For our implementation, we fall back to a traditional iterative algorithm when we encounter
  2953  	// Irreducible CFGs. They are very uncommon in Go code because they need to be constructed with
  2954  	// gotos and our current loopnest definition does not compute all the information that
  2955  	// we'd need to compute the loop ancestors for that step of the algorithm.
  2956  	//
  2957  	// Additionally, instead of only considering non-loop successors in the initial DFS phase,
  2958  	// we compute the liveout as the union of all successors. This larger liveout set is a subset
  2959  	// of the final liveout for the block and adding this information in the DFS phase means that
  2960  	// we get slightly more accurate distance information.
  2961  	var loopLiveIn map[*loop][]liveInfo
  2962  	var numCalls []int32
  2963  	if len(s.loopnest.loops) > 0 && !s.loopnest.hasIrreducible {
  2964  		loopLiveIn = make(map[*loop][]liveInfo)
  2965  		numCalls = f.Cache.allocInt32Slice(f.NumBlocks())
  2966  		defer f.Cache.freeInt32Slice(numCalls)
  2967  	}
  2968  
  2969  	for {
  2970  		changed := false
  2971  
  2972  		for _, b := range po {
  2973  			// Start with known live values at the end of the block.
  2974  			live.clear()
  2975  			for _, e := range s.live[b.ID] {
  2976  				live.set(e.ID, e.dist, e.pos)
  2977  			}
  2978  			update := false
  2979  			// arguments to phi nodes are live at this blocks out
  2980  			for _, e := range b.Succs {
  2981  				succ := e.b
  2982  				delta := branchDistance(b, succ)
  2983  				for _, v := range succ.Values {
  2984  					if v.Op != OpPhi {
  2985  						break
  2986  					}
  2987  					arg := v.Args[e.i]
  2988  					if s.values[arg.ID].needReg && (!live.contains(arg.ID) || delta < live.get(arg.ID)) {
  2989  						live.set(arg.ID, delta, v.Pos)
  2990  						update = true
  2991  					}
  2992  				}
  2993  			}
  2994  			if update {
  2995  				s.live[b.ID] = updateLive(live, s.live[b.ID])
  2996  			}
  2997  			// Add len(b.Values) to adjust from end-of-block distance
  2998  			// to beginning-of-block distance.
  2999  			c := live.contents()
  3000  			for i := range c {
  3001  				c[i].val += int32(len(b.Values))
  3002  			}
  3003  
  3004  			// Mark control values as live
  3005  			for _, c := range b.ControlValues() {
  3006  				if s.values[c.ID].needReg {
  3007  					live.set(c.ID, int32(len(b.Values)), b.Pos)
  3008  				}
  3009  			}
  3010  
  3011  			for i := len(b.Values) - 1; i >= 0; i-- {
  3012  				v := b.Values[i]
  3013  				live.remove(v.ID)
  3014  				if v.Op == OpPhi {
  3015  					continue
  3016  				}
  3017  				if opcodeTable[v.Op].call {
  3018  					if numCalls != nil {
  3019  						numCalls[b.ID]++
  3020  					}
  3021  					rematIDs = rematIDs[:0]
  3022  					c := live.contents()
  3023  					for i := range c {
  3024  						c[i].val += unlikelyDistance
  3025  						vid := c[i].key
  3026  						if s.values[vid].rematerializeable {
  3027  							rematIDs = append(rematIDs, vid)
  3028  						}
  3029  					}
  3030  					// We don't spill rematerializeable values, and assuming they
  3031  					// are live across a call would only force shuffle to add some
  3032  					// (dead) constant rematerialization. Remove them.
  3033  					for _, r := range rematIDs {
  3034  						live.remove(r)
  3035  					}
  3036  				}
  3037  				for _, a := range v.Args {
  3038  					if s.values[a.ID].needReg {
  3039  						live.set(a.ID, int32(i), v.Pos)
  3040  					}
  3041  				}
  3042  			}
  3043  			// This is a loop header, save our live-in so that
  3044  			// we can use it to fill in the loop bodies later
  3045  			if loopLiveIn != nil {
  3046  				loop := s.loopnest.b2l[b.ID]
  3047  				if loop != nil && loop.header.ID == b.ID {
  3048  					loopLiveIn[loop] = updateLive(live, nil)
  3049  				}
  3050  			}
  3051  			// For each predecessor of b, expand its list of live-at-end values.
  3052  			// invariant: live contains the values live at the start of b
  3053  			for _, e := range b.Preds {
  3054  				p := e.b
  3055  				delta := branchDistance(p, b)
  3056  
  3057  				// Start t off with the previously known live values at the end of p.
  3058  				t.clear()
  3059  				for _, e := range s.live[p.ID] {
  3060  					t.set(e.ID, e.dist, e.pos)
  3061  				}
  3062  				update := false
  3063  
  3064  				// Add new live values from scanning this block.
  3065  				for _, e := range live.contents() {
  3066  					d := e.val + delta
  3067  					if !t.contains(e.key) || d < t.get(e.key) {
  3068  						update = true
  3069  						t.set(e.key, d, e.pos)
  3070  					}
  3071  				}
  3072  
  3073  				if !update {
  3074  					continue
  3075  				}
  3076  				s.live[p.ID] = updateLive(t, s.live[p.ID])
  3077  				changed = true
  3078  			}
  3079  		}
  3080  
  3081  		// Doing a traditional iterative algorithm and have run
  3082  		// out of changes
  3083  		if !changed {
  3084  			break
  3085  		}
  3086  
  3087  		// Doing a pre-pass and will fill in the liveness information
  3088  		// later
  3089  		if loopLiveIn != nil {
  3090  			break
  3091  		}
  3092  		// For loopless code, we have full liveness info after a single
  3093  		// iteration
  3094  		if len(s.loopnest.loops) == 0 {
  3095  			break
  3096  		}
  3097  	}
  3098  	if f.pass.debug > regDebug {
  3099  		s.debugPrintLive("after dfs walk", f, s.live, s.desired)
  3100  	}
  3101  
  3102  	// irreducible CFGs and functions without loops are already
  3103  	// done, compute their desired registers and return
  3104  	if loopLiveIn == nil {
  3105  		s.computeDesired()
  3106  		return
  3107  	}
  3108  
  3109  	// Walk the loopnest from outer to inner, adding
  3110  	// all live-in values from their parent. Instead of
  3111  	// a recursive algorithm, iterate in depth order.
  3112  	// TODO(dmo): can we permute the loopnest? can we avoid this copy?
  3113  	loops := slices.Clone(s.loopnest.loops)
  3114  	slices.SortFunc(loops, func(a, b *loop) int {
  3115  		return cmp.Compare(a.depth, b.depth)
  3116  	})
  3117  
  3118  	loopset := f.newSparseMapPos(f.NumValues())
  3119  	defer f.retSparseMapPos(loopset)
  3120  	for _, loop := range loops {
  3121  		if loop.outer == nil {
  3122  			continue
  3123  		}
  3124  		livein := loopLiveIn[loop]
  3125  		loopset.clear()
  3126  		for _, l := range livein {
  3127  			loopset.set(l.ID, l.dist, l.pos)
  3128  		}
  3129  		update := false
  3130  		for _, l := range loopLiveIn[loop.outer] {
  3131  			if !loopset.contains(l.ID) {
  3132  				loopset.set(l.ID, l.dist, l.pos)
  3133  				update = true
  3134  			}
  3135  		}
  3136  		if update {
  3137  			loopLiveIn[loop] = updateLive(loopset, livein)
  3138  		}
  3139  	}
  3140  	// unknownDistance is a sentinel value for when we know a variable
  3141  	// is live at any given block, but we do not yet know how far until it's next
  3142  	// use. The distance will be computed later.
  3143  	const unknownDistance = -1
  3144  
  3145  	// add live-in values of the loop headers to their children.
  3146  	// This includes the loop headers themselves, since they can have values
  3147  	// that die in the middle of the block and aren't live-out
  3148  	for _, b := range po {
  3149  		loop := s.loopnest.b2l[b.ID]
  3150  		if loop == nil {
  3151  			continue
  3152  		}
  3153  		headerLive := loopLiveIn[loop]
  3154  		loopset.clear()
  3155  		for _, l := range s.live[b.ID] {
  3156  			loopset.set(l.ID, l.dist, l.pos)
  3157  		}
  3158  		update := false
  3159  		for _, l := range headerLive {
  3160  			if !loopset.contains(l.ID) {
  3161  				loopset.set(l.ID, unknownDistance, src.NoXPos)
  3162  				update = true
  3163  			}
  3164  		}
  3165  		if update {
  3166  			s.live[b.ID] = updateLive(loopset, s.live[b.ID])
  3167  		}
  3168  	}
  3169  	if f.pass.debug > regDebug {
  3170  		s.debugPrintLive("after live loop prop", f, s.live, s.desired)
  3171  	}
  3172  	// Filling in liveness from loops leaves some blocks with no distance information
  3173  	// Run over them and fill in the information from their successors.
  3174  	// To stabilize faster, we quit when no block has missing values and we only
  3175  	// look at blocks that still have missing values in subsequent iterations
  3176  	unfinishedBlocks := f.Cache.allocBlockSlice(len(po))
  3177  	defer f.Cache.freeBlockSlice(unfinishedBlocks)
  3178  	copy(unfinishedBlocks, po)
  3179  
  3180  	for len(unfinishedBlocks) > 0 {
  3181  		n := 0
  3182  		for _, b := range unfinishedBlocks {
  3183  			live.clear()
  3184  			unfinishedValues := 0
  3185  			for _, l := range s.live[b.ID] {
  3186  				if l.dist == unknownDistance {
  3187  					unfinishedValues++
  3188  				}
  3189  				live.set(l.ID, l.dist, l.pos)
  3190  			}
  3191  			update := false
  3192  			for _, e := range b.Succs {
  3193  				succ := e.b
  3194  				for _, l := range s.live[succ.ID] {
  3195  					if !live.contains(l.ID) || l.dist == unknownDistance {
  3196  						continue
  3197  					}
  3198  					dist := int32(len(succ.Values)) + l.dist + branchDistance(b, succ)
  3199  					dist += numCalls[succ.ID] * unlikelyDistance
  3200  					val := live.get(l.ID)
  3201  					switch {
  3202  					case val == unknownDistance:
  3203  						unfinishedValues--
  3204  						fallthrough
  3205  					case dist < val:
  3206  						update = true
  3207  						live.set(l.ID, dist, l.pos)
  3208  					}
  3209  				}
  3210  			}
  3211  			if update {
  3212  				s.live[b.ID] = updateLive(live, s.live[b.ID])
  3213  			}
  3214  			if unfinishedValues > 0 {
  3215  				unfinishedBlocks[n] = b
  3216  				n++
  3217  			}
  3218  		}
  3219  		unfinishedBlocks = unfinishedBlocks[:n]
  3220  	}
  3221  
  3222  	// Sort live values in order of their nearest next use.
  3223  	// Useful for promoting values to registers, nearest use first.
  3224  	for _, b := range f.Blocks {
  3225  		slices.SortFunc(s.live[b.ID], func(a, b liveInfo) int {
  3226  			if a.dist != b.dist {
  3227  				return cmp.Compare(a.dist, b.dist)
  3228  			}
  3229  			return cmp.Compare(a.ID, b.ID) // for deterministic sorting
  3230  		})
  3231  	}
  3232  
  3233  	s.computeDesired()
  3234  
  3235  	if f.pass.debug > regDebug {
  3236  		s.debugPrintLive("final", f, s.live, s.desired)
  3237  	}
  3238  }
  3239  
  3240  // computeDesired computes the desired register information at the end of each block.
  3241  // It is essentially a liveness analysis on machine registers instead of SSA values
  3242  // The desired register information is stored in s.desired.
  3243  func (s *regAllocState) computeDesired() {
  3244  
  3245  	// TODO: Can we speed this up using the liveness information we have already
  3246  	// from computeLive?
  3247  	var desired desiredState
  3248  	f := s.f
  3249  	po := f.postorder()
  3250  	maxPreds := 0
  3251  	for _, b := range f.Blocks {
  3252  		maxPreds = max(maxPreds, len(b.Preds))
  3253  	}
  3254  	// phiPrefs[i] collects desired registers for phi inputs coming from b.Preds[i].
  3255  	phiPrefs := make([]desiredState, maxPreds)
  3256  	for {
  3257  		changed := false
  3258  		for _, b := range po {
  3259  			desired.copy(&s.desired[b.ID])
  3260  			for i := range b.Preds {
  3261  				phiPrefs[i].reset()
  3262  			}
  3263  			var headerLoop *loop // loop whose header is b, if any
  3264  			if l := s.loopnest.b2l[b.ID]; l != nil && l.header == b {
  3265  				headerLoop = l
  3266  			}
  3267  			// Process non-phis, then phis.
  3268  			i := len(b.Values) - 1
  3269  			for ; i >= 0; i-- {
  3270  				v := b.Values[i]
  3271  				if v.Op == OpPhi {
  3272  					break
  3273  				}
  3274  				prefs := desired.remove(v.ID)
  3275  				regspec := s.regspec(v)
  3276  				// Cancel desired registers if they get clobbered.
  3277  				desired.clobber(regspec.clobbers)
  3278  				// Update desired registers if there are any fixed register inputs.
  3279  				for _, j := range regspec.inputs {
  3280  					if countRegs(j.regs) != 1 {
  3281  						continue
  3282  					}
  3283  					desired.clobber(j.regs)
  3284  					desired.add(v.Args[j.idx].ID, pickReg(j.regs))
  3285  				}
  3286  				// Set desired register of input 0 if this is a 2-operand instruction.
  3287  				if opcodeTable[v.Op].resultInArg0 || v.Op == OpAMD64ADDQconst || v.Op == OpAMD64ADDLconst || v.Op == OpSelect0 {
  3288  					// ADDQconst is added here because we want to treat it as resultInArg0 for
  3289  					// the purposes of desired registers, even though it is not an absolute requirement.
  3290  					// This is because we'd rather implement it as ADDQ instead of LEAQ.
  3291  					// Same for ADDLconst
  3292  					// Select0 is added here to propagate the desired register to the tuple-generating instruction.
  3293  					if opcodeTable[v.Op].commutative {
  3294  						desired.addList(v.Args[1].ID, prefs)
  3295  					}
  3296  					desired.addList(v.Args[0].ID, prefs)
  3297  				}
  3298  			}
  3299  			for ; i >= 0; i-- {
  3300  				v := b.Values[i]
  3301  				prefs := desired.remove(v.ID)
  3302  				if prefs[0] == noRegister {
  3303  					continue
  3304  				}
  3305  				// Phi desires go to phiPrefs (per-pred), so drop them from desired.avoid.
  3306  				// The merge below re-adds any bits other entries still need.
  3307  				for _, r := range prefs {
  3308  					if r != noRegister {
  3309  						desired.avoid = desired.avoid.minus(regMaskAt(r))
  3310  					}
  3311  				}
  3312  				// Propagate v's desired registers back to its args.
  3313  				for pidx, a := range v.Args {
  3314  					if headerLoop != nil && s.loopnest.b2l[b.Preds[pidx].b.ID] == headerLoop {
  3315  						// Skip direct back-edges to avoid pessimizing the loop body to skip a single reg-reg move.
  3316  						// We check only the immediate loop; it is simple and empirically sufficient.
  3317  						continue
  3318  					}
  3319  					phiPrefs[pidx].addList(a.ID, prefs)
  3320  				}
  3321  			}
  3322  			for pidx, e := range b.Preds {
  3323  				p := e.b
  3324  				changed = s.desired[p.ID].merge(&desired) || changed
  3325  				changed = s.desired[p.ID].merge(&phiPrefs[pidx]) || changed
  3326  			}
  3327  		}
  3328  		if !changed || (!s.loopnest.hasIrreducible && len(s.loopnest.loops) == 0) {
  3329  			break
  3330  		}
  3331  	}
  3332  }
  3333  
  3334  // updateLive updates a given liveInfo slice with the contents of t
  3335  func updateLive(t *sparseMapPos, live []liveInfo) []liveInfo {
  3336  	live = live[:0]
  3337  	if cap(live) < t.size() {
  3338  		live = make([]liveInfo, 0, t.size())
  3339  	}
  3340  	for _, e := range t.contents() {
  3341  		live = append(live, liveInfo{e.key, e.val, e.pos})
  3342  	}
  3343  	return live
  3344  }
  3345  
  3346  // branchDistance calculates the distance between a block and a
  3347  // successor in pseudo-instructions. This is used to indicate
  3348  // likeliness
  3349  func branchDistance(b *Block, s *Block) int32 {
  3350  	if len(b.Succs) == 2 {
  3351  		if b.Succs[0].b == s && b.Likely == BranchLikely ||
  3352  			b.Succs[1].b == s && b.Likely == BranchUnlikely {
  3353  			return likelyDistance
  3354  		}
  3355  		if b.Succs[0].b == s && b.Likely == BranchUnlikely ||
  3356  			b.Succs[1].b == s && b.Likely == BranchLikely {
  3357  			return unlikelyDistance
  3358  		}
  3359  	}
  3360  	// Note: the branch distance must be at least 1 to distinguish the control
  3361  	// value use from the first user in a successor block.
  3362  	return normalDistance
  3363  }
  3364  
  3365  func (s *regAllocState) debugPrintLive(stage string, f *Func, live [][]liveInfo, desired []desiredState) {
  3366  	fmt.Printf("%s: live values at end of each block: %s\n", stage, f.Name)
  3367  	for _, b := range f.Blocks {
  3368  		s.debugPrintLiveBlock(b, live[b.ID], &desired[b.ID])
  3369  	}
  3370  }
  3371  
  3372  func (s *regAllocState) debugPrintLiveBlock(b *Block, live []liveInfo, desired *desiredState) {
  3373  	fmt.Printf("  %s:", b)
  3374  	slices.SortFunc(live, func(a, b liveInfo) int {
  3375  		return cmp.Compare(a.ID, b.ID)
  3376  	})
  3377  	for _, x := range live {
  3378  		fmt.Printf(" v%d(%d)", x.ID, x.dist)
  3379  		for _, e := range desired.entries {
  3380  			if e.ID != x.ID {
  3381  				continue
  3382  			}
  3383  			fmt.Printf("[")
  3384  			first := true
  3385  			for _, r := range e.regs {
  3386  				if r == noRegister {
  3387  					continue
  3388  				}
  3389  				if !first {
  3390  					fmt.Printf(",")
  3391  				}
  3392  				fmt.Print(&s.registers[r])
  3393  				first = false
  3394  			}
  3395  			fmt.Printf("]")
  3396  		}
  3397  	}
  3398  	if avoid := desired.avoid; !avoid.empty() {
  3399  		fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
  3400  	}
  3401  	fmt.Println()
  3402  }
  3403  
  3404  // A desiredState represents desired register assignments.
  3405  type desiredState struct {
  3406  	// Desired assignments will be small, so we just use a list
  3407  	// of valueID+registers entries.
  3408  	entries []desiredStateEntry
  3409  	// Registers that other values want to be in.  This value will
  3410  	// contain at least the union of the regs fields of entries, but
  3411  	// may contain additional entries for values that were once in
  3412  	// this data structure but are no longer.
  3413  	avoid regMask
  3414  }
  3415  type desiredStateEntry struct {
  3416  	// (pre-regalloc) value
  3417  	ID ID
  3418  	// Registers it would like to be in, in priority order.
  3419  	// Unused slots are filled with noRegister.
  3420  	// For opcodes that return tuples, we track desired registers only
  3421  	// for the first element of the tuple (see desiredSecondReg for
  3422  	// tracking the desired register for second part of a tuple).
  3423  	regs [4]register
  3424  }
  3425  
  3426  // get returns a list of desired registers for value vid.
  3427  func (d *desiredState) get(vid ID) [4]register {
  3428  	for _, e := range d.entries {
  3429  		if e.ID == vid {
  3430  			return e.regs
  3431  		}
  3432  	}
  3433  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  3434  }
  3435  
  3436  // add records that we'd like value vid to be in register r.
  3437  func (d *desiredState) add(vid ID, r register) {
  3438  	d.avoid = d.avoid.addReg(r)
  3439  	for i := range d.entries {
  3440  		e := &d.entries[i]
  3441  		if e.ID != vid {
  3442  			continue
  3443  		}
  3444  		if e.regs[0] == r {
  3445  			// Already known and highest priority
  3446  			return
  3447  		}
  3448  		for j := 1; j < len(e.regs); j++ {
  3449  			if e.regs[j] == r {
  3450  				// Move from lower priority to top priority
  3451  				copy(e.regs[1:], e.regs[:j])
  3452  				e.regs[0] = r
  3453  				return
  3454  			}
  3455  		}
  3456  		copy(e.regs[1:], e.regs[:])
  3457  		e.regs[0] = r
  3458  		return
  3459  	}
  3460  	d.entries = append(d.entries, desiredStateEntry{vid, [4]register{r, noRegister, noRegister, noRegister}})
  3461  }
  3462  
  3463  func (d *desiredState) addList(vid ID, regs [4]register) {
  3464  	// regs is in priority order, so iterate in reverse order.
  3465  	for i := len(regs) - 1; i >= 0; i-- {
  3466  		r := regs[i]
  3467  		if r != noRegister {
  3468  			d.add(vid, r)
  3469  		}
  3470  	}
  3471  }
  3472  
  3473  // clobber erases any desired registers in the set m.
  3474  func (d *desiredState) clobber(m regMask) {
  3475  	for i := 0; i < len(d.entries); {
  3476  		e := &d.entries[i]
  3477  		j := 0
  3478  		for _, r := range e.regs {
  3479  			if r != noRegister && !m.hasReg(r) {
  3480  				e.regs[j] = r
  3481  				j++
  3482  			}
  3483  		}
  3484  		if j == 0 {
  3485  			// No more desired registers for this value.
  3486  			d.entries[i] = d.entries[len(d.entries)-1]
  3487  			d.entries = d.entries[:len(d.entries)-1]
  3488  			continue
  3489  		}
  3490  		for ; j < len(e.regs); j++ {
  3491  			e.regs[j] = noRegister
  3492  		}
  3493  		i++
  3494  	}
  3495  	d.avoid = d.avoid.minus(m)
  3496  }
  3497  
  3498  // reset prepares d for re-use.
  3499  func (d *desiredState) reset() {
  3500  	d.entries = d.entries[:0]
  3501  	d.avoid = regMask{}
  3502  }
  3503  
  3504  // copy copies a desired state from another desiredState x.
  3505  func (d *desiredState) copy(x *desiredState) {
  3506  	d.entries = append(d.entries[:0], x.entries...)
  3507  	d.avoid = x.avoid
  3508  }
  3509  
  3510  // remove removes the desired registers for vid and returns them.
  3511  func (d *desiredState) remove(vid ID) [4]register {
  3512  	for i := range d.entries {
  3513  		if d.entries[i].ID == vid {
  3514  			regs := d.entries[i].regs
  3515  			d.entries[i] = d.entries[len(d.entries)-1]
  3516  			d.entries = d.entries[:len(d.entries)-1]
  3517  			return regs
  3518  		}
  3519  	}
  3520  	return [4]register{noRegister, noRegister, noRegister, noRegister}
  3521  }
  3522  
  3523  // merge merges another desired state x into d. Returns whether the set has
  3524  // changed
  3525  func (d *desiredState) merge(x *desiredState) bool {
  3526  	oldAvoid := d.avoid
  3527  	d.avoid = d.avoid.union(x.avoid)
  3528  	// There should only be a few desired registers, so
  3529  	// linear insert is ok.
  3530  	for _, e := range x.entries {
  3531  		d.addList(e.ID, e.regs)
  3532  	}
  3533  	return oldAvoid != d.avoid
  3534  }
  3535  
  3536  // computeUnavoidableCalls computes the containsUnavoidableCall fields in the loop nest.
  3537  func (loopnest *loopnest) computeUnavoidableCalls() {
  3538  	f := loopnest.f
  3539  
  3540  	hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
  3541  	defer f.Cache.freeBoolSlice(hasCall)
  3542  	for _, b := range f.Blocks {
  3543  		if b.containsCall() {
  3544  			hasCall[b.ID] = true
  3545  		}
  3546  	}
  3547  	found := f.Cache.allocSparseSet(f.NumBlocks())
  3548  	defer f.Cache.freeSparseSet(found)
  3549  	// Run dfs to find path through the loop that avoids all calls.
  3550  	// Such path either escapes the loop or returns back to the header.
  3551  	// It isn't enough to have exit not dominated by any call, for example:
  3552  	// ... some loop
  3553  	// call1    call2
  3554  	//   \       /
  3555  	//     block
  3556  	// ...
  3557  	// block is not dominated by any single call, but we don't have call-free path to it.
  3558  loopLoop:
  3559  	for _, l := range loopnest.loops {
  3560  		found.clear()
  3561  		tovisit := make([]*Block, 0, 8)
  3562  		tovisit = append(tovisit, l.header)
  3563  		for len(tovisit) > 0 {
  3564  			cur := tovisit[len(tovisit)-1]
  3565  			tovisit = tovisit[:len(tovisit)-1]
  3566  			if hasCall[cur.ID] {
  3567  				continue
  3568  			}
  3569  			for _, s := range cur.Succs {
  3570  				nb := s.Block()
  3571  				if nb == l.header {
  3572  					// Found a call-free path around the loop.
  3573  					continue loopLoop
  3574  				}
  3575  				if found.contains(nb.ID) {
  3576  					// Already found via another path.
  3577  					continue
  3578  				}
  3579  				nl := loopnest.b2l[nb.ID]
  3580  				if nl == nil || (nl.depth <= l.depth && nl != l) {
  3581  					// Left the loop.
  3582  					continue
  3583  				}
  3584  				tovisit = append(tovisit, nb)
  3585  				found.add(nb.ID)
  3586  			}
  3587  		}
  3588  		// No call-free path was found.
  3589  		l.containsUnavoidableCall = true
  3590  	}
  3591  }
  3592  
  3593  func (b *Block) containsCall() bool {
  3594  	if b.Kind == BlockDefer {
  3595  		return true
  3596  	}
  3597  	for _, v := range b.Values {
  3598  		if opcodeTable[v.Op].call {
  3599  			return true
  3600  		}
  3601  	}
  3602  	return false
  3603  }
  3604  

View as plain text