Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/logopt"
    11  	"cmd/compile/internal/reflectdata"
    12  	"cmd/compile/internal/rttype"
    13  	"cmd/compile/internal/typecheck"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/obj"
    16  	"cmd/internal/obj/s390x"
    17  	"cmd/internal/objabi"
    18  	"cmd/internal/src"
    19  	"encoding/binary"
    20  	"fmt"
    21  	"internal/buildcfg"
    22  	"io"
    23  	"math"
    24  	"math/bits"
    25  	"os"
    26  	"path/filepath"
    27  	"strings"
    28  )
    29  
    30  type deadValueChoice bool
    31  
    32  const (
    33  	leaveDeadValues  deadValueChoice = false
    34  	removeDeadValues                 = true
    35  
    36  	repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
    37  	repMoveThreshold = 1408 // size beyond which we use REP MOVS for copying
    38  )
    39  
    40  // deadcode indicates whether rewrite should try to remove any values that become dead.
    41  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    42  	// repeat rewrites until we find no more rewrites
    43  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    44  	pendingLines.clear()
    45  	debug := f.pass.debug
    46  	if debug > 1 {
    47  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    48  	}
    49  	// if the number of rewrite iterations reaches itersLimit we will
    50  	// at that point turn on cycle detection. Instead of a fixed limit,
    51  	// size the limit according to func size to allow for cases such
    52  	// as the one in issue #66773.
    53  	itersLimit := f.NumBlocks()
    54  	if itersLimit < 20 {
    55  		itersLimit = 20
    56  	}
    57  	var iters int
    58  	var states map[string]bool
    59  	for {
    60  		if debug > 1 {
    61  			fmt.Printf("%s: iter %d\n", f.pass.name, iters)
    62  		}
    63  		change := false
    64  		deadChange := false
    65  		for _, b := range f.Blocks {
    66  			var b0 *Block
    67  			if debug > 1 {
    68  				fmt.Printf("%s: start block\n", f.pass.name)
    69  				b0 = new(Block)
    70  				*b0 = *b
    71  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    72  			}
    73  			for i, c := range b.ControlValues() {
    74  				for c.Op == OpCopy {
    75  					c = c.Args[0]
    76  					b.ReplaceControl(i, c)
    77  				}
    78  			}
    79  			if rb(b) {
    80  				change = true
    81  				if debug > 1 {
    82  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    83  				}
    84  			}
    85  			for j, v := range b.Values {
    86  				if debug > 1 {
    87  					fmt.Printf("%s: consider %v\n", f.pass.name, v.LongString())
    88  				}
    89  				var v0 *Value
    90  				if debug > 1 {
    91  					v0 = new(Value)
    92  					*v0 = *v
    93  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    94  				}
    95  				if v.Uses == 0 && v.removeable() {
    96  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    97  						// Reset any values that are now unused, so that we decrement
    98  						// the use count of all of its arguments.
    99  						// Not quite a deadcode pass, because it does not handle cycles.
   100  						// But it should help Uses==1 rules to fire.
   101  						v.reset(OpInvalid)
   102  						deadChange = true
   103  					}
   104  					// No point rewriting values which aren't used.
   105  					continue
   106  				}
   107  
   108  				vchange := phielimValue(v)
   109  				if vchange && debug > 1 {
   110  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   111  				}
   112  
   113  				// Eliminate copy inputs.
   114  				// If any copy input becomes unused, mark it
   115  				// as invalid and discard its argument. Repeat
   116  				// recursively on the discarded argument.
   117  				// This phase helps remove phantom "dead copy" uses
   118  				// of a value so that a x.Uses==1 rule condition
   119  				// fires reliably.
   120  				for i, a := range v.Args {
   121  					if a.Op != OpCopy {
   122  						continue
   123  					}
   124  					aa := copySource(a)
   125  					v.SetArg(i, aa)
   126  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   127  					// to hold it.  The first candidate is the value that will replace a (aa),
   128  					// if it shares the same block and line and is eligible.
   129  					// The second option is v, which has a as an input.  Because aa is earlier in
   130  					// the data flow, it is the better choice.
   131  					if a.Pos.IsStmt() == src.PosIsStmt {
   132  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   133  							aa.Pos = aa.Pos.WithIsStmt()
   134  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   135  							v.Pos = v.Pos.WithIsStmt()
   136  						} else {
   137  							// Record the lost line and look for a new home after all rewrites are complete.
   138  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   139  							// line to appear in more than one block, but only one block is stored, so if both end
   140  							// up here, then one will be lost.
   141  							pendingLines.set(a.Pos, int32(a.Block.ID))
   142  						}
   143  						a.Pos = a.Pos.WithNotStmt()
   144  					}
   145  					vchange = true
   146  					for a.Uses == 0 {
   147  						b := a.Args[0]
   148  						a.reset(OpInvalid)
   149  						a = b
   150  					}
   151  				}
   152  				if vchange && debug > 1 {
   153  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   154  				}
   155  
   156  				// apply rewrite function
   157  				if rv(v) {
   158  					vchange = true
   159  					// If value changed to a poor choice for a statement boundary, move the boundary
   160  					if v.Pos.IsStmt() == src.PosIsStmt {
   161  						if k := nextGoodStatementIndex(v, j, b); k != j {
   162  							v.Pos = v.Pos.WithNotStmt()
   163  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   164  						}
   165  					}
   166  				}
   167  
   168  				change = change || vchange
   169  				if vchange && debug > 1 {
   170  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   171  				}
   172  			}
   173  		}
   174  		if !change && !deadChange {
   175  			break
   176  		}
   177  		iters++
   178  		if (iters > itersLimit || debug >= 2) && change {
   179  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   180  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   181  			// and the maximum value encountered during make.bash is 12.
   182  			// Start checking for cycles. (This is too expensive to do routinely.)
   183  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   184  			if states == nil {
   185  				states = make(map[string]bool)
   186  			}
   187  			h := f.rewriteHash()
   188  			if _, ok := states[h]; ok {
   189  				// We've found a cycle.
   190  				// To diagnose it, set debug to 2 and start again,
   191  				// so that we'll print all rules applied until we complete another cycle.
   192  				// If debug is already >= 2, we've already done that, so it's time to crash.
   193  				if debug < 2 {
   194  					debug = 2
   195  					states = make(map[string]bool)
   196  				} else {
   197  					f.Fatalf("rewrite cycle detected")
   198  				}
   199  			}
   200  			states[h] = true
   201  		}
   202  	}
   203  	// remove clobbered values
   204  	for _, b := range f.Blocks {
   205  		j := 0
   206  		for i, v := range b.Values {
   207  			vl := v.Pos
   208  			if v.Op == OpInvalid {
   209  				if v.Pos.IsStmt() == src.PosIsStmt {
   210  					pendingLines.set(vl, int32(b.ID))
   211  				}
   212  				f.freeValue(v)
   213  				continue
   214  			}
   215  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
   216  				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
   217  					pendingLines.remove(vl)
   218  					v.Pos = v.Pos.WithIsStmt()
   219  				}
   220  			}
   221  			if i != j {
   222  				b.Values[j] = v
   223  			}
   224  			j++
   225  		}
   226  		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
   227  			b.Pos = b.Pos.WithIsStmt()
   228  			pendingLines.remove(b.Pos)
   229  		}
   230  		b.truncateValues(j)
   231  	}
   232  }
   233  
   234  // Common functions called from rewriting rules
   235  
   236  func is64BitFloat(t *types.Type) bool {
   237  	return t.Size() == 8 && t.IsFloat()
   238  }
   239  
   240  func is32BitFloat(t *types.Type) bool {
   241  	return t.Size() == 4 && t.IsFloat()
   242  }
   243  
   244  func is64BitInt(t *types.Type) bool {
   245  	return t.Size() == 8 && t.IsInteger()
   246  }
   247  
   248  func is32BitInt(t *types.Type) bool {
   249  	return t.Size() == 4 && t.IsInteger()
   250  }
   251  
   252  func is16BitInt(t *types.Type) bool {
   253  	return t.Size() == 2 && t.IsInteger()
   254  }
   255  
   256  func is8BitInt(t *types.Type) bool {
   257  	return t.Size() == 1 && t.IsInteger()
   258  }
   259  
   260  func isPtr(t *types.Type) bool {
   261  	return t.IsPtrShaped()
   262  }
   263  
   264  func copyCompatibleType(t1, t2 *types.Type) bool {
   265  	if t1.Size() != t2.Size() {
   266  		return false
   267  	}
   268  	if t1.IsInteger() {
   269  		return t2.IsInteger()
   270  	}
   271  	if isPtr(t1) {
   272  		return isPtr(t2)
   273  	}
   274  	return t1.Compare(t2) == types.CMPeq
   275  }
   276  
   277  // mergeSym merges two symbolic offsets. There is no real merging of
   278  // offsets, we just pick the non-nil one.
   279  func mergeSym(x, y Sym) Sym {
   280  	if x == nil {
   281  		return y
   282  	}
   283  	if y == nil {
   284  		return x
   285  	}
   286  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   287  }
   288  
   289  func canMergeSym(x, y Sym) bool {
   290  	return x == nil || y == nil
   291  }
   292  
   293  // canMergeLoadClobber reports whether the load can be merged into target without
   294  // invalidating the schedule.
   295  // It also checks that the other non-load argument x is something we
   296  // are ok with clobbering.
   297  func canMergeLoadClobber(target, load, x *Value) bool {
   298  	// The register containing x is going to get clobbered.
   299  	// Don't merge if we still need the value of x.
   300  	// We don't have liveness information here, but we can
   301  	// approximate x dying with:
   302  	//  1) target is x's only use.
   303  	//  2) target is not in a deeper loop than x.
   304  	switch {
   305  	case x.Uses == 2 && x.Op == OpPhi && len(x.Args) == 2 && (x.Args[0] == target || x.Args[1] == target) && target.Uses == 1:
   306  		// This is a simple detector to determine that x is probably
   307  		// not live after target. (It does not need to be perfect,
   308  		// regalloc will issue a reg-reg move to save it if we are wrong.)
   309  		// We have:
   310  		//   x = Phi(?, target)
   311  		//   target = Op(load, x)
   312  		// Because target has only one use as a Phi argument, we can schedule it
   313  		// very late. Hopefully, later than the other use of x. (The other use died
   314  		// between x and target, or exists on another branch entirely).
   315  	case x.Uses > 1:
   316  		return false
   317  	}
   318  	loopnest := x.Block.Func.loopnest()
   319  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   320  		return false
   321  	}
   322  	return canMergeLoad(target, load)
   323  }
   324  
   325  // canMergeLoad reports whether the load can be merged into target without
   326  // invalidating the schedule.
   327  func canMergeLoad(target, load *Value) bool {
   328  	if target.Block.ID != load.Block.ID {
   329  		// If the load is in a different block do not merge it.
   330  		return false
   331  	}
   332  
   333  	// We can't merge the load into the target if the load
   334  	// has more than one use.
   335  	if load.Uses != 1 {
   336  		return false
   337  	}
   338  
   339  	mem := load.MemoryArg()
   340  
   341  	// We need the load's memory arg to still be alive at target. That
   342  	// can't be the case if one of target's args depends on a memory
   343  	// state that is a successor of load's memory arg.
   344  	//
   345  	// For example, it would be invalid to merge load into target in
   346  	// the following situation because newmem has killed oldmem
   347  	// before target is reached:
   348  	//     load = read ... oldmem
   349  	//   newmem = write ... oldmem
   350  	//     arg0 = read ... newmem
   351  	//   target = add arg0 load
   352  	//
   353  	// If the argument comes from a different block then we can exclude
   354  	// it immediately because it must dominate load (which is in the
   355  	// same block as target).
   356  	var args []*Value
   357  	for _, a := range target.Args {
   358  		if a != load && a.Block.ID == target.Block.ID {
   359  			args = append(args, a)
   360  		}
   361  	}
   362  
   363  	// memPreds contains memory states known to be predecessors of load's
   364  	// memory state. It is lazily initialized.
   365  	var memPreds map[*Value]bool
   366  	for i := 0; len(args) > 0; i++ {
   367  		const limit = 100
   368  		if i >= limit {
   369  			// Give up if we have done a lot of iterations.
   370  			return false
   371  		}
   372  		v := args[len(args)-1]
   373  		args = args[:len(args)-1]
   374  		if target.Block.ID != v.Block.ID {
   375  			// Since target and load are in the same block
   376  			// we can stop searching when we leave the block.
   377  			continue
   378  		}
   379  		if v.Op == OpPhi {
   380  			// A Phi implies we have reached the top of the block.
   381  			// The memory phi, if it exists, is always
   382  			// the first logical store in the block.
   383  			continue
   384  		}
   385  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   386  			// We could handle this situation however it is likely
   387  			// to be very rare.
   388  			return false
   389  		}
   390  		if v.Op.SymEffect()&SymAddr != 0 {
   391  			// This case prevents an operation that calculates the
   392  			// address of a local variable from being forced to schedule
   393  			// before its corresponding VarDef.
   394  			// See issue 28445.
   395  			//   v1 = LOAD ...
   396  			//   v2 = VARDEF
   397  			//   v3 = LEAQ
   398  			//   v4 = CMPQ v1 v3
   399  			// We don't want to combine the CMPQ with the load, because
   400  			// that would force the CMPQ to schedule before the VARDEF, which
   401  			// in turn requires the LEAQ to schedule before the VARDEF.
   402  			return false
   403  		}
   404  		if v.Type.IsMemory() {
   405  			if memPreds == nil {
   406  				// Initialise a map containing memory states
   407  				// known to be predecessors of load's memory
   408  				// state.
   409  				memPreds = make(map[*Value]bool)
   410  				m := mem
   411  				const limit = 50
   412  				for i := 0; i < limit; i++ {
   413  					if m.Op == OpPhi {
   414  						// The memory phi, if it exists, is always
   415  						// the first logical store in the block.
   416  						break
   417  					}
   418  					if m.Block.ID != target.Block.ID {
   419  						break
   420  					}
   421  					if !m.Type.IsMemory() {
   422  						break
   423  					}
   424  					memPreds[m] = true
   425  					if len(m.Args) == 0 {
   426  						break
   427  					}
   428  					m = m.MemoryArg()
   429  				}
   430  			}
   431  
   432  			// We can merge if v is a predecessor of mem.
   433  			//
   434  			// For example, we can merge load into target in the
   435  			// following scenario:
   436  			//      x = read ... v
   437  			//    mem = write ... v
   438  			//   load = read ... mem
   439  			// target = add x load
   440  			if memPreds[v] {
   441  				continue
   442  			}
   443  			return false
   444  		}
   445  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   446  			// If v takes mem as an input then we know mem
   447  			// is valid at this point.
   448  			continue
   449  		}
   450  		for _, a := range v.Args {
   451  			if target.Block.ID == a.Block.ID {
   452  				args = append(args, a)
   453  			}
   454  		}
   455  	}
   456  
   457  	return true
   458  }
   459  
   460  // isSameCall reports whether aux is the same as the given named symbol.
   461  func isSameCall(aux Aux, name string) bool {
   462  	fn := aux.(*AuxCall).Fn
   463  	return fn != nil && fn.String() == name
   464  }
   465  
   466  func isMalloc(aux Aux) bool {
   467  	return isNewObject(aux) || isSpecializedMalloc(aux)
   468  }
   469  
   470  func isNewObject(aux Aux) bool {
   471  	fn := aux.(*AuxCall).Fn
   472  	return fn != nil && fn.String() == "runtime.newobject"
   473  }
   474  
   475  func isSpecializedMalloc(aux Aux) bool {
   476  	fn := aux.(*AuxCall).Fn
   477  	if fn == nil {
   478  		return false
   479  	}
   480  	name := fn.String()
   481  	return strings.HasPrefix(name, "runtime.mallocgcSmallNoScanSC") ||
   482  		strings.HasPrefix(name, "runtime.mallocgcSmallScanNoHeaderSC") ||
   483  		strings.HasPrefix(name, "runtime.mallocgcTinySize")
   484  }
   485  
   486  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   487  func canLoadUnaligned(c *Config) bool {
   488  	return c.ctxt.Arch.Alignment == 1
   489  }
   490  
   491  // nlzX returns the number of leading zeros.
   492  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   493  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   494  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   495  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   496  
   497  // ntzX returns the number of trailing zeros.
   498  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   499  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   500  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   501  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   502  
   503  // oneBit reports whether x contains exactly one set bit.
   504  func oneBit[T int8 | int16 | int32 | int64](x T) bool {
   505  	return x&(x-1) == 0 && x != 0
   506  }
   507  
   508  // nto returns the number of trailing ones.
   509  func nto(x int64) int64 {
   510  	return int64(ntz64(^x))
   511  }
   512  
   513  // logX returns logarithm of n base 2.
   514  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   515  func log8(n int8) int64   { return log8u(uint8(n)) }
   516  func log16(n int16) int64 { return log16u(uint16(n)) }
   517  func log32(n int32) int64 { return log32u(uint32(n)) }
   518  func log64(n int64) int64 { return log64u(uint64(n)) }
   519  
   520  // logXu returns the logarithm of n base 2.
   521  // n must be a power of 2 (isPowerOfTwo returns true)
   522  func log8u(n uint8) int64   { return int64(bits.Len8(n)) - 1 }
   523  func log16u(n uint16) int64 { return int64(bits.Len16(n)) - 1 }
   524  func log32u(n uint32) int64 { return int64(bits.Len32(n)) - 1 }
   525  func log64u(n uint64) int64 { return int64(bits.Len64(n)) - 1 }
   526  
   527  // isPowerOfTwoX functions report whether n is a power of 2.
   528  func isPowerOfTwo[T int8 | int16 | int32 | int64 | uint8 | uint16 | uint32 | uint64](n T) bool {
   529  	return n > 0 && n&(n-1) == 0
   530  }
   531  
   532  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   533  func is32Bit(n int64) bool {
   534  	return n == int64(int32(n))
   535  }
   536  
   537  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   538  func is16Bit(n int64) bool {
   539  	return n == int64(int16(n))
   540  }
   541  
   542  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   543  func is8Bit(n int64) bool {
   544  	return n == int64(int8(n))
   545  }
   546  
   547  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   548  func isU8Bit(n int64) bool {
   549  	return n == int64(uint8(n))
   550  }
   551  
   552  // is12Bit reports whether n can be represented as a signed 12 bit integer.
   553  func is12Bit(n int64) bool {
   554  	return -(1<<11) <= n && n < (1<<11)
   555  }
   556  
   557  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   558  func isU12Bit(n int64) bool {
   559  	return 0 <= n && n < (1<<12)
   560  }
   561  
   562  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   563  func isU16Bit(n int64) bool {
   564  	return n == int64(uint16(n))
   565  }
   566  
   567  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   568  func isU32Bit(n int64) bool {
   569  	return n == int64(uint32(n))
   570  }
   571  
   572  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   573  func is20Bit(n int64) bool {
   574  	return -(1<<19) <= n && n < (1<<19)
   575  }
   576  
   577  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   578  func b2i(b bool) int64 {
   579  	if b {
   580  		return 1
   581  	}
   582  	return 0
   583  }
   584  
   585  // b2i32 translates a boolean value to 0 or 1.
   586  func b2i32(b bool) int32 {
   587  	if b {
   588  		return 1
   589  	}
   590  	return 0
   591  }
   592  
   593  func canMulStrengthReduce(config *Config, x int64) bool {
   594  	_, ok := config.mulRecipes[x]
   595  	return ok
   596  }
   597  func canMulStrengthReduce32(config *Config, x int32) bool {
   598  	_, ok := config.mulRecipes[int64(x)]
   599  	return ok
   600  }
   601  
   602  // mulStrengthReduce returns v*x evaluated at the location
   603  // (block and source position) of m.
   604  // canMulStrengthReduce must have returned true.
   605  func mulStrengthReduce(m *Value, v *Value, x int64) *Value {
   606  	return v.Block.Func.Config.mulRecipes[x].build(m, v)
   607  }
   608  
   609  // mulStrengthReduce32 returns v*x evaluated at the location
   610  // (block and source position) of m.
   611  // canMulStrengthReduce32 must have returned true.
   612  // The upper 32 bits of m might be set to junk.
   613  func mulStrengthReduce32(m *Value, v *Value, x int32) *Value {
   614  	return v.Block.Func.Config.mulRecipes[int64(x)].build(m, v)
   615  }
   616  
   617  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   618  // A shift is bounded if it is shifting by less than the width of the shifted value.
   619  func shiftIsBounded(v *Value) bool {
   620  	return v.AuxInt != 0
   621  }
   622  
   623  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   624  // generated code as much as possible.
   625  func canonLessThan(x, y *Value) bool {
   626  	if x.Op != y.Op {
   627  		return x.Op < y.Op
   628  	}
   629  	if !x.Pos.SameFileAndLine(y.Pos) {
   630  		return x.Pos.Before(y.Pos)
   631  	}
   632  	return x.ID < y.ID
   633  }
   634  
   635  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   636  // of the mantissa. It will panic if the truncation results in lost information.
   637  func truncate64Fto32F(f float64) float32 {
   638  	if !isExactFloat32(f) {
   639  		panic("truncate64Fto32F: truncation is not exact")
   640  	}
   641  	if !math.IsNaN(f) {
   642  		return float32(f)
   643  	}
   644  	// NaN bit patterns aren't necessarily preserved across conversion
   645  	// instructions so we need to do the conversion manually.
   646  	b := math.Float64bits(f)
   647  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   648  	//          | sign                  | exponent   | mantissa       |
   649  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   650  	return math.Float32frombits(r)
   651  }
   652  
   653  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   654  func DivisionNeedsFixUp(v *Value) bool {
   655  	return v.AuxInt == 0
   656  }
   657  
   658  // auxTo32F decodes a float32 from the AuxInt value provided.
   659  func auxTo32F(i int64) float32 {
   660  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   661  }
   662  
   663  func auxIntToBool(i int64) bool {
   664  	if i == 0 {
   665  		return false
   666  	}
   667  	return true
   668  }
   669  func auxIntToInt8(i int64) int8 {
   670  	return int8(i)
   671  }
   672  func auxIntToInt16(i int64) int16 {
   673  	return int16(i)
   674  }
   675  func auxIntToInt32(i int64) int32 {
   676  	return int32(i)
   677  }
   678  func auxIntToInt64(i int64) int64 {
   679  	return i
   680  }
   681  func auxIntToUint8(i int64) uint8 {
   682  	return uint8(i)
   683  }
   684  func auxIntToFloat32(i int64) float32 {
   685  	return float32(math.Float64frombits(uint64(i)))
   686  }
   687  func auxIntToFloat64(i int64) float64 {
   688  	return math.Float64frombits(uint64(i))
   689  }
   690  func auxIntToValAndOff(i int64) ValAndOff {
   691  	return ValAndOff(i)
   692  }
   693  func auxIntToArm64BitField(i int64) arm64BitField {
   694  	return arm64BitField(i)
   695  }
   696  func auxIntToArm64ConditionalParams(i int64) arm64ConditionalParams {
   697  	var params arm64ConditionalParams
   698  	params.cond = Op(i & 0xffff)
   699  	i >>= 16
   700  	params.nzcv = uint8(i & 0x0f)
   701  	i >>= 4
   702  	params.constValue = uint8(i & 0x1f)
   703  	i >>= 5
   704  	params.ind = i == 1
   705  	return params
   706  }
   707  func auxIntToFlagConstant(x int64) flagConstant {
   708  	return flagConstant(x)
   709  }
   710  
   711  func auxIntToOp(cc int64) Op {
   712  	return Op(cc)
   713  }
   714  
   715  func boolToAuxInt(b bool) int64 {
   716  	if b {
   717  		return 1
   718  	}
   719  	return 0
   720  }
   721  func int8ToAuxInt(i int8) int64 {
   722  	return int64(i)
   723  }
   724  func int16ToAuxInt(i int16) int64 {
   725  	return int64(i)
   726  }
   727  func int32ToAuxInt(i int32) int64 {
   728  	return int64(i)
   729  }
   730  func int64ToAuxInt(i int64) int64 {
   731  	return i
   732  }
   733  func uint8ToAuxInt(i uint8) int64 {
   734  	return int64(int8(i))
   735  }
   736  func float32ToAuxInt(f float32) int64 {
   737  	return int64(math.Float64bits(float64(f)))
   738  }
   739  func float64ToAuxInt(f float64) int64 {
   740  	return int64(math.Float64bits(f))
   741  }
   742  func valAndOffToAuxInt(v ValAndOff) int64 {
   743  	return int64(v)
   744  }
   745  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   746  	return int64(v)
   747  }
   748  func arm64ConditionalParamsToAuxInt(v arm64ConditionalParams) int64 {
   749  	if v.cond&^0xffff != 0 {
   750  		panic("condition value exceeds 16 bits")
   751  	}
   752  
   753  	var i int64
   754  	if v.ind {
   755  		i = 1 << 25
   756  	}
   757  	i |= int64(v.constValue) << 20
   758  	i |= int64(v.nzcv) << 16
   759  	i |= int64(v.cond)
   760  	return i
   761  }
   762  
   763  func flagConstantToAuxInt(x flagConstant) int64 {
   764  	return int64(x)
   765  }
   766  
   767  func opToAuxInt(o Op) int64 {
   768  	return int64(o)
   769  }
   770  
   771  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   772  type Aux interface {
   773  	CanBeAnSSAAux()
   774  }
   775  
   776  // for now only used to mark moves that need to avoid clobbering flags
   777  type auxMark bool
   778  
   779  func (auxMark) CanBeAnSSAAux() {}
   780  
   781  var AuxMark auxMark
   782  
   783  // stringAux wraps string values for use in Aux.
   784  type stringAux string
   785  
   786  func (stringAux) CanBeAnSSAAux() {}
   787  
   788  func auxToString(i Aux) string {
   789  	return string(i.(stringAux))
   790  }
   791  func auxToSym(i Aux) Sym {
   792  	// TODO: kind of a hack - allows nil interface through
   793  	s, _ := i.(Sym)
   794  	return s
   795  }
   796  func auxToType(i Aux) *types.Type {
   797  	return i.(*types.Type)
   798  }
   799  func auxToCall(i Aux) *AuxCall {
   800  	return i.(*AuxCall)
   801  }
   802  func auxToS390xCCMask(i Aux) s390x.CCMask {
   803  	return i.(s390x.CCMask)
   804  }
   805  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   806  	return i.(s390x.RotateParams)
   807  }
   808  
   809  func StringToAux(s string) Aux {
   810  	return stringAux(s)
   811  }
   812  func symToAux(s Sym) Aux {
   813  	return s
   814  }
   815  func callToAux(s *AuxCall) Aux {
   816  	return s
   817  }
   818  func typeToAux(t *types.Type) Aux {
   819  	return t
   820  }
   821  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   822  	return c
   823  }
   824  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   825  	return r
   826  }
   827  
   828  // uaddOvf reports whether unsigned a+b would overflow.
   829  func uaddOvf(a, b int64) bool {
   830  	return uint64(a)+uint64(b) < uint64(a)
   831  }
   832  
   833  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   834  	v.Op = OpStaticLECall
   835  	auxcall := v.Aux.(*AuxCall)
   836  	auxcall.Fn = sym
   837  	// Remove first arg
   838  	v.Args[0].Uses--
   839  	copy(v.Args[0:], v.Args[1:])
   840  	v.Args[len(v.Args)-1] = nil // aid GC
   841  	v.Args = v.Args[:len(v.Args)-1]
   842  	if f := v.Block.Func; f.pass.debug > 0 {
   843  		f.Warnl(v.Pos, "de-virtualizing call")
   844  	}
   845  	return v
   846  }
   847  
   848  // isSamePtr reports whether p1 and p2 point to the same address.
   849  func isSamePtr(p1, p2 *Value) bool {
   850  	if p1 == p2 {
   851  		return true
   852  	}
   853  	if p1.Op != p2.Op {
   854  		for p1.Op == OpOffPtr && p1.AuxInt == 0 {
   855  			p1 = p1.Args[0]
   856  		}
   857  		for p2.Op == OpOffPtr && p2.AuxInt == 0 {
   858  			p2 = p2.Args[0]
   859  		}
   860  		if p1 == p2 {
   861  			return true
   862  		}
   863  		if p1.Op != p2.Op {
   864  			return false
   865  		}
   866  	}
   867  	switch p1.Op {
   868  	case OpOffPtr:
   869  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   870  	case OpAddr, OpLocalAddr:
   871  		return p1.Aux == p2.Aux
   872  	case OpAddPtr:
   873  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   874  	}
   875  	return false
   876  }
   877  
   878  func isStackPtr(v *Value) bool {
   879  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   880  		v = v.Args[0]
   881  	}
   882  	return v.Op == OpSP || v.Op == OpLocalAddr
   883  }
   884  
   885  // disjoint reports whether the memory region specified by [p1:p1+n1)
   886  // does not overlap with [p2:p2+n2).
   887  // A return value of false does not imply the regions overlap.
   888  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   889  	if n1 == 0 || n2 == 0 {
   890  		return true
   891  	}
   892  	if p1 == p2 {
   893  		return false
   894  	}
   895  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   896  		base, offset = ptr, 0
   897  		for base.Op == OpOffPtr {
   898  			offset += base.AuxInt
   899  			base = base.Args[0]
   900  		}
   901  		if opcodeTable[base.Op].nilCheck {
   902  			base = base.Args[0]
   903  		}
   904  		return base, offset
   905  	}
   906  
   907  	// Run types-based analysis
   908  	if disjointTypes(p1.Type, p2.Type) {
   909  		return true
   910  	}
   911  
   912  	p1, off1 := baseAndOffset(p1)
   913  	p2, off2 := baseAndOffset(p2)
   914  	if isSamePtr(p1, p2) {
   915  		return !overlap(off1, n1, off2, n2)
   916  	}
   917  	// p1 and p2 are not the same, so if they are both OpAddrs then
   918  	// they point to different variables.
   919  	// If one pointer is on the stack and the other is an argument
   920  	// then they can't overlap.
   921  	switch p1.Op {
   922  	case OpAddr, OpLocalAddr:
   923  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   924  			return true
   925  		}
   926  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   927  	case OpArg, OpArgIntReg:
   928  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   929  			return true
   930  		}
   931  	case OpSP:
   932  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   933  	}
   934  	return false
   935  }
   936  
   937  // disjointTypes reports whether a memory region pointed to by a pointer of type
   938  // t1 does not overlap with a memory region pointed to by a pointer of type t2 --
   939  // based on type aliasing rules.
   940  func disjointTypes(t1 *types.Type, t2 *types.Type) bool {
   941  	// Unsafe pointer can alias with anything.
   942  	if t1.IsUnsafePtr() || t2.IsUnsafePtr() {
   943  		return false
   944  	}
   945  
   946  	if !t1.IsPtr() || !t2.IsPtr() {
   947  		// Treat non-pointer types (such as TFUNC, TMAP, uintptr) conservatively.
   948  		return false
   949  	}
   950  
   951  	t1 = t1.Elem()
   952  	t2 = t2.Elem()
   953  
   954  	// Not-in-heap types are not supported -- they are rare and non-important; also,
   955  	// type.HasPointers check doesn't work for them correctly.
   956  	if t1.NotInHeap() || t2.NotInHeap() {
   957  		return false
   958  	}
   959  
   960  	isPtrShaped := func(t *types.Type) bool { return int(t.Size()) == types.PtrSize && t.HasPointers() }
   961  
   962  	// Pointers and non-pointers are disjoint (https://pkg.go.dev/unsafe#Pointer).
   963  	if (isPtrShaped(t1) && !t2.HasPointers()) ||
   964  		(isPtrShaped(t2) && !t1.HasPointers()) {
   965  		return true
   966  	}
   967  
   968  	return false
   969  }
   970  
   971  // moveSize returns the number of bytes an aligned MOV instruction moves.
   972  func moveSize(align int64, c *Config) int64 {
   973  	switch {
   974  	case align%8 == 0 && c.PtrSize == 8:
   975  		return 8
   976  	case align%4 == 0:
   977  		return 4
   978  	case align%2 == 0:
   979  		return 2
   980  	}
   981  	return 1
   982  }
   983  
   984  // mergePoint finds a block among a's blocks which dominates b and is itself
   985  // dominated by all of a's blocks. Returns nil if it can't find one.
   986  // Might return nil even if one does exist.
   987  func mergePoint(b *Block, a ...*Value) *Block {
   988  	// Walk backward from b looking for one of the a's blocks.
   989  
   990  	// Max distance
   991  	d := 100
   992  
   993  	for d > 0 {
   994  		for _, x := range a {
   995  			if b == x.Block {
   996  				goto found
   997  			}
   998  		}
   999  		if len(b.Preds) > 1 {
  1000  			// Don't know which way to go back. Abort.
  1001  			return nil
  1002  		}
  1003  		b = b.Preds[0].b
  1004  		d--
  1005  	}
  1006  	return nil // too far away
  1007  found:
  1008  	// At this point, r is the first value in a that we find by walking backwards.
  1009  	// if we return anything, r will be it.
  1010  	r := b
  1011  
  1012  	// Keep going, counting the other a's that we find. They must all dominate r.
  1013  	na := 0
  1014  	for d > 0 {
  1015  		for _, x := range a {
  1016  			if b == x.Block {
  1017  				na++
  1018  			}
  1019  		}
  1020  		if na == len(a) {
  1021  			// Found all of a in a backwards walk. We can return r.
  1022  			return r
  1023  		}
  1024  		if len(b.Preds) > 1 {
  1025  			return nil
  1026  		}
  1027  		b = b.Preds[0].b
  1028  		d--
  1029  
  1030  	}
  1031  	return nil // too far away
  1032  }
  1033  
  1034  // clobber invalidates values. Returns true.
  1035  // clobber is used by rewrite rules to:
  1036  //
  1037  //	A) make sure the values are really dead and never used again.
  1038  //	B) decrement use counts of the values' args.
  1039  func clobber(vv ...*Value) bool {
  1040  	for _, v := range vv {
  1041  		v.reset(OpInvalid)
  1042  		// Note: leave v.Block intact.  The Block field is used after clobber.
  1043  	}
  1044  	return true
  1045  }
  1046  
  1047  // resetCopy resets v to be a copy of arg.
  1048  // Always returns true.
  1049  func resetCopy(v *Value, arg *Value) bool {
  1050  	v.reset(OpCopy)
  1051  	v.AddArg(arg)
  1052  	return true
  1053  }
  1054  
  1055  // clobberIfDead resets v when use count is 1. Returns true.
  1056  // clobberIfDead is used by rewrite rules to decrement
  1057  // use counts of v's args when v is dead and never used.
  1058  func clobberIfDead(v *Value) bool {
  1059  	if v.Uses == 1 {
  1060  		v.reset(OpInvalid)
  1061  	}
  1062  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
  1063  	return true
  1064  }
  1065  
  1066  // noteRule is an easy way to track if a rule is matched when writing
  1067  // new ones.  Make the rule of interest also conditional on
  1068  //
  1069  //	noteRule("note to self: rule of interest matched")
  1070  //
  1071  // and that message will print when the rule matches.
  1072  func noteRule(s string) bool {
  1073  	fmt.Println(s)
  1074  	return true
  1075  }
  1076  
  1077  // countRule increments Func.ruleMatches[key].
  1078  // If Func.ruleMatches is non-nil at the end
  1079  // of compilation, it will be printed to stdout.
  1080  // This is intended to make it easier to find which functions
  1081  // which contain lots of rules matches when developing new rules.
  1082  func countRule(v *Value, key string) bool {
  1083  	f := v.Block.Func
  1084  	if f.ruleMatches == nil {
  1085  		f.ruleMatches = make(map[string]int)
  1086  	}
  1087  	f.ruleMatches[key]++
  1088  	return true
  1089  }
  1090  
  1091  // warnRule generates compiler debug output with string s when
  1092  // v is not in autogenerated code, cond is true and the rule has fired.
  1093  func warnRule(cond bool, v *Value, s string) bool {
  1094  	if pos := v.Pos; pos.Line() > 1 && cond {
  1095  		v.Block.Func.Warnl(pos, s)
  1096  	}
  1097  	return true
  1098  }
  1099  
  1100  // for a pseudo-op like (LessThan x), extract x.
  1101  func flagArg(v *Value) *Value {
  1102  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1103  		return nil
  1104  	}
  1105  	return v.Args[0]
  1106  }
  1107  
  1108  // arm64Negate finds the complement to an ARM64 condition code,
  1109  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1110  //
  1111  // For floating point, it's more subtle because NaN is unordered. We do
  1112  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1113  func arm64Negate(op Op) Op {
  1114  	switch op {
  1115  	case OpARM64LessThan:
  1116  		return OpARM64GreaterEqual
  1117  	case OpARM64LessThanU:
  1118  		return OpARM64GreaterEqualU
  1119  	case OpARM64GreaterThan:
  1120  		return OpARM64LessEqual
  1121  	case OpARM64GreaterThanU:
  1122  		return OpARM64LessEqualU
  1123  	case OpARM64LessEqual:
  1124  		return OpARM64GreaterThan
  1125  	case OpARM64LessEqualU:
  1126  		return OpARM64GreaterThanU
  1127  	case OpARM64GreaterEqual:
  1128  		return OpARM64LessThan
  1129  	case OpARM64GreaterEqualU:
  1130  		return OpARM64LessThanU
  1131  	case OpARM64Equal:
  1132  		return OpARM64NotEqual
  1133  	case OpARM64NotEqual:
  1134  		return OpARM64Equal
  1135  	case OpARM64LessThanF:
  1136  		return OpARM64NotLessThanF
  1137  	case OpARM64NotLessThanF:
  1138  		return OpARM64LessThanF
  1139  	case OpARM64LessEqualF:
  1140  		return OpARM64NotLessEqualF
  1141  	case OpARM64NotLessEqualF:
  1142  		return OpARM64LessEqualF
  1143  	case OpARM64GreaterThanF:
  1144  		return OpARM64NotGreaterThanF
  1145  	case OpARM64NotGreaterThanF:
  1146  		return OpARM64GreaterThanF
  1147  	case OpARM64GreaterEqualF:
  1148  		return OpARM64NotGreaterEqualF
  1149  	case OpARM64NotGreaterEqualF:
  1150  		return OpARM64GreaterEqualF
  1151  	default:
  1152  		panic("unreachable")
  1153  	}
  1154  }
  1155  
  1156  // arm64Invert evaluates (InvertFlags op), which
  1157  // is the same as altering the condition codes such
  1158  // that the same result would be produced if the arguments
  1159  // to the flag-generating instruction were reversed, e.g.
  1160  // (InvertFlags (CMP x y)) -> (CMP y x)
  1161  func arm64Invert(op Op) Op {
  1162  	switch op {
  1163  	case OpARM64LessThan:
  1164  		return OpARM64GreaterThan
  1165  	case OpARM64LessThanU:
  1166  		return OpARM64GreaterThanU
  1167  	case OpARM64GreaterThan:
  1168  		return OpARM64LessThan
  1169  	case OpARM64GreaterThanU:
  1170  		return OpARM64LessThanU
  1171  	case OpARM64LessEqual:
  1172  		return OpARM64GreaterEqual
  1173  	case OpARM64LessEqualU:
  1174  		return OpARM64GreaterEqualU
  1175  	case OpARM64GreaterEqual:
  1176  		return OpARM64LessEqual
  1177  	case OpARM64GreaterEqualU:
  1178  		return OpARM64LessEqualU
  1179  	case OpARM64Equal, OpARM64NotEqual:
  1180  		return op
  1181  	case OpARM64LessThanF:
  1182  		return OpARM64GreaterThanF
  1183  	case OpARM64GreaterThanF:
  1184  		return OpARM64LessThanF
  1185  	case OpARM64LessEqualF:
  1186  		return OpARM64GreaterEqualF
  1187  	case OpARM64GreaterEqualF:
  1188  		return OpARM64LessEqualF
  1189  	case OpARM64NotLessThanF:
  1190  		return OpARM64NotGreaterThanF
  1191  	case OpARM64NotGreaterThanF:
  1192  		return OpARM64NotLessThanF
  1193  	case OpARM64NotLessEqualF:
  1194  		return OpARM64NotGreaterEqualF
  1195  	case OpARM64NotGreaterEqualF:
  1196  		return OpARM64NotLessEqualF
  1197  	default:
  1198  		panic("unreachable")
  1199  	}
  1200  }
  1201  
  1202  // evaluate an ARM64 op against a flags value
  1203  // that is potentially constant; return 1 for true,
  1204  // -1 for false, and 0 for not constant.
  1205  func ccARM64Eval(op Op, flags *Value) int {
  1206  	fop := flags.Op
  1207  	if fop == OpARM64InvertFlags {
  1208  		return -ccARM64Eval(op, flags.Args[0])
  1209  	}
  1210  	if fop != OpARM64FlagConstant {
  1211  		return 0
  1212  	}
  1213  	fc := flagConstant(flags.AuxInt)
  1214  	b2i := func(b bool) int {
  1215  		if b {
  1216  			return 1
  1217  		}
  1218  		return -1
  1219  	}
  1220  	switch op {
  1221  	case OpARM64Equal:
  1222  		return b2i(fc.eq())
  1223  	case OpARM64NotEqual:
  1224  		return b2i(fc.ne())
  1225  	case OpARM64LessThan:
  1226  		return b2i(fc.lt())
  1227  	case OpARM64LessThanU:
  1228  		return b2i(fc.ult())
  1229  	case OpARM64GreaterThan:
  1230  		return b2i(fc.gt())
  1231  	case OpARM64GreaterThanU:
  1232  		return b2i(fc.ugt())
  1233  	case OpARM64LessEqual:
  1234  		return b2i(fc.le())
  1235  	case OpARM64LessEqualU:
  1236  		return b2i(fc.ule())
  1237  	case OpARM64GreaterEqual:
  1238  		return b2i(fc.ge())
  1239  	case OpARM64GreaterEqualU:
  1240  		return b2i(fc.uge())
  1241  	}
  1242  	return 0
  1243  }
  1244  
  1245  // logRule logs the use of the rule s. This will only be enabled if
  1246  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1247  func logRule(s string) {
  1248  	if ruleFile == nil {
  1249  		// Open a log file to write log to. We open in append
  1250  		// mode because all.bash runs the compiler lots of times,
  1251  		// and we want the concatenation of all of those logs.
  1252  		// This means, of course, that users need to rm the old log
  1253  		// to get fresh data.
  1254  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1255  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1256  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1257  		if err != nil {
  1258  			panic(err)
  1259  		}
  1260  		ruleFile = w
  1261  	}
  1262  	// Ignore errors in case of multiple processes fighting over the file.
  1263  	fmt.Fprintln(ruleFile, s)
  1264  }
  1265  
  1266  var ruleFile io.Writer
  1267  
  1268  func isConstZero(v *Value) bool {
  1269  	switch v.Op {
  1270  	case OpConstNil:
  1271  		return true
  1272  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1273  		return v.AuxInt == 0
  1274  	case OpStringMake, OpIMake, OpComplexMake:
  1275  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1276  	case OpSliceMake:
  1277  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1278  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1279  		return isConstZero(v.Args[0])
  1280  	}
  1281  	return false
  1282  }
  1283  
  1284  // reciprocalExact64 reports whether 1/c is exactly representable.
  1285  func reciprocalExact64(c float64) bool {
  1286  	b := math.Float64bits(c)
  1287  	man := b & (1<<52 - 1)
  1288  	if man != 0 {
  1289  		return false // not a power of 2, denormal, or NaN
  1290  	}
  1291  	exp := b >> 52 & (1<<11 - 1)
  1292  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1293  	// changes the exponent to 0x7fe-exp.
  1294  	switch exp {
  1295  	case 0:
  1296  		return false // ±0
  1297  	case 0x7ff:
  1298  		return false // ±inf
  1299  	case 0x7fe:
  1300  		return false // exponent is not representable
  1301  	default:
  1302  		return true
  1303  	}
  1304  }
  1305  
  1306  // reciprocalExact32 reports whether 1/c is exactly representable.
  1307  func reciprocalExact32(c float32) bool {
  1308  	b := math.Float32bits(c)
  1309  	man := b & (1<<23 - 1)
  1310  	if man != 0 {
  1311  		return false // not a power of 2, denormal, or NaN
  1312  	}
  1313  	exp := b >> 23 & (1<<8 - 1)
  1314  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1315  	// changes the exponent to 0xfe-exp.
  1316  	switch exp {
  1317  	case 0:
  1318  		return false // ±0
  1319  	case 0xff:
  1320  		return false // ±inf
  1321  	case 0xfe:
  1322  		return false // exponent is not representable
  1323  	default:
  1324  		return true
  1325  	}
  1326  }
  1327  
  1328  // check if an immediate can be directly encoded into an ARM's instruction.
  1329  func isARMImmRot(v uint32) bool {
  1330  	for i := 0; i < 16; i++ {
  1331  		if v&^0xff == 0 {
  1332  			return true
  1333  		}
  1334  		v = v<<2 | v>>30
  1335  	}
  1336  
  1337  	return false
  1338  }
  1339  
  1340  // overlap reports whether the ranges given by the given offset and
  1341  // size pairs overlap.
  1342  func overlap(offset1, size1, offset2, size2 int64) bool {
  1343  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1344  		return true
  1345  	}
  1346  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1347  		return true
  1348  	}
  1349  	return false
  1350  }
  1351  
  1352  // check if value zeroes out upper 32-bit of 64-bit register.
  1353  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1354  // because it catches same amount of cases as 4.
  1355  func ZeroUpper32Bits(x *Value, depth int) bool {
  1356  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1357  		// If the value is signed, it might get re-sign-extended
  1358  		// during spill and restore. See issue 68227.
  1359  		return false
  1360  	}
  1361  	switch x.Op {
  1362  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1363  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1364  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1365  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1366  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1367  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1368  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1369  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1370  		OpAMD64SHLL, OpAMD64SHLLconst:
  1371  		return true
  1372  	case OpAMD64MOVQconst:
  1373  		return uint64(uint32(x.AuxInt)) == uint64(x.AuxInt)
  1374  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1375  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1376  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1377  		return true
  1378  	case OpArg: // note: but not ArgIntReg
  1379  		// amd64 always loads args from the stack unsigned.
  1380  		// most other architectures load them sign/zero extended based on the type.
  1381  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1382  	case OpPhi, OpSelect0, OpSelect1:
  1383  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1384  		// just limit recursion depth.
  1385  		if depth <= 0 {
  1386  			return false
  1387  		}
  1388  		for i := range x.Args {
  1389  			if !ZeroUpper32Bits(x.Args[i], depth-1) {
  1390  				return false
  1391  			}
  1392  		}
  1393  		return true
  1394  
  1395  	}
  1396  	return false
  1397  }
  1398  
  1399  // ZeroUpper48Bits is similar to ZeroUpper32Bits, but for upper 48 bits.
  1400  func ZeroUpper48Bits(x *Value, depth int) bool {
  1401  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1402  		return false
  1403  	}
  1404  	switch x.Op {
  1405  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1406  		return true
  1407  	case OpAMD64MOVQconst, OpAMD64MOVLconst:
  1408  		return uint64(uint16(x.AuxInt)) == uint64(x.AuxInt)
  1409  	case OpArg: // note: but not ArgIntReg
  1410  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1411  	case OpPhi, OpSelect0, OpSelect1:
  1412  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1413  		// just limit recursion depth.
  1414  		if depth <= 0 {
  1415  			return false
  1416  		}
  1417  		for i := range x.Args {
  1418  			if !ZeroUpper48Bits(x.Args[i], depth-1) {
  1419  				return false
  1420  			}
  1421  		}
  1422  		return true
  1423  
  1424  	}
  1425  	return false
  1426  }
  1427  
  1428  // ZeroUpper56Bits is similar to ZeroUpper32Bits, but for upper 56 bits.
  1429  func ZeroUpper56Bits(x *Value, depth int) bool {
  1430  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1431  		return false
  1432  	}
  1433  	switch x.Op {
  1434  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1435  		return true
  1436  	case OpAMD64MOVQconst, OpAMD64MOVLconst:
  1437  		return uint64(uint8(x.AuxInt)) == uint64(x.AuxInt)
  1438  	case OpArg: // note: but not ArgIntReg
  1439  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1440  	case OpPhi, OpSelect0, OpSelect1:
  1441  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1442  		// just limit recursion depth.
  1443  		if depth <= 0 {
  1444  			return false
  1445  		}
  1446  		for i := range x.Args {
  1447  			if !ZeroUpper56Bits(x.Args[i], depth-1) {
  1448  				return false
  1449  			}
  1450  		}
  1451  		return true
  1452  
  1453  	}
  1454  	return false
  1455  }
  1456  
  1457  func isInlinableMemclr(c *Config, sz int64) bool {
  1458  	if sz < 0 {
  1459  		return false
  1460  	}
  1461  	// TODO: expand this check to allow other architectures
  1462  	// see CL 454255 and issue 56997
  1463  	switch c.arch {
  1464  	case "amd64", "arm64":
  1465  		return true
  1466  	case "ppc64le", "ppc64", "loong64":
  1467  		return sz < 512
  1468  	}
  1469  	return false
  1470  }
  1471  
  1472  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1473  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1474  // safe, either because Move will do all of its loads before any of its stores, or
  1475  // because the arguments are known to be disjoint.
  1476  // This is used as a check for replacing memmove with Move ops.
  1477  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1478  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1479  	// Move ops may or may not be faster for large sizes depending on how the platform
  1480  	// lowers them, so we only perform this optimization on platforms that we know to
  1481  	// have fast Move ops.
  1482  	switch c.arch {
  1483  	case "amd64":
  1484  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1485  	case "arm64":
  1486  		return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
  1487  	case "386":
  1488  		return sz <= 8
  1489  	case "s390x", "ppc64", "ppc64le":
  1490  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1491  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1492  		return sz <= 4
  1493  	}
  1494  	return false
  1495  }
  1496  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1497  	return isInlinableMemmove(dst, src, sz, c)
  1498  }
  1499  
  1500  // logLargeCopy logs the occurrence of a large copy.
  1501  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1502  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1503  func logLargeCopy(v *Value, s int64) bool {
  1504  	if s < 128 {
  1505  		return true
  1506  	}
  1507  	if logopt.Enabled() {
  1508  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1509  	}
  1510  	return true
  1511  }
  1512  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1513  	if s < 128 {
  1514  		return
  1515  	}
  1516  	if logopt.Enabled() {
  1517  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1518  	}
  1519  }
  1520  
  1521  // hasSmallRotate reports whether the architecture has rotate instructions
  1522  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1523  func hasSmallRotate(c *Config) bool {
  1524  	switch c.arch {
  1525  	case "amd64", "386":
  1526  		return true
  1527  	default:
  1528  		return false
  1529  	}
  1530  }
  1531  
  1532  func supportsPPC64PCRel() bool {
  1533  	// PCRel is currently supported for >= power10, linux only
  1534  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1535  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1536  }
  1537  
  1538  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1539  	if sh < 0 || sh >= sz {
  1540  		panic("PPC64 shift arg sh out of range")
  1541  	}
  1542  	if mb < 0 || mb >= sz {
  1543  		panic("PPC64 shift arg mb out of range")
  1544  	}
  1545  	if me < 0 || me >= sz {
  1546  		panic("PPC64 shift arg me out of range")
  1547  	}
  1548  	return int32(sh<<16 | mb<<8 | me)
  1549  }
  1550  
  1551  func GetPPC64Shiftsh(auxint int64) int64 {
  1552  	return int64(int8(auxint >> 16))
  1553  }
  1554  
  1555  func GetPPC64Shiftmb(auxint int64) int64 {
  1556  	return int64(int8(auxint >> 8))
  1557  }
  1558  
  1559  // Test if this value can encoded as a mask for a rlwinm like
  1560  // operation.  Masks can also extend from the msb and wrap to
  1561  // the lsb too.  That is, the valid masks are 32 bit strings
  1562  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1563  //
  1564  // Note: This ignores the upper 32 bits of the input. When a
  1565  // zero extended result is desired (e.g a 64 bit result), the
  1566  // user must verify the upper 32 bits are 0 and the mask is
  1567  // contiguous (that is, non-wrapping).
  1568  func isPPC64WordRotateMask(v64 int64) bool {
  1569  	// Isolate rightmost 1 (if none 0) and add.
  1570  	v := uint32(v64)
  1571  	vp := (v & -v) + v
  1572  	// Likewise, for the wrapping case.
  1573  	vn := ^v
  1574  	vpn := (vn & -vn) + vn
  1575  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1576  }
  1577  
  1578  // Test if this mask is a valid, contiguous bitmask which can be
  1579  // represented by a RLWNM mask and also clears the upper 32 bits
  1580  // of the register.
  1581  func isPPC64WordRotateMaskNonWrapping(v64 int64) bool {
  1582  	// Isolate rightmost 1 (if none 0) and add.
  1583  	v := uint32(v64)
  1584  	vp := (v & -v) + v
  1585  	return (v&vp == 0) && v != 0 && uint64(uint32(v64)) == uint64(v64)
  1586  }
  1587  
  1588  // Compress mask and shift into single value of the form
  1589  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1590  // be used to regenerate the input mask.
  1591  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1592  	var mb, me, mbn, men int
  1593  
  1594  	// Determine boundaries and then decode them
  1595  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1596  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1597  	} else if nbits == 32 {
  1598  		mb = bits.LeadingZeros32(uint32(mask))
  1599  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1600  		mbn = bits.LeadingZeros32(^uint32(mask))
  1601  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1602  	} else {
  1603  		mb = bits.LeadingZeros64(uint64(mask))
  1604  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1605  		mbn = bits.LeadingZeros64(^uint64(mask))
  1606  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1607  	}
  1608  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1609  	if mb == 0 && me == int(nbits) {
  1610  		// swap the inverted values
  1611  		mb, me = men, mbn
  1612  	}
  1613  
  1614  	return int64(me) | int64(mb<<8) | rotate<<16 | nbits<<24
  1615  }
  1616  
  1617  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1618  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1619  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1620  // operations can be combined. This functions assumes the two opcodes can
  1621  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1622  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1623  	mb := s
  1624  	r := 64 - s
  1625  	// A larger mb is a smaller mask.
  1626  	if (encoded>>8)&0xFF < mb {
  1627  		encoded = (encoded &^ 0xFF00) | mb<<8
  1628  	}
  1629  	// The rotate is expected to be 0.
  1630  	if (encoded & 0xFF0000) != 0 {
  1631  		panic("non-zero rotate")
  1632  	}
  1633  	return encoded | r<<16
  1634  }
  1635  
  1636  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1637  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1638  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1639  	auxint := uint64(sauxint)
  1640  	rotate = int64((auxint >> 16) & 0xFF)
  1641  	mb = int64((auxint >> 8) & 0xFF)
  1642  	me = int64((auxint >> 0) & 0xFF)
  1643  	nbits := int64((auxint >> 24) & 0xFF)
  1644  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1645  	if mb > me {
  1646  		mask = ^mask
  1647  	}
  1648  	if nbits == 32 {
  1649  		mask = uint64(uint32(mask))
  1650  	}
  1651  
  1652  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1653  	// is inclusive.
  1654  	me = (me - 1) & (nbits - 1)
  1655  	return
  1656  }
  1657  
  1658  // This verifies that the mask is a set of
  1659  // consecutive bits including the least
  1660  // significant bit.
  1661  func isPPC64ValidShiftMask(v int64) bool {
  1662  	if (v != 0) && ((v+1)&v) == 0 {
  1663  		return true
  1664  	}
  1665  	return false
  1666  }
  1667  
  1668  func getPPC64ShiftMaskLength(v int64) int64 {
  1669  	return int64(bits.Len64(uint64(v)))
  1670  }
  1671  
  1672  // Decompose a shift right into an equivalent rotate/mask,
  1673  // and return mask & m.
  1674  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1675  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1676  	return m & int64(smask)
  1677  }
  1678  
  1679  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1680  func mergePPC64AndSrwi(m, s int64) int64 {
  1681  	mask := mergePPC64RShiftMask(m, s, 32)
  1682  	if !isPPC64WordRotateMask(mask) {
  1683  		return 0
  1684  	}
  1685  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1686  }
  1687  
  1688  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1689  func mergePPC64AndSrdi(m, s int64) int64 {
  1690  	mask := mergePPC64RShiftMask(m, s, 64)
  1691  
  1692  	// Verify the rotate and mask result only uses the lower 32 bits.
  1693  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1694  	if rv&uint64(mask) != 0 {
  1695  		return 0
  1696  	}
  1697  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1698  		return 0
  1699  	}
  1700  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1701  }
  1702  
  1703  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1704  func mergePPC64AndSldi(m, s int64) int64 {
  1705  	mask := -1 << s & m
  1706  
  1707  	// Verify the rotate and mask result only uses the lower 32 bits.
  1708  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1709  	if rv&uint64(mask) != 0 {
  1710  		return 0
  1711  	}
  1712  	if !isPPC64WordRotateMaskNonWrapping(mask) {
  1713  		return 0
  1714  	}
  1715  	return encodePPC64RotateMask(s&31, mask, 32)
  1716  }
  1717  
  1718  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1719  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1720  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1721  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1722  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1723  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1724  
  1725  	// Rewrite mask to apply after the final left shift.
  1726  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1727  
  1728  	r_1 := 32 - srw
  1729  	r_2 := GetPPC64Shiftsh(sld)
  1730  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1731  
  1732  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1733  		return 0
  1734  	}
  1735  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1736  }
  1737  
  1738  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1739  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1740  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1741  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1742  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1743  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(sld))
  1744  
  1745  	// Rewrite mask to apply after the final left shift.
  1746  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1747  
  1748  	r_1 := 64 - srd
  1749  	r_2 := GetPPC64Shiftsh(sld)
  1750  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1751  
  1752  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1753  		return 0
  1754  	}
  1755  	// This combine only works when selecting and shifting the lower 32 bits.
  1756  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1757  	if v1&mask_3 != 0 {
  1758  		return 0
  1759  	}
  1760  	return encodePPC64RotateMask(r_3&31, int64(mask_3), 32)
  1761  }
  1762  
  1763  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1764  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1765  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1766  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1767  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1768  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1769  
  1770  	// combine the masks, and adjust for the final left shift.
  1771  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1772  	r_2 := GetPPC64Shiftsh(int64(sld))
  1773  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1774  
  1775  	// Verify the result is still a valid bitmask of <= 32 bits.
  1776  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1777  		return 0
  1778  	}
  1779  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1780  }
  1781  
  1782  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1783  // or 0 if they cannot be merged.
  1784  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1785  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1786  	mask_out := (mask_rlw & uint64(mask))
  1787  
  1788  	// Verify the result is still a valid bitmask of <= 32 bits.
  1789  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1790  		return 0
  1791  	}
  1792  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1793  }
  1794  
  1795  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1796  // result. Return rlw if it does, 0 otherwise.
  1797  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1798  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1799  	if mb > me {
  1800  		return 0
  1801  	}
  1802  	return rlw
  1803  }
  1804  
  1805  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1806  // or 0 if they cannot be merged.
  1807  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1808  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1809  
  1810  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1811  	r_mask := bits.RotateLeft32(mask, int(r))
  1812  
  1813  	mask_out := (mask_rlw & uint64(r_mask))
  1814  
  1815  	// Verify the result is still a valid bitmask of <= 32 bits.
  1816  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1817  		return 0
  1818  	}
  1819  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1820  }
  1821  
  1822  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1823  // or 0 if they cannot be merged.
  1824  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1825  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1826  	if mb > me || mb < sldi {
  1827  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1828  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1829  		return 0
  1830  	}
  1831  	// combine the masks, and adjust for the final left shift.
  1832  	mask_3 := mask_1 << sldi
  1833  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1834  
  1835  	// Verify the result is still a valid bitmask of <= 32 bits.
  1836  	if uint64(uint32(mask_3)) != mask_3 {
  1837  		return 0
  1838  	}
  1839  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1840  }
  1841  
  1842  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1843  // or return 0 if they cannot be combined.
  1844  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1845  	if sld > srw || srw >= 32 {
  1846  		return 0
  1847  	}
  1848  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1849  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1850  	mask := (mask_r & mask_l) << uint(sld)
  1851  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1852  }
  1853  
  1854  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1855  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1856  // of op.
  1857  //
  1858  // E.g consider the case:
  1859  // a = (ADD x y)
  1860  // b = (CMPconst [0] a)
  1861  // c = (OR a z)
  1862  //
  1863  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1864  // would produce:
  1865  // a  = (ADD x y)
  1866  // a' = (ADDCC x y)
  1867  // a” = (Select0 a')
  1868  // b  = (CMPconst [0] a”)
  1869  // c  = (OR a z)
  1870  //
  1871  // which makes it impossible to rewrite the second user. Instead the result
  1872  // of this conversion is:
  1873  // a' = (ADDCC x y)
  1874  // a  = (Select0 a')
  1875  // b  = (CMPconst [0] a)
  1876  // c  = (OR a z)
  1877  //
  1878  // Which makes it trivial to rewrite b using a lowering rule.
  1879  func convertPPC64OpToOpCC(op *Value) *Value {
  1880  	ccOpMap := map[Op]Op{
  1881  		OpPPC64ADD:      OpPPC64ADDCC,
  1882  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1883  		OpPPC64AND:      OpPPC64ANDCC,
  1884  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1885  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1886  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1887  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1888  		OpPPC64NEG:      OpPPC64NEGCC,
  1889  		OpPPC64NOR:      OpPPC64NORCC,
  1890  		OpPPC64OR:       OpPPC64ORCC,
  1891  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1892  		OpPPC64SUB:      OpPPC64SUBCC,
  1893  		OpPPC64XOR:      OpPPC64XORCC,
  1894  	}
  1895  	b := op.Block
  1896  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1897  	opCC.AddArgs(op.Args...)
  1898  	op.reset(OpSelect0)
  1899  	op.AddArgs(opCC)
  1900  	return op
  1901  }
  1902  
  1903  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1904  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1905  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1906  	if r != 0 || mask&0xFFFF != mask {
  1907  		return 0
  1908  	}
  1909  	return int64(mask)
  1910  }
  1911  
  1912  // Convenience function to rotate a 32 bit constant value by another constant.
  1913  func rotateLeft32(v, rotate int64) int64 {
  1914  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1915  }
  1916  
  1917  func rotateRight64(v, rotate int64) int64 {
  1918  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1919  }
  1920  
  1921  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1922  func armBFAuxInt(lsb, width int64) arm64BitField {
  1923  	if lsb < 0 || lsb > 63 {
  1924  		panic("ARM(64) bit field lsb constant out of range")
  1925  	}
  1926  	if width < 1 || lsb+width > 64 {
  1927  		panic("ARM(64) bit field width constant out of range")
  1928  	}
  1929  	return arm64BitField(width | lsb<<8)
  1930  }
  1931  
  1932  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1933  func (bfc arm64BitField) lsb() int64 {
  1934  	return int64(uint64(bfc) >> 8)
  1935  }
  1936  
  1937  // returns the width part of the auxInt field of arm64 bitfield ops.
  1938  func (bfc arm64BitField) width() int64 {
  1939  	return int64(bfc) & 0xff
  1940  }
  1941  
  1942  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1943  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1944  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1945  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1946  }
  1947  
  1948  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1949  func arm64BFWidth(mask, rshift int64) int64 {
  1950  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1951  	if shiftedMask == 0 {
  1952  		panic("ARM64 BF mask is zero")
  1953  	}
  1954  	return nto(shiftedMask)
  1955  }
  1956  
  1957  // encodes condition code and NZCV flags into result.
  1958  func arm64ConditionalParamsAuxInt(cond Op, nzcv uint8) arm64ConditionalParams {
  1959  	if cond < OpARM64Equal || cond > OpARM64GreaterEqualU {
  1960  		panic("Wrong conditional operation")
  1961  	}
  1962  	if nzcv&0x0f != nzcv {
  1963  		panic("Wrong value of NZCV flag")
  1964  	}
  1965  	return arm64ConditionalParams{cond, nzcv, 0, false}
  1966  }
  1967  
  1968  // encodes condition code, NZCV flags and constant value into auxint.
  1969  func arm64ConditionalParamsAuxIntWithValue(cond Op, nzcv uint8, value uint8) arm64ConditionalParams {
  1970  	if value&0x1f != value {
  1971  		panic("Wrong value of constant")
  1972  	}
  1973  	params := arm64ConditionalParamsAuxInt(cond, nzcv)
  1974  	params.constValue = value
  1975  	params.ind = true
  1976  	return params
  1977  }
  1978  
  1979  // extracts condition code from auxint.
  1980  func (condParams arm64ConditionalParams) Cond() Op {
  1981  	return condParams.cond
  1982  }
  1983  
  1984  // extracts NZCV flags from auxint.
  1985  func (condParams arm64ConditionalParams) Nzcv() int64 {
  1986  	return int64(condParams.nzcv)
  1987  }
  1988  
  1989  // extracts constant value from auxint if present.
  1990  func (condParams arm64ConditionalParams) ConstValue() (int64, bool) {
  1991  	return int64(condParams.constValue), condParams.ind
  1992  }
  1993  
  1994  // registerizable reports whether t is a primitive type that fits in
  1995  // a register. It assumes float64 values will always fit into registers
  1996  // even if that isn't strictly true.
  1997  func registerizable(b *Block, typ *types.Type) bool {
  1998  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1999  		return true
  2000  	}
  2001  	if typ.IsInteger() {
  2002  		return typ.Size() <= b.Func.Config.RegSize
  2003  	}
  2004  	return false
  2005  }
  2006  
  2007  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  2008  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  2009  	f := v.Block.Func
  2010  	if !f.Config.Race {
  2011  		return false
  2012  	}
  2013  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  2014  		return false
  2015  	}
  2016  	for _, b := range f.Blocks {
  2017  		for _, v := range b.Values {
  2018  			switch v.Op {
  2019  			case OpStaticCall, OpStaticLECall:
  2020  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  2021  				// Allow calls to panic*
  2022  				s := v.Aux.(*AuxCall).Fn.String()
  2023  				switch s {
  2024  				case "runtime.racefuncenter", "runtime.racefuncexit",
  2025  					"runtime.panicdivide", "runtime.panicwrap",
  2026  					"runtime.panicshift":
  2027  					continue
  2028  				}
  2029  				// If we encountered any call, we need to keep racefunc*,
  2030  				// for accurate stacktraces.
  2031  				return false
  2032  			case OpPanicBounds, OpPanicExtend:
  2033  				// Note: these are panic generators that are ok (like the static calls above).
  2034  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  2035  				// We must keep the race functions if there are any other call types.
  2036  				return false
  2037  			}
  2038  		}
  2039  	}
  2040  	if isSameCall(sym, "runtime.racefuncenter") {
  2041  		// TODO REGISTER ABI this needs to be cleaned up.
  2042  		// If we're removing racefuncenter, remove its argument as well.
  2043  		if v.Args[0].Op != OpStore {
  2044  			if v.Op == OpStaticLECall {
  2045  				// there is no store, yet.
  2046  				return true
  2047  			}
  2048  			return false
  2049  		}
  2050  		mem := v.Args[0].Args[2]
  2051  		v.Args[0].reset(OpCopy)
  2052  		v.Args[0].AddArg(mem)
  2053  	}
  2054  	return true
  2055  }
  2056  
  2057  // symIsRO reports whether sym is a read-only global.
  2058  func symIsRO(sym Sym) bool {
  2059  	lsym := sym.(*obj.LSym)
  2060  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  2061  }
  2062  
  2063  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  2064  func symIsROZero(sym Sym) bool {
  2065  	lsym := sym.(*obj.LSym)
  2066  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  2067  		return false
  2068  	}
  2069  	for _, b := range lsym.P {
  2070  		if b != 0 {
  2071  			return false
  2072  		}
  2073  	}
  2074  	return true
  2075  }
  2076  
  2077  // isFixedLoad returns true if the load can be resolved to fixed address or constant,
  2078  // and can be rewritten by rewriteFixedLoad.
  2079  func isFixedLoad(v *Value, sym Sym, off int64) bool {
  2080  	lsym := sym.(*obj.LSym)
  2081  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2082  		for _, r := range lsym.R {
  2083  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2084  				return true
  2085  			}
  2086  		}
  2087  		return false
  2088  	}
  2089  
  2090  	if ti := lsym.TypeInfo(); ti != nil {
  2091  		// Type symbols do not contain information about their fields, unlike the cases above.
  2092  		// Hand-implement field accesses.
  2093  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2094  
  2095  		t := ti.Type.(*types.Type)
  2096  
  2097  		for _, f := range rttype.Type.Fields() {
  2098  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2099  				switch f.Sym.Name {
  2100  				case "Size_", "PtrBytes", "Hash", "Kind_", "GCData":
  2101  					return true
  2102  				default:
  2103  					// fmt.Println("unknown field", f.Sym.Name)
  2104  					return false
  2105  				}
  2106  			}
  2107  		}
  2108  
  2109  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2110  			return true
  2111  		}
  2112  
  2113  		return false
  2114  	}
  2115  
  2116  	return false
  2117  }
  2118  
  2119  // rewriteFixedLoad rewrites a load to a fixed address or constant, if isFixedLoad returns true.
  2120  func rewriteFixedLoad(v *Value, sym Sym, sb *Value, off int64) *Value {
  2121  	b := v.Block
  2122  	f := b.Func
  2123  
  2124  	lsym := sym.(*obj.LSym)
  2125  	if (v.Type.IsPtrShaped() || v.Type.IsUintptr()) && lsym.Type == objabi.SRODATA {
  2126  		for _, r := range lsym.R {
  2127  			if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  2128  				if strings.HasPrefix(r.Sym.Name, "type:") {
  2129  					// In case we're loading a type out of a dictionary, we need to record
  2130  					// that the containing function might put that type in an interface.
  2131  					// That information is currently recorded in relocations in the dictionary,
  2132  					// but if we perform this load at compile time then the dictionary
  2133  					// might be dead.
  2134  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2135  				} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  2136  					// Same, but if we're using an itab we need to record that the
  2137  					// itab._type might be put in an interface.
  2138  					reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  2139  				}
  2140  				v.reset(OpAddr)
  2141  				v.Aux = symToAux(r.Sym)
  2142  				v.AddArg(sb)
  2143  				return v
  2144  			}
  2145  		}
  2146  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2147  	}
  2148  
  2149  	if ti := lsym.TypeInfo(); ti != nil {
  2150  		// Type symbols do not contain information about their fields, unlike the cases above.
  2151  		// Hand-implement field accesses.
  2152  		// TODO: can this be replaced with reflectdata.writeType and just use the code above?
  2153  
  2154  		t := ti.Type.(*types.Type)
  2155  
  2156  		ptrSizedOpConst := OpConst64
  2157  		if f.Config.PtrSize == 4 {
  2158  			ptrSizedOpConst = OpConst32
  2159  		}
  2160  
  2161  		for _, f := range rttype.Type.Fields() {
  2162  			if f.Offset == off && copyCompatibleType(v.Type, f.Type) {
  2163  				switch f.Sym.Name {
  2164  				case "Size_":
  2165  					v.reset(ptrSizedOpConst)
  2166  					v.AuxInt = t.Size()
  2167  					return v
  2168  				case "PtrBytes":
  2169  					v.reset(ptrSizedOpConst)
  2170  					v.AuxInt = types.PtrDataSize(t)
  2171  					return v
  2172  				case "Hash":
  2173  					v.reset(OpConst32)
  2174  					v.AuxInt = int64(types.TypeHash(t))
  2175  					return v
  2176  				case "Kind_":
  2177  					v.reset(OpConst8)
  2178  					v.AuxInt = int64(reflectdata.ABIKindOfType(t))
  2179  					return v
  2180  				case "GCData":
  2181  					gcdata, _ := reflectdata.GCSym(t, true)
  2182  					v.reset(OpAddr)
  2183  					v.Aux = symToAux(gcdata)
  2184  					v.AddArg(sb)
  2185  					return v
  2186  				default:
  2187  					base.Fatalf("unknown field %s for fixedLoad of %s at offset %d", f.Sym.Name, lsym.Name, off)
  2188  				}
  2189  			}
  2190  		}
  2191  
  2192  		if t.IsPtr() && off == rttype.PtrType.OffsetOf("Elem") {
  2193  			elemSym := reflectdata.TypeLinksym(t.Elem())
  2194  			reflectdata.MarkTypeSymUsedInInterface(elemSym, f.fe.Func().Linksym())
  2195  			v.reset(OpAddr)
  2196  			v.Aux = symToAux(elemSym)
  2197  			v.AddArg(sb)
  2198  			return v
  2199  		}
  2200  
  2201  		base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2202  	}
  2203  
  2204  	base.Fatalf("fixedLoad data not known for %s:%d", sym, off)
  2205  	return nil
  2206  }
  2207  
  2208  // read8 reads one byte from the read-only global sym at offset off.
  2209  func read8(sym Sym, off int64) uint8 {
  2210  	lsym := sym.(*obj.LSym)
  2211  	if off >= int64(len(lsym.P)) || off < 0 {
  2212  		// Invalid index into the global sym.
  2213  		// This can happen in dead code, so we don't want to panic.
  2214  		// Just return any value, it will eventually get ignored.
  2215  		// See issue 29215.
  2216  		return 0
  2217  	}
  2218  	return lsym.P[off]
  2219  }
  2220  
  2221  // read16 reads two bytes from the read-only global sym at offset off.
  2222  func read16(sym Sym, off int64, byteorder binary.ByteOrder) uint16 {
  2223  	lsym := sym.(*obj.LSym)
  2224  	// lsym.P is written lazily.
  2225  	// Bytes requested after the end of lsym.P are 0.
  2226  	var src []byte
  2227  	if 0 <= off && off < int64(len(lsym.P)) {
  2228  		src = lsym.P[off:]
  2229  	}
  2230  	buf := make([]byte, 2)
  2231  	copy(buf, src)
  2232  	return byteorder.Uint16(buf)
  2233  }
  2234  
  2235  // read32 reads four bytes from the read-only global sym at offset off.
  2236  func read32(sym Sym, off int64, byteorder binary.ByteOrder) uint32 {
  2237  	lsym := sym.(*obj.LSym)
  2238  	var src []byte
  2239  	if 0 <= off && off < int64(len(lsym.P)) {
  2240  		src = lsym.P[off:]
  2241  	}
  2242  	buf := make([]byte, 4)
  2243  	copy(buf, src)
  2244  	return byteorder.Uint32(buf)
  2245  }
  2246  
  2247  // read64 reads eight bytes from the read-only global sym at offset off.
  2248  func read64(sym Sym, off int64, byteorder binary.ByteOrder) uint64 {
  2249  	lsym := sym.(*obj.LSym)
  2250  	var src []byte
  2251  	if 0 <= off && off < int64(len(lsym.P)) {
  2252  		src = lsym.P[off:]
  2253  	}
  2254  	buf := make([]byte, 8)
  2255  	copy(buf, src)
  2256  	return byteorder.Uint64(buf)
  2257  }
  2258  
  2259  // sequentialAddresses reports true if it can prove that x + n == y
  2260  func sequentialAddresses(x, y *Value, n int64) bool {
  2261  	if x == y && n == 0 {
  2262  		return true
  2263  	}
  2264  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2265  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2266  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2267  		return true
  2268  	}
  2269  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2270  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2271  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2272  		return true
  2273  	}
  2274  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2275  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2276  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2277  		return true
  2278  	}
  2279  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2280  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2281  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2282  		return true
  2283  	}
  2284  	return false
  2285  }
  2286  
  2287  // flagConstant represents the result of a compile-time comparison.
  2288  // The sense of these flags does not necessarily represent the hardware's notion
  2289  // of a flags register - these are just a compile-time construct.
  2290  // We happen to match the semantics to those of arm/arm64.
  2291  // Note that these semantics differ from x86: the carry flag has the opposite
  2292  // sense on a subtraction!
  2293  //
  2294  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2295  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2296  //	 (because it does x + ^y + C).
  2297  //
  2298  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2299  type flagConstant uint8
  2300  
  2301  // N reports whether the result of an operation is negative (high bit set).
  2302  func (fc flagConstant) N() bool {
  2303  	return fc&1 != 0
  2304  }
  2305  
  2306  // Z reports whether the result of an operation is 0.
  2307  func (fc flagConstant) Z() bool {
  2308  	return fc&2 != 0
  2309  }
  2310  
  2311  // C reports whether an unsigned add overflowed (carry), or an
  2312  // unsigned subtract did not underflow (borrow).
  2313  func (fc flagConstant) C() bool {
  2314  	return fc&4 != 0
  2315  }
  2316  
  2317  // V reports whether a signed operation overflowed or underflowed.
  2318  func (fc flagConstant) V() bool {
  2319  	return fc&8 != 0
  2320  }
  2321  
  2322  func (fc flagConstant) eq() bool {
  2323  	return fc.Z()
  2324  }
  2325  func (fc flagConstant) ne() bool {
  2326  	return !fc.Z()
  2327  }
  2328  func (fc flagConstant) lt() bool {
  2329  	return fc.N() != fc.V()
  2330  }
  2331  func (fc flagConstant) le() bool {
  2332  	return fc.Z() || fc.lt()
  2333  }
  2334  func (fc flagConstant) gt() bool {
  2335  	return !fc.Z() && fc.ge()
  2336  }
  2337  func (fc flagConstant) ge() bool {
  2338  	return fc.N() == fc.V()
  2339  }
  2340  func (fc flagConstant) ult() bool {
  2341  	return !fc.C()
  2342  }
  2343  func (fc flagConstant) ule() bool {
  2344  	return fc.Z() || fc.ult()
  2345  }
  2346  func (fc flagConstant) ugt() bool {
  2347  	return !fc.Z() && fc.uge()
  2348  }
  2349  func (fc flagConstant) uge() bool {
  2350  	return fc.C()
  2351  }
  2352  
  2353  func (fc flagConstant) ltNoov() bool {
  2354  	return fc.lt() && !fc.V()
  2355  }
  2356  func (fc flagConstant) leNoov() bool {
  2357  	return fc.le() && !fc.V()
  2358  }
  2359  func (fc flagConstant) gtNoov() bool {
  2360  	return fc.gt() && !fc.V()
  2361  }
  2362  func (fc flagConstant) geNoov() bool {
  2363  	return fc.ge() && !fc.V()
  2364  }
  2365  
  2366  func (fc flagConstant) String() string {
  2367  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2368  }
  2369  
  2370  type flagConstantBuilder struct {
  2371  	N bool
  2372  	Z bool
  2373  	C bool
  2374  	V bool
  2375  }
  2376  
  2377  func (fcs flagConstantBuilder) encode() flagConstant {
  2378  	var fc flagConstant
  2379  	if fcs.N {
  2380  		fc |= 1
  2381  	}
  2382  	if fcs.Z {
  2383  		fc |= 2
  2384  	}
  2385  	if fcs.C {
  2386  		fc |= 4
  2387  	}
  2388  	if fcs.V {
  2389  		fc |= 8
  2390  	}
  2391  	return fc
  2392  }
  2393  
  2394  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2395  //  - the results of the C flag are different
  2396  //  - the results of the V flag when y==minint are different
  2397  
  2398  // addFlags64 returns the flags that would be set from computing x+y.
  2399  func addFlags64(x, y int64) flagConstant {
  2400  	var fcb flagConstantBuilder
  2401  	fcb.Z = x+y == 0
  2402  	fcb.N = x+y < 0
  2403  	fcb.C = uint64(x+y) < uint64(x)
  2404  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2405  	return fcb.encode()
  2406  }
  2407  
  2408  // subFlags64 returns the flags that would be set from computing x-y.
  2409  func subFlags64(x, y int64) flagConstant {
  2410  	var fcb flagConstantBuilder
  2411  	fcb.Z = x-y == 0
  2412  	fcb.N = x-y < 0
  2413  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2414  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2415  	return fcb.encode()
  2416  }
  2417  
  2418  // addFlags32 returns the flags that would be set from computing x+y.
  2419  func addFlags32(x, y int32) flagConstant {
  2420  	var fcb flagConstantBuilder
  2421  	fcb.Z = x+y == 0
  2422  	fcb.N = x+y < 0
  2423  	fcb.C = uint32(x+y) < uint32(x)
  2424  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2425  	return fcb.encode()
  2426  }
  2427  
  2428  // subFlags32 returns the flags that would be set from computing x-y.
  2429  func subFlags32(x, y int32) flagConstant {
  2430  	var fcb flagConstantBuilder
  2431  	fcb.Z = x-y == 0
  2432  	fcb.N = x-y < 0
  2433  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2434  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2435  	return fcb.encode()
  2436  }
  2437  
  2438  // logicFlags64 returns flags set to the sign/zeroness of x.
  2439  // C and V are set to false.
  2440  func logicFlags64(x int64) flagConstant {
  2441  	var fcb flagConstantBuilder
  2442  	fcb.Z = x == 0
  2443  	fcb.N = x < 0
  2444  	return fcb.encode()
  2445  }
  2446  
  2447  // logicFlags32 returns flags set to the sign/zeroness of x.
  2448  // C and V are set to false.
  2449  func logicFlags32(x int32) flagConstant {
  2450  	var fcb flagConstantBuilder
  2451  	fcb.Z = x == 0
  2452  	fcb.N = x < 0
  2453  	return fcb.encode()
  2454  }
  2455  
  2456  func makeJumpTableSym(b *Block) *obj.LSym {
  2457  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2458  	// The jump table symbol is accessed only from the function symbol.
  2459  	s.Set(obj.AttrStatic, true)
  2460  	return s
  2461  }
  2462  
  2463  // canRotate reports whether the architecture supports
  2464  // rotates of integer registers with the given number of bits.
  2465  func canRotate(c *Config, bits int64) bool {
  2466  	if bits > c.PtrSize*8 {
  2467  		// Don't rewrite to rotates bigger than the machine word.
  2468  		return false
  2469  	}
  2470  	switch c.arch {
  2471  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2472  		return true
  2473  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2474  		return bits >= 32
  2475  	default:
  2476  		return false
  2477  	}
  2478  }
  2479  
  2480  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2481  func isARM64bitcon(x uint64) bool {
  2482  	if x == 1<<64-1 || x == 0 {
  2483  		return false
  2484  	}
  2485  	// determine the period and sign-extend a unit to 64 bits
  2486  	switch {
  2487  	case x != x>>32|x<<32:
  2488  		// period is 64
  2489  		// nothing to do
  2490  	case x != x>>16|x<<48:
  2491  		// period is 32
  2492  		x = uint64(int64(int32(x)))
  2493  	case x != x>>8|x<<56:
  2494  		// period is 16
  2495  		x = uint64(int64(int16(x)))
  2496  	case x != x>>4|x<<60:
  2497  		// period is 8
  2498  		x = uint64(int64(int8(x)))
  2499  	default:
  2500  		// period is 4 or 2, always true
  2501  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2502  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2503  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2504  		// 0101, 1010             -- 01   rotate, repeat
  2505  		return true
  2506  	}
  2507  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2508  }
  2509  
  2510  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2511  func sequenceOfOnes(x uint64) bool {
  2512  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2513  	y += x
  2514  	return (y-1)&y == 0
  2515  }
  2516  
  2517  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2518  func isARM64addcon(v int64) bool {
  2519  	/* uimm12 or uimm24? */
  2520  	if v < 0 {
  2521  		return false
  2522  	}
  2523  	if (v & 0xFFF) == 0 {
  2524  		v >>= 12
  2525  	}
  2526  	return v <= 0xFFF
  2527  }
  2528  
  2529  // setPos sets the position of v to pos, then returns true.
  2530  // Useful for setting the result of a rewrite's position to
  2531  // something other than the default.
  2532  func setPos(v *Value, pos src.XPos) bool {
  2533  	v.Pos = pos
  2534  	return true
  2535  }
  2536  
  2537  // isNonNegative reports whether v is known to be greater or equal to zero.
  2538  // Note that this is pretty simplistic. The prove pass generates more detailed
  2539  // nonnegative information about values.
  2540  func isNonNegative(v *Value) bool {
  2541  	if !v.Type.IsInteger() {
  2542  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2543  	}
  2544  	// TODO: return true if !v.Type.IsSigned()
  2545  	// SSA isn't type-safe enough to do that now (issue 37753).
  2546  	// The checks below depend only on the pattern of bits.
  2547  
  2548  	switch v.Op {
  2549  	case OpConst64:
  2550  		return v.AuxInt >= 0
  2551  
  2552  	case OpConst32:
  2553  		return int32(v.AuxInt) >= 0
  2554  
  2555  	case OpConst16:
  2556  		return int16(v.AuxInt) >= 0
  2557  
  2558  	case OpConst8:
  2559  		return int8(v.AuxInt) >= 0
  2560  
  2561  	case OpStringLen, OpSliceLen, OpSliceCap,
  2562  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2563  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2564  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2565  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2566  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2567  		return true
  2568  
  2569  	case OpRsh64Ux64, OpRsh32Ux64:
  2570  		by := v.Args[1]
  2571  		return by.Op == OpConst64 && by.AuxInt > 0
  2572  
  2573  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2574  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2575  		return isNonNegative(v.Args[0])
  2576  
  2577  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2578  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2579  
  2580  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2581  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2582  		OpOr64, OpOr32, OpOr16, OpOr8,
  2583  		OpXor64, OpXor32, OpXor16, OpXor8:
  2584  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2585  
  2586  		// We could handle OpPhi here, but the improvements from doing
  2587  		// so are very minor, and it is neither simple nor cheap.
  2588  	}
  2589  	return false
  2590  }
  2591  
  2592  func rewriteStructLoad(v *Value) *Value {
  2593  	b := v.Block
  2594  	ptr := v.Args[0]
  2595  	mem := v.Args[1]
  2596  
  2597  	t := v.Type
  2598  	args := make([]*Value, t.NumFields())
  2599  	for i := range args {
  2600  		ft := t.FieldType(i)
  2601  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2602  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2603  	}
  2604  
  2605  	v.reset(OpStructMake)
  2606  	v.AddArgs(args...)
  2607  	return v
  2608  }
  2609  
  2610  func rewriteStructStore(v *Value) *Value {
  2611  	b := v.Block
  2612  	dst := v.Args[0]
  2613  	x := v.Args[1]
  2614  	if x.Op != OpStructMake {
  2615  		base.Fatalf("invalid struct store: %v", x)
  2616  	}
  2617  	mem := v.Args[2]
  2618  
  2619  	t := x.Type
  2620  	for i, arg := range x.Args {
  2621  		ft := t.FieldType(i)
  2622  
  2623  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2624  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2625  	}
  2626  
  2627  	return mem
  2628  }
  2629  
  2630  // isDirectAndComparableType reports whether v represents a type
  2631  // (a *runtime._type) whose value is stored directly in an
  2632  // interface (i.e., is pointer or pointer-like) and is comparable.
  2633  func isDirectAndComparableType(v *Value) bool {
  2634  	return isDirectAndComparableType1(v)
  2635  }
  2636  
  2637  // v is a type
  2638  func isDirectAndComparableType1(v *Value) bool {
  2639  	switch v.Op {
  2640  	case OpITab:
  2641  		return isDirectAndComparableType2(v.Args[0])
  2642  	case OpAddr:
  2643  		lsym := v.Aux.(*obj.LSym)
  2644  		if ti := lsym.TypeInfo(); ti != nil {
  2645  			t := ti.Type.(*types.Type)
  2646  			return types.IsDirectIface(t) && types.IsComparable(t)
  2647  		}
  2648  	}
  2649  	return false
  2650  }
  2651  
  2652  // v is an empty interface
  2653  func isDirectAndComparableType2(v *Value) bool {
  2654  	switch v.Op {
  2655  	case OpIMake:
  2656  		return isDirectAndComparableType1(v.Args[0])
  2657  	}
  2658  	return false
  2659  }
  2660  
  2661  // isDirectAndComparableIface reports whether v represents an itab
  2662  // (a *runtime._itab) for a type whose value is stored directly
  2663  // in an interface (i.e., is pointer or pointer-like) and is comparable.
  2664  func isDirectAndComparableIface(v *Value) bool {
  2665  	return isDirectAndComparableIface1(v, 9)
  2666  }
  2667  
  2668  // v is an itab
  2669  func isDirectAndComparableIface1(v *Value, depth int) bool {
  2670  	if depth == 0 {
  2671  		return false
  2672  	}
  2673  	switch v.Op {
  2674  	case OpITab:
  2675  		return isDirectAndComparableIface2(v.Args[0], depth-1)
  2676  	case OpAddr:
  2677  		lsym := v.Aux.(*obj.LSym)
  2678  		if ii := lsym.ItabInfo(); ii != nil {
  2679  			t := ii.Type.(*types.Type)
  2680  			return types.IsDirectIface(t) && types.IsComparable(t)
  2681  		}
  2682  	case OpConstNil:
  2683  		// We can treat this as direct, because if the itab is
  2684  		// nil, the data field must be nil also.
  2685  		return true
  2686  	}
  2687  	return false
  2688  }
  2689  
  2690  // v is an interface
  2691  func isDirectAndComparableIface2(v *Value, depth int) bool {
  2692  	if depth == 0 {
  2693  		return false
  2694  	}
  2695  	switch v.Op {
  2696  	case OpIMake:
  2697  		return isDirectAndComparableIface1(v.Args[0], depth-1)
  2698  	case OpPhi:
  2699  		for _, a := range v.Args {
  2700  			if !isDirectAndComparableIface2(a, depth-1) {
  2701  				return false
  2702  			}
  2703  		}
  2704  		return true
  2705  	}
  2706  	return false
  2707  }
  2708  
  2709  func bitsAdd64(x, y, carry int64) (r struct{ sum, carry int64 }) {
  2710  	s, c := bits.Add64(uint64(x), uint64(y), uint64(carry))
  2711  	r.sum, r.carry = int64(s), int64(c)
  2712  	return
  2713  }
  2714  
  2715  func bitsMulU64(x, y int64) (r struct{ hi, lo int64 }) {
  2716  	hi, lo := bits.Mul64(uint64(x), uint64(y))
  2717  	r.hi, r.lo = int64(hi), int64(lo)
  2718  	return
  2719  }
  2720  func bitsMulU32(x, y int32) (r struct{ hi, lo int32 }) {
  2721  	hi, lo := bits.Mul32(uint32(x), uint32(y))
  2722  	r.hi, r.lo = int32(hi), int32(lo)
  2723  	return
  2724  }
  2725  
  2726  // flagify rewrites v which is (X ...) to (Select0 (Xflags ...)).
  2727  func flagify(v *Value) bool {
  2728  	var flagVersion Op
  2729  	switch v.Op {
  2730  	case OpAMD64ADDQconst:
  2731  		flagVersion = OpAMD64ADDQconstflags
  2732  	case OpAMD64ADDLconst:
  2733  		flagVersion = OpAMD64ADDLconstflags
  2734  	default:
  2735  		base.Fatalf("can't flagify op %s", v.Op)
  2736  	}
  2737  	inner := v.copyInto(v.Block)
  2738  	inner.Op = flagVersion
  2739  	inner.Type = types.NewTuple(v.Type, types.TypeFlags)
  2740  	v.reset(OpSelect0)
  2741  	v.AddArg(inner)
  2742  	return true
  2743  }
  2744  
  2745  // PanicBoundsC contains a constant for a bounds failure.
  2746  type PanicBoundsC struct {
  2747  	C int64
  2748  }
  2749  
  2750  // PanicBoundsCC contains 2 constants for a bounds failure.
  2751  type PanicBoundsCC struct {
  2752  	Cx int64
  2753  	Cy int64
  2754  }
  2755  
  2756  func (p PanicBoundsC) CanBeAnSSAAux() {
  2757  }
  2758  func (p PanicBoundsCC) CanBeAnSSAAux() {
  2759  }
  2760  
  2761  func auxToPanicBoundsC(i Aux) PanicBoundsC {
  2762  	return i.(PanicBoundsC)
  2763  }
  2764  func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
  2765  	return i.(PanicBoundsCC)
  2766  }
  2767  func panicBoundsCToAux(p PanicBoundsC) Aux {
  2768  	return p
  2769  }
  2770  func panicBoundsCCToAux(p PanicBoundsCC) Aux {
  2771  	return p
  2772  }
  2773  
  2774  func isDictArgSym(sym Sym) bool {
  2775  	return sym.(*ir.Name).Sym().Name == typecheck.LocalDictName
  2776  }
  2777  
  2778  // When v is (IMake typ (StructMake ...)), convert to
  2779  // (IMake typ arg) where arg is the pointer-y argument to
  2780  // the StructMake (there must be exactly one).
  2781  func imakeOfStructMake(v *Value) *Value {
  2782  	var arg *Value
  2783  	for _, a := range v.Args[1].Args {
  2784  		if a.Type.Size() > 0 {
  2785  			arg = a
  2786  			break
  2787  		}
  2788  	}
  2789  	return v.Block.NewValue2(v.Pos, OpIMake, v.Type, v.Args[0], arg)
  2790  }
  2791  
  2792  // bool2int converts bool to int: true to 1, false to 0
  2793  func bool2int(x bool) int {
  2794  	var b int
  2795  	if x {
  2796  		b = 1
  2797  	}
  2798  	return b
  2799  }
  2800  

View as plain text