Source file src/cmd/compile/internal/ssa/rewrite.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/logopt"
    10  	"cmd/compile/internal/reflectdata"
    11  	"cmd/compile/internal/types"
    12  	"cmd/internal/obj"
    13  	"cmd/internal/obj/s390x"
    14  	"cmd/internal/objabi"
    15  	"cmd/internal/src"
    16  	"encoding/binary"
    17  	"fmt"
    18  	"internal/buildcfg"
    19  	"io"
    20  	"math"
    21  	"math/bits"
    22  	"os"
    23  	"path/filepath"
    24  	"strings"
    25  )
    26  
    27  type deadValueChoice bool
    28  
    29  const (
    30  	leaveDeadValues  deadValueChoice = false
    31  	removeDeadValues                 = true
    32  )
    33  
    34  // deadcode indicates whether rewrite should try to remove any values that become dead.
    35  func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValueChoice) {
    36  	// repeat rewrites until we find no more rewrites
    37  	pendingLines := f.cachedLineStarts // Holds statement boundaries that need to be moved to a new value/block
    38  	pendingLines.clear()
    39  	debug := f.pass.debug
    40  	if debug > 1 {
    41  		fmt.Printf("%s: rewriting for %s\n", f.pass.name, f.Name)
    42  	}
    43  	// if the number of rewrite iterations reaches itersLimit we will
    44  	// at that point turn on cycle detection. Instead of a fixed limit,
    45  	// size the limit according to func size to allow for cases such
    46  	// as the one in issue #66773.
    47  	itersLimit := f.NumBlocks()
    48  	if itersLimit < 20 {
    49  		itersLimit = 20
    50  	}
    51  	var iters int
    52  	var states map[string]bool
    53  	for {
    54  		change := false
    55  		deadChange := false
    56  		for _, b := range f.Blocks {
    57  			var b0 *Block
    58  			if debug > 1 {
    59  				b0 = new(Block)
    60  				*b0 = *b
    61  				b0.Succs = append([]Edge{}, b.Succs...) // make a new copy, not aliasing
    62  			}
    63  			for i, c := range b.ControlValues() {
    64  				for c.Op == OpCopy {
    65  					c = c.Args[0]
    66  					b.ReplaceControl(i, c)
    67  				}
    68  			}
    69  			if rb(b) {
    70  				change = true
    71  				if debug > 1 {
    72  					fmt.Printf("rewriting %s  ->  %s\n", b0.LongString(), b.LongString())
    73  				}
    74  			}
    75  			for j, v := range b.Values {
    76  				var v0 *Value
    77  				if debug > 1 {
    78  					v0 = new(Value)
    79  					*v0 = *v
    80  					v0.Args = append([]*Value{}, v.Args...) // make a new copy, not aliasing
    81  				}
    82  				if v.Uses == 0 && v.removeable() {
    83  					if v.Op != OpInvalid && deadcode == removeDeadValues {
    84  						// Reset any values that are now unused, so that we decrement
    85  						// the use count of all of its arguments.
    86  						// Not quite a deadcode pass, because it does not handle cycles.
    87  						// But it should help Uses==1 rules to fire.
    88  						v.reset(OpInvalid)
    89  						deadChange = true
    90  					}
    91  					// No point rewriting values which aren't used.
    92  					continue
    93  				}
    94  
    95  				vchange := phielimValue(v)
    96  				if vchange && debug > 1 {
    97  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
    98  				}
    99  
   100  				// Eliminate copy inputs.
   101  				// If any copy input becomes unused, mark it
   102  				// as invalid and discard its argument. Repeat
   103  				// recursively on the discarded argument.
   104  				// This phase helps remove phantom "dead copy" uses
   105  				// of a value so that a x.Uses==1 rule condition
   106  				// fires reliably.
   107  				for i, a := range v.Args {
   108  					if a.Op != OpCopy {
   109  						continue
   110  					}
   111  					aa := copySource(a)
   112  					v.SetArg(i, aa)
   113  					// If a, a copy, has a line boundary indicator, attempt to find a new value
   114  					// to hold it.  The first candidate is the value that will replace a (aa),
   115  					// if it shares the same block and line and is eligible.
   116  					// The second option is v, which has a as an input.  Because aa is earlier in
   117  					// the data flow, it is the better choice.
   118  					if a.Pos.IsStmt() == src.PosIsStmt {
   119  						if aa.Block == a.Block && aa.Pos.Line() == a.Pos.Line() && aa.Pos.IsStmt() != src.PosNotStmt {
   120  							aa.Pos = aa.Pos.WithIsStmt()
   121  						} else if v.Block == a.Block && v.Pos.Line() == a.Pos.Line() && v.Pos.IsStmt() != src.PosNotStmt {
   122  							v.Pos = v.Pos.WithIsStmt()
   123  						} else {
   124  							// Record the lost line and look for a new home after all rewrites are complete.
   125  							// TODO: it's possible (in FOR loops, in particular) for statement boundaries for the same
   126  							// line to appear in more than one block, but only one block is stored, so if both end
   127  							// up here, then one will be lost.
   128  							pendingLines.set(a.Pos, int32(a.Block.ID))
   129  						}
   130  						a.Pos = a.Pos.WithNotStmt()
   131  					}
   132  					vchange = true
   133  					for a.Uses == 0 {
   134  						b := a.Args[0]
   135  						a.reset(OpInvalid)
   136  						a = b
   137  					}
   138  				}
   139  				if vchange && debug > 1 {
   140  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   141  				}
   142  
   143  				// apply rewrite function
   144  				if rv(v) {
   145  					vchange = true
   146  					// If value changed to a poor choice for a statement boundary, move the boundary
   147  					if v.Pos.IsStmt() == src.PosIsStmt {
   148  						if k := nextGoodStatementIndex(v, j, b); k != j {
   149  							v.Pos = v.Pos.WithNotStmt()
   150  							b.Values[k].Pos = b.Values[k].Pos.WithIsStmt()
   151  						}
   152  					}
   153  				}
   154  
   155  				change = change || vchange
   156  				if vchange && debug > 1 {
   157  					fmt.Printf("rewriting %s  ->  %s\n", v0.LongString(), v.LongString())
   158  				}
   159  			}
   160  		}
   161  		if !change && !deadChange {
   162  			break
   163  		}
   164  		iters++
   165  		if (iters > itersLimit || debug >= 2) && change {
   166  			// We've done a suspiciously large number of rewrites (or we're in debug mode).
   167  			// As of Sep 2021, 90% of rewrites complete in 4 iterations or fewer
   168  			// and the maximum value encountered during make.bash is 12.
   169  			// Start checking for cycles. (This is too expensive to do routinely.)
   170  			// Note: we avoid this path for deadChange-only iterations, to fix #51639.
   171  			if states == nil {
   172  				states = make(map[string]bool)
   173  			}
   174  			h := f.rewriteHash()
   175  			if _, ok := states[h]; ok {
   176  				// We've found a cycle.
   177  				// To diagnose it, set debug to 2 and start again,
   178  				// so that we'll print all rules applied until we complete another cycle.
   179  				// If debug is already >= 2, we've already done that, so it's time to crash.
   180  				if debug < 2 {
   181  					debug = 2
   182  					states = make(map[string]bool)
   183  				} else {
   184  					f.Fatalf("rewrite cycle detected")
   185  				}
   186  			}
   187  			states[h] = true
   188  		}
   189  	}
   190  	// remove clobbered values
   191  	for _, b := range f.Blocks {
   192  		j := 0
   193  		for i, v := range b.Values {
   194  			vl := v.Pos
   195  			if v.Op == OpInvalid {
   196  				if v.Pos.IsStmt() == src.PosIsStmt {
   197  					pendingLines.set(vl, int32(b.ID))
   198  				}
   199  				f.freeValue(v)
   200  				continue
   201  			}
   202  			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
   203  				pendingLines.remove(vl)
   204  				v.Pos = v.Pos.WithIsStmt()
   205  			}
   206  			if i != j {
   207  				b.Values[j] = v
   208  			}
   209  			j++
   210  		}
   211  		if pendingLines.get(b.Pos) == int32(b.ID) {
   212  			b.Pos = b.Pos.WithIsStmt()
   213  			pendingLines.remove(b.Pos)
   214  		}
   215  		b.truncateValues(j)
   216  	}
   217  }
   218  
   219  // Common functions called from rewriting rules
   220  
   221  func is64BitFloat(t *types.Type) bool {
   222  	return t.Size() == 8 && t.IsFloat()
   223  }
   224  
   225  func is32BitFloat(t *types.Type) bool {
   226  	return t.Size() == 4 && t.IsFloat()
   227  }
   228  
   229  func is64BitInt(t *types.Type) bool {
   230  	return t.Size() == 8 && t.IsInteger()
   231  }
   232  
   233  func is32BitInt(t *types.Type) bool {
   234  	return t.Size() == 4 && t.IsInteger()
   235  }
   236  
   237  func is16BitInt(t *types.Type) bool {
   238  	return t.Size() == 2 && t.IsInteger()
   239  }
   240  
   241  func is8BitInt(t *types.Type) bool {
   242  	return t.Size() == 1 && t.IsInteger()
   243  }
   244  
   245  func isPtr(t *types.Type) bool {
   246  	return t.IsPtrShaped()
   247  }
   248  
   249  // mergeSym merges two symbolic offsets. There is no real merging of
   250  // offsets, we just pick the non-nil one.
   251  func mergeSym(x, y Sym) Sym {
   252  	if x == nil {
   253  		return y
   254  	}
   255  	if y == nil {
   256  		return x
   257  	}
   258  	panic(fmt.Sprintf("mergeSym with two non-nil syms %v %v", x, y))
   259  }
   260  
   261  func canMergeSym(x, y Sym) bool {
   262  	return x == nil || y == nil
   263  }
   264  
   265  // canMergeLoadClobber reports whether the load can be merged into target without
   266  // invalidating the schedule.
   267  // It also checks that the other non-load argument x is something we
   268  // are ok with clobbering.
   269  func canMergeLoadClobber(target, load, x *Value) bool {
   270  	// The register containing x is going to get clobbered.
   271  	// Don't merge if we still need the value of x.
   272  	// We don't have liveness information here, but we can
   273  	// approximate x dying with:
   274  	//  1) target is x's only use.
   275  	//  2) target is not in a deeper loop than x.
   276  	if x.Uses != 1 {
   277  		return false
   278  	}
   279  	loopnest := x.Block.Func.loopnest()
   280  	loopnest.calculateDepths()
   281  	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
   282  		return false
   283  	}
   284  	return canMergeLoad(target, load)
   285  }
   286  
   287  // canMergeLoad reports whether the load can be merged into target without
   288  // invalidating the schedule.
   289  func canMergeLoad(target, load *Value) bool {
   290  	if target.Block.ID != load.Block.ID {
   291  		// If the load is in a different block do not merge it.
   292  		return false
   293  	}
   294  
   295  	// We can't merge the load into the target if the load
   296  	// has more than one use.
   297  	if load.Uses != 1 {
   298  		return false
   299  	}
   300  
   301  	mem := load.MemoryArg()
   302  
   303  	// We need the load's memory arg to still be alive at target. That
   304  	// can't be the case if one of target's args depends on a memory
   305  	// state that is a successor of load's memory arg.
   306  	//
   307  	// For example, it would be invalid to merge load into target in
   308  	// the following situation because newmem has killed oldmem
   309  	// before target is reached:
   310  	//     load = read ... oldmem
   311  	//   newmem = write ... oldmem
   312  	//     arg0 = read ... newmem
   313  	//   target = add arg0 load
   314  	//
   315  	// If the argument comes from a different block then we can exclude
   316  	// it immediately because it must dominate load (which is in the
   317  	// same block as target).
   318  	var args []*Value
   319  	for _, a := range target.Args {
   320  		if a != load && a.Block.ID == target.Block.ID {
   321  			args = append(args, a)
   322  		}
   323  	}
   324  
   325  	// memPreds contains memory states known to be predecessors of load's
   326  	// memory state. It is lazily initialized.
   327  	var memPreds map[*Value]bool
   328  	for i := 0; len(args) > 0; i++ {
   329  		const limit = 100
   330  		if i >= limit {
   331  			// Give up if we have done a lot of iterations.
   332  			return false
   333  		}
   334  		v := args[len(args)-1]
   335  		args = args[:len(args)-1]
   336  		if target.Block.ID != v.Block.ID {
   337  			// Since target and load are in the same block
   338  			// we can stop searching when we leave the block.
   339  			continue
   340  		}
   341  		if v.Op == OpPhi {
   342  			// A Phi implies we have reached the top of the block.
   343  			// The memory phi, if it exists, is always
   344  			// the first logical store in the block.
   345  			continue
   346  		}
   347  		if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
   348  			// We could handle this situation however it is likely
   349  			// to be very rare.
   350  			return false
   351  		}
   352  		if v.Op.SymEffect()&SymAddr != 0 {
   353  			// This case prevents an operation that calculates the
   354  			// address of a local variable from being forced to schedule
   355  			// before its corresponding VarDef.
   356  			// See issue 28445.
   357  			//   v1 = LOAD ...
   358  			//   v2 = VARDEF
   359  			//   v3 = LEAQ
   360  			//   v4 = CMPQ v1 v3
   361  			// We don't want to combine the CMPQ with the load, because
   362  			// that would force the CMPQ to schedule before the VARDEF, which
   363  			// in turn requires the LEAQ to schedule before the VARDEF.
   364  			return false
   365  		}
   366  		if v.Type.IsMemory() {
   367  			if memPreds == nil {
   368  				// Initialise a map containing memory states
   369  				// known to be predecessors of load's memory
   370  				// state.
   371  				memPreds = make(map[*Value]bool)
   372  				m := mem
   373  				const limit = 50
   374  				for i := 0; i < limit; i++ {
   375  					if m.Op == OpPhi {
   376  						// The memory phi, if it exists, is always
   377  						// the first logical store in the block.
   378  						break
   379  					}
   380  					if m.Block.ID != target.Block.ID {
   381  						break
   382  					}
   383  					if !m.Type.IsMemory() {
   384  						break
   385  					}
   386  					memPreds[m] = true
   387  					if len(m.Args) == 0 {
   388  						break
   389  					}
   390  					m = m.MemoryArg()
   391  				}
   392  			}
   393  
   394  			// We can merge if v is a predecessor of mem.
   395  			//
   396  			// For example, we can merge load into target in the
   397  			// following scenario:
   398  			//      x = read ... v
   399  			//    mem = write ... v
   400  			//   load = read ... mem
   401  			// target = add x load
   402  			if memPreds[v] {
   403  				continue
   404  			}
   405  			return false
   406  		}
   407  		if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
   408  			// If v takes mem as an input then we know mem
   409  			// is valid at this point.
   410  			continue
   411  		}
   412  		for _, a := range v.Args {
   413  			if target.Block.ID == a.Block.ID {
   414  				args = append(args, a)
   415  			}
   416  		}
   417  	}
   418  
   419  	return true
   420  }
   421  
   422  // isSameCall reports whether sym is the same as the given named symbol.
   423  func isSameCall(sym interface{}, name string) bool {
   424  	fn := sym.(*AuxCall).Fn
   425  	return fn != nil && fn.String() == name
   426  }
   427  
   428  // canLoadUnaligned reports if the architecture supports unaligned load operations.
   429  func canLoadUnaligned(c *Config) bool {
   430  	return c.ctxt.Arch.Alignment == 1
   431  }
   432  
   433  // nlzX returns the number of leading zeros.
   434  func nlz64(x int64) int { return bits.LeadingZeros64(uint64(x)) }
   435  func nlz32(x int32) int { return bits.LeadingZeros32(uint32(x)) }
   436  func nlz16(x int16) int { return bits.LeadingZeros16(uint16(x)) }
   437  func nlz8(x int8) int   { return bits.LeadingZeros8(uint8(x)) }
   438  
   439  // ntzX returns the number of trailing zeros.
   440  func ntz64(x int64) int { return bits.TrailingZeros64(uint64(x)) }
   441  func ntz32(x int32) int { return bits.TrailingZeros32(uint32(x)) }
   442  func ntz16(x int16) int { return bits.TrailingZeros16(uint16(x)) }
   443  func ntz8(x int8) int   { return bits.TrailingZeros8(uint8(x)) }
   444  
   445  func oneBit(x int64) bool   { return x&(x-1) == 0 && x != 0 }
   446  func oneBit8(x int8) bool   { return x&(x-1) == 0 && x != 0 }
   447  func oneBit16(x int16) bool { return x&(x-1) == 0 && x != 0 }
   448  func oneBit32(x int32) bool { return x&(x-1) == 0 && x != 0 }
   449  func oneBit64(x int64) bool { return x&(x-1) == 0 && x != 0 }
   450  
   451  // nto returns the number of trailing ones.
   452  func nto(x int64) int64 {
   453  	return int64(ntz64(^x))
   454  }
   455  
   456  // logX returns logarithm of n base 2.
   457  // n must be a positive power of 2 (isPowerOfTwoX returns true).
   458  func log8(n int8) int64 {
   459  	return int64(bits.Len8(uint8(n))) - 1
   460  }
   461  func log16(n int16) int64 {
   462  	return int64(bits.Len16(uint16(n))) - 1
   463  }
   464  func log32(n int32) int64 {
   465  	return int64(bits.Len32(uint32(n))) - 1
   466  }
   467  func log64(n int64) int64 {
   468  	return int64(bits.Len64(uint64(n))) - 1
   469  }
   470  
   471  // log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
   472  // Rounds down.
   473  func log2uint32(n int64) int64 {
   474  	return int64(bits.Len32(uint32(n))) - 1
   475  }
   476  
   477  // isPowerOfTwoX functions report whether n is a power of 2.
   478  func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
   479  	return n > 0 && n&(n-1) == 0
   480  }
   481  
   482  // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
   483  func isUint64PowerOfTwo(in int64) bool {
   484  	n := uint64(in)
   485  	return n > 0 && n&(n-1) == 0
   486  }
   487  
   488  // isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
   489  func isUint32PowerOfTwo(in int64) bool {
   490  	n := uint64(uint32(in))
   491  	return n > 0 && n&(n-1) == 0
   492  }
   493  
   494  // is32Bit reports whether n can be represented as a signed 32 bit integer.
   495  func is32Bit(n int64) bool {
   496  	return n == int64(int32(n))
   497  }
   498  
   499  // is16Bit reports whether n can be represented as a signed 16 bit integer.
   500  func is16Bit(n int64) bool {
   501  	return n == int64(int16(n))
   502  }
   503  
   504  // is8Bit reports whether n can be represented as a signed 8 bit integer.
   505  func is8Bit(n int64) bool {
   506  	return n == int64(int8(n))
   507  }
   508  
   509  // isU8Bit reports whether n can be represented as an unsigned 8 bit integer.
   510  func isU8Bit(n int64) bool {
   511  	return n == int64(uint8(n))
   512  }
   513  
   514  // isU12Bit reports whether n can be represented as an unsigned 12 bit integer.
   515  func isU12Bit(n int64) bool {
   516  	return 0 <= n && n < (1<<12)
   517  }
   518  
   519  // isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
   520  func isU16Bit(n int64) bool {
   521  	return n == int64(uint16(n))
   522  }
   523  
   524  // isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
   525  func isU32Bit(n int64) bool {
   526  	return n == int64(uint32(n))
   527  }
   528  
   529  // is20Bit reports whether n can be represented as a signed 20 bit integer.
   530  func is20Bit(n int64) bool {
   531  	return -(1<<19) <= n && n < (1<<19)
   532  }
   533  
   534  // b2i translates a boolean value to 0 or 1 for assigning to auxInt.
   535  func b2i(b bool) int64 {
   536  	if b {
   537  		return 1
   538  	}
   539  	return 0
   540  }
   541  
   542  // b2i32 translates a boolean value to 0 or 1.
   543  func b2i32(b bool) int32 {
   544  	if b {
   545  		return 1
   546  	}
   547  	return 0
   548  }
   549  
   550  // shiftIsBounded reports whether (left/right) shift Value v is known to be bounded.
   551  // A shift is bounded if it is shifting by less than the width of the shifted value.
   552  func shiftIsBounded(v *Value) bool {
   553  	return v.AuxInt != 0
   554  }
   555  
   556  // canonLessThan returns whether x is "ordered" less than y, for purposes of normalizing
   557  // generated code as much as possible.
   558  func canonLessThan(x, y *Value) bool {
   559  	if x.Op != y.Op {
   560  		return x.Op < y.Op
   561  	}
   562  	if !x.Pos.SameFileAndLine(y.Pos) {
   563  		return x.Pos.Before(y.Pos)
   564  	}
   565  	return x.ID < y.ID
   566  }
   567  
   568  // truncate64Fto32F converts a float64 value to a float32 preserving the bit pattern
   569  // of the mantissa. It will panic if the truncation results in lost information.
   570  func truncate64Fto32F(f float64) float32 {
   571  	if !isExactFloat32(f) {
   572  		panic("truncate64Fto32F: truncation is not exact")
   573  	}
   574  	if !math.IsNaN(f) {
   575  		return float32(f)
   576  	}
   577  	// NaN bit patterns aren't necessarily preserved across conversion
   578  	// instructions so we need to do the conversion manually.
   579  	b := math.Float64bits(f)
   580  	m := b & ((1 << 52) - 1) // mantissa (a.k.a. significand)
   581  	//          | sign                  | exponent   | mantissa       |
   582  	r := uint32(((b >> 32) & (1 << 31)) | 0x7f800000 | (m >> (52 - 23)))
   583  	return math.Float32frombits(r)
   584  }
   585  
   586  // extend32Fto64F converts a float32 value to a float64 value preserving the bit
   587  // pattern of the mantissa.
   588  func extend32Fto64F(f float32) float64 {
   589  	if !math.IsNaN(float64(f)) {
   590  		return float64(f)
   591  	}
   592  	// NaN bit patterns aren't necessarily preserved across conversion
   593  	// instructions so we need to do the conversion manually.
   594  	b := uint64(math.Float32bits(f))
   595  	//   | sign                  | exponent      | mantissa                    |
   596  	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
   597  	return math.Float64frombits(r)
   598  }
   599  
   600  // DivisionNeedsFixUp reports whether the division needs fix-up code.
   601  func DivisionNeedsFixUp(v *Value) bool {
   602  	return v.AuxInt == 0
   603  }
   604  
   605  // auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
   606  func auxFrom64F(f float64) int64 {
   607  	if f != f {
   608  		panic("can't encode a NaN in AuxInt field")
   609  	}
   610  	return int64(math.Float64bits(f))
   611  }
   612  
   613  // auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
   614  func auxFrom32F(f float32) int64 {
   615  	if f != f {
   616  		panic("can't encode a NaN in AuxInt field")
   617  	}
   618  	return int64(math.Float64bits(extend32Fto64F(f)))
   619  }
   620  
   621  // auxTo32F decodes a float32 from the AuxInt value provided.
   622  func auxTo32F(i int64) float32 {
   623  	return truncate64Fto32F(math.Float64frombits(uint64(i)))
   624  }
   625  
   626  // auxTo64F decodes a float64 from the AuxInt value provided.
   627  func auxTo64F(i int64) float64 {
   628  	return math.Float64frombits(uint64(i))
   629  }
   630  
   631  func auxIntToBool(i int64) bool {
   632  	if i == 0 {
   633  		return false
   634  	}
   635  	return true
   636  }
   637  func auxIntToInt8(i int64) int8 {
   638  	return int8(i)
   639  }
   640  func auxIntToInt16(i int64) int16 {
   641  	return int16(i)
   642  }
   643  func auxIntToInt32(i int64) int32 {
   644  	return int32(i)
   645  }
   646  func auxIntToInt64(i int64) int64 {
   647  	return i
   648  }
   649  func auxIntToUint8(i int64) uint8 {
   650  	return uint8(i)
   651  }
   652  func auxIntToFloat32(i int64) float32 {
   653  	return float32(math.Float64frombits(uint64(i)))
   654  }
   655  func auxIntToFloat64(i int64) float64 {
   656  	return math.Float64frombits(uint64(i))
   657  }
   658  func auxIntToValAndOff(i int64) ValAndOff {
   659  	return ValAndOff(i)
   660  }
   661  func auxIntToArm64BitField(i int64) arm64BitField {
   662  	return arm64BitField(i)
   663  }
   664  func auxIntToInt128(x int64) int128 {
   665  	if x != 0 {
   666  		panic("nonzero int128 not allowed")
   667  	}
   668  	return 0
   669  }
   670  func auxIntToFlagConstant(x int64) flagConstant {
   671  	return flagConstant(x)
   672  }
   673  
   674  func auxIntToOp(cc int64) Op {
   675  	return Op(cc)
   676  }
   677  
   678  func boolToAuxInt(b bool) int64 {
   679  	if b {
   680  		return 1
   681  	}
   682  	return 0
   683  }
   684  func int8ToAuxInt(i int8) int64 {
   685  	return int64(i)
   686  }
   687  func int16ToAuxInt(i int16) int64 {
   688  	return int64(i)
   689  }
   690  func int32ToAuxInt(i int32) int64 {
   691  	return int64(i)
   692  }
   693  func int64ToAuxInt(i int64) int64 {
   694  	return int64(i)
   695  }
   696  func uint8ToAuxInt(i uint8) int64 {
   697  	return int64(int8(i))
   698  }
   699  func float32ToAuxInt(f float32) int64 {
   700  	return int64(math.Float64bits(float64(f)))
   701  }
   702  func float64ToAuxInt(f float64) int64 {
   703  	return int64(math.Float64bits(f))
   704  }
   705  func valAndOffToAuxInt(v ValAndOff) int64 {
   706  	return int64(v)
   707  }
   708  func arm64BitFieldToAuxInt(v arm64BitField) int64 {
   709  	return int64(v)
   710  }
   711  func int128ToAuxInt(x int128) int64 {
   712  	if x != 0 {
   713  		panic("nonzero int128 not allowed")
   714  	}
   715  	return 0
   716  }
   717  func flagConstantToAuxInt(x flagConstant) int64 {
   718  	return int64(x)
   719  }
   720  
   721  func opToAuxInt(o Op) int64 {
   722  	return int64(o)
   723  }
   724  
   725  // Aux is an interface to hold miscellaneous data in Blocks and Values.
   726  type Aux interface {
   727  	CanBeAnSSAAux()
   728  }
   729  
   730  // for now only used to mark moves that need to avoid clobbering flags
   731  type auxMark bool
   732  
   733  func (auxMark) CanBeAnSSAAux() {}
   734  
   735  var AuxMark auxMark
   736  
   737  // stringAux wraps string values for use in Aux.
   738  type stringAux string
   739  
   740  func (stringAux) CanBeAnSSAAux() {}
   741  
   742  func auxToString(i Aux) string {
   743  	return string(i.(stringAux))
   744  }
   745  func auxToSym(i Aux) Sym {
   746  	// TODO: kind of a hack - allows nil interface through
   747  	s, _ := i.(Sym)
   748  	return s
   749  }
   750  func auxToType(i Aux) *types.Type {
   751  	return i.(*types.Type)
   752  }
   753  func auxToCall(i Aux) *AuxCall {
   754  	return i.(*AuxCall)
   755  }
   756  func auxToS390xCCMask(i Aux) s390x.CCMask {
   757  	return i.(s390x.CCMask)
   758  }
   759  func auxToS390xRotateParams(i Aux) s390x.RotateParams {
   760  	return i.(s390x.RotateParams)
   761  }
   762  
   763  func StringToAux(s string) Aux {
   764  	return stringAux(s)
   765  }
   766  func symToAux(s Sym) Aux {
   767  	return s
   768  }
   769  func callToAux(s *AuxCall) Aux {
   770  	return s
   771  }
   772  func typeToAux(t *types.Type) Aux {
   773  	return t
   774  }
   775  func s390xCCMaskToAux(c s390x.CCMask) Aux {
   776  	return c
   777  }
   778  func s390xRotateParamsToAux(r s390x.RotateParams) Aux {
   779  	return r
   780  }
   781  
   782  // uaddOvf reports whether unsigned a+b would overflow.
   783  func uaddOvf(a, b int64) bool {
   784  	return uint64(a)+uint64(b) < uint64(a)
   785  }
   786  
   787  // loadLSymOffset simulates reading a word at an offset into a
   788  // read-only symbol's runtime memory. If it would read a pointer to
   789  // another symbol, that symbol is returned. Otherwise, it returns nil.
   790  func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
   791  	if lsym.Type != objabi.SRODATA {
   792  		return nil
   793  	}
   794  
   795  	for _, r := range lsym.R {
   796  		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
   797  			return r.Sym
   798  		}
   799  	}
   800  
   801  	return nil
   802  }
   803  
   804  func devirtLECall(v *Value, sym *obj.LSym) *Value {
   805  	v.Op = OpStaticLECall
   806  	auxcall := v.Aux.(*AuxCall)
   807  	auxcall.Fn = sym
   808  	// Remove first arg
   809  	v.Args[0].Uses--
   810  	copy(v.Args[0:], v.Args[1:])
   811  	v.Args[len(v.Args)-1] = nil // aid GC
   812  	v.Args = v.Args[:len(v.Args)-1]
   813  	if f := v.Block.Func; f.pass.debug > 0 {
   814  		f.Warnl(v.Pos, "de-virtualizing call")
   815  	}
   816  	return v
   817  }
   818  
   819  // isSamePtr reports whether p1 and p2 point to the same address.
   820  func isSamePtr(p1, p2 *Value) bool {
   821  	if p1 == p2 {
   822  		return true
   823  	}
   824  	if p1.Op != p2.Op {
   825  		return false
   826  	}
   827  	switch p1.Op {
   828  	case OpOffPtr:
   829  		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
   830  	case OpAddr, OpLocalAddr:
   831  		return p1.Aux == p2.Aux
   832  	case OpAddPtr:
   833  		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
   834  	}
   835  	return false
   836  }
   837  
   838  func isStackPtr(v *Value) bool {
   839  	for v.Op == OpOffPtr || v.Op == OpAddPtr {
   840  		v = v.Args[0]
   841  	}
   842  	return v.Op == OpSP || v.Op == OpLocalAddr
   843  }
   844  
   845  // disjoint reports whether the memory region specified by [p1:p1+n1)
   846  // does not overlap with [p2:p2+n2).
   847  // A return value of false does not imply the regions overlap.
   848  func disjoint(p1 *Value, n1 int64, p2 *Value, n2 int64) bool {
   849  	if n1 == 0 || n2 == 0 {
   850  		return true
   851  	}
   852  	if p1 == p2 {
   853  		return false
   854  	}
   855  	baseAndOffset := func(ptr *Value) (base *Value, offset int64) {
   856  		base, offset = ptr, 0
   857  		for base.Op == OpOffPtr {
   858  			offset += base.AuxInt
   859  			base = base.Args[0]
   860  		}
   861  		if opcodeTable[base.Op].nilCheck {
   862  			base = base.Args[0]
   863  		}
   864  		return base, offset
   865  	}
   866  	p1, off1 := baseAndOffset(p1)
   867  	p2, off2 := baseAndOffset(p2)
   868  	if isSamePtr(p1, p2) {
   869  		return !overlap(off1, n1, off2, n2)
   870  	}
   871  	// p1 and p2 are not the same, so if they are both OpAddrs then
   872  	// they point to different variables.
   873  	// If one pointer is on the stack and the other is an argument
   874  	// then they can't overlap.
   875  	switch p1.Op {
   876  	case OpAddr, OpLocalAddr:
   877  		if p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpSP {
   878  			return true
   879  		}
   880  		return (p2.Op == OpArg || p2.Op == OpArgIntReg) && p1.Args[0].Op == OpSP
   881  	case OpArg, OpArgIntReg:
   882  		if p2.Op == OpSP || p2.Op == OpLocalAddr {
   883  			return true
   884  		}
   885  	case OpSP:
   886  		return p2.Op == OpAddr || p2.Op == OpLocalAddr || p2.Op == OpArg || p2.Op == OpArgIntReg || p2.Op == OpSP
   887  	}
   888  	return false
   889  }
   890  
   891  // moveSize returns the number of bytes an aligned MOV instruction moves.
   892  func moveSize(align int64, c *Config) int64 {
   893  	switch {
   894  	case align%8 == 0 && c.PtrSize == 8:
   895  		return 8
   896  	case align%4 == 0:
   897  		return 4
   898  	case align%2 == 0:
   899  		return 2
   900  	}
   901  	return 1
   902  }
   903  
   904  // mergePoint finds a block among a's blocks which dominates b and is itself
   905  // dominated by all of a's blocks. Returns nil if it can't find one.
   906  // Might return nil even if one does exist.
   907  func mergePoint(b *Block, a ...*Value) *Block {
   908  	// Walk backward from b looking for one of the a's blocks.
   909  
   910  	// Max distance
   911  	d := 100
   912  
   913  	for d > 0 {
   914  		for _, x := range a {
   915  			if b == x.Block {
   916  				goto found
   917  			}
   918  		}
   919  		if len(b.Preds) > 1 {
   920  			// Don't know which way to go back. Abort.
   921  			return nil
   922  		}
   923  		b = b.Preds[0].b
   924  		d--
   925  	}
   926  	return nil // too far away
   927  found:
   928  	// At this point, r is the first value in a that we find by walking backwards.
   929  	// if we return anything, r will be it.
   930  	r := b
   931  
   932  	// Keep going, counting the other a's that we find. They must all dominate r.
   933  	na := 0
   934  	for d > 0 {
   935  		for _, x := range a {
   936  			if b == x.Block {
   937  				na++
   938  			}
   939  		}
   940  		if na == len(a) {
   941  			// Found all of a in a backwards walk. We can return r.
   942  			return r
   943  		}
   944  		if len(b.Preds) > 1 {
   945  			return nil
   946  		}
   947  		b = b.Preds[0].b
   948  		d--
   949  
   950  	}
   951  	return nil // too far away
   952  }
   953  
   954  // clobber invalidates values. Returns true.
   955  // clobber is used by rewrite rules to:
   956  //
   957  //	A) make sure the values are really dead and never used again.
   958  //	B) decrement use counts of the values' args.
   959  func clobber(vv ...*Value) bool {
   960  	for _, v := range vv {
   961  		v.reset(OpInvalid)
   962  		// Note: leave v.Block intact.  The Block field is used after clobber.
   963  	}
   964  	return true
   965  }
   966  
   967  // clobberIfDead resets v when use count is 1. Returns true.
   968  // clobberIfDead is used by rewrite rules to decrement
   969  // use counts of v's args when v is dead and never used.
   970  func clobberIfDead(v *Value) bool {
   971  	if v.Uses == 1 {
   972  		v.reset(OpInvalid)
   973  	}
   974  	// Note: leave v.Block intact.  The Block field is used after clobberIfDead.
   975  	return true
   976  }
   977  
   978  // noteRule is an easy way to track if a rule is matched when writing
   979  // new ones.  Make the rule of interest also conditional on
   980  //
   981  //	noteRule("note to self: rule of interest matched")
   982  //
   983  // and that message will print when the rule matches.
   984  func noteRule(s string) bool {
   985  	fmt.Println(s)
   986  	return true
   987  }
   988  
   989  // countRule increments Func.ruleMatches[key].
   990  // If Func.ruleMatches is non-nil at the end
   991  // of compilation, it will be printed to stdout.
   992  // This is intended to make it easier to find which functions
   993  // which contain lots of rules matches when developing new rules.
   994  func countRule(v *Value, key string) bool {
   995  	f := v.Block.Func
   996  	if f.ruleMatches == nil {
   997  		f.ruleMatches = make(map[string]int)
   998  	}
   999  	f.ruleMatches[key]++
  1000  	return true
  1001  }
  1002  
  1003  // warnRule generates compiler debug output with string s when
  1004  // v is not in autogenerated code, cond is true and the rule has fired.
  1005  func warnRule(cond bool, v *Value, s string) bool {
  1006  	if pos := v.Pos; pos.Line() > 1 && cond {
  1007  		v.Block.Func.Warnl(pos, s)
  1008  	}
  1009  	return true
  1010  }
  1011  
  1012  // for a pseudo-op like (LessThan x), extract x.
  1013  func flagArg(v *Value) *Value {
  1014  	if len(v.Args) != 1 || !v.Args[0].Type.IsFlags() {
  1015  		return nil
  1016  	}
  1017  	return v.Args[0]
  1018  }
  1019  
  1020  // arm64Negate finds the complement to an ARM64 condition code,
  1021  // for example !Equal -> NotEqual or !LessThan -> GreaterEqual
  1022  //
  1023  // For floating point, it's more subtle because NaN is unordered. We do
  1024  // !LessThanF -> NotLessThanF, the latter takes care of NaNs.
  1025  func arm64Negate(op Op) Op {
  1026  	switch op {
  1027  	case OpARM64LessThan:
  1028  		return OpARM64GreaterEqual
  1029  	case OpARM64LessThanU:
  1030  		return OpARM64GreaterEqualU
  1031  	case OpARM64GreaterThan:
  1032  		return OpARM64LessEqual
  1033  	case OpARM64GreaterThanU:
  1034  		return OpARM64LessEqualU
  1035  	case OpARM64LessEqual:
  1036  		return OpARM64GreaterThan
  1037  	case OpARM64LessEqualU:
  1038  		return OpARM64GreaterThanU
  1039  	case OpARM64GreaterEqual:
  1040  		return OpARM64LessThan
  1041  	case OpARM64GreaterEqualU:
  1042  		return OpARM64LessThanU
  1043  	case OpARM64Equal:
  1044  		return OpARM64NotEqual
  1045  	case OpARM64NotEqual:
  1046  		return OpARM64Equal
  1047  	case OpARM64LessThanF:
  1048  		return OpARM64NotLessThanF
  1049  	case OpARM64NotLessThanF:
  1050  		return OpARM64LessThanF
  1051  	case OpARM64LessEqualF:
  1052  		return OpARM64NotLessEqualF
  1053  	case OpARM64NotLessEqualF:
  1054  		return OpARM64LessEqualF
  1055  	case OpARM64GreaterThanF:
  1056  		return OpARM64NotGreaterThanF
  1057  	case OpARM64NotGreaterThanF:
  1058  		return OpARM64GreaterThanF
  1059  	case OpARM64GreaterEqualF:
  1060  		return OpARM64NotGreaterEqualF
  1061  	case OpARM64NotGreaterEqualF:
  1062  		return OpARM64GreaterEqualF
  1063  	default:
  1064  		panic("unreachable")
  1065  	}
  1066  }
  1067  
  1068  // arm64Invert evaluates (InvertFlags op), which
  1069  // is the same as altering the condition codes such
  1070  // that the same result would be produced if the arguments
  1071  // to the flag-generating instruction were reversed, e.g.
  1072  // (InvertFlags (CMP x y)) -> (CMP y x)
  1073  func arm64Invert(op Op) Op {
  1074  	switch op {
  1075  	case OpARM64LessThan:
  1076  		return OpARM64GreaterThan
  1077  	case OpARM64LessThanU:
  1078  		return OpARM64GreaterThanU
  1079  	case OpARM64GreaterThan:
  1080  		return OpARM64LessThan
  1081  	case OpARM64GreaterThanU:
  1082  		return OpARM64LessThanU
  1083  	case OpARM64LessEqual:
  1084  		return OpARM64GreaterEqual
  1085  	case OpARM64LessEqualU:
  1086  		return OpARM64GreaterEqualU
  1087  	case OpARM64GreaterEqual:
  1088  		return OpARM64LessEqual
  1089  	case OpARM64GreaterEqualU:
  1090  		return OpARM64LessEqualU
  1091  	case OpARM64Equal, OpARM64NotEqual:
  1092  		return op
  1093  	case OpARM64LessThanF:
  1094  		return OpARM64GreaterThanF
  1095  	case OpARM64GreaterThanF:
  1096  		return OpARM64LessThanF
  1097  	case OpARM64LessEqualF:
  1098  		return OpARM64GreaterEqualF
  1099  	case OpARM64GreaterEqualF:
  1100  		return OpARM64LessEqualF
  1101  	case OpARM64NotLessThanF:
  1102  		return OpARM64NotGreaterThanF
  1103  	case OpARM64NotGreaterThanF:
  1104  		return OpARM64NotLessThanF
  1105  	case OpARM64NotLessEqualF:
  1106  		return OpARM64NotGreaterEqualF
  1107  	case OpARM64NotGreaterEqualF:
  1108  		return OpARM64NotLessEqualF
  1109  	default:
  1110  		panic("unreachable")
  1111  	}
  1112  }
  1113  
  1114  // evaluate an ARM64 op against a flags value
  1115  // that is potentially constant; return 1 for true,
  1116  // -1 for false, and 0 for not constant.
  1117  func ccARM64Eval(op Op, flags *Value) int {
  1118  	fop := flags.Op
  1119  	if fop == OpARM64InvertFlags {
  1120  		return -ccARM64Eval(op, flags.Args[0])
  1121  	}
  1122  	if fop != OpARM64FlagConstant {
  1123  		return 0
  1124  	}
  1125  	fc := flagConstant(flags.AuxInt)
  1126  	b2i := func(b bool) int {
  1127  		if b {
  1128  			return 1
  1129  		}
  1130  		return -1
  1131  	}
  1132  	switch op {
  1133  	case OpARM64Equal:
  1134  		return b2i(fc.eq())
  1135  	case OpARM64NotEqual:
  1136  		return b2i(fc.ne())
  1137  	case OpARM64LessThan:
  1138  		return b2i(fc.lt())
  1139  	case OpARM64LessThanU:
  1140  		return b2i(fc.ult())
  1141  	case OpARM64GreaterThan:
  1142  		return b2i(fc.gt())
  1143  	case OpARM64GreaterThanU:
  1144  		return b2i(fc.ugt())
  1145  	case OpARM64LessEqual:
  1146  		return b2i(fc.le())
  1147  	case OpARM64LessEqualU:
  1148  		return b2i(fc.ule())
  1149  	case OpARM64GreaterEqual:
  1150  		return b2i(fc.ge())
  1151  	case OpARM64GreaterEqualU:
  1152  		return b2i(fc.uge())
  1153  	}
  1154  	return 0
  1155  }
  1156  
  1157  // logRule logs the use of the rule s. This will only be enabled if
  1158  // rewrite rules were generated with the -log option, see _gen/rulegen.go.
  1159  func logRule(s string) {
  1160  	if ruleFile == nil {
  1161  		// Open a log file to write log to. We open in append
  1162  		// mode because all.bash runs the compiler lots of times,
  1163  		// and we want the concatenation of all of those logs.
  1164  		// This means, of course, that users need to rm the old log
  1165  		// to get fresh data.
  1166  		// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
  1167  		w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
  1168  			os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
  1169  		if err != nil {
  1170  			panic(err)
  1171  		}
  1172  		ruleFile = w
  1173  	}
  1174  	_, err := fmt.Fprintln(ruleFile, s)
  1175  	if err != nil {
  1176  		panic(err)
  1177  	}
  1178  }
  1179  
  1180  var ruleFile io.Writer
  1181  
  1182  func isConstZero(v *Value) bool {
  1183  	switch v.Op {
  1184  	case OpConstNil:
  1185  		return true
  1186  	case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
  1187  		return v.AuxInt == 0
  1188  	case OpStringMake, OpIMake, OpComplexMake:
  1189  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1])
  1190  	case OpSliceMake:
  1191  		return isConstZero(v.Args[0]) && isConstZero(v.Args[1]) && isConstZero(v.Args[2])
  1192  	case OpStringPtr, OpStringLen, OpSlicePtr, OpSliceLen, OpSliceCap, OpITab, OpIData, OpComplexReal, OpComplexImag:
  1193  		return isConstZero(v.Args[0])
  1194  	}
  1195  	return false
  1196  }
  1197  
  1198  // reciprocalExact64 reports whether 1/c is exactly representable.
  1199  func reciprocalExact64(c float64) bool {
  1200  	b := math.Float64bits(c)
  1201  	man := b & (1<<52 - 1)
  1202  	if man != 0 {
  1203  		return false // not a power of 2, denormal, or NaN
  1204  	}
  1205  	exp := b >> 52 & (1<<11 - 1)
  1206  	// exponent bias is 0x3ff.  So taking the reciprocal of a number
  1207  	// changes the exponent to 0x7fe-exp.
  1208  	switch exp {
  1209  	case 0:
  1210  		return false // ±0
  1211  	case 0x7ff:
  1212  		return false // ±inf
  1213  	case 0x7fe:
  1214  		return false // exponent is not representable
  1215  	default:
  1216  		return true
  1217  	}
  1218  }
  1219  
  1220  // reciprocalExact32 reports whether 1/c is exactly representable.
  1221  func reciprocalExact32(c float32) bool {
  1222  	b := math.Float32bits(c)
  1223  	man := b & (1<<23 - 1)
  1224  	if man != 0 {
  1225  		return false // not a power of 2, denormal, or NaN
  1226  	}
  1227  	exp := b >> 23 & (1<<8 - 1)
  1228  	// exponent bias is 0x7f.  So taking the reciprocal of a number
  1229  	// changes the exponent to 0xfe-exp.
  1230  	switch exp {
  1231  	case 0:
  1232  		return false // ±0
  1233  	case 0xff:
  1234  		return false // ±inf
  1235  	case 0xfe:
  1236  		return false // exponent is not representable
  1237  	default:
  1238  		return true
  1239  	}
  1240  }
  1241  
  1242  // check if an immediate can be directly encoded into an ARM's instruction.
  1243  func isARMImmRot(v uint32) bool {
  1244  	for i := 0; i < 16; i++ {
  1245  		if v&^0xff == 0 {
  1246  			return true
  1247  		}
  1248  		v = v<<2 | v>>30
  1249  	}
  1250  
  1251  	return false
  1252  }
  1253  
  1254  // overlap reports whether the ranges given by the given offset and
  1255  // size pairs overlap.
  1256  func overlap(offset1, size1, offset2, size2 int64) bool {
  1257  	if offset1 >= offset2 && offset2+size2 > offset1 {
  1258  		return true
  1259  	}
  1260  	if offset2 >= offset1 && offset1+size1 > offset2 {
  1261  		return true
  1262  	}
  1263  	return false
  1264  }
  1265  
  1266  // check if value zeroes out upper 32-bit of 64-bit register.
  1267  // depth limits recursion depth. In AMD64.rules 3 is used as limit,
  1268  // because it catches same amount of cases as 4.
  1269  func zeroUpper32Bits(x *Value, depth int) bool {
  1270  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1271  		// If the value is signed, it might get re-sign-extended
  1272  		// during spill and restore. See issue 68227.
  1273  		return false
  1274  	}
  1275  	switch x.Op {
  1276  	case OpAMD64MOVLconst, OpAMD64MOVLload, OpAMD64MOVLQZX, OpAMD64MOVLloadidx1,
  1277  		OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVBload, OpAMD64MOVBloadidx1,
  1278  		OpAMD64MOVLloadidx4, OpAMD64ADDLload, OpAMD64SUBLload, OpAMD64ANDLload,
  1279  		OpAMD64ORLload, OpAMD64XORLload, OpAMD64CVTTSD2SL,
  1280  		OpAMD64ADDL, OpAMD64ADDLconst, OpAMD64SUBL, OpAMD64SUBLconst,
  1281  		OpAMD64ANDL, OpAMD64ANDLconst, OpAMD64ORL, OpAMD64ORLconst,
  1282  		OpAMD64XORL, OpAMD64XORLconst, OpAMD64NEGL, OpAMD64NOTL,
  1283  		OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
  1284  		OpAMD64SHLL, OpAMD64SHLLconst:
  1285  		return true
  1286  	case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
  1287  		OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
  1288  		OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
  1289  		return true
  1290  	case OpArg: // note: but not ArgIntReg
  1291  		// amd64 always loads args from the stack unsigned.
  1292  		// most other architectures load them sign/zero extended based on the type.
  1293  		return x.Type.Size() == 4 && x.Block.Func.Config.arch == "amd64"
  1294  	case OpPhi, OpSelect0, OpSelect1:
  1295  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1296  		// just limit recursion depth.
  1297  		if depth <= 0 {
  1298  			return false
  1299  		}
  1300  		for i := range x.Args {
  1301  			if !zeroUpper32Bits(x.Args[i], depth-1) {
  1302  				return false
  1303  			}
  1304  		}
  1305  		return true
  1306  
  1307  	}
  1308  	return false
  1309  }
  1310  
  1311  // zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
  1312  func zeroUpper48Bits(x *Value, depth int) bool {
  1313  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1314  		return false
  1315  	}
  1316  	switch x.Op {
  1317  	case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
  1318  		return true
  1319  	case OpArg: // note: but not ArgIntReg
  1320  		return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
  1321  	case OpPhi, OpSelect0, OpSelect1:
  1322  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1323  		// just limit recursion depth.
  1324  		if depth <= 0 {
  1325  			return false
  1326  		}
  1327  		for i := range x.Args {
  1328  			if !zeroUpper48Bits(x.Args[i], depth-1) {
  1329  				return false
  1330  			}
  1331  		}
  1332  		return true
  1333  
  1334  	}
  1335  	return false
  1336  }
  1337  
  1338  // zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
  1339  func zeroUpper56Bits(x *Value, depth int) bool {
  1340  	if x.Type.IsSigned() && x.Type.Size() < 8 {
  1341  		return false
  1342  	}
  1343  	switch x.Op {
  1344  	case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
  1345  		return true
  1346  	case OpArg: // note: but not ArgIntReg
  1347  		return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
  1348  	case OpPhi, OpSelect0, OpSelect1:
  1349  		// Phis can use each-other as an arguments, instead of tracking visited values,
  1350  		// just limit recursion depth.
  1351  		if depth <= 0 {
  1352  			return false
  1353  		}
  1354  		for i := range x.Args {
  1355  			if !zeroUpper56Bits(x.Args[i], depth-1) {
  1356  				return false
  1357  			}
  1358  		}
  1359  		return true
  1360  
  1361  	}
  1362  	return false
  1363  }
  1364  
  1365  func isInlinableMemclr(c *Config, sz int64) bool {
  1366  	if sz < 0 {
  1367  		return false
  1368  	}
  1369  	// TODO: expand this check to allow other architectures
  1370  	// see CL 454255 and issue 56997
  1371  	switch c.arch {
  1372  	case "amd64", "arm64":
  1373  		return true
  1374  	case "ppc64le", "ppc64", "loong64":
  1375  		return sz < 512
  1376  	}
  1377  	return false
  1378  }
  1379  
  1380  // isInlinableMemmove reports whether the given arch performs a Move of the given size
  1381  // faster than memmove. It will only return true if replacing the memmove with a Move is
  1382  // safe, either because Move will do all of its loads before any of its stores, or
  1383  // because the arguments are known to be disjoint.
  1384  // This is used as a check for replacing memmove with Move ops.
  1385  func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1386  	// It is always safe to convert memmove into Move when its arguments are disjoint.
  1387  	// Move ops may or may not be faster for large sizes depending on how the platform
  1388  	// lowers them, so we only perform this optimization on platforms that we know to
  1389  	// have fast Move ops.
  1390  	switch c.arch {
  1391  	case "amd64":
  1392  		return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
  1393  	case "386", "arm64":
  1394  		return sz <= 8
  1395  	case "s390x", "ppc64", "ppc64le":
  1396  		return sz <= 8 || disjoint(dst, sz, src, sz)
  1397  	case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
  1398  		return sz <= 4
  1399  	}
  1400  	return false
  1401  }
  1402  func IsInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
  1403  	return isInlinableMemmove(dst, src, sz, c)
  1404  }
  1405  
  1406  // logLargeCopy logs the occurrence of a large copy.
  1407  // The best place to do this is in the rewrite rules where the size of the move is easy to find.
  1408  // "Large" is arbitrarily chosen to be 128 bytes; this may change.
  1409  func logLargeCopy(v *Value, s int64) bool {
  1410  	if s < 128 {
  1411  		return true
  1412  	}
  1413  	if logopt.Enabled() {
  1414  		logopt.LogOpt(v.Pos, "copy", "lower", v.Block.Func.Name, fmt.Sprintf("%d bytes", s))
  1415  	}
  1416  	return true
  1417  }
  1418  func LogLargeCopy(funcName string, pos src.XPos, s int64) {
  1419  	if s < 128 {
  1420  		return
  1421  	}
  1422  	if logopt.Enabled() {
  1423  		logopt.LogOpt(pos, "copy", "lower", funcName, fmt.Sprintf("%d bytes", s))
  1424  	}
  1425  }
  1426  
  1427  // hasSmallRotate reports whether the architecture has rotate instructions
  1428  // for sizes < 32-bit.  This is used to decide whether to promote some rotations.
  1429  func hasSmallRotate(c *Config) bool {
  1430  	switch c.arch {
  1431  	case "amd64", "386":
  1432  		return true
  1433  	default:
  1434  		return false
  1435  	}
  1436  }
  1437  
  1438  func supportsPPC64PCRel() bool {
  1439  	// PCRel is currently supported for >= power10, linux only
  1440  	// Internal and external linking supports this on ppc64le; internal linking on ppc64.
  1441  	return buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux"
  1442  }
  1443  
  1444  func newPPC64ShiftAuxInt(sh, mb, me, sz int64) int32 {
  1445  	if sh < 0 || sh >= sz {
  1446  		panic("PPC64 shift arg sh out of range")
  1447  	}
  1448  	if mb < 0 || mb >= sz {
  1449  		panic("PPC64 shift arg mb out of range")
  1450  	}
  1451  	if me < 0 || me >= sz {
  1452  		panic("PPC64 shift arg me out of range")
  1453  	}
  1454  	return int32(sh<<16 | mb<<8 | me)
  1455  }
  1456  
  1457  func GetPPC64Shiftsh(auxint int64) int64 {
  1458  	return int64(int8(auxint >> 16))
  1459  }
  1460  
  1461  func GetPPC64Shiftmb(auxint int64) int64 {
  1462  	return int64(int8(auxint >> 8))
  1463  }
  1464  
  1465  func GetPPC64Shiftme(auxint int64) int64 {
  1466  	return int64(int8(auxint))
  1467  }
  1468  
  1469  // Test if this value can encoded as a mask for a rlwinm like
  1470  // operation.  Masks can also extend from the msb and wrap to
  1471  // the lsb too.  That is, the valid masks are 32 bit strings
  1472  // of the form: 0..01..10..0 or 1..10..01..1 or 1...1
  1473  func isPPC64WordRotateMask(v64 int64) bool {
  1474  	// Isolate rightmost 1 (if none 0) and add.
  1475  	v := uint32(v64)
  1476  	vp := (v & -v) + v
  1477  	// Likewise, for the wrapping case.
  1478  	vn := ^v
  1479  	vpn := (vn & -vn) + vn
  1480  	return (v&vp == 0 || vn&vpn == 0) && v != 0
  1481  }
  1482  
  1483  // Compress mask and shift into single value of the form
  1484  // me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
  1485  // be used to regenerate the input mask.
  1486  func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
  1487  	var mb, me, mbn, men int
  1488  
  1489  	// Determine boundaries and then decode them
  1490  	if mask == 0 || ^mask == 0 || rotate >= nbits {
  1491  		panic(fmt.Sprintf("invalid PPC64 rotate mask: %x %d %d", uint64(mask), rotate, nbits))
  1492  	} else if nbits == 32 {
  1493  		mb = bits.LeadingZeros32(uint32(mask))
  1494  		me = 32 - bits.TrailingZeros32(uint32(mask))
  1495  		mbn = bits.LeadingZeros32(^uint32(mask))
  1496  		men = 32 - bits.TrailingZeros32(^uint32(mask))
  1497  	} else {
  1498  		mb = bits.LeadingZeros64(uint64(mask))
  1499  		me = 64 - bits.TrailingZeros64(uint64(mask))
  1500  		mbn = bits.LeadingZeros64(^uint64(mask))
  1501  		men = 64 - bits.TrailingZeros64(^uint64(mask))
  1502  	}
  1503  	// Check for a wrapping mask (e.g bits at 0 and 63)
  1504  	if mb == 0 && me == int(nbits) {
  1505  		// swap the inverted values
  1506  		mb, me = men, mbn
  1507  	}
  1508  
  1509  	return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
  1510  }
  1511  
  1512  // Merge (RLDICL [encoded] (SRDconst [s] x)) into (RLDICL [new_encoded] x)
  1513  // SRDconst on PPC64 is an extended mnemonic of RLDICL. If the input to an
  1514  // RLDICL is an SRDconst, and the RLDICL does not rotate its value, the two
  1515  // operations can be combined. This functions assumes the two opcodes can
  1516  // be merged, and returns an encoded rotate+mask value of the combined RLDICL.
  1517  func mergePPC64RLDICLandSRDconst(encoded, s int64) int64 {
  1518  	mb := s
  1519  	r := 64 - s
  1520  	// A larger mb is a smaller mask.
  1521  	if (encoded>>8)&0xFF < mb {
  1522  		encoded = (encoded &^ 0xFF00) | mb<<8
  1523  	}
  1524  	// The rotate is expected to be 0.
  1525  	if (encoded & 0xFF0000) != 0 {
  1526  		panic("non-zero rotate")
  1527  	}
  1528  	return encoded | r<<16
  1529  }
  1530  
  1531  // DecodePPC64RotateMask is the inverse operation of encodePPC64RotateMask.  The values returned as
  1532  // mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
  1533  func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
  1534  	auxint := uint64(sauxint)
  1535  	rotate = int64((auxint >> 16) & 0xFF)
  1536  	mb = int64((auxint >> 8) & 0xFF)
  1537  	me = int64((auxint >> 0) & 0xFF)
  1538  	nbits := int64((auxint >> 24) & 0xFF)
  1539  	mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
  1540  	if mb > me {
  1541  		mask = ^mask
  1542  	}
  1543  	if nbits == 32 {
  1544  		mask = uint64(uint32(mask))
  1545  	}
  1546  
  1547  	// Fixup ME to match ISA definition.  The second argument to MASK(..,me)
  1548  	// is inclusive.
  1549  	me = (me - 1) & (nbits - 1)
  1550  	return
  1551  }
  1552  
  1553  // This verifies that the mask is a set of
  1554  // consecutive bits including the least
  1555  // significant bit.
  1556  func isPPC64ValidShiftMask(v int64) bool {
  1557  	if (v != 0) && ((v+1)&v) == 0 {
  1558  		return true
  1559  	}
  1560  	return false
  1561  }
  1562  
  1563  func getPPC64ShiftMaskLength(v int64) int64 {
  1564  	return int64(bits.Len64(uint64(v)))
  1565  }
  1566  
  1567  // Decompose a shift right into an equivalent rotate/mask,
  1568  // and return mask & m.
  1569  func mergePPC64RShiftMask(m, s, nbits int64) int64 {
  1570  	smask := uint64((1<<uint(nbits))-1) >> uint(s)
  1571  	return m & int64(smask)
  1572  }
  1573  
  1574  // Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
  1575  func mergePPC64AndSrwi(m, s int64) int64 {
  1576  	mask := mergePPC64RShiftMask(m, s, 32)
  1577  	if !isPPC64WordRotateMask(mask) {
  1578  		return 0
  1579  	}
  1580  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1581  }
  1582  
  1583  // Combine (ANDconst [m] (SRDconst [s])) into (RLWINM [y]) or return 0
  1584  func mergePPC64AndSrdi(m, s int64) int64 {
  1585  	mask := mergePPC64RShiftMask(m, s, 64)
  1586  
  1587  	// Verify the rotate and mask result only uses the lower 32 bits.
  1588  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, -int(s))
  1589  	if rv&uint64(mask) != 0 {
  1590  		return 0
  1591  	}
  1592  	if !isPPC64WordRotateMask(mask) {
  1593  		return 0
  1594  	}
  1595  	return encodePPC64RotateMask((32-s)&31, mask, 32)
  1596  }
  1597  
  1598  // Combine (ANDconst [m] (SLDconst [s])) into (RLWINM [y]) or return 0
  1599  func mergePPC64AndSldi(m, s int64) int64 {
  1600  	mask := -1 << s & m
  1601  
  1602  	// Verify the rotate and mask result only uses the lower 32 bits.
  1603  	rv := bits.RotateLeft64(0xFFFFFFFF00000000, int(s))
  1604  	if rv&uint64(mask) != 0 {
  1605  		return 0
  1606  	}
  1607  	if !isPPC64WordRotateMask(mask) {
  1608  		return 0
  1609  	}
  1610  	return encodePPC64RotateMask(s&31, mask, 32)
  1611  }
  1612  
  1613  // Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1614  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1615  func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
  1616  	mask_1 := uint64(0xFFFFFFFF >> uint(srw))
  1617  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1618  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1619  
  1620  	// Rewrite mask to apply after the final left shift.
  1621  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1622  
  1623  	r_1 := 32 - srw
  1624  	r_2 := GetPPC64Shiftsh(sld)
  1625  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1626  
  1627  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1628  		return 0
  1629  	}
  1630  	return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
  1631  }
  1632  
  1633  // Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
  1634  // Return the encoded RLWINM constant, or 0 if they cannot be merged.
  1635  func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
  1636  	mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
  1637  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1638  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1639  
  1640  	// Rewrite mask to apply after the final left shift.
  1641  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
  1642  
  1643  	r_1 := 64 - srd
  1644  	r_2 := GetPPC64Shiftsh(sld)
  1645  	r_3 := (r_1 + r_2) & 63 // This can wrap.
  1646  
  1647  	if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
  1648  		return 0
  1649  	}
  1650  	// This combine only works when selecting and shifting the lower 32 bits.
  1651  	v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
  1652  	if v1&mask_3 != 0 {
  1653  		return 0
  1654  	}
  1655  	return encodePPC64RotateMask(int64(r_3&31), int64(mask_3), 32)
  1656  }
  1657  
  1658  // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM.  Return
  1659  // the encoded RLWINM constant, or 0 if they cannot be merged.
  1660  func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
  1661  	r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
  1662  	// for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
  1663  	mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
  1664  
  1665  	// combine the masks, and adjust for the final left shift.
  1666  	mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
  1667  	r_2 := GetPPC64Shiftsh(int64(sld))
  1668  	r_3 := (r_1 + r_2) & 31 // This can wrap.
  1669  
  1670  	// Verify the result is still a valid bitmask of <= 32 bits.
  1671  	if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
  1672  		return 0
  1673  	}
  1674  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1675  }
  1676  
  1677  // Test if RLWINM feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1678  // or 0 if they cannot be merged.
  1679  func mergePPC64AndRlwinm(mask uint32, rlw int64) int64 {
  1680  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1681  	mask_out := (mask_rlw & uint64(mask))
  1682  
  1683  	// Verify the result is still a valid bitmask of <= 32 bits.
  1684  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1685  		return 0
  1686  	}
  1687  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1688  }
  1689  
  1690  // Test if RLWINM opcode rlw clears the upper 32 bits of the
  1691  // result. Return rlw if it does, 0 otherwise.
  1692  func mergePPC64MovwzregRlwinm(rlw int64) int64 {
  1693  	_, mb, me, _ := DecodePPC64RotateMask(rlw)
  1694  	if mb > me {
  1695  		return 0
  1696  	}
  1697  	return rlw
  1698  }
  1699  
  1700  // Test if AND feeding into an ANDconst can be merged. Return the encoded RLWINM constant,
  1701  // or 0 if they cannot be merged.
  1702  func mergePPC64RlwinmAnd(rlw int64, mask uint32) int64 {
  1703  	r, _, _, mask_rlw := DecodePPC64RotateMask(rlw)
  1704  
  1705  	// Rotate the input mask, combine with the rlwnm mask, and test if it is still a valid rlwinm mask.
  1706  	r_mask := bits.RotateLeft32(mask, int(r))
  1707  
  1708  	mask_out := (mask_rlw & uint64(r_mask))
  1709  
  1710  	// Verify the result is still a valid bitmask of <= 32 bits.
  1711  	if !isPPC64WordRotateMask(int64(mask_out)) {
  1712  		return 0
  1713  	}
  1714  	return encodePPC64RotateMask(r, int64(mask_out), 32)
  1715  }
  1716  
  1717  // Test if RLWINM feeding into SRDconst can be merged. Return the encoded RLIWNM constant,
  1718  // or 0 if they cannot be merged.
  1719  func mergePPC64SldiRlwinm(sldi, rlw int64) int64 {
  1720  	r_1, mb, me, mask_1 := DecodePPC64RotateMask(rlw)
  1721  	if mb > me || mb < sldi {
  1722  		// Wrapping masks cannot be merged as the upper 32 bits are effectively undefined in this case.
  1723  		// Likewise, if mb is less than the shift amount, it cannot be merged.
  1724  		return 0
  1725  	}
  1726  	// combine the masks, and adjust for the final left shift.
  1727  	mask_3 := mask_1 << sldi
  1728  	r_3 := (r_1 + sldi) & 31 // This can wrap.
  1729  
  1730  	// Verify the result is still a valid bitmask of <= 32 bits.
  1731  	if uint64(uint32(mask_3)) != mask_3 {
  1732  		return 0
  1733  	}
  1734  	return encodePPC64RotateMask(r_3, int64(mask_3), 32)
  1735  }
  1736  
  1737  // Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
  1738  // or return 0 if they cannot be combined.
  1739  func mergePPC64SldiSrw(sld, srw int64) int64 {
  1740  	if sld > srw || srw >= 32 {
  1741  		return 0
  1742  	}
  1743  	mask_r := uint32(0xFFFFFFFF) >> uint(srw)
  1744  	mask_l := uint32(0xFFFFFFFF) >> uint(sld)
  1745  	mask := (mask_r & mask_l) << uint(sld)
  1746  	return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
  1747  }
  1748  
  1749  // Convert a PPC64 opcode from the Op to OpCC form. This converts (op x y)
  1750  // to (Select0 (opCC x y)) without having to explicitly fixup every user
  1751  // of op.
  1752  //
  1753  // E.g consider the case:
  1754  // a = (ADD x y)
  1755  // b = (CMPconst [0] a)
  1756  // c = (OR a z)
  1757  //
  1758  // A rule like (CMPconst [0] (ADD x y)) => (CMPconst [0] (Select0 (ADDCC x y)))
  1759  // would produce:
  1760  // a  = (ADD x y)
  1761  // a' = (ADDCC x y)
  1762  // a” = (Select0 a')
  1763  // b  = (CMPconst [0] a”)
  1764  // c  = (OR a z)
  1765  //
  1766  // which makes it impossible to rewrite the second user. Instead the result
  1767  // of this conversion is:
  1768  // a' = (ADDCC x y)
  1769  // a  = (Select0 a')
  1770  // b  = (CMPconst [0] a)
  1771  // c  = (OR a z)
  1772  //
  1773  // Which makes it trivial to rewrite b using a lowering rule.
  1774  func convertPPC64OpToOpCC(op *Value) *Value {
  1775  	ccOpMap := map[Op]Op{
  1776  		OpPPC64ADD:      OpPPC64ADDCC,
  1777  		OpPPC64ADDconst: OpPPC64ADDCCconst,
  1778  		OpPPC64AND:      OpPPC64ANDCC,
  1779  		OpPPC64ANDN:     OpPPC64ANDNCC,
  1780  		OpPPC64ANDconst: OpPPC64ANDCCconst,
  1781  		OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
  1782  		OpPPC64MULHDU:   OpPPC64MULHDUCC,
  1783  		OpPPC64NEG:      OpPPC64NEGCC,
  1784  		OpPPC64NOR:      OpPPC64NORCC,
  1785  		OpPPC64OR:       OpPPC64ORCC,
  1786  		OpPPC64RLDICL:   OpPPC64RLDICLCC,
  1787  		OpPPC64SUB:      OpPPC64SUBCC,
  1788  		OpPPC64XOR:      OpPPC64XORCC,
  1789  	}
  1790  	b := op.Block
  1791  	opCC := b.NewValue0I(op.Pos, ccOpMap[op.Op], types.NewTuple(op.Type, types.TypeFlags), op.AuxInt)
  1792  	opCC.AddArgs(op.Args...)
  1793  	op.reset(OpSelect0)
  1794  	op.AddArgs(opCC)
  1795  	return op
  1796  }
  1797  
  1798  // Try converting a RLDICL to ANDCC. If successful, return the mask otherwise 0.
  1799  func convertPPC64RldiclAndccconst(sauxint int64) int64 {
  1800  	r, _, _, mask := DecodePPC64RotateMask(sauxint)
  1801  	if r != 0 || mask&0xFFFF != mask {
  1802  		return 0
  1803  	}
  1804  	return int64(mask)
  1805  }
  1806  
  1807  // Convenience function to rotate a 32 bit constant value by another constant.
  1808  func rotateLeft32(v, rotate int64) int64 {
  1809  	return int64(bits.RotateLeft32(uint32(v), int(rotate)))
  1810  }
  1811  
  1812  func rotateRight64(v, rotate int64) int64 {
  1813  	return int64(bits.RotateLeft64(uint64(v), int(-rotate)))
  1814  }
  1815  
  1816  // encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
  1817  func armBFAuxInt(lsb, width int64) arm64BitField {
  1818  	if lsb < 0 || lsb > 63 {
  1819  		panic("ARM(64) bit field lsb constant out of range")
  1820  	}
  1821  	if width < 1 || lsb+width > 64 {
  1822  		panic("ARM(64) bit field width constant out of range")
  1823  	}
  1824  	return arm64BitField(width | lsb<<8)
  1825  }
  1826  
  1827  // returns the lsb part of the auxInt field of arm64 bitfield ops.
  1828  func (bfc arm64BitField) lsb() int64 {
  1829  	return int64(uint64(bfc) >> 8)
  1830  }
  1831  
  1832  // returns the width part of the auxInt field of arm64 bitfield ops.
  1833  func (bfc arm64BitField) width() int64 {
  1834  	return int64(bfc) & 0xff
  1835  }
  1836  
  1837  // checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
  1838  func isARM64BFMask(lsb, mask, rshift int64) bool {
  1839  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1840  	return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
  1841  }
  1842  
  1843  // returns the bitfield width of mask >> rshift for arm64 bitfield ops.
  1844  func arm64BFWidth(mask, rshift int64) int64 {
  1845  	shiftedMask := int64(uint64(mask) >> uint64(rshift))
  1846  	if shiftedMask == 0 {
  1847  		panic("ARM64 BF mask is zero")
  1848  	}
  1849  	return nto(shiftedMask)
  1850  }
  1851  
  1852  // registerizable reports whether t is a primitive type that fits in
  1853  // a register. It assumes float64 values will always fit into registers
  1854  // even if that isn't strictly true.
  1855  func registerizable(b *Block, typ *types.Type) bool {
  1856  	if typ.IsPtrShaped() || typ.IsFloat() || typ.IsBoolean() {
  1857  		return true
  1858  	}
  1859  	if typ.IsInteger() {
  1860  		return typ.Size() <= b.Func.Config.RegSize
  1861  	}
  1862  	return false
  1863  }
  1864  
  1865  // needRaceCleanup reports whether this call to racefuncenter/exit isn't needed.
  1866  func needRaceCleanup(sym *AuxCall, v *Value) bool {
  1867  	f := v.Block.Func
  1868  	if !f.Config.Race {
  1869  		return false
  1870  	}
  1871  	if !isSameCall(sym, "runtime.racefuncenter") && !isSameCall(sym, "runtime.racefuncexit") {
  1872  		return false
  1873  	}
  1874  	for _, b := range f.Blocks {
  1875  		for _, v := range b.Values {
  1876  			switch v.Op {
  1877  			case OpStaticCall, OpStaticLECall:
  1878  				// Check for racefuncenter will encounter racefuncexit and vice versa.
  1879  				// Allow calls to panic*
  1880  				s := v.Aux.(*AuxCall).Fn.String()
  1881  				switch s {
  1882  				case "runtime.racefuncenter", "runtime.racefuncexit",
  1883  					"runtime.panicdivide", "runtime.panicwrap",
  1884  					"runtime.panicshift":
  1885  					continue
  1886  				}
  1887  				// If we encountered any call, we need to keep racefunc*,
  1888  				// for accurate stacktraces.
  1889  				return false
  1890  			case OpPanicBounds, OpPanicExtend:
  1891  				// Note: these are panic generators that are ok (like the static calls above).
  1892  			case OpClosureCall, OpInterCall, OpClosureLECall, OpInterLECall:
  1893  				// We must keep the race functions if there are any other call types.
  1894  				return false
  1895  			}
  1896  		}
  1897  	}
  1898  	if isSameCall(sym, "runtime.racefuncenter") {
  1899  		// TODO REGISTER ABI this needs to be cleaned up.
  1900  		// If we're removing racefuncenter, remove its argument as well.
  1901  		if v.Args[0].Op != OpStore {
  1902  			if v.Op == OpStaticLECall {
  1903  				// there is no store, yet.
  1904  				return true
  1905  			}
  1906  			return false
  1907  		}
  1908  		mem := v.Args[0].Args[2]
  1909  		v.Args[0].reset(OpCopy)
  1910  		v.Args[0].AddArg(mem)
  1911  	}
  1912  	return true
  1913  }
  1914  
  1915  // symIsRO reports whether sym is a read-only global.
  1916  func symIsRO(sym interface{}) bool {
  1917  	lsym := sym.(*obj.LSym)
  1918  	return lsym.Type == objabi.SRODATA && len(lsym.R) == 0
  1919  }
  1920  
  1921  // symIsROZero reports whether sym is a read-only global whose data contains all zeros.
  1922  func symIsROZero(sym Sym) bool {
  1923  	lsym := sym.(*obj.LSym)
  1924  	if lsym.Type != objabi.SRODATA || len(lsym.R) != 0 {
  1925  		return false
  1926  	}
  1927  	for _, b := range lsym.P {
  1928  		if b != 0 {
  1929  			return false
  1930  		}
  1931  	}
  1932  	return true
  1933  }
  1934  
  1935  // isFixed32 returns true if the int32 at offset off in symbol sym
  1936  // is known and constant.
  1937  func isFixed32(c *Config, sym Sym, off int64) bool {
  1938  	return isFixed(c, sym, off, 4)
  1939  }
  1940  
  1941  // isFixed returns true if the range [off,off+size] of the symbol sym
  1942  // is known and constant.
  1943  func isFixed(c *Config, sym Sym, off, size int64) bool {
  1944  	lsym := sym.(*obj.LSym)
  1945  	if lsym.Extra == nil {
  1946  		return false
  1947  	}
  1948  	if _, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1949  		if off == 2*c.PtrSize && size == 4 {
  1950  			return true // type hash field
  1951  		}
  1952  	}
  1953  	return false
  1954  }
  1955  func fixed32(c *Config, sym Sym, off int64) int32 {
  1956  	lsym := sym.(*obj.LSym)
  1957  	if ti, ok := (*lsym.Extra).(*obj.TypeInfo); ok {
  1958  		if off == 2*c.PtrSize {
  1959  			return int32(types.TypeHash(ti.Type.(*types.Type)))
  1960  		}
  1961  	}
  1962  	base.Fatalf("fixed32 data not known for %s:%d", sym, off)
  1963  	return 0
  1964  }
  1965  
  1966  // isFixedSym returns true if the contents of sym at the given offset
  1967  // is known and is the constant address of another symbol.
  1968  func isFixedSym(sym Sym, off int64) bool {
  1969  	lsym := sym.(*obj.LSym)
  1970  	switch {
  1971  	case lsym.Type == objabi.SRODATA:
  1972  		// itabs, dictionaries
  1973  	default:
  1974  		return false
  1975  	}
  1976  	for _, r := range lsym.R {
  1977  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off && r.Add == 0 {
  1978  			return true
  1979  		}
  1980  	}
  1981  	return false
  1982  }
  1983  func fixedSym(f *Func, sym Sym, off int64) Sym {
  1984  	lsym := sym.(*obj.LSym)
  1985  	for _, r := range lsym.R {
  1986  		if (r.Type == objabi.R_ADDR || r.Type == objabi.R_WEAKADDR) && int64(r.Off) == off {
  1987  			if strings.HasPrefix(r.Sym.Name, "type:") {
  1988  				// In case we're loading a type out of a dictionary, we need to record
  1989  				// that the containing function might put that type in an interface.
  1990  				// That information is currently recorded in relocations in the dictionary,
  1991  				// but if we perform this load at compile time then the dictionary
  1992  				// might be dead.
  1993  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  1994  			} else if strings.HasPrefix(r.Sym.Name, "go:itab") {
  1995  				// Same, but if we're using an itab we need to record that the
  1996  				// itab._type might be put in an interface.
  1997  				reflectdata.MarkTypeSymUsedInInterface(r.Sym, f.fe.Func().Linksym())
  1998  			}
  1999  			return r.Sym
  2000  		}
  2001  	}
  2002  	base.Fatalf("fixedSym data not known for %s:%d", sym, off)
  2003  	return nil
  2004  }
  2005  
  2006  // read8 reads one byte from the read-only global sym at offset off.
  2007  func read8(sym interface{}, off int64) uint8 {
  2008  	lsym := sym.(*obj.LSym)
  2009  	if off >= int64(len(lsym.P)) || off < 0 {
  2010  		// Invalid index into the global sym.
  2011  		// This can happen in dead code, so we don't want to panic.
  2012  		// Just return any value, it will eventually get ignored.
  2013  		// See issue 29215.
  2014  		return 0
  2015  	}
  2016  	return lsym.P[off]
  2017  }
  2018  
  2019  // read16 reads two bytes from the read-only global sym at offset off.
  2020  func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
  2021  	lsym := sym.(*obj.LSym)
  2022  	// lsym.P is written lazily.
  2023  	// Bytes requested after the end of lsym.P are 0.
  2024  	var src []byte
  2025  	if 0 <= off && off < int64(len(lsym.P)) {
  2026  		src = lsym.P[off:]
  2027  	}
  2028  	buf := make([]byte, 2)
  2029  	copy(buf, src)
  2030  	return byteorder.Uint16(buf)
  2031  }
  2032  
  2033  // read32 reads four bytes from the read-only global sym at offset off.
  2034  func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
  2035  	lsym := sym.(*obj.LSym)
  2036  	var src []byte
  2037  	if 0 <= off && off < int64(len(lsym.P)) {
  2038  		src = lsym.P[off:]
  2039  	}
  2040  	buf := make([]byte, 4)
  2041  	copy(buf, src)
  2042  	return byteorder.Uint32(buf)
  2043  }
  2044  
  2045  // read64 reads eight bytes from the read-only global sym at offset off.
  2046  func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
  2047  	lsym := sym.(*obj.LSym)
  2048  	var src []byte
  2049  	if 0 <= off && off < int64(len(lsym.P)) {
  2050  		src = lsym.P[off:]
  2051  	}
  2052  	buf := make([]byte, 8)
  2053  	copy(buf, src)
  2054  	return byteorder.Uint64(buf)
  2055  }
  2056  
  2057  // sequentialAddresses reports true if it can prove that x + n == y
  2058  func sequentialAddresses(x, y *Value, n int64) bool {
  2059  	if x == y && n == 0 {
  2060  		return true
  2061  	}
  2062  	if x.Op == Op386ADDL && y.Op == Op386LEAL1 && y.AuxInt == n && y.Aux == nil &&
  2063  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2064  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2065  		return true
  2066  	}
  2067  	if x.Op == Op386LEAL1 && y.Op == Op386LEAL1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2068  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2069  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2070  		return true
  2071  	}
  2072  	if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
  2073  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2074  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2075  		return true
  2076  	}
  2077  	if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
  2078  		(x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
  2079  			x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
  2080  		return true
  2081  	}
  2082  	return false
  2083  }
  2084  
  2085  // flagConstant represents the result of a compile-time comparison.
  2086  // The sense of these flags does not necessarily represent the hardware's notion
  2087  // of a flags register - these are just a compile-time construct.
  2088  // We happen to match the semantics to those of arm/arm64.
  2089  // Note that these semantics differ from x86: the carry flag has the opposite
  2090  // sense on a subtraction!
  2091  //
  2092  //	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
  2093  //	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
  2094  //	 (because it does x + ^y + C).
  2095  //
  2096  // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
  2097  type flagConstant uint8
  2098  
  2099  // N reports whether the result of an operation is negative (high bit set).
  2100  func (fc flagConstant) N() bool {
  2101  	return fc&1 != 0
  2102  }
  2103  
  2104  // Z reports whether the result of an operation is 0.
  2105  func (fc flagConstant) Z() bool {
  2106  	return fc&2 != 0
  2107  }
  2108  
  2109  // C reports whether an unsigned add overflowed (carry), or an
  2110  // unsigned subtract did not underflow (borrow).
  2111  func (fc flagConstant) C() bool {
  2112  	return fc&4 != 0
  2113  }
  2114  
  2115  // V reports whether a signed operation overflowed or underflowed.
  2116  func (fc flagConstant) V() bool {
  2117  	return fc&8 != 0
  2118  }
  2119  
  2120  func (fc flagConstant) eq() bool {
  2121  	return fc.Z()
  2122  }
  2123  func (fc flagConstant) ne() bool {
  2124  	return !fc.Z()
  2125  }
  2126  func (fc flagConstant) lt() bool {
  2127  	return fc.N() != fc.V()
  2128  }
  2129  func (fc flagConstant) le() bool {
  2130  	return fc.Z() || fc.lt()
  2131  }
  2132  func (fc flagConstant) gt() bool {
  2133  	return !fc.Z() && fc.ge()
  2134  }
  2135  func (fc flagConstant) ge() bool {
  2136  	return fc.N() == fc.V()
  2137  }
  2138  func (fc flagConstant) ult() bool {
  2139  	return !fc.C()
  2140  }
  2141  func (fc flagConstant) ule() bool {
  2142  	return fc.Z() || fc.ult()
  2143  }
  2144  func (fc flagConstant) ugt() bool {
  2145  	return !fc.Z() && fc.uge()
  2146  }
  2147  func (fc flagConstant) uge() bool {
  2148  	return fc.C()
  2149  }
  2150  
  2151  func (fc flagConstant) ltNoov() bool {
  2152  	return fc.lt() && !fc.V()
  2153  }
  2154  func (fc flagConstant) leNoov() bool {
  2155  	return fc.le() && !fc.V()
  2156  }
  2157  func (fc flagConstant) gtNoov() bool {
  2158  	return fc.gt() && !fc.V()
  2159  }
  2160  func (fc flagConstant) geNoov() bool {
  2161  	return fc.ge() && !fc.V()
  2162  }
  2163  
  2164  func (fc flagConstant) String() string {
  2165  	return fmt.Sprintf("N=%v,Z=%v,C=%v,V=%v", fc.N(), fc.Z(), fc.C(), fc.V())
  2166  }
  2167  
  2168  type flagConstantBuilder struct {
  2169  	N bool
  2170  	Z bool
  2171  	C bool
  2172  	V bool
  2173  }
  2174  
  2175  func (fcs flagConstantBuilder) encode() flagConstant {
  2176  	var fc flagConstant
  2177  	if fcs.N {
  2178  		fc |= 1
  2179  	}
  2180  	if fcs.Z {
  2181  		fc |= 2
  2182  	}
  2183  	if fcs.C {
  2184  		fc |= 4
  2185  	}
  2186  	if fcs.V {
  2187  		fc |= 8
  2188  	}
  2189  	return fc
  2190  }
  2191  
  2192  // Note: addFlags(x,y) != subFlags(x,-y) in some situations:
  2193  //  - the results of the C flag are different
  2194  //  - the results of the V flag when y==minint are different
  2195  
  2196  // addFlags64 returns the flags that would be set from computing x+y.
  2197  func addFlags64(x, y int64) flagConstant {
  2198  	var fcb flagConstantBuilder
  2199  	fcb.Z = x+y == 0
  2200  	fcb.N = x+y < 0
  2201  	fcb.C = uint64(x+y) < uint64(x)
  2202  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2203  	return fcb.encode()
  2204  }
  2205  
  2206  // subFlags64 returns the flags that would be set from computing x-y.
  2207  func subFlags64(x, y int64) flagConstant {
  2208  	var fcb flagConstantBuilder
  2209  	fcb.Z = x-y == 0
  2210  	fcb.N = x-y < 0
  2211  	fcb.C = uint64(y) <= uint64(x) // This code follows the arm carry flag model.
  2212  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2213  	return fcb.encode()
  2214  }
  2215  
  2216  // addFlags32 returns the flags that would be set from computing x+y.
  2217  func addFlags32(x, y int32) flagConstant {
  2218  	var fcb flagConstantBuilder
  2219  	fcb.Z = x+y == 0
  2220  	fcb.N = x+y < 0
  2221  	fcb.C = uint32(x+y) < uint32(x)
  2222  	fcb.V = x >= 0 && y >= 0 && x+y < 0 || x < 0 && y < 0 && x+y >= 0
  2223  	return fcb.encode()
  2224  }
  2225  
  2226  // subFlags32 returns the flags that would be set from computing x-y.
  2227  func subFlags32(x, y int32) flagConstant {
  2228  	var fcb flagConstantBuilder
  2229  	fcb.Z = x-y == 0
  2230  	fcb.N = x-y < 0
  2231  	fcb.C = uint32(y) <= uint32(x) // This code follows the arm carry flag model.
  2232  	fcb.V = x >= 0 && y < 0 && x-y < 0 || x < 0 && y >= 0 && x-y >= 0
  2233  	return fcb.encode()
  2234  }
  2235  
  2236  // logicFlags64 returns flags set to the sign/zeroness of x.
  2237  // C and V are set to false.
  2238  func logicFlags64(x int64) flagConstant {
  2239  	var fcb flagConstantBuilder
  2240  	fcb.Z = x == 0
  2241  	fcb.N = x < 0
  2242  	return fcb.encode()
  2243  }
  2244  
  2245  // logicFlags32 returns flags set to the sign/zeroness of x.
  2246  // C and V are set to false.
  2247  func logicFlags32(x int32) flagConstant {
  2248  	var fcb flagConstantBuilder
  2249  	fcb.Z = x == 0
  2250  	fcb.N = x < 0
  2251  	return fcb.encode()
  2252  }
  2253  
  2254  func makeJumpTableSym(b *Block) *obj.LSym {
  2255  	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.Func().LSym.Name, b.ID))
  2256  	// The jump table symbol is accessed only from the function symbol.
  2257  	s.Set(obj.AttrStatic, true)
  2258  	return s
  2259  }
  2260  
  2261  // canRotate reports whether the architecture supports
  2262  // rotates of integer registers with the given number of bits.
  2263  func canRotate(c *Config, bits int64) bool {
  2264  	if bits > c.PtrSize*8 {
  2265  		// Don't rewrite to rotates bigger than the machine word.
  2266  		return false
  2267  	}
  2268  	switch c.arch {
  2269  	case "386", "amd64", "arm64", "loong64", "riscv64":
  2270  		return true
  2271  	case "arm", "s390x", "ppc64", "ppc64le", "wasm":
  2272  		return bits >= 32
  2273  	default:
  2274  		return false
  2275  	}
  2276  }
  2277  
  2278  // isARM64bitcon reports whether a constant can be encoded into a logical instruction.
  2279  func isARM64bitcon(x uint64) bool {
  2280  	if x == 1<<64-1 || x == 0 {
  2281  		return false
  2282  	}
  2283  	// determine the period and sign-extend a unit to 64 bits
  2284  	switch {
  2285  	case x != x>>32|x<<32:
  2286  		// period is 64
  2287  		// nothing to do
  2288  	case x != x>>16|x<<48:
  2289  		// period is 32
  2290  		x = uint64(int64(int32(x)))
  2291  	case x != x>>8|x<<56:
  2292  		// period is 16
  2293  		x = uint64(int64(int16(x)))
  2294  	case x != x>>4|x<<60:
  2295  		// period is 8
  2296  		x = uint64(int64(int8(x)))
  2297  	default:
  2298  		// period is 4 or 2, always true
  2299  		// 0001, 0010, 0100, 1000 -- 0001 rotate
  2300  		// 0011, 0110, 1100, 1001 -- 0011 rotate
  2301  		// 0111, 1011, 1101, 1110 -- 0111 rotate
  2302  		// 0101, 1010             -- 01   rotate, repeat
  2303  		return true
  2304  	}
  2305  	return sequenceOfOnes(x) || sequenceOfOnes(^x)
  2306  }
  2307  
  2308  // sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
  2309  func sequenceOfOnes(x uint64) bool {
  2310  	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
  2311  	y += x
  2312  	return (y-1)&y == 0
  2313  }
  2314  
  2315  // isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
  2316  func isARM64addcon(v int64) bool {
  2317  	/* uimm12 or uimm24? */
  2318  	if v < 0 {
  2319  		return false
  2320  	}
  2321  	if (v & 0xFFF) == 0 {
  2322  		v >>= 12
  2323  	}
  2324  	return v <= 0xFFF
  2325  }
  2326  
  2327  // setPos sets the position of v to pos, then returns true.
  2328  // Useful for setting the result of a rewrite's position to
  2329  // something other than the default.
  2330  func setPos(v *Value, pos src.XPos) bool {
  2331  	v.Pos = pos
  2332  	return true
  2333  }
  2334  
  2335  // isNonNegative reports whether v is known to be greater or equal to zero.
  2336  // Note that this is pretty simplistic. The prove pass generates more detailed
  2337  // nonnegative information about values.
  2338  func isNonNegative(v *Value) bool {
  2339  	if !v.Type.IsInteger() {
  2340  		v.Fatalf("isNonNegative bad type: %v", v.Type)
  2341  	}
  2342  	// TODO: return true if !v.Type.IsSigned()
  2343  	// SSA isn't type-safe enough to do that now (issue 37753).
  2344  	// The checks below depend only on the pattern of bits.
  2345  
  2346  	switch v.Op {
  2347  	case OpConst64:
  2348  		return v.AuxInt >= 0
  2349  
  2350  	case OpConst32:
  2351  		return int32(v.AuxInt) >= 0
  2352  
  2353  	case OpConst16:
  2354  		return int16(v.AuxInt) >= 0
  2355  
  2356  	case OpConst8:
  2357  		return int8(v.AuxInt) >= 0
  2358  
  2359  	case OpStringLen, OpSliceLen, OpSliceCap,
  2360  		OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64,
  2361  		OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16,
  2362  		OpCtz64, OpCtz32, OpCtz16, OpCtz8,
  2363  		OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero,
  2364  		OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8:
  2365  		return true
  2366  
  2367  	case OpRsh64Ux64, OpRsh32Ux64:
  2368  		by := v.Args[1]
  2369  		return by.Op == OpConst64 && by.AuxInt > 0
  2370  
  2371  	case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32,
  2372  		OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32:
  2373  		return isNonNegative(v.Args[0])
  2374  
  2375  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2376  		return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1])
  2377  
  2378  	case OpMod64, OpMod32, OpMod16, OpMod8,
  2379  		OpDiv64, OpDiv32, OpDiv16, OpDiv8,
  2380  		OpOr64, OpOr32, OpOr16, OpOr8,
  2381  		OpXor64, OpXor32, OpXor16, OpXor8:
  2382  		return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1])
  2383  
  2384  		// We could handle OpPhi here, but the improvements from doing
  2385  		// so are very minor, and it is neither simple nor cheap.
  2386  	}
  2387  	return false
  2388  }
  2389  
  2390  func rewriteStructLoad(v *Value) *Value {
  2391  	b := v.Block
  2392  	ptr := v.Args[0]
  2393  	mem := v.Args[1]
  2394  
  2395  	t := v.Type
  2396  	args := make([]*Value, t.NumFields())
  2397  	for i := range args {
  2398  		ft := t.FieldType(i)
  2399  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), ptr)
  2400  		args[i] = b.NewValue2(v.Pos, OpLoad, ft, addr, mem)
  2401  	}
  2402  
  2403  	v.reset(OpStructMake)
  2404  	v.AddArgs(args...)
  2405  	return v
  2406  }
  2407  
  2408  func rewriteStructStore(v *Value) *Value {
  2409  	b := v.Block
  2410  	dst := v.Args[0]
  2411  	x := v.Args[1]
  2412  	if x.Op != OpStructMake {
  2413  		base.Fatalf("invalid struct store: %v", x)
  2414  	}
  2415  	mem := v.Args[2]
  2416  
  2417  	t := x.Type
  2418  	for i, arg := range x.Args {
  2419  		ft := t.FieldType(i)
  2420  
  2421  		addr := b.NewValue1I(v.Pos, OpOffPtr, ft.PtrTo(), t.FieldOff(i), dst)
  2422  		mem = b.NewValue3A(v.Pos, OpStore, types.TypeMem, typeToAux(ft), addr, arg, mem)
  2423  	}
  2424  
  2425  	return mem
  2426  }
  2427  

View as plain text