Source file src/cmd/compile/internal/ssa/prove.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/src"
    10  	"cmp"
    11  	"fmt"
    12  	"math"
    13  	"math/bits"
    14  	"slices"
    15  	"strings"
    16  )
    17  
    18  type branch int
    19  
    20  const (
    21  	unknown branch = iota
    22  	positive
    23  	negative
    24  	// The outedges from a jump table are jumpTable0,
    25  	// jumpTable0+1, jumpTable0+2, etc. There could be an
    26  	// arbitrary number so we can't list them all here.
    27  	jumpTable0
    28  )
    29  
    30  func (b branch) String() string {
    31  	switch b {
    32  	case unknown:
    33  		return "unk"
    34  	case positive:
    35  		return "pos"
    36  	case negative:
    37  		return "neg"
    38  	default:
    39  		return fmt.Sprintf("jmp%d", b-jumpTable0)
    40  	}
    41  }
    42  
    43  // relation represents the set of possible relations between
    44  // pairs of variables (v, w). Without a priori knowledge the
    45  // mask is lt | eq | gt meaning v can be less than, equal to or
    46  // greater than w. When the execution path branches on the condition
    47  // `v op w` the set of relations is updated to exclude any
    48  // relation not possible due to `v op w` being true (or false).
    49  //
    50  // E.g.
    51  //
    52  //	r := relation(...)
    53  //
    54  //	if v < w {
    55  //	  newR := r & lt
    56  //	}
    57  //	if v >= w {
    58  //	  newR := r & (eq|gt)
    59  //	}
    60  //	if v != w {
    61  //	  newR := r & (lt|gt)
    62  //	}
    63  type relation uint
    64  
    65  const (
    66  	lt relation = 1 << iota
    67  	eq
    68  	gt
    69  )
    70  
    71  var relationStrings = [...]string{
    72  	0: "none", lt: "<", eq: "==", lt | eq: "<=",
    73  	gt: ">", gt | lt: "!=", gt | eq: ">=", gt | eq | lt: "any",
    74  }
    75  
    76  func (r relation) String() string {
    77  	if r < relation(len(relationStrings)) {
    78  		return relationStrings[r]
    79  	}
    80  	return fmt.Sprintf("relation(%d)", uint(r))
    81  }
    82  
    83  // domain represents the domain of a variable pair in which a set
    84  // of relations is known. For example, relations learned for unsigned
    85  // pairs cannot be transferred to signed pairs because the same bit
    86  // representation can mean something else.
    87  type domain uint
    88  
    89  const (
    90  	signed domain = 1 << iota
    91  	unsigned
    92  	pointer
    93  	boolean
    94  )
    95  
    96  var domainStrings = [...]string{
    97  	"signed", "unsigned", "pointer", "boolean",
    98  }
    99  
   100  func (d domain) String() string {
   101  	s := ""
   102  	for i, ds := range domainStrings {
   103  		if d&(1<<uint(i)) != 0 {
   104  			if len(s) != 0 {
   105  				s += "|"
   106  			}
   107  			s += ds
   108  			d &^= 1 << uint(i)
   109  		}
   110  	}
   111  	if d != 0 {
   112  		if len(s) != 0 {
   113  			s += "|"
   114  		}
   115  		s += fmt.Sprintf("0x%x", uint(d))
   116  	}
   117  	return s
   118  }
   119  
   120  // a limit records known upper and lower bounds for a value.
   121  //
   122  // If we have min>max or umin>umax, then this limit is
   123  // called "unsatisfiable". When we encounter such a limit, we
   124  // know that any code for which that limit applies is unreachable.
   125  // We don't particularly care how unsatisfiable limits propagate,
   126  // including becoming satisfiable, because any optimization
   127  // decisions based on those limits only apply to unreachable code.
   128  type limit struct {
   129  	min, max   int64  // min <= value <= max, signed
   130  	umin, umax uint64 // umin <= value <= umax, unsigned
   131  	// For booleans, we use 0==false, 1==true for both ranges
   132  	// For pointers, we use 0,0,0,0 for nil and minInt64,maxInt64,1,maxUint64 for nonnil
   133  }
   134  
   135  func (l limit) String() string {
   136  	return fmt.Sprintf("sm,SM=%d,%d um,UM=%d,%d", l.min, l.max, l.umin, l.umax)
   137  }
   138  
   139  func (l limit) intersect(l2 limit) limit {
   140  	l.min = max(l.min, l2.min)
   141  	l.umin = max(l.umin, l2.umin)
   142  	l.max = min(l.max, l2.max)
   143  	l.umax = min(l.umax, l2.umax)
   144  	return l
   145  }
   146  
   147  func (l limit) signedMin(m int64) limit {
   148  	l.min = max(l.min, m)
   149  	return l
   150  }
   151  
   152  func (l limit) signedMinMax(minimum, maximum int64) limit {
   153  	l.min = max(l.min, minimum)
   154  	l.max = min(l.max, maximum)
   155  	return l
   156  }
   157  
   158  func (l limit) unsignedMin(m uint64) limit {
   159  	l.umin = max(l.umin, m)
   160  	return l
   161  }
   162  func (l limit) unsignedMax(m uint64) limit {
   163  	l.umax = min(l.umax, m)
   164  	return l
   165  }
   166  func (l limit) unsignedMinMax(minimum, maximum uint64) limit {
   167  	l.umin = max(l.umin, minimum)
   168  	l.umax = min(l.umax, maximum)
   169  	return l
   170  }
   171  
   172  func (l limit) nonzero() bool {
   173  	return l.min > 0 || l.umin > 0 || l.max < 0
   174  }
   175  func (l limit) maybeZero() bool {
   176  	return !l.nonzero()
   177  }
   178  func (l limit) nonnegative() bool {
   179  	return l.min >= 0
   180  }
   181  func (l limit) unsat() bool {
   182  	return l.min > l.max || l.umin > l.umax
   183  }
   184  
   185  // If x and y can add without overflow or underflow
   186  // (using b bits), safeAdd returns x+y, true.
   187  // Otherwise, returns 0, false.
   188  func safeAdd(x, y int64, b uint) (int64, bool) {
   189  	s := x + y
   190  	if x >= 0 && y >= 0 && s < 0 {
   191  		return 0, false // 64-bit overflow
   192  	}
   193  	if x < 0 && y < 0 && s >= 0 {
   194  		return 0, false // 64-bit underflow
   195  	}
   196  	if !fitsInBits(s, b) {
   197  		return 0, false
   198  	}
   199  	return s, true
   200  }
   201  
   202  // same as safeAdd for unsigned arithmetic.
   203  func safeAddU(x, y uint64, b uint) (uint64, bool) {
   204  	s := x + y
   205  	if s < x || s < y {
   206  		return 0, false // 64-bit overflow
   207  	}
   208  	if !fitsInBitsU(s, b) {
   209  		return 0, false
   210  	}
   211  	return s, true
   212  }
   213  
   214  // same as safeAdd but for subtraction.
   215  func safeSub(x, y int64, b uint) (int64, bool) {
   216  	if y == math.MinInt64 {
   217  		if x == math.MaxInt64 {
   218  			return 0, false // 64-bit overflow
   219  		}
   220  		x++
   221  		y++
   222  	}
   223  	return safeAdd(x, -y, b)
   224  }
   225  
   226  // same as safeAddU but for subtraction.
   227  func safeSubU(x, y uint64, b uint) (uint64, bool) {
   228  	if x < y {
   229  		return 0, false // 64-bit underflow
   230  	}
   231  	s := x - y
   232  	if !fitsInBitsU(s, b) {
   233  		return 0, false
   234  	}
   235  	return s, true
   236  }
   237  
   238  // fitsInBits reports whether x fits in b bits (signed).
   239  func fitsInBits(x int64, b uint) bool {
   240  	if b == 64 {
   241  		return true
   242  	}
   243  	m := int64(-1) << (b - 1)
   244  	M := -m - 1
   245  	return x >= m && x <= M
   246  }
   247  
   248  // fitsInBitsU reports whether x fits in b bits (unsigned).
   249  func fitsInBitsU(x uint64, b uint) bool {
   250  	return x>>b == 0
   251  }
   252  
   253  func noLimitForBitsize(bitsize uint) limit {
   254  	return limit{min: -(1 << (bitsize - 1)), max: 1<<(bitsize-1) - 1, umin: 0, umax: 1<<bitsize - 1}
   255  }
   256  
   257  func convertIntWithBitsize[Target uint64 | int64, Source uint64 | int64](x Source, bitsize uint) Target {
   258  	switch bitsize {
   259  	case 64:
   260  		return Target(x)
   261  	case 32:
   262  		return Target(int32(x))
   263  	case 16:
   264  		return Target(int16(x))
   265  	case 8:
   266  		return Target(int8(x))
   267  	default:
   268  		panic("unreachable")
   269  	}
   270  }
   271  
   272  // add returns the limit obtained by adding a value with limit l
   273  // to a value with limit l2. The result must fit in b bits.
   274  func (l limit) add(l2 limit, b uint) limit {
   275  	var isLConst, isL2Const bool
   276  	var lConst, l2Const uint64
   277  	if l.min == l.max {
   278  		isLConst = true
   279  		lConst = convertIntWithBitsize[uint64](l.min, b)
   280  	} else if l.umin == l.umax {
   281  		isLConst = true
   282  		lConst = l.umin
   283  	}
   284  	if l2.min == l2.max {
   285  		isL2Const = true
   286  		l2Const = convertIntWithBitsize[uint64](l2.min, b)
   287  	} else if l2.umin == l2.umax {
   288  		isL2Const = true
   289  		l2Const = l2.umin
   290  	}
   291  	if isLConst && isL2Const {
   292  		r := lConst + l2Const
   293  		r &= (uint64(1) << b) - 1
   294  		int64r := convertIntWithBitsize[int64](r, b)
   295  		return limit{min: int64r, max: int64r, umin: r, umax: r}
   296  	}
   297  
   298  	r := noLimit
   299  	min, minOk := safeAdd(l.min, l2.min, b)
   300  	max, maxOk := safeAdd(l.max, l2.max, b)
   301  	if minOk && maxOk {
   302  		r.min = min
   303  		r.max = max
   304  	}
   305  	umin, uminOk := safeAddU(l.umin, l2.umin, b)
   306  	umax, umaxOk := safeAddU(l.umax, l2.umax, b)
   307  	if uminOk && umaxOk {
   308  		r.umin = umin
   309  		r.umax = umax
   310  	}
   311  	return r
   312  }
   313  
   314  // same as add but for subtraction.
   315  func (l limit) sub(l2 limit, b uint) limit {
   316  	r := noLimit
   317  	min, minOk := safeSub(l.min, l2.max, b)
   318  	max, maxOk := safeSub(l.max, l2.min, b)
   319  	if minOk && maxOk {
   320  		r.min = min
   321  		r.max = max
   322  	}
   323  	umin, uminOk := safeSubU(l.umin, l2.umax, b)
   324  	umax, umaxOk := safeSubU(l.umax, l2.umin, b)
   325  	if uminOk && umaxOk {
   326  		r.umin = umin
   327  		r.umax = umax
   328  	}
   329  	return r
   330  }
   331  
   332  // same as add but for multiplication.
   333  func (l limit) mul(l2 limit, b uint) limit {
   334  	r := noLimit
   335  	umaxhi, umaxlo := bits.Mul64(l.umax, l2.umax)
   336  	if umaxhi == 0 && fitsInBitsU(umaxlo, b) {
   337  		r.umax = umaxlo
   338  		r.umin = l.umin * l2.umin
   339  		// Note: if the code containing this multiply is
   340  		// unreachable, then we may have umin>umax, and this
   341  		// multiply may overflow.  But that's ok for
   342  		// unreachable code. If this code is reachable, we
   343  		// know umin<=umax, so this multiply will not overflow
   344  		// because the max multiply didn't.
   345  	}
   346  	// Signed is harder, so don't bother. The only useful
   347  	// case is when we know both multiplicands are nonnegative,
   348  	// but that case is handled above because we would have then
   349  	// previously propagated signed info to the unsigned domain,
   350  	// and will propagate it back after the multiply.
   351  	return r
   352  }
   353  
   354  // Similar to add, but compute 1 << l if it fits without overflow in b bits.
   355  func (l limit) exp2(b uint) limit {
   356  	r := noLimit
   357  	if l.umax < uint64(b) {
   358  		r.umin = 1 << l.umin
   359  		r.umax = 1 << l.umax
   360  		// Same as above in mul, signed<->unsigned propagation
   361  		// will handle the signed case for us.
   362  	}
   363  	return r
   364  }
   365  
   366  // Similar to add, but computes the complement of the limit for bitsize b.
   367  func (l limit) com(b uint) limit {
   368  	switch b {
   369  	case 64:
   370  		return limit{
   371  			min:  ^l.max,
   372  			max:  ^l.min,
   373  			umin: ^l.umax,
   374  			umax: ^l.umin,
   375  		}
   376  	case 32:
   377  		return limit{
   378  			min:  int64(^int32(l.max)),
   379  			max:  int64(^int32(l.min)),
   380  			umin: uint64(^uint32(l.umax)),
   381  			umax: uint64(^uint32(l.umin)),
   382  		}
   383  	case 16:
   384  		return limit{
   385  			min:  int64(^int16(l.max)),
   386  			max:  int64(^int16(l.min)),
   387  			umin: uint64(^uint16(l.umax)),
   388  			umax: uint64(^uint16(l.umin)),
   389  		}
   390  	case 8:
   391  		return limit{
   392  			min:  int64(^int8(l.max)),
   393  			max:  int64(^int8(l.min)),
   394  			umin: uint64(^uint8(l.umax)),
   395  			umax: uint64(^uint8(l.umin)),
   396  		}
   397  	default:
   398  		panic("unreachable")
   399  	}
   400  }
   401  
   402  // Similar to add, but computes the negation of the limit for bitsize b.
   403  func (l limit) neg(b uint) limit {
   404  	return l.com(b).add(limit{min: 1, max: 1, umin: 1, umax: 1}, b)
   405  }
   406  
   407  var noLimit = limit{math.MinInt64, math.MaxInt64, 0, math.MaxUint64}
   408  
   409  // a limitFact is a limit known for a particular value.
   410  type limitFact struct {
   411  	vid   ID
   412  	limit limit
   413  }
   414  
   415  // An ordering encodes facts like v < w.
   416  type ordering struct {
   417  	next *ordering // linked list of all known orderings for v.
   418  	// Note: v is implicit here, determined by which linked list it is in.
   419  	w *Value
   420  	d domain
   421  	r relation // one of ==,!=,<,<=,>,>=
   422  	// if d is boolean or pointer, r can only be ==, !=
   423  }
   424  
   425  // factsTable keeps track of relations between pairs of values.
   426  //
   427  // The fact table logic is sound, but incomplete. Outside of a few
   428  // special cases, it performs no deduction or arithmetic. While there
   429  // are known decision procedures for this, the ad hoc approach taken
   430  // by the facts table is effective for real code while remaining very
   431  // efficient.
   432  type factsTable struct {
   433  	// unsat is true if facts contains a contradiction.
   434  	//
   435  	// Note that the factsTable logic is incomplete, so if unsat
   436  	// is false, the assertions in factsTable could be satisfiable
   437  	// *or* unsatisfiable.
   438  	unsat      bool // true if facts contains a contradiction
   439  	unsatDepth int  // number of unsat checkpoints
   440  
   441  	// order* is a couple of partial order sets that record information
   442  	// about relations between SSA values in the signed and unsigned
   443  	// domain.
   444  	orderS *poset
   445  	orderU *poset
   446  
   447  	// orderings contains a list of known orderings between values.
   448  	// These lists are indexed by v.ID.
   449  	// We do not record transitive orderings. Only explicitly learned
   450  	// orderings are recorded. Transitive orderings can be obtained
   451  	// by walking along the individual orderings.
   452  	orderings map[ID]*ordering
   453  	// stack of IDs which have had an entry added in orderings.
   454  	// In addition, ID==0 are checkpoint markers.
   455  	orderingsStack []ID
   456  	orderingCache  *ordering // unused ordering records
   457  
   458  	// known lower and upper constant bounds on individual values.
   459  	limits       []limit     // indexed by value ID
   460  	limitStack   []limitFact // previous entries
   461  	recurseCheck []bool      // recursion detector for limit propagation
   462  
   463  	// For each slice s, a map from s to a len(s)/cap(s) value (if any)
   464  	// TODO: check if there are cases that matter where we have
   465  	// more than one len(s) for a slice. We could keep a list if necessary.
   466  	lens map[ID]*Value
   467  	caps map[ID]*Value
   468  
   469  	// reusedTopoSortScoresTable recycle allocations for topo-sort
   470  	reusedTopoSortScoresTable []uint
   471  }
   472  
   473  // checkpointBound is an invalid value used for checkpointing
   474  // and restoring factsTable.
   475  var checkpointBound = limitFact{}
   476  
   477  func newFactsTable(f *Func) *factsTable {
   478  	ft := &factsTable{}
   479  	ft.orderS = f.newPoset()
   480  	ft.orderU = f.newPoset()
   481  	ft.orderS.SetUnsigned(false)
   482  	ft.orderU.SetUnsigned(true)
   483  	ft.orderings = make(map[ID]*ordering)
   484  	ft.limits = f.Cache.allocLimitSlice(f.NumValues())
   485  	for _, b := range f.Blocks {
   486  		for _, v := range b.Values {
   487  			ft.limits[v.ID] = initLimit(v)
   488  		}
   489  	}
   490  	ft.limitStack = make([]limitFact, 4)
   491  	ft.recurseCheck = f.Cache.allocBoolSlice(f.NumValues())
   492  	return ft
   493  }
   494  
   495  // initLimitForNewValue initializes the limits for newly created values,
   496  // possibly needing to expand the limits slice. Currently used by
   497  // simplifyBlock when certain provably constant results are folded.
   498  func (ft *factsTable) initLimitForNewValue(v *Value) {
   499  	if int(v.ID) >= len(ft.limits) {
   500  		f := v.Block.Func
   501  		n := f.NumValues()
   502  		if cap(ft.limits) >= n {
   503  			ft.limits = ft.limits[:n]
   504  		} else {
   505  			old := ft.limits
   506  			ft.limits = f.Cache.allocLimitSlice(n)
   507  			copy(ft.limits, old)
   508  			f.Cache.freeLimitSlice(old)
   509  		}
   510  	}
   511  	ft.limits[v.ID] = initLimit(v)
   512  }
   513  
   514  // signedMin records the fact that we know v is at least
   515  // min in the signed domain.
   516  func (ft *factsTable) signedMin(v *Value, min int64) {
   517  	ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
   518  }
   519  
   520  // signedMax records the fact that we know v is at most
   521  // max in the signed domain.
   522  func (ft *factsTable) signedMax(v *Value, max int64) {
   523  	ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
   524  }
   525  func (ft *factsTable) signedMinMax(v *Value, min, max int64) {
   526  	ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
   527  }
   528  
   529  // setNonNegative records the fact that v is known to be non-negative.
   530  func (ft *factsTable) setNonNegative(v *Value) {
   531  	ft.signedMin(v, 0)
   532  }
   533  
   534  // unsignedMin records the fact that we know v is at least
   535  // min in the unsigned domain.
   536  func (ft *factsTable) unsignedMin(v *Value, min uint64) {
   537  	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
   538  }
   539  
   540  // unsignedMax records the fact that we know v is at most
   541  // max in the unsigned domain.
   542  func (ft *factsTable) unsignedMax(v *Value, max uint64) {
   543  	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
   544  }
   545  func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) {
   546  	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
   547  }
   548  
   549  func (ft *factsTable) booleanFalse(v *Value) {
   550  	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   551  }
   552  func (ft *factsTable) booleanTrue(v *Value) {
   553  	ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
   554  }
   555  func (ft *factsTable) pointerNil(v *Value) {
   556  	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   557  }
   558  func (ft *factsTable) pointerNonNil(v *Value) {
   559  	l := noLimit
   560  	l.umin = 1
   561  	ft.newLimit(v, l)
   562  }
   563  
   564  // newLimit adds new limiting information for v.
   565  func (ft *factsTable) newLimit(v *Value, newLim limit) {
   566  	oldLim := ft.limits[v.ID]
   567  
   568  	// Merge old and new information.
   569  	lim := oldLim.intersect(newLim)
   570  
   571  	// signed <-> unsigned propagation
   572  	if lim.min >= 0 {
   573  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
   574  	}
   575  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
   576  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
   577  	}
   578  
   579  	if lim == oldLim {
   580  		return // nothing new to record
   581  	}
   582  
   583  	if lim.unsat() {
   584  		ft.unsat = true
   585  		return
   586  	}
   587  
   588  	// Check for recursion. This normally happens because in unsatisfiable
   589  	// cases we have a < b < a, and every update to a's limits returns
   590  	// here again with the limit increased by 2.
   591  	// Normally this is caught early by the orderS/orderU posets, but in
   592  	// cases where the comparisons jump between signed and unsigned domains,
   593  	// the posets will not notice.
   594  	if ft.recurseCheck[v.ID] {
   595  		// This should only happen for unsatisfiable cases. TODO: check
   596  		return
   597  	}
   598  	ft.recurseCheck[v.ID] = true
   599  	defer func() {
   600  		ft.recurseCheck[v.ID] = false
   601  	}()
   602  
   603  	// Record undo information.
   604  	ft.limitStack = append(ft.limitStack, limitFact{v.ID, oldLim})
   605  	// Record new information.
   606  	ft.limits[v.ID] = lim
   607  	if v.Block.Func.pass.debug > 2 {
   608  		// TODO: pos is probably wrong. This is the position where v is defined,
   609  		// not the position where we learned the fact about it (which was
   610  		// probably some subsequent compare+branch).
   611  		v.Block.Func.Warnl(v.Pos, "new limit %s %s unsat=%v", v, lim.String(), ft.unsat)
   612  	}
   613  
   614  	// Propagate this new constant range to other values
   615  	// that we know are ordered with respect to this one.
   616  	// Note overflow/underflow in the arithmetic below is ok,
   617  	// it will just lead to imprecision (undetected unsatisfiability).
   618  	for o := ft.orderings[v.ID]; o != nil; o = o.next {
   619  		switch o.d {
   620  		case signed:
   621  			switch o.r {
   622  			case eq: // v == w
   623  				ft.signedMinMax(o.w, lim.min, lim.max)
   624  			case lt | eq: // v <= w
   625  				ft.signedMin(o.w, lim.min)
   626  			case lt: // v < w
   627  				ft.signedMin(o.w, lim.min+1)
   628  			case gt | eq: // v >= w
   629  				ft.signedMax(o.w, lim.max)
   630  			case gt: // v > w
   631  				ft.signedMax(o.w, lim.max-1)
   632  			case lt | gt: // v != w
   633  				if lim.min == lim.max { // v is a constant
   634  					c := lim.min
   635  					if ft.limits[o.w.ID].min == c {
   636  						ft.signedMin(o.w, c+1)
   637  					}
   638  					if ft.limits[o.w.ID].max == c {
   639  						ft.signedMax(o.w, c-1)
   640  					}
   641  				}
   642  			}
   643  		case unsigned:
   644  			switch o.r {
   645  			case eq: // v == w
   646  				ft.unsignedMinMax(o.w, lim.umin, lim.umax)
   647  			case lt | eq: // v <= w
   648  				ft.unsignedMin(o.w, lim.umin)
   649  			case lt: // v < w
   650  				ft.unsignedMin(o.w, lim.umin+1)
   651  			case gt | eq: // v >= w
   652  				ft.unsignedMax(o.w, lim.umax)
   653  			case gt: // v > w
   654  				ft.unsignedMax(o.w, lim.umax-1)
   655  			case lt | gt: // v != w
   656  				if lim.umin == lim.umax { // v is a constant
   657  					c := lim.umin
   658  					if ft.limits[o.w.ID].umin == c {
   659  						ft.unsignedMin(o.w, c+1)
   660  					}
   661  					if ft.limits[o.w.ID].umax == c {
   662  						ft.unsignedMax(o.w, c-1)
   663  					}
   664  				}
   665  			}
   666  		case boolean:
   667  			switch o.r {
   668  			case eq:
   669  				if lim.min == 0 && lim.max == 0 { // constant false
   670  					ft.booleanFalse(o.w)
   671  				}
   672  				if lim.min == 1 && lim.max == 1 { // constant true
   673  					ft.booleanTrue(o.w)
   674  				}
   675  			case lt | gt:
   676  				if lim.min == 0 && lim.max == 0 { // constant false
   677  					ft.booleanTrue(o.w)
   678  				}
   679  				if lim.min == 1 && lim.max == 1 { // constant true
   680  					ft.booleanFalse(o.w)
   681  				}
   682  			}
   683  		case pointer:
   684  			switch o.r {
   685  			case eq:
   686  				if lim.umax == 0 { // nil
   687  					ft.pointerNil(o.w)
   688  				}
   689  				if lim.umin > 0 { // non-nil
   690  					ft.pointerNonNil(o.w)
   691  				}
   692  			case lt | gt:
   693  				if lim.umax == 0 { // nil
   694  					ft.pointerNonNil(o.w)
   695  				}
   696  				// note: not equal to non-nil doesn't tell us anything.
   697  			}
   698  		}
   699  	}
   700  
   701  	// If this is new known constant for a boolean value,
   702  	// extract relation between its args. For example, if
   703  	// We learn v is false, and v is defined as a<b, then we learn a>=b.
   704  	if v.Type.IsBoolean() {
   705  		// If we reach here, it is because we have a more restrictive
   706  		// value for v than the default. The only two such values
   707  		// are constant true or constant false.
   708  		if lim.min != lim.max {
   709  			v.Block.Func.Fatalf("boolean not constant %v", v)
   710  		}
   711  		isTrue := lim.min == 1
   712  		if dr, ok := domainRelationTable[v.Op]; ok && v.Op != OpIsInBounds && v.Op != OpIsSliceInBounds {
   713  			d := dr.d
   714  			r := dr.r
   715  			if d == signed && ft.isNonNegative(v.Args[0]) && ft.isNonNegative(v.Args[1]) {
   716  				d |= unsigned
   717  			}
   718  			if !isTrue {
   719  				r ^= lt | gt | eq
   720  			}
   721  			// TODO: v.Block is wrong?
   722  			addRestrictions(v.Block, ft, d, v.Args[0], v.Args[1], r)
   723  		}
   724  		switch v.Op {
   725  		case OpIsNonNil:
   726  			if isTrue {
   727  				ft.pointerNonNil(v.Args[0])
   728  			} else {
   729  				ft.pointerNil(v.Args[0])
   730  			}
   731  		case OpIsInBounds, OpIsSliceInBounds:
   732  			// 0 <= a0 < a1 (or 0 <= a0 <= a1)
   733  			r := lt
   734  			if v.Op == OpIsSliceInBounds {
   735  				r |= eq
   736  			}
   737  			if isTrue {
   738  				// On the positive branch, we learn:
   739  				//   signed: 0 <= a0 < a1 (or 0 <= a0 <= a1)
   740  				//   unsigned:    a0 < a1 (or a0 <= a1)
   741  				ft.setNonNegative(v.Args[0])
   742  				ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   743  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   744  			} else {
   745  				// On the negative branch, we learn (0 > a0 ||
   746  				// a0 >= a1). In the unsigned domain, this is
   747  				// simply a0 >= a1 (which is the reverse of the
   748  				// positive branch, so nothing surprising).
   749  				// But in the signed domain, we can't express the ||
   750  				// condition, so check if a0 is non-negative instead,
   751  				// to be able to learn something.
   752  				r ^= lt | gt | eq // >= (index) or > (slice)
   753  				if ft.isNonNegative(v.Args[0]) {
   754  					ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   755  				}
   756  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   757  				// TODO: v.Block is wrong here
   758  			}
   759  		}
   760  	}
   761  }
   762  
   763  func (ft *factsTable) addOrdering(v, w *Value, d domain, r relation) {
   764  	o := ft.orderingCache
   765  	if o == nil {
   766  		o = &ordering{}
   767  	} else {
   768  		ft.orderingCache = o.next
   769  	}
   770  	o.w = w
   771  	o.d = d
   772  	o.r = r
   773  	o.next = ft.orderings[v.ID]
   774  	ft.orderings[v.ID] = o
   775  	ft.orderingsStack = append(ft.orderingsStack, v.ID)
   776  }
   777  
   778  // update updates the set of relations between v and w in domain d
   779  // restricting it to r.
   780  func (ft *factsTable) update(parent *Block, v, w *Value, d domain, r relation) {
   781  	if parent.Func.pass.debug > 2 {
   782  		parent.Func.Warnl(parent.Pos, "parent=%s, update %s %s %s", parent, v, w, r)
   783  	}
   784  	// No need to do anything else if we already found unsat.
   785  	if ft.unsat {
   786  		return
   787  	}
   788  
   789  	// Self-fact. It's wasteful to register it into the facts
   790  	// table, so just note whether it's satisfiable
   791  	if v == w {
   792  		if r&eq == 0 {
   793  			ft.unsat = true
   794  		}
   795  		return
   796  	}
   797  
   798  	if d == signed || d == unsigned {
   799  		var ok bool
   800  		order := ft.orderS
   801  		if d == unsigned {
   802  			order = ft.orderU
   803  		}
   804  		switch r {
   805  		case lt:
   806  			ok = order.SetOrder(v, w)
   807  		case gt:
   808  			ok = order.SetOrder(w, v)
   809  		case lt | eq:
   810  			ok = order.SetOrderOrEqual(v, w)
   811  		case gt | eq:
   812  			ok = order.SetOrderOrEqual(w, v)
   813  		case eq:
   814  			ok = order.SetEqual(v, w)
   815  		case lt | gt:
   816  			ok = order.SetNonEqual(v, w)
   817  		default:
   818  			panic("unknown relation")
   819  		}
   820  		ft.addOrdering(v, w, d, r)
   821  		ft.addOrdering(w, v, d, reverseBits[r])
   822  
   823  		if !ok {
   824  			if parent.Func.pass.debug > 2 {
   825  				parent.Func.Warnl(parent.Pos, "unsat %s %s %s", v, w, r)
   826  			}
   827  			ft.unsat = true
   828  			return
   829  		}
   830  	}
   831  	if d == boolean || d == pointer {
   832  		for o := ft.orderings[v.ID]; o != nil; o = o.next {
   833  			if o.d == d && o.w == w {
   834  				// We already know a relationship between v and w.
   835  				// Either it is a duplicate, or it is a contradiction,
   836  				// as we only allow eq and lt|gt for these domains,
   837  				if o.r != r {
   838  					ft.unsat = true
   839  				}
   840  				return
   841  			}
   842  		}
   843  		// TODO: this does not do transitive equality.
   844  		// We could use a poset like above, but somewhat degenerate (==,!= only).
   845  		ft.addOrdering(v, w, d, r)
   846  		ft.addOrdering(w, v, d, r) // note: reverseBits unnecessary for eq and lt|gt.
   847  	}
   848  
   849  	// Extract new constant limits based on the comparison.
   850  	vLimit := ft.limits[v.ID]
   851  	wLimit := ft.limits[w.ID]
   852  	// Note: all the +1/-1 below could overflow/underflow. Either will
   853  	// still generate correct results, it will just lead to imprecision.
   854  	// In fact if there is overflow/underflow, the corresponding
   855  	// code is unreachable because the known range is outside the range
   856  	// of the value's type.
   857  	switch d {
   858  	case signed:
   859  		switch r {
   860  		case eq: // v == w
   861  			ft.signedMinMax(v, wLimit.min, wLimit.max)
   862  			ft.signedMinMax(w, vLimit.min, vLimit.max)
   863  		case lt: // v < w
   864  			ft.signedMax(v, wLimit.max-1)
   865  			ft.signedMin(w, vLimit.min+1)
   866  		case lt | eq: // v <= w
   867  			ft.signedMax(v, wLimit.max)
   868  			ft.signedMin(w, vLimit.min)
   869  		case gt: // v > w
   870  			ft.signedMin(v, wLimit.min+1)
   871  			ft.signedMax(w, vLimit.max-1)
   872  		case gt | eq: // v >= w
   873  			ft.signedMin(v, wLimit.min)
   874  			ft.signedMax(w, vLimit.max)
   875  		case lt | gt: // v != w
   876  			if vLimit.min == vLimit.max { // v is a constant
   877  				c := vLimit.min
   878  				if wLimit.min == c {
   879  					ft.signedMin(w, c+1)
   880  				}
   881  				if wLimit.max == c {
   882  					ft.signedMax(w, c-1)
   883  				}
   884  			}
   885  			if wLimit.min == wLimit.max { // w is a constant
   886  				c := wLimit.min
   887  				if vLimit.min == c {
   888  					ft.signedMin(v, c+1)
   889  				}
   890  				if vLimit.max == c {
   891  					ft.signedMax(v, c-1)
   892  				}
   893  			}
   894  		}
   895  	case unsigned:
   896  		switch r {
   897  		case eq: // v == w
   898  			ft.unsignedMinMax(v, wLimit.umin, wLimit.umax)
   899  			ft.unsignedMinMax(w, vLimit.umin, vLimit.umax)
   900  		case lt: // v < w
   901  			ft.unsignedMax(v, wLimit.umax-1)
   902  			ft.unsignedMin(w, vLimit.umin+1)
   903  		case lt | eq: // v <= w
   904  			ft.unsignedMax(v, wLimit.umax)
   905  			ft.unsignedMin(w, vLimit.umin)
   906  		case gt: // v > w
   907  			ft.unsignedMin(v, wLimit.umin+1)
   908  			ft.unsignedMax(w, vLimit.umax-1)
   909  		case gt | eq: // v >= w
   910  			ft.unsignedMin(v, wLimit.umin)
   911  			ft.unsignedMax(w, vLimit.umax)
   912  		case lt | gt: // v != w
   913  			if vLimit.umin == vLimit.umax { // v is a constant
   914  				c := vLimit.umin
   915  				if wLimit.umin == c {
   916  					ft.unsignedMin(w, c+1)
   917  				}
   918  				if wLimit.umax == c {
   919  					ft.unsignedMax(w, c-1)
   920  				}
   921  			}
   922  			if wLimit.umin == wLimit.umax { // w is a constant
   923  				c := wLimit.umin
   924  				if vLimit.umin == c {
   925  					ft.unsignedMin(v, c+1)
   926  				}
   927  				if vLimit.umax == c {
   928  					ft.unsignedMax(v, c-1)
   929  				}
   930  			}
   931  		}
   932  	case boolean:
   933  		switch r {
   934  		case eq: // v == w
   935  			if vLimit.min == 1 { // v is true
   936  				ft.booleanTrue(w)
   937  			}
   938  			if vLimit.max == 0 { // v is false
   939  				ft.booleanFalse(w)
   940  			}
   941  			if wLimit.min == 1 { // w is true
   942  				ft.booleanTrue(v)
   943  			}
   944  			if wLimit.max == 0 { // w is false
   945  				ft.booleanFalse(v)
   946  			}
   947  		case lt | gt: // v != w
   948  			if vLimit.min == 1 { // v is true
   949  				ft.booleanFalse(w)
   950  			}
   951  			if vLimit.max == 0 { // v is false
   952  				ft.booleanTrue(w)
   953  			}
   954  			if wLimit.min == 1 { // w is true
   955  				ft.booleanFalse(v)
   956  			}
   957  			if wLimit.max == 0 { // w is false
   958  				ft.booleanTrue(v)
   959  			}
   960  		}
   961  	case pointer:
   962  		switch r {
   963  		case eq: // v == w
   964  			if vLimit.umax == 0 { // v is nil
   965  				ft.pointerNil(w)
   966  			}
   967  			if vLimit.umin > 0 { // v is non-nil
   968  				ft.pointerNonNil(w)
   969  			}
   970  			if wLimit.umax == 0 { // w is nil
   971  				ft.pointerNil(v)
   972  			}
   973  			if wLimit.umin > 0 { // w is non-nil
   974  				ft.pointerNonNil(v)
   975  			}
   976  		case lt | gt: // v != w
   977  			if vLimit.umax == 0 { // v is nil
   978  				ft.pointerNonNil(w)
   979  			}
   980  			if wLimit.umax == 0 { // w is nil
   981  				ft.pointerNonNil(v)
   982  			}
   983  			// Note: the other direction doesn't work.
   984  			// Being not equal to a non-nil pointer doesn't
   985  			// make you (necessarily) a nil pointer.
   986  		}
   987  	}
   988  
   989  	// Derived facts below here are only about numbers.
   990  	if d != signed && d != unsigned {
   991  		return
   992  	}
   993  
   994  	// Additional facts we know given the relationship between len and cap.
   995  	//
   996  	// TODO: Since prove now derives transitive relations, it
   997  	// should be sufficient to learn that len(w) <= cap(w) at the
   998  	// beginning of prove where we look for all len/cap ops.
   999  	if v.Op == OpSliceLen && r&lt == 0 && ft.caps[v.Args[0].ID] != nil {
  1000  		// len(s) > w implies cap(s) > w
  1001  		// len(s) >= w implies cap(s) >= w
  1002  		// len(s) == w implies cap(s) >= w
  1003  		ft.update(parent, ft.caps[v.Args[0].ID], w, d, r|gt)
  1004  	}
  1005  	if w.Op == OpSliceLen && r&gt == 0 && ft.caps[w.Args[0].ID] != nil {
  1006  		// same, length on the RHS.
  1007  		ft.update(parent, v, ft.caps[w.Args[0].ID], d, r|lt)
  1008  	}
  1009  	if v.Op == OpSliceCap && r&gt == 0 && ft.lens[v.Args[0].ID] != nil {
  1010  		// cap(s) < w implies len(s) < w
  1011  		// cap(s) <= w implies len(s) <= w
  1012  		// cap(s) == w implies len(s) <= w
  1013  		ft.update(parent, ft.lens[v.Args[0].ID], w, d, r|lt)
  1014  	}
  1015  	if w.Op == OpSliceCap && r&lt == 0 && ft.lens[w.Args[0].ID] != nil {
  1016  		// same, capacity on the RHS.
  1017  		ft.update(parent, v, ft.lens[w.Args[0].ID], d, r|gt)
  1018  	}
  1019  
  1020  	// Process fence-post implications.
  1021  	//
  1022  	// First, make the condition > or >=.
  1023  	if r == lt || r == lt|eq {
  1024  		v, w = w, v
  1025  		r = reverseBits[r]
  1026  	}
  1027  	switch r {
  1028  	case gt:
  1029  		if x, delta := isConstDelta(v); x != nil && delta == 1 {
  1030  			// x+1 > w  ⇒  x >= w
  1031  			//
  1032  			// This is useful for eliminating the
  1033  			// growslice branch of append.
  1034  			ft.update(parent, x, w, d, gt|eq)
  1035  		} else if x, delta := isConstDelta(w); x != nil && delta == -1 {
  1036  			// v > x-1  ⇒  v >= x
  1037  			ft.update(parent, v, x, d, gt|eq)
  1038  		}
  1039  	case gt | eq:
  1040  		if x, delta := isConstDelta(v); x != nil && delta == -1 {
  1041  			// x-1 >= w && x > min  ⇒  x > w
  1042  			//
  1043  			// Useful for i > 0; s[i-1].
  1044  			lim := ft.limits[x.ID]
  1045  			if (d == signed && lim.min > opMin[v.Op]) || (d == unsigned && lim.umin > 0) {
  1046  				ft.update(parent, x, w, d, gt)
  1047  			}
  1048  		} else if x, delta := isConstDelta(w); x != nil && delta == 1 {
  1049  			// v >= x+1 && x < max  ⇒  v > x
  1050  			lim := ft.limits[x.ID]
  1051  			if (d == signed && lim.max < opMax[w.Op]) || (d == unsigned && lim.umax < opUMax[w.Op]) {
  1052  				ft.update(parent, v, x, d, gt)
  1053  			}
  1054  		}
  1055  	}
  1056  
  1057  	// Process: x+delta > w (with delta constant)
  1058  	// Only signed domain for now (useful for accesses to slices in loops).
  1059  	if r == gt || r == gt|eq {
  1060  		if x, delta := isConstDelta(v); x != nil && d == signed {
  1061  			if parent.Func.pass.debug > 1 {
  1062  				parent.Func.Warnl(parent.Pos, "x+d %s w; x:%v %v delta:%v w:%v d:%v", r, x, parent.String(), delta, w.AuxInt, d)
  1063  			}
  1064  			underflow := true
  1065  			if delta < 0 {
  1066  				l := ft.limits[x.ID]
  1067  				if (x.Type.Size() == 8 && l.min >= math.MinInt64-delta) ||
  1068  					(x.Type.Size() == 4 && l.min >= math.MinInt32-delta) {
  1069  					underflow = false
  1070  				}
  1071  			}
  1072  			if delta < 0 && !underflow {
  1073  				// If delta < 0 and x+delta cannot underflow then x > x+delta (that is, x > v)
  1074  				ft.update(parent, x, v, signed, gt)
  1075  			}
  1076  			if !w.isGenericIntConst() {
  1077  				// If we know that x+delta > w but w is not constant, we can derive:
  1078  				//    if delta < 0 and x+delta cannot underflow, then x > w
  1079  				// This is useful for loops with bounds "len(slice)-K" (delta = -K)
  1080  				if delta < 0 && !underflow {
  1081  					ft.update(parent, x, w, signed, r)
  1082  				}
  1083  			} else {
  1084  				// With w,delta constants, we want to derive: x+delta > w  ⇒  x > w-delta
  1085  				//
  1086  				// We compute (using integers of the correct size):
  1087  				//    min = w - delta
  1088  				//    max = MaxInt - delta
  1089  				//
  1090  				// And we prove that:
  1091  				//    if min<max: min < x AND x <= max
  1092  				//    if min>max: min < x OR  x <= max
  1093  				//
  1094  				// This is always correct, even in case of overflow.
  1095  				//
  1096  				// If the initial fact is x+delta >= w instead, the derived conditions are:
  1097  				//    if min<max: min <= x AND x <= max
  1098  				//    if min>max: min <= x OR  x <= max
  1099  				//
  1100  				// Notice the conditions for max are still <=, as they handle overflows.
  1101  				var min, max int64
  1102  				switch x.Type.Size() {
  1103  				case 8:
  1104  					min = w.AuxInt - delta
  1105  					max = int64(^uint64(0)>>1) - delta
  1106  				case 4:
  1107  					min = int64(int32(w.AuxInt) - int32(delta))
  1108  					max = int64(int32(^uint32(0)>>1) - int32(delta))
  1109  				case 2:
  1110  					min = int64(int16(w.AuxInt) - int16(delta))
  1111  					max = int64(int16(^uint16(0)>>1) - int16(delta))
  1112  				case 1:
  1113  					min = int64(int8(w.AuxInt) - int8(delta))
  1114  					max = int64(int8(^uint8(0)>>1) - int8(delta))
  1115  				default:
  1116  					panic("unimplemented")
  1117  				}
  1118  
  1119  				if min < max {
  1120  					// Record that x > min and max >= x
  1121  					if r == gt {
  1122  						min++
  1123  					}
  1124  					ft.signedMinMax(x, min, max)
  1125  				} else {
  1126  					// We know that either x>min OR x<=max. factsTable cannot record OR conditions,
  1127  					// so let's see if we can already prove that one of them is false, in which case
  1128  					// the other must be true
  1129  					l := ft.limits[x.ID]
  1130  					if l.max <= min {
  1131  						if r&eq == 0 || l.max < min {
  1132  							// x>min (x>=min) is impossible, so it must be x<=max
  1133  							ft.signedMax(x, max)
  1134  						}
  1135  					} else if l.min > max {
  1136  						// x<=max is impossible, so it must be x>min
  1137  						if r == gt {
  1138  							min++
  1139  						}
  1140  						ft.signedMin(x, min)
  1141  					}
  1142  				}
  1143  			}
  1144  		}
  1145  	}
  1146  
  1147  	// Look through value-preserving extensions.
  1148  	// If the domain is appropriate for the pre-extension Type,
  1149  	// repeat the update with the pre-extension Value.
  1150  	if isCleanExt(v) {
  1151  		switch {
  1152  		case d == signed && v.Args[0].Type.IsSigned():
  1153  			fallthrough
  1154  		case d == unsigned && !v.Args[0].Type.IsSigned():
  1155  			ft.update(parent, v.Args[0], w, d, r)
  1156  		}
  1157  	}
  1158  	if isCleanExt(w) {
  1159  		switch {
  1160  		case d == signed && w.Args[0].Type.IsSigned():
  1161  			fallthrough
  1162  		case d == unsigned && !w.Args[0].Type.IsSigned():
  1163  			ft.update(parent, v, w.Args[0], d, r)
  1164  		}
  1165  	}
  1166  }
  1167  
  1168  var opMin = map[Op]int64{
  1169  	OpAdd64: math.MinInt64, OpSub64: math.MinInt64,
  1170  	OpAdd32: math.MinInt32, OpSub32: math.MinInt32,
  1171  }
  1172  
  1173  var opMax = map[Op]int64{
  1174  	OpAdd64: math.MaxInt64, OpSub64: math.MaxInt64,
  1175  	OpAdd32: math.MaxInt32, OpSub32: math.MaxInt32,
  1176  }
  1177  
  1178  var opUMax = map[Op]uint64{
  1179  	OpAdd64: math.MaxUint64, OpSub64: math.MaxUint64,
  1180  	OpAdd32: math.MaxUint32, OpSub32: math.MaxUint32,
  1181  }
  1182  
  1183  // isNonNegative reports whether v is known to be non-negative.
  1184  func (ft *factsTable) isNonNegative(v *Value) bool {
  1185  	return ft.limits[v.ID].min >= 0
  1186  }
  1187  
  1188  // checkpoint saves the current state of known relations.
  1189  // Called when descending on a branch.
  1190  func (ft *factsTable) checkpoint() {
  1191  	if ft.unsat {
  1192  		ft.unsatDepth++
  1193  	}
  1194  	ft.limitStack = append(ft.limitStack, checkpointBound)
  1195  	ft.orderS.Checkpoint()
  1196  	ft.orderU.Checkpoint()
  1197  	ft.orderingsStack = append(ft.orderingsStack, 0)
  1198  }
  1199  
  1200  // restore restores known relation to the state just
  1201  // before the previous checkpoint.
  1202  // Called when backing up on a branch.
  1203  func (ft *factsTable) restore() {
  1204  	if ft.unsatDepth > 0 {
  1205  		ft.unsatDepth--
  1206  	} else {
  1207  		ft.unsat = false
  1208  	}
  1209  	for {
  1210  		old := ft.limitStack[len(ft.limitStack)-1]
  1211  		ft.limitStack = ft.limitStack[:len(ft.limitStack)-1]
  1212  		if old.vid == 0 { // checkpointBound
  1213  			break
  1214  		}
  1215  		ft.limits[old.vid] = old.limit
  1216  	}
  1217  	ft.orderS.Undo()
  1218  	ft.orderU.Undo()
  1219  	for {
  1220  		id := ft.orderingsStack[len(ft.orderingsStack)-1]
  1221  		ft.orderingsStack = ft.orderingsStack[:len(ft.orderingsStack)-1]
  1222  		if id == 0 { // checkpoint marker
  1223  			break
  1224  		}
  1225  		o := ft.orderings[id]
  1226  		ft.orderings[id] = o.next
  1227  		o.next = ft.orderingCache
  1228  		ft.orderingCache = o
  1229  	}
  1230  }
  1231  
  1232  var (
  1233  	reverseBits = [...]relation{0, 4, 2, 6, 1, 5, 3, 7}
  1234  
  1235  	// maps what we learn when the positive branch is taken.
  1236  	// For example:
  1237  	//      OpLess8:   {signed, lt},
  1238  	//	v1 = (OpLess8 v2 v3).
  1239  	// If we learn that v1 is true, then we can deduce that v2<v3
  1240  	// in the signed domain.
  1241  	domainRelationTable = map[Op]struct {
  1242  		d domain
  1243  		r relation
  1244  	}{
  1245  		OpEq8:   {signed | unsigned, eq},
  1246  		OpEq16:  {signed | unsigned, eq},
  1247  		OpEq32:  {signed | unsigned, eq},
  1248  		OpEq64:  {signed | unsigned, eq},
  1249  		OpEqPtr: {pointer, eq},
  1250  		OpEqB:   {boolean, eq},
  1251  
  1252  		OpNeq8:   {signed | unsigned, lt | gt},
  1253  		OpNeq16:  {signed | unsigned, lt | gt},
  1254  		OpNeq32:  {signed | unsigned, lt | gt},
  1255  		OpNeq64:  {signed | unsigned, lt | gt},
  1256  		OpNeqPtr: {pointer, lt | gt},
  1257  		OpNeqB:   {boolean, lt | gt},
  1258  
  1259  		OpLess8:   {signed, lt},
  1260  		OpLess8U:  {unsigned, lt},
  1261  		OpLess16:  {signed, lt},
  1262  		OpLess16U: {unsigned, lt},
  1263  		OpLess32:  {signed, lt},
  1264  		OpLess32U: {unsigned, lt},
  1265  		OpLess64:  {signed, lt},
  1266  		OpLess64U: {unsigned, lt},
  1267  
  1268  		OpLeq8:   {signed, lt | eq},
  1269  		OpLeq8U:  {unsigned, lt | eq},
  1270  		OpLeq16:  {signed, lt | eq},
  1271  		OpLeq16U: {unsigned, lt | eq},
  1272  		OpLeq32:  {signed, lt | eq},
  1273  		OpLeq32U: {unsigned, lt | eq},
  1274  		OpLeq64:  {signed, lt | eq},
  1275  		OpLeq64U: {unsigned, lt | eq},
  1276  	}
  1277  )
  1278  
  1279  // cleanup returns the posets to the free list
  1280  func (ft *factsTable) cleanup(f *Func) {
  1281  	for _, po := range []*poset{ft.orderS, ft.orderU} {
  1282  		// Make sure it's empty as it should be. A non-empty poset
  1283  		// might cause errors and miscompilations if reused.
  1284  		if checkEnabled {
  1285  			if err := po.CheckEmpty(); err != nil {
  1286  				f.Fatalf("poset not empty after function %s: %v", f.Name, err)
  1287  			}
  1288  		}
  1289  		f.retPoset(po)
  1290  	}
  1291  	f.Cache.freeLimitSlice(ft.limits)
  1292  	f.Cache.freeBoolSlice(ft.recurseCheck)
  1293  	if cap(ft.reusedTopoSortScoresTable) > 0 {
  1294  		f.Cache.freeUintSlice(ft.reusedTopoSortScoresTable)
  1295  	}
  1296  }
  1297  
  1298  // addSlicesOfSameLen finds the slices that are in the same block and whose Op
  1299  // is OpPhi and always have the same length, then add the equality relationship
  1300  // between them to ft. If two slices start out with the same length and decrease
  1301  // in length by the same amount on each round of the loop (or in the if block),
  1302  // then we think their lengths are always equal.
  1303  //
  1304  // See https://go.dev/issues/75144
  1305  //
  1306  // In fact, we are just propagating the equality
  1307  //
  1308  //	if len(a) == len(b) { // from here
  1309  //		for len(a) > 4 {
  1310  //			a = a[4:]
  1311  //			b = b[4:]
  1312  //		}
  1313  //		if len(a) == len(b) { // to here
  1314  //			return true
  1315  //		}
  1316  //	}
  1317  //
  1318  // or change the for to if:
  1319  //
  1320  //	if len(a) == len(b) { // from here
  1321  //		if len(a) > 4 {
  1322  //			a = a[4:]
  1323  //			b = b[4:]
  1324  //		}
  1325  //		if len(a) == len(b) { // to here
  1326  //			return true
  1327  //		}
  1328  //	}
  1329  func addSlicesOfSameLen(ft *factsTable, b *Block) {
  1330  	// Let w points to the first value we're interested in, and then we
  1331  	// only process those values ​​that appear to be the same length as w,
  1332  	// looping only once. This should be enough in most cases. And u is
  1333  	// similar to w, see comment for predIndex.
  1334  	var u, w *Value
  1335  	var i, j, k sliceInfo
  1336  	isInterested := func(v *Value) bool {
  1337  		j = getSliceInfo(v)
  1338  		return j.sliceWhere != sliceUnknown
  1339  	}
  1340  	for _, v := range b.Values {
  1341  		if v.Uses == 0 {
  1342  			continue
  1343  		}
  1344  		if v.Op == OpPhi && len(v.Args) == 2 && ft.lens[v.ID] != nil && isInterested(v) {
  1345  			if j.predIndex == 1 && ft.lens[v.Args[0].ID] != nil {
  1346  				// found v = (Phi x (SliceMake _ (Add64 (Const64 [n]) (SliceLen x)) _))) or
  1347  				// v = (Phi x (SliceMake _ (Add64 (Const64 [n]) (SliceLen v)) _)))
  1348  				if w == nil {
  1349  					k = j
  1350  					w = v
  1351  					continue
  1352  				}
  1353  				// propagate the equality
  1354  				if j == k && ft.orderS.Equal(ft.lens[v.Args[0].ID], ft.lens[w.Args[0].ID]) {
  1355  					ft.update(b, ft.lens[v.ID], ft.lens[w.ID], signed, eq)
  1356  				}
  1357  			} else if j.predIndex == 0 && ft.lens[v.Args[1].ID] != nil {
  1358  				// found v = (Phi (SliceMake _ (Add64 (Const64 [n]) (SliceLen x)) _)) x) or
  1359  				// v = (Phi (SliceMake _ (Add64 (Const64 [n]) (SliceLen v)) _)) x)
  1360  				if u == nil {
  1361  					i = j
  1362  					u = v
  1363  					continue
  1364  				}
  1365  				// propagate the equality
  1366  				if j == i && ft.orderS.Equal(ft.lens[v.Args[1].ID], ft.lens[u.Args[1].ID]) {
  1367  					ft.update(b, ft.lens[v.ID], ft.lens[u.ID], signed, eq)
  1368  				}
  1369  			}
  1370  		}
  1371  	}
  1372  }
  1373  
  1374  type sliceWhere int
  1375  
  1376  const (
  1377  	sliceUnknown sliceWhere = iota
  1378  	sliceInFor
  1379  	sliceInIf
  1380  )
  1381  
  1382  // predIndex is used to indicate the branch represented by the predecessor
  1383  // block in which the slicing operation occurs.
  1384  type predIndex int
  1385  
  1386  type sliceInfo struct {
  1387  	lengthDiff int64
  1388  	sliceWhere
  1389  	predIndex
  1390  }
  1391  
  1392  // getSliceInfo returns the negative increment of the slice length in a slice
  1393  // operation by examine the Phi node at the merge block. So, we only interest
  1394  // in the slice operation if it is inside a for block or an if block.
  1395  // Otherwise it returns sliceInfo{0, sliceUnknown, 0}.
  1396  //
  1397  // For the following for block:
  1398  //
  1399  //	for len(a) > 4 {
  1400  //	    a = a[4:]
  1401  //	}
  1402  //
  1403  // vp = (Phi v3 v9)
  1404  // v5 = (SliceLen vp)
  1405  // v7 = (Add64 (Const64 [-4]) v5)
  1406  // v9 = (SliceMake _ v7 _)
  1407  //
  1408  // returns sliceInfo{-4, sliceInFor, 1}
  1409  //
  1410  // For a subsequent merge block after an if block:
  1411  //
  1412  //	if len(a) > 4 {
  1413  //	    a = a[4:]
  1414  //	}
  1415  //	a // here
  1416  //
  1417  // vp = (Phi v3 v9)
  1418  // v5 = (SliceLen v3)
  1419  // v7 = (Add64 (Const64 [-4]) v5)
  1420  // v9 = (SliceMake _ v7 _)
  1421  //
  1422  // returns sliceInfo{-4, sliceInIf, 1}
  1423  //
  1424  // Returns sliceInfo{0, sliceUnknown, 0} if it is not the slice
  1425  // operation we are interested in.
  1426  func getSliceInfo(vp *Value) (inf sliceInfo) {
  1427  	if vp.Op != OpPhi || len(vp.Args) != 2 {
  1428  		return
  1429  	}
  1430  	var i predIndex
  1431  	var l *Value // length for OpSliceMake
  1432  	if vp.Args[0].Op != OpSliceMake && vp.Args[1].Op == OpSliceMake {
  1433  		l = vp.Args[1].Args[1]
  1434  		i = 1
  1435  	} else if vp.Args[0].Op == OpSliceMake && vp.Args[1].Op != OpSliceMake {
  1436  		l = vp.Args[0].Args[1]
  1437  		i = 0
  1438  	} else {
  1439  		return
  1440  	}
  1441  	var op Op
  1442  	switch l.Op {
  1443  	case OpAdd64:
  1444  		op = OpConst64
  1445  	case OpAdd32:
  1446  		op = OpConst32
  1447  	default:
  1448  		return
  1449  	}
  1450  	if l.Args[0].Op == op && l.Args[1].Op == OpSliceLen && l.Args[1].Args[0] == vp {
  1451  		return sliceInfo{l.Args[0].AuxInt, sliceInFor, i}
  1452  	}
  1453  	if l.Args[1].Op == op && l.Args[0].Op == OpSliceLen && l.Args[0].Args[0] == vp {
  1454  		return sliceInfo{l.Args[1].AuxInt, sliceInFor, i}
  1455  	}
  1456  	if l.Args[0].Op == op && l.Args[1].Op == OpSliceLen && l.Args[1].Args[0] == vp.Args[1-i] {
  1457  		return sliceInfo{l.Args[0].AuxInt, sliceInIf, i}
  1458  	}
  1459  	if l.Args[1].Op == op && l.Args[0].Op == OpSliceLen && l.Args[0].Args[0] == vp.Args[1-i] {
  1460  		return sliceInfo{l.Args[1].AuxInt, sliceInIf, i}
  1461  	}
  1462  	return
  1463  }
  1464  
  1465  // prove removes redundant BlockIf branches that can be inferred
  1466  // from previous dominating comparisons.
  1467  //
  1468  // By far, the most common redundant pair are generated by bounds checking.
  1469  // For example for the code:
  1470  //
  1471  //	a[i] = 4
  1472  //	foo(a[i])
  1473  //
  1474  // The compiler will generate the following code:
  1475  //
  1476  //	if i >= len(a) {
  1477  //	    panic("not in bounds")
  1478  //	}
  1479  //	a[i] = 4
  1480  //	if i >= len(a) {
  1481  //	    panic("not in bounds")
  1482  //	}
  1483  //	foo(a[i])
  1484  //
  1485  // The second comparison i >= len(a) is clearly redundant because if the
  1486  // else branch of the first comparison is executed, we already know that i < len(a).
  1487  // The code for the second panic can be removed.
  1488  //
  1489  // prove works by finding contradictions and trimming branches whose
  1490  // conditions are unsatisfiable given the branches leading up to them.
  1491  // It tracks a "fact table" of branch conditions. For each branching
  1492  // block, it asserts the branch conditions that uniquely dominate that
  1493  // block, and then separately asserts the block's branch condition and
  1494  // its negation. If either leads to a contradiction, it can trim that
  1495  // successor.
  1496  func prove(f *Func) {
  1497  	// Find induction variables. Currently, findIndVars
  1498  	// is limited to one induction variable per block.
  1499  	var indVars map[*Block]indVar
  1500  	for _, v := range findIndVar(f) {
  1501  		ind := v.ind
  1502  		if len(ind.Args) != 2 {
  1503  			// the rewrite code assumes there is only ever two parents to loops
  1504  			panic("unexpected induction with too many parents")
  1505  		}
  1506  
  1507  		nxt := v.nxt
  1508  		if !(ind.Uses == 2 && // 2 used by comparison and next
  1509  			nxt.Uses == 1) { // 1 used by induction
  1510  			// ind or nxt is used inside the loop, add it for the facts table
  1511  			if indVars == nil {
  1512  				indVars = make(map[*Block]indVar)
  1513  			}
  1514  			indVars[v.entry] = v
  1515  			continue
  1516  		} else {
  1517  			// Since this induction variable is not used for anything but counting the iterations,
  1518  			// no point in putting it into the facts table.
  1519  		}
  1520  
  1521  		// try to rewrite to a downward counting loop checking against start if the
  1522  		// loop body does not depend on ind or nxt and end is known before the loop.
  1523  		// This reduces pressure on the register allocator because this does not need
  1524  		// to use end on each iteration anymore. We compare against the start constant instead.
  1525  		// That means this code:
  1526  		//
  1527  		//	loop:
  1528  		//		ind = (Phi (Const [x]) nxt),
  1529  		//		if ind < end
  1530  		//		then goto enter_loop
  1531  		//		else goto exit_loop
  1532  		//
  1533  		//	enter_loop:
  1534  		//		do something without using ind nor nxt
  1535  		//		nxt = inc + ind
  1536  		//		goto loop
  1537  		//
  1538  		//	exit_loop:
  1539  		//
  1540  		// is rewritten to:
  1541  		//
  1542  		//	loop:
  1543  		//		ind = (Phi end nxt)
  1544  		//		if (Const [x]) < ind
  1545  		//		then goto enter_loop
  1546  		//		else goto exit_loop
  1547  		//
  1548  		//	enter_loop:
  1549  		//		do something without using ind nor nxt
  1550  		//		nxt = ind - inc
  1551  		//		goto loop
  1552  		//
  1553  		//	exit_loop:
  1554  		//
  1555  		// this is better because it only requires to keep ind then nxt alive while looping,
  1556  		// while the original form keeps ind then nxt and end alive
  1557  		start, end := v.min, v.max
  1558  		if v.flags&indVarCountDown != 0 {
  1559  			start, end = end, start
  1560  		}
  1561  
  1562  		if !start.isGenericIntConst() {
  1563  			// if start is not a constant we would be winning nothing from inverting the loop
  1564  			continue
  1565  		}
  1566  		if end.isGenericIntConst() {
  1567  			// TODO: if both start and end are constants we should rewrite such that the comparison
  1568  			// is against zero and nxt is ++ or -- operation
  1569  			// That means:
  1570  			//	for i := 2; i < 11; i += 2 {
  1571  			// should be rewritten to:
  1572  			//	for i := 5; 0 < i; i-- {
  1573  			continue
  1574  		}
  1575  
  1576  		if end.Block == ind.Block {
  1577  			// we can't rewrite loops where the condition depends on the loop body
  1578  			// this simple check is forced to work because if this is true a Phi in ind.Block must exist
  1579  			continue
  1580  		}
  1581  
  1582  		check := ind.Block.Controls[0]
  1583  		// invert the check
  1584  		check.Args[0], check.Args[1] = check.Args[1], check.Args[0]
  1585  
  1586  		// swap start and end in the loop
  1587  		for i, v := range check.Args {
  1588  			if v != end {
  1589  				continue
  1590  			}
  1591  
  1592  			check.SetArg(i, start)
  1593  			goto replacedEnd
  1594  		}
  1595  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1596  	replacedEnd:
  1597  
  1598  		for i, v := range ind.Args {
  1599  			if v != start {
  1600  				continue
  1601  			}
  1602  
  1603  			ind.SetArg(i, end)
  1604  			goto replacedStart
  1605  		}
  1606  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1607  	replacedStart:
  1608  
  1609  		if nxt.Args[0] != ind {
  1610  			// unlike additions subtractions are not commutative so be sure we get it right
  1611  			nxt.Args[0], nxt.Args[1] = nxt.Args[1], nxt.Args[0]
  1612  		}
  1613  
  1614  		switch nxt.Op {
  1615  		case OpAdd8:
  1616  			nxt.Op = OpSub8
  1617  		case OpAdd16:
  1618  			nxt.Op = OpSub16
  1619  		case OpAdd32:
  1620  			nxt.Op = OpSub32
  1621  		case OpAdd64:
  1622  			nxt.Op = OpSub64
  1623  		case OpSub8:
  1624  			nxt.Op = OpAdd8
  1625  		case OpSub16:
  1626  			nxt.Op = OpAdd16
  1627  		case OpSub32:
  1628  			nxt.Op = OpAdd32
  1629  		case OpSub64:
  1630  			nxt.Op = OpAdd64
  1631  		default:
  1632  			panic("unreachable")
  1633  		}
  1634  
  1635  		if f.pass.debug > 0 {
  1636  			f.Warnl(ind.Pos, "Inverted loop iteration")
  1637  		}
  1638  	}
  1639  
  1640  	ft := newFactsTable(f)
  1641  	ft.checkpoint()
  1642  
  1643  	// Find length and capacity ops.
  1644  	for _, b := range f.Blocks {
  1645  		for _, v := range b.Values {
  1646  			if v.Uses == 0 {
  1647  				// We don't care about dead values.
  1648  				// (There can be some that are CSEd but not removed yet.)
  1649  				continue
  1650  			}
  1651  			switch v.Op {
  1652  			case OpSliceLen:
  1653  				if ft.lens == nil {
  1654  					ft.lens = map[ID]*Value{}
  1655  				}
  1656  				// Set all len Values for the same slice as equal in the poset.
  1657  				// The poset handles transitive relations, so Values related to
  1658  				// any OpSliceLen for this slice will be correctly related to others.
  1659  				if l, ok := ft.lens[v.Args[0].ID]; ok {
  1660  					ft.update(b, v, l, signed, eq)
  1661  				} else {
  1662  					ft.lens[v.Args[0].ID] = v
  1663  				}
  1664  			case OpSliceCap:
  1665  				if ft.caps == nil {
  1666  					ft.caps = map[ID]*Value{}
  1667  				}
  1668  				// Same as case OpSliceLen above, but for slice cap.
  1669  				if c, ok := ft.caps[v.Args[0].ID]; ok {
  1670  					ft.update(b, v, c, signed, eq)
  1671  				} else {
  1672  					ft.caps[v.Args[0].ID] = v
  1673  				}
  1674  			}
  1675  		}
  1676  	}
  1677  
  1678  	// current node state
  1679  	type walkState int
  1680  	const (
  1681  		descend walkState = iota
  1682  		simplify
  1683  	)
  1684  	// work maintains the DFS stack.
  1685  	type bp struct {
  1686  		block *Block    // current handled block
  1687  		state walkState // what's to do
  1688  	}
  1689  	work := make([]bp, 0, 256)
  1690  	work = append(work, bp{
  1691  		block: f.Entry,
  1692  		state: descend,
  1693  	})
  1694  
  1695  	idom := f.Idom()
  1696  	sdom := f.Sdom()
  1697  
  1698  	// DFS on the dominator tree.
  1699  	//
  1700  	// For efficiency, we consider only the dominator tree rather
  1701  	// than the entire flow graph. On the way down, we consider
  1702  	// incoming branches and accumulate conditions that uniquely
  1703  	// dominate the current block. If we discover a contradiction,
  1704  	// we can eliminate the entire block and all of its children.
  1705  	// On the way back up, we consider outgoing branches that
  1706  	// haven't already been considered. This way we consider each
  1707  	// branch condition only once.
  1708  	for len(work) > 0 {
  1709  		node := work[len(work)-1]
  1710  		work = work[:len(work)-1]
  1711  		parent := idom[node.block.ID]
  1712  		branch := getBranch(sdom, parent, node.block)
  1713  
  1714  		switch node.state {
  1715  		case descend:
  1716  			ft.checkpoint()
  1717  
  1718  			// Entering the block, add facts about the induction variable
  1719  			// that is bound to this block.
  1720  			if iv, ok := indVars[node.block]; ok {
  1721  				addIndVarRestrictions(ft, parent, iv)
  1722  			}
  1723  
  1724  			// Add results of reaching this block via a branch from
  1725  			// its immediate dominator (if any).
  1726  			if branch != unknown {
  1727  				addBranchRestrictions(ft, parent, branch)
  1728  			}
  1729  
  1730  			// Add slices of the same length start from current block.
  1731  			addSlicesOfSameLen(ft, node.block)
  1732  
  1733  			if ft.unsat {
  1734  				// node.block is unreachable.
  1735  				// Remove it and don't visit
  1736  				// its children.
  1737  				removeBranch(parent, branch)
  1738  				ft.restore()
  1739  				break
  1740  			}
  1741  			// Otherwise, we can now commit to
  1742  			// taking this branch. We'll restore
  1743  			// ft when we unwind.
  1744  
  1745  			// Add facts about the values in the current block.
  1746  			addLocalFacts(ft, node.block)
  1747  
  1748  			work = append(work, bp{
  1749  				block: node.block,
  1750  				state: simplify,
  1751  			})
  1752  			for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) {
  1753  				work = append(work, bp{
  1754  					block: s,
  1755  					state: descend,
  1756  				})
  1757  			}
  1758  
  1759  		case simplify:
  1760  			simplifyBlock(sdom, ft, node.block)
  1761  			ft.restore()
  1762  		}
  1763  	}
  1764  
  1765  	ft.restore()
  1766  
  1767  	ft.cleanup(f)
  1768  }
  1769  
  1770  // initLimit sets initial constant limit for v.  This limit is based
  1771  // only on the operation itself, not any of its input arguments. This
  1772  // method is only used in two places, once when the prove pass startup
  1773  // and the other when a new ssa value is created, both for init. (unlike
  1774  // flowLimit, below, which computes additional constraints based on
  1775  // ranges of opcode arguments).
  1776  func initLimit(v *Value) limit {
  1777  	if v.Type.IsBoolean() {
  1778  		switch v.Op {
  1779  		case OpConstBool:
  1780  			b := v.AuxInt
  1781  			return limit{min: b, max: b, umin: uint64(b), umax: uint64(b)}
  1782  		default:
  1783  			return limit{min: 0, max: 1, umin: 0, umax: 1}
  1784  		}
  1785  	}
  1786  	if v.Type.IsPtrShaped() { // These are the types that EqPtr/NeqPtr operate on, except uintptr.
  1787  		switch v.Op {
  1788  		case OpConstNil:
  1789  			return limit{min: 0, max: 0, umin: 0, umax: 0}
  1790  		case OpAddr, OpLocalAddr: // TODO: others?
  1791  			l := noLimit
  1792  			l.umin = 1
  1793  			return l
  1794  		default:
  1795  			return noLimit
  1796  		}
  1797  	}
  1798  	if !v.Type.IsInteger() {
  1799  		return noLimit
  1800  	}
  1801  
  1802  	// Default limits based on type.
  1803  	lim := noLimitForBitsize(uint(v.Type.Size()) * 8)
  1804  
  1805  	// Tighter limits on some opcodes.
  1806  	switch v.Op {
  1807  	// constants
  1808  	case OpConst64:
  1809  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(v.AuxInt), umax: uint64(v.AuxInt)}
  1810  	case OpConst32:
  1811  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint32(v.AuxInt)), umax: uint64(uint32(v.AuxInt))}
  1812  	case OpConst16:
  1813  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint16(v.AuxInt)), umax: uint64(uint16(v.AuxInt))}
  1814  	case OpConst8:
  1815  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint8(v.AuxInt)), umax: uint64(uint8(v.AuxInt))}
  1816  
  1817  	// extensions
  1818  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16:
  1819  		lim = lim.signedMinMax(0, 1<<8-1)
  1820  		lim = lim.unsignedMax(1<<8 - 1)
  1821  	case OpZeroExt16to64, OpZeroExt16to32:
  1822  		lim = lim.signedMinMax(0, 1<<16-1)
  1823  		lim = lim.unsignedMax(1<<16 - 1)
  1824  	case OpZeroExt32to64:
  1825  		lim = lim.signedMinMax(0, 1<<32-1)
  1826  		lim = lim.unsignedMax(1<<32 - 1)
  1827  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16:
  1828  		lim = lim.signedMinMax(math.MinInt8, math.MaxInt8)
  1829  	case OpSignExt16to64, OpSignExt16to32:
  1830  		lim = lim.signedMinMax(math.MinInt16, math.MaxInt16)
  1831  	case OpSignExt32to64:
  1832  		lim = lim.signedMinMax(math.MinInt32, math.MaxInt32)
  1833  
  1834  	// math/bits intrinsics
  1835  	case OpCtz64, OpBitLen64, OpPopCount64,
  1836  		OpCtz32, OpBitLen32, OpPopCount32,
  1837  		OpCtz16, OpBitLen16, OpPopCount16,
  1838  		OpCtz8, OpBitLen8, OpPopCount8:
  1839  		lim = lim.unsignedMax(uint64(v.Args[0].Type.Size() * 8))
  1840  
  1841  	// bool to uint8 conversion
  1842  	case OpCvtBoolToUint8:
  1843  		lim = lim.unsignedMax(1)
  1844  
  1845  	// length operations
  1846  	case OpSliceLen, OpSliceCap:
  1847  		f := v.Block.Func
  1848  		elemSize := uint64(v.Args[0].Type.Elem().Size())
  1849  		if elemSize > 0 {
  1850  			heapSize := uint64(1)<<(uint64(f.Config.PtrSize)*8) - 1
  1851  			maximumElementsFittingInHeap := heapSize / elemSize
  1852  			lim = lim.unsignedMax(maximumElementsFittingInHeap)
  1853  		}
  1854  		fallthrough
  1855  	case OpStringLen:
  1856  		lim = lim.signedMin(0)
  1857  	}
  1858  
  1859  	// signed <-> unsigned propagation
  1860  	if lim.min >= 0 {
  1861  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
  1862  	}
  1863  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
  1864  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
  1865  	}
  1866  
  1867  	return lim
  1868  }
  1869  
  1870  // flowLimit updates the known limits of v in ft.
  1871  // flowLimit can use the ranges of input arguments.
  1872  //
  1873  // Note: this calculation only happens at the point the value is defined. We do not reevaluate
  1874  // it later. So for example:
  1875  //
  1876  //	v := x + y
  1877  //	if 0 <= x && x < 5 && 0 <= y && y < 5 { ... use v ... }
  1878  //
  1879  // we don't discover that the range of v is bounded in the conditioned
  1880  // block. We could recompute the range of v once we enter the block so
  1881  // we know that it is 0 <= v <= 8, but we don't have a mechanism to do
  1882  // that right now.
  1883  func (ft *factsTable) flowLimit(v *Value) {
  1884  	if !v.Type.IsInteger() {
  1885  		// TODO: boolean?
  1886  		return
  1887  	}
  1888  
  1889  	// Additional limits based on opcode and argument.
  1890  	// No need to repeat things here already done in initLimit.
  1891  	switch v.Op {
  1892  
  1893  	// extensions
  1894  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16, OpZeroExt16to64, OpZeroExt16to32, OpZeroExt32to64:
  1895  		a := ft.limits[v.Args[0].ID]
  1896  		ft.unsignedMinMax(v, a.umin, a.umax)
  1897  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16, OpSignExt16to64, OpSignExt16to32, OpSignExt32to64:
  1898  		a := ft.limits[v.Args[0].ID]
  1899  		ft.signedMinMax(v, a.min, a.max)
  1900  	case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
  1901  		a := ft.limits[v.Args[0].ID]
  1902  		if a.umax <= 1<<(uint64(v.Type.Size())*8)-1 {
  1903  			ft.unsignedMinMax(v, a.umin, a.umax)
  1904  		}
  1905  
  1906  	// math/bits
  1907  	case OpCtz64:
  1908  		a := ft.limits[v.Args[0].ID]
  1909  		if a.nonzero() {
  1910  			ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
  1911  		}
  1912  	case OpCtz32:
  1913  		a := ft.limits[v.Args[0].ID]
  1914  		if a.nonzero() {
  1915  			ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
  1916  		}
  1917  	case OpCtz16:
  1918  		a := ft.limits[v.Args[0].ID]
  1919  		if a.nonzero() {
  1920  			ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
  1921  		}
  1922  	case OpCtz8:
  1923  		a := ft.limits[v.Args[0].ID]
  1924  		if a.nonzero() {
  1925  			ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
  1926  		}
  1927  
  1928  	case OpPopCount64, OpPopCount32, OpPopCount16, OpPopCount8:
  1929  		a := ft.limits[v.Args[0].ID]
  1930  		changingBitsCount := uint64(bits.Len64(a.umax ^ a.umin))
  1931  		sharedLeadingMask := ^(uint64(1)<<changingBitsCount - 1)
  1932  		fixedBits := a.umax & sharedLeadingMask
  1933  		min := uint64(bits.OnesCount64(fixedBits))
  1934  		ft.unsignedMinMax(v, min, min+changingBitsCount)
  1935  
  1936  	case OpBitLen64:
  1937  		a := ft.limits[v.Args[0].ID]
  1938  		ft.unsignedMinMax(v,
  1939  			uint64(bits.Len64(a.umin)),
  1940  			uint64(bits.Len64(a.umax)))
  1941  	case OpBitLen32:
  1942  		a := ft.limits[v.Args[0].ID]
  1943  		ft.unsignedMinMax(v,
  1944  			uint64(bits.Len32(uint32(a.umin))),
  1945  			uint64(bits.Len32(uint32(a.umax))))
  1946  	case OpBitLen16:
  1947  		a := ft.limits[v.Args[0].ID]
  1948  		ft.unsignedMinMax(v,
  1949  			uint64(bits.Len16(uint16(a.umin))),
  1950  			uint64(bits.Len16(uint16(a.umax))))
  1951  	case OpBitLen8:
  1952  		a := ft.limits[v.Args[0].ID]
  1953  		ft.unsignedMinMax(v,
  1954  			uint64(bits.Len8(uint8(a.umin))),
  1955  			uint64(bits.Len8(uint8(a.umax))))
  1956  
  1957  	// Masks.
  1958  
  1959  	// TODO: if y.umax and y.umin share a leading bit pattern, y also has that leading bit pattern.
  1960  	// we could compare the patterns of always set bits in a and b and learn more about minimum and maximum.
  1961  	// But I doubt this help any real world code.
  1962  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  1963  		// AND can only make the value smaller.
  1964  		a := ft.limits[v.Args[0].ID]
  1965  		b := ft.limits[v.Args[1].ID]
  1966  		ft.unsignedMax(v, min(a.umax, b.umax))
  1967  	case OpOr64, OpOr32, OpOr16, OpOr8:
  1968  		// OR can only make the value bigger and can't flip bits proved to be zero in both inputs.
  1969  		a := ft.limits[v.Args[0].ID]
  1970  		b := ft.limits[v.Args[1].ID]
  1971  		ft.unsignedMinMax(v,
  1972  			max(a.umin, b.umin),
  1973  			1<<bits.Len64(a.umax|b.umax)-1)
  1974  	case OpXor64, OpXor32, OpXor16, OpXor8:
  1975  		// XOR can't flip bits that are proved to be zero in both inputs.
  1976  		a := ft.limits[v.Args[0].ID]
  1977  		b := ft.limits[v.Args[1].ID]
  1978  		ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
  1979  	case OpCom64, OpCom32, OpCom16, OpCom8:
  1980  		a := ft.limits[v.Args[0].ID]
  1981  		ft.newLimit(v, a.com(uint(v.Type.Size())*8))
  1982  
  1983  	// Arithmetic.
  1984  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  1985  		a := ft.limits[v.Args[0].ID]
  1986  		b := ft.limits[v.Args[1].ID]
  1987  		ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
  1988  	case OpSub64, OpSub32, OpSub16, OpSub8:
  1989  		a := ft.limits[v.Args[0].ID]
  1990  		b := ft.limits[v.Args[1].ID]
  1991  		ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
  1992  		ft.detectMod(v)
  1993  		ft.detectSliceLenRelation(v)
  1994  		ft.detectSubRelations(v)
  1995  	case OpNeg64, OpNeg32, OpNeg16, OpNeg8:
  1996  		a := ft.limits[v.Args[0].ID]
  1997  		bitsize := uint(v.Type.Size()) * 8
  1998  		ft.newLimit(v, a.neg(bitsize))
  1999  	case OpMul64, OpMul32, OpMul16, OpMul8:
  2000  		a := ft.limits[v.Args[0].ID]
  2001  		b := ft.limits[v.Args[1].ID]
  2002  		ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
  2003  	case OpLsh64x64, OpLsh64x32, OpLsh64x16, OpLsh64x8,
  2004  		OpLsh32x64, OpLsh32x32, OpLsh32x16, OpLsh32x8,
  2005  		OpLsh16x64, OpLsh16x32, OpLsh16x16, OpLsh16x8,
  2006  		OpLsh8x64, OpLsh8x32, OpLsh8x16, OpLsh8x8:
  2007  		a := ft.limits[v.Args[0].ID]
  2008  		b := ft.limits[v.Args[1].ID]
  2009  		bitsize := uint(v.Type.Size()) * 8
  2010  		ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
  2011  	case OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8,
  2012  		OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
  2013  		OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
  2014  		OpRsh8x64, OpRsh8x32, OpRsh8x16, OpRsh8x8:
  2015  		a := ft.limits[v.Args[0].ID]
  2016  		b := ft.limits[v.Args[1].ID]
  2017  		if b.min >= 0 {
  2018  			// Shift of negative makes a value closer to 0 (greater),
  2019  			// so if a.min is negative, v.min is a.min>>b.min instead of a.min>>b.max,
  2020  			// and similarly if a.max is negative, v.max is a.max>>b.max.
  2021  			// Easier to compute min and max of both than to write sign logic.
  2022  			vmin := min(a.min>>b.min, a.min>>b.max)
  2023  			vmax := max(a.max>>b.min, a.max>>b.max)
  2024  			ft.signedMinMax(v, vmin, vmax)
  2025  		}
  2026  	case OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8,
  2027  		OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2028  		OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2029  		OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8:
  2030  		a := ft.limits[v.Args[0].ID]
  2031  		b := ft.limits[v.Args[1].ID]
  2032  		if b.min >= 0 {
  2033  			ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min)
  2034  		}
  2035  	case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  2036  		a := ft.limits[v.Args[0].ID]
  2037  		b := ft.limits[v.Args[1].ID]
  2038  		if !(a.nonnegative() && b.nonnegative()) {
  2039  			// TODO: we could handle signed limits but I didn't bother.
  2040  			break
  2041  		}
  2042  		fallthrough
  2043  	case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u:
  2044  		a := ft.limits[v.Args[0].ID]
  2045  		b := ft.limits[v.Args[1].ID]
  2046  		lim := noLimit
  2047  		if b.umax > 0 {
  2048  			lim = lim.unsignedMin(a.umin / b.umax)
  2049  		}
  2050  		if b.umin > 0 {
  2051  			lim = lim.unsignedMax(a.umax / b.umin)
  2052  		}
  2053  		ft.newLimit(v, lim)
  2054  	case OpMod64, OpMod32, OpMod16, OpMod8:
  2055  		ft.modLimit(true, v, v.Args[0], v.Args[1])
  2056  	case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  2057  		ft.modLimit(false, v, v.Args[0], v.Args[1])
  2058  
  2059  	case OpPhi:
  2060  		// Compute the union of all the input phis.
  2061  		// Often this will convey no information, because the block
  2062  		// is not dominated by its predecessors and hence the
  2063  		// phi arguments might not have been processed yet. But if
  2064  		// the values are declared earlier, it may help. e.g., for
  2065  		//    v = phi(c3, c5)
  2066  		// where c3 = OpConst [3] and c5 = OpConst [5] are
  2067  		// defined in the entry block, we can derive [3,5]
  2068  		// as the limit for v.
  2069  		l := ft.limits[v.Args[0].ID]
  2070  		for _, a := range v.Args[1:] {
  2071  			l2 := ft.limits[a.ID]
  2072  			l.min = min(l.min, l2.min)
  2073  			l.max = max(l.max, l2.max)
  2074  			l.umin = min(l.umin, l2.umin)
  2075  			l.umax = max(l.umax, l2.umax)
  2076  		}
  2077  		ft.newLimit(v, l)
  2078  	}
  2079  }
  2080  
  2081  // detectSliceLenRelation matches the pattern where
  2082  //  1. v := slicelen - index, OR v := slicecap - index
  2083  //     AND
  2084  //  2. index <= slicelen - K
  2085  //     THEN
  2086  //
  2087  // slicecap - index >= slicelen - index >= K
  2088  //
  2089  // Note that "index" is not used for indexing in this pattern, but
  2090  // in the motivating example (chunked slice iteration) it is.
  2091  func (ft *factsTable) detectSliceLenRelation(v *Value) {
  2092  	if v.Op != OpSub64 {
  2093  		return
  2094  	}
  2095  
  2096  	if !(v.Args[0].Op == OpSliceLen || v.Args[0].Op == OpStringLen || v.Args[0].Op == OpSliceCap) {
  2097  		return
  2098  	}
  2099  
  2100  	index := v.Args[1]
  2101  	if !ft.isNonNegative(index) {
  2102  		return
  2103  	}
  2104  	slice := v.Args[0].Args[0]
  2105  
  2106  	for o := ft.orderings[index.ID]; o != nil; o = o.next {
  2107  		if o.d != signed {
  2108  			continue
  2109  		}
  2110  		or := o.r
  2111  		if or != lt && or != lt|eq {
  2112  			continue
  2113  		}
  2114  		ow := o.w
  2115  		if ow.Op != OpAdd64 && ow.Op != OpSub64 {
  2116  			continue
  2117  		}
  2118  		var lenOffset *Value
  2119  		if bound := ow.Args[0]; (bound.Op == OpSliceLen || bound.Op == OpStringLen) && bound.Args[0] == slice {
  2120  			lenOffset = ow.Args[1]
  2121  		} else if bound := ow.Args[1]; (bound.Op == OpSliceLen || bound.Op == OpStringLen) && bound.Args[0] == slice {
  2122  			// Do not infer K - slicelen, see issue #76709.
  2123  			if ow.Op == OpAdd64 {
  2124  				lenOffset = ow.Args[0]
  2125  			}
  2126  		}
  2127  		if lenOffset == nil || lenOffset.Op != OpConst64 {
  2128  			continue
  2129  		}
  2130  		K := lenOffset.AuxInt
  2131  		if ow.Op == OpAdd64 {
  2132  			K = -K
  2133  		}
  2134  		if K < 0 {
  2135  			continue
  2136  		}
  2137  		if or == lt {
  2138  			K++
  2139  		}
  2140  		if K < 0 { // We hate thinking about overflow
  2141  			continue
  2142  		}
  2143  		ft.signedMin(v, K)
  2144  	}
  2145  }
  2146  
  2147  // v must be Sub{64,32,16,8}.
  2148  func (ft *factsTable) detectSubRelations(v *Value) {
  2149  	// v = x-y
  2150  	x := v.Args[0]
  2151  	y := v.Args[1]
  2152  	if x == y {
  2153  		ft.signedMinMax(v, 0, 0)
  2154  		return
  2155  	}
  2156  	xLim := ft.limits[x.ID]
  2157  	yLim := ft.limits[y.ID]
  2158  
  2159  	// Check if we might wrap around. If so, give up.
  2160  	width := uint(v.Type.Size()) * 8
  2161  	if _, ok := safeSub(xLim.min, yLim.max, width); !ok {
  2162  		return // x-y might underflow
  2163  	}
  2164  	if _, ok := safeSub(xLim.max, yLim.min, width); !ok {
  2165  		return // x-y might overflow
  2166  	}
  2167  
  2168  	// Subtracting a positive number only makes
  2169  	// things smaller.
  2170  	if yLim.min >= 0 {
  2171  		ft.update(v.Block, v, x, signed, lt|eq)
  2172  		// TODO: is this worth it?
  2173  		//if yLim.min > 0 {
  2174  		//	ft.update(v.Block, v, x, signed, lt)
  2175  		//}
  2176  	}
  2177  
  2178  	// Subtracting a number from a bigger one
  2179  	// can't go below 0.
  2180  	if ft.orderS.OrderedOrEqual(y, x) {
  2181  		ft.setNonNegative(v)
  2182  		// TODO: is this worth it?
  2183  		//if ft.orderS.Ordered(y, x) {
  2184  		//	ft.signedMin(v, 1)
  2185  		//}
  2186  	}
  2187  }
  2188  
  2189  // x%d has been rewritten to x - (x/d)*d.
  2190  func (ft *factsTable) detectMod(v *Value) {
  2191  	var opDiv, opDivU, opMul, opConst Op
  2192  	switch v.Op {
  2193  	case OpSub64:
  2194  		opDiv = OpDiv64
  2195  		opDivU = OpDiv64u
  2196  		opMul = OpMul64
  2197  		opConst = OpConst64
  2198  	case OpSub32:
  2199  		opDiv = OpDiv32
  2200  		opDivU = OpDiv32u
  2201  		opMul = OpMul32
  2202  		opConst = OpConst32
  2203  	case OpSub16:
  2204  		opDiv = OpDiv16
  2205  		opDivU = OpDiv16u
  2206  		opMul = OpMul16
  2207  		opConst = OpConst16
  2208  	case OpSub8:
  2209  		opDiv = OpDiv8
  2210  		opDivU = OpDiv8u
  2211  		opMul = OpMul8
  2212  		opConst = OpConst8
  2213  	}
  2214  
  2215  	mul := v.Args[1]
  2216  	if mul.Op != opMul {
  2217  		return
  2218  	}
  2219  	div, con := mul.Args[0], mul.Args[1]
  2220  	if div.Op == opConst {
  2221  		div, con = con, div
  2222  	}
  2223  	if con.Op != opConst || (div.Op != opDiv && div.Op != opDivU) || div.Args[0] != v.Args[0] || div.Args[1].Op != opConst || div.Args[1].AuxInt != con.AuxInt {
  2224  		return
  2225  	}
  2226  	ft.modLimit(div.Op == opDiv, v, v.Args[0], con)
  2227  }
  2228  
  2229  // modLimit sets v with facts derived from v = p % q.
  2230  func (ft *factsTable) modLimit(signed bool, v, p, q *Value) {
  2231  	a := ft.limits[p.ID]
  2232  	b := ft.limits[q.ID]
  2233  	if signed {
  2234  		if a.min < 0 && b.min > 0 {
  2235  			ft.signedMinMax(v, -(b.max - 1), b.max-1)
  2236  			return
  2237  		}
  2238  		if !(a.nonnegative() && b.nonnegative()) {
  2239  			// TODO: we could handle signed limits but I didn't bother.
  2240  			return
  2241  		}
  2242  		if a.min >= 0 && b.min > 0 {
  2243  			ft.setNonNegative(v)
  2244  		}
  2245  	}
  2246  	// Underflow in the arithmetic below is ok, it gives to MaxUint64 which does nothing to the limit.
  2247  	ft.unsignedMax(v, min(a.umax, b.umax-1))
  2248  }
  2249  
  2250  // getBranch returns the range restrictions added by p
  2251  // when reaching b. p is the immediate dominator of b.
  2252  func getBranch(sdom SparseTree, p *Block, b *Block) branch {
  2253  	if p == nil {
  2254  		return unknown
  2255  	}
  2256  	switch p.Kind {
  2257  	case BlockIf:
  2258  		// If p and p.Succs[0] are dominators it means that every path
  2259  		// from entry to b passes through p and p.Succs[0]. We care that
  2260  		// no path from entry to b passes through p.Succs[1]. If p.Succs[0]
  2261  		// has one predecessor then (apart from the degenerate case),
  2262  		// there is no path from entry that can reach b through p.Succs[1].
  2263  		// TODO: how about p->yes->b->yes, i.e. a loop in yes.
  2264  		if sdom.IsAncestorEq(p.Succs[0].b, b) && len(p.Succs[0].b.Preds) == 1 {
  2265  			return positive
  2266  		}
  2267  		if sdom.IsAncestorEq(p.Succs[1].b, b) && len(p.Succs[1].b.Preds) == 1 {
  2268  			return negative
  2269  		}
  2270  	case BlockJumpTable:
  2271  		// TODO: this loop can lead to quadratic behavior, as
  2272  		// getBranch can be called len(p.Succs) times.
  2273  		for i, e := range p.Succs {
  2274  			if sdom.IsAncestorEq(e.b, b) && len(e.b.Preds) == 1 {
  2275  				return jumpTable0 + branch(i)
  2276  			}
  2277  		}
  2278  	}
  2279  	return unknown
  2280  }
  2281  
  2282  // addIndVarRestrictions updates the factsTables ft with the facts
  2283  // learned from the induction variable indVar which drives the loop
  2284  // starting in Block b.
  2285  func addIndVarRestrictions(ft *factsTable, b *Block, iv indVar) {
  2286  	d := signed
  2287  	if ft.isNonNegative(iv.min) && ft.isNonNegative(iv.max) {
  2288  		d |= unsigned
  2289  	}
  2290  
  2291  	if iv.flags&indVarMinExc == 0 {
  2292  		addRestrictions(b, ft, d, iv.min, iv.ind, lt|eq)
  2293  	} else {
  2294  		addRestrictions(b, ft, d, iv.min, iv.ind, lt)
  2295  	}
  2296  
  2297  	if iv.flags&indVarMaxInc == 0 {
  2298  		addRestrictions(b, ft, d, iv.ind, iv.max, lt)
  2299  	} else {
  2300  		addRestrictions(b, ft, d, iv.ind, iv.max, lt|eq)
  2301  	}
  2302  }
  2303  
  2304  // addBranchRestrictions updates the factsTables ft with the facts learned when
  2305  // branching from Block b in direction br.
  2306  func addBranchRestrictions(ft *factsTable, b *Block, br branch) {
  2307  	c := b.Controls[0]
  2308  	switch {
  2309  	case br == negative:
  2310  		ft.booleanFalse(c)
  2311  	case br == positive:
  2312  		ft.booleanTrue(c)
  2313  	case br >= jumpTable0:
  2314  		idx := br - jumpTable0
  2315  		val := int64(idx)
  2316  		if v, off := isConstDelta(c); v != nil {
  2317  			// Establish the bound on the underlying value we're switching on,
  2318  			// not on the offset-ed value used as the jump table index.
  2319  			c = v
  2320  			val -= off
  2321  		}
  2322  		ft.newLimit(c, limit{min: val, max: val, umin: uint64(val), umax: uint64(val)})
  2323  	default:
  2324  		panic("unknown branch")
  2325  	}
  2326  }
  2327  
  2328  // addRestrictions updates restrictions from the immediate
  2329  // dominating block (p) using r.
  2330  func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r relation) {
  2331  	if t == 0 {
  2332  		// Trivial case: nothing to do.
  2333  		// Should not happen, but just in case.
  2334  		return
  2335  	}
  2336  	for i := domain(1); i <= t; i <<= 1 {
  2337  		if t&i == 0 {
  2338  			continue
  2339  		}
  2340  		ft.update(parent, v, w, i, r)
  2341  	}
  2342  }
  2343  
  2344  func unsignedAddOverflows(a, b uint64, t *types.Type) bool {
  2345  	switch t.Size() {
  2346  	case 8:
  2347  		return a+b < a
  2348  	case 4:
  2349  		return a+b > math.MaxUint32
  2350  	case 2:
  2351  		return a+b > math.MaxUint16
  2352  	case 1:
  2353  		return a+b > math.MaxUint8
  2354  	default:
  2355  		panic("unreachable")
  2356  	}
  2357  }
  2358  
  2359  func signedAddOverflowsOrUnderflows(a, b int64, t *types.Type) bool {
  2360  	r := a + b
  2361  	switch t.Size() {
  2362  	case 8:
  2363  		return (a >= 0 && b >= 0 && r < 0) || (a < 0 && b < 0 && r >= 0)
  2364  	case 4:
  2365  		return r < math.MinInt32 || math.MaxInt32 < r
  2366  	case 2:
  2367  		return r < math.MinInt16 || math.MaxInt16 < r
  2368  	case 1:
  2369  		return r < math.MinInt8 || math.MaxInt8 < r
  2370  	default:
  2371  		panic("unreachable")
  2372  	}
  2373  }
  2374  
  2375  func unsignedSubUnderflows(a, b uint64) bool {
  2376  	return a < b
  2377  }
  2378  
  2379  // checkForChunkedIndexBounds looks for index expressions of the form
  2380  // A[i+delta] where delta < K and i <= len(A)-K.  That is, this is a chunked
  2381  // iteration where the index is not directly compared to the length.
  2382  // if isReslice, then delta can be equal to K.
  2383  func checkForChunkedIndexBounds(ft *factsTable, b *Block, index, bound *Value, isReslice bool) bool {
  2384  	if bound.Op != OpSliceLen && bound.Op != OpStringLen && bound.Op != OpSliceCap {
  2385  		return false
  2386  	}
  2387  
  2388  	// this is a slice bounds check against len or capacity,
  2389  	// and refers back to a prior check against length, which
  2390  	// will also work for the cap since that is not smaller
  2391  	// than the length.
  2392  
  2393  	slice := bound.Args[0]
  2394  	lim := ft.limits[index.ID]
  2395  	if lim.min < 0 {
  2396  		return false
  2397  	}
  2398  	i, delta := isConstDelta(index)
  2399  	if i == nil {
  2400  		return false
  2401  	}
  2402  	if delta < 0 {
  2403  		return false
  2404  	}
  2405  	// special case for blocked iteration over a slice.
  2406  	// slicelen > i + delta && <==== if clauses above
  2407  	// && index >= 0           <==== if clause above
  2408  	// delta >= 0 &&           <==== if clause above
  2409  	// slicelen-K >/>= x       <==== checked below
  2410  	// && K >=/> delta         <==== checked below
  2411  	// then v > w
  2412  	// example: i <=/< len - 4/3 means i+{0,1,2,3} are legal indices
  2413  	for o := ft.orderings[i.ID]; o != nil; o = o.next {
  2414  		if o.d != signed {
  2415  			continue
  2416  		}
  2417  		if ow := o.w; ow.Op == OpAdd64 {
  2418  			var lenOffset *Value
  2419  			if bound := ow.Args[0]; (bound.Op == OpSliceLen || bound.Op == OpStringLen) && bound.Args[0] == slice {
  2420  				lenOffset = ow.Args[1]
  2421  			} else if bound := ow.Args[1]; (bound.Op == OpSliceLen || bound.Op == OpStringLen) && bound.Args[0] == slice {
  2422  				lenOffset = ow.Args[0]
  2423  			}
  2424  			if lenOffset == nil || lenOffset.Op != OpConst64 {
  2425  				continue
  2426  			}
  2427  			if K := -lenOffset.AuxInt; K >= 0 {
  2428  				or := o.r
  2429  				if isReslice {
  2430  					K++
  2431  				}
  2432  				if or == lt {
  2433  					or = lt | eq
  2434  					K++
  2435  				}
  2436  				if K < 0 { // We hate thinking about overflow
  2437  					continue
  2438  				}
  2439  
  2440  				if delta < K && or == lt|eq {
  2441  					return true
  2442  				}
  2443  			}
  2444  		}
  2445  	}
  2446  	return false
  2447  }
  2448  
  2449  func addLocalFacts(ft *factsTable, b *Block) {
  2450  	ft.topoSortValuesInBlock(b)
  2451  
  2452  	for _, v := range b.Values {
  2453  		// Propagate constant ranges before relative relations to get
  2454  		// the most up-to-date constant bounds for isNonNegative calls.
  2455  		ft.flowLimit(v)
  2456  
  2457  		switch v.Op {
  2458  		case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2459  			x := ft.limits[v.Args[0].ID]
  2460  			y := ft.limits[v.Args[1].ID]
  2461  			if !unsignedAddOverflows(x.umax, y.umax, v.Type) {
  2462  				r := gt
  2463  				if x.maybeZero() {
  2464  					r |= eq
  2465  				}
  2466  				ft.update(b, v, v.Args[1], unsigned, r)
  2467  				r = gt
  2468  				if y.maybeZero() {
  2469  					r |= eq
  2470  				}
  2471  				ft.update(b, v, v.Args[0], unsigned, r)
  2472  			}
  2473  			if x.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2474  				r := gt
  2475  				if x.maybeZero() {
  2476  					r |= eq
  2477  				}
  2478  				ft.update(b, v, v.Args[1], signed, r)
  2479  			}
  2480  			if y.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2481  				r := gt
  2482  				if y.maybeZero() {
  2483  					r |= eq
  2484  				}
  2485  				ft.update(b, v, v.Args[0], signed, r)
  2486  			}
  2487  			if x.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2488  				r := lt
  2489  				if x.maybeZero() {
  2490  					r |= eq
  2491  				}
  2492  				ft.update(b, v, v.Args[1], signed, r)
  2493  			}
  2494  			if y.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2495  				r := lt
  2496  				if y.maybeZero() {
  2497  					r |= eq
  2498  				}
  2499  				ft.update(b, v, v.Args[0], signed, r)
  2500  			}
  2501  		case OpSub64, OpSub32, OpSub16, OpSub8:
  2502  			x := ft.limits[v.Args[0].ID]
  2503  			y := ft.limits[v.Args[1].ID]
  2504  			if !unsignedSubUnderflows(x.umin, y.umax) {
  2505  				r := lt
  2506  				if y.maybeZero() {
  2507  					r |= eq
  2508  				}
  2509  				ft.update(b, v, v.Args[0], unsigned, r)
  2510  			}
  2511  			// FIXME: we could also do signed facts but the overflow checks are much trickier and I don't need it yet.
  2512  		case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2513  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2514  			ft.update(b, v, v.Args[1], unsigned, lt|eq)
  2515  			if ft.isNonNegative(v.Args[0]) {
  2516  				ft.update(b, v, v.Args[0], signed, lt|eq)
  2517  			}
  2518  			if ft.isNonNegative(v.Args[1]) {
  2519  				ft.update(b, v, v.Args[1], signed, lt|eq)
  2520  			}
  2521  		case OpOr64, OpOr32, OpOr16, OpOr8:
  2522  			// TODO: investigate how to always add facts without much slowdown, see issue #57959
  2523  			//ft.update(b, v, v.Args[0], unsigned, gt|eq)
  2524  			//ft.update(b, v, v.Args[1], unsigned, gt|eq)
  2525  		case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  2526  			if !ft.isNonNegative(v.Args[1]) {
  2527  				break
  2528  			}
  2529  			fallthrough
  2530  		case OpRsh8x64, OpRsh8x32, OpRsh8x16, OpRsh8x8,
  2531  			OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
  2532  			OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
  2533  			OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8:
  2534  			if !ft.isNonNegative(v.Args[0]) {
  2535  				break
  2536  			}
  2537  			fallthrough
  2538  		case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u,
  2539  			OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8,
  2540  			OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2541  			OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2542  			OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8:
  2543  			switch add := v.Args[0]; add.Op {
  2544  			// round-up division pattern; given:
  2545  			// v = (x + y) / z
  2546  			// if y < z then v <= x
  2547  			case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2548  				z := v.Args[1]
  2549  				zl := ft.limits[z.ID]
  2550  				var uminDivisor uint64
  2551  				switch v.Op {
  2552  				case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u,
  2553  					OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  2554  					uminDivisor = zl.umin
  2555  				case OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8,
  2556  					OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2557  					OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2558  					OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8,
  2559  					OpRsh8x64, OpRsh8x32, OpRsh8x16, OpRsh8x8,
  2560  					OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
  2561  					OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
  2562  					OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8:
  2563  					uminDivisor = 1 << zl.umin
  2564  				default:
  2565  					panic("unreachable")
  2566  				}
  2567  
  2568  				x := add.Args[0]
  2569  				xl := ft.limits[x.ID]
  2570  				y := add.Args[1]
  2571  				yl := ft.limits[y.ID]
  2572  				if !unsignedAddOverflows(xl.umax, yl.umax, add.Type) {
  2573  					if xl.umax < uminDivisor {
  2574  						ft.update(b, v, y, unsigned, lt|eq)
  2575  					}
  2576  					if yl.umax < uminDivisor {
  2577  						ft.update(b, v, x, unsigned, lt|eq)
  2578  					}
  2579  				}
  2580  			}
  2581  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2582  		case OpMod64, OpMod32, OpMod16, OpMod8:
  2583  			if !ft.isNonNegative(v.Args[0]) || !ft.isNonNegative(v.Args[1]) {
  2584  				break
  2585  			}
  2586  			fallthrough
  2587  		case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  2588  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2589  			// Note: we have to be careful that this doesn't imply
  2590  			// that the modulus is >0, which isn't true until *after*
  2591  			// the mod instruction executes (and thus panics if the
  2592  			// modulus is 0). See issue 67625.
  2593  			ft.update(b, v, v.Args[1], unsigned, lt)
  2594  		case OpStringLen:
  2595  			if v.Args[0].Op == OpStringMake {
  2596  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2597  			}
  2598  		case OpSliceLen:
  2599  			if v.Args[0].Op == OpSliceMake {
  2600  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2601  			}
  2602  		case OpSliceCap:
  2603  			if v.Args[0].Op == OpSliceMake {
  2604  				ft.update(b, v, v.Args[0].Args[2], signed, eq)
  2605  			}
  2606  		case OpIsInBounds:
  2607  			if checkForChunkedIndexBounds(ft, b, v.Args[0], v.Args[1], false) {
  2608  				if b.Func.pass.debug > 0 {
  2609  					b.Func.Warnl(v.Pos, "Proved %s for blocked indexing", v.Op)
  2610  				}
  2611  				ft.booleanTrue(v)
  2612  			}
  2613  		case OpIsSliceInBounds:
  2614  			if checkForChunkedIndexBounds(ft, b, v.Args[0], v.Args[1], true) {
  2615  				if b.Func.pass.debug > 0 {
  2616  					b.Func.Warnl(v.Pos, "Proved %s for blocked reslicing", v.Op)
  2617  				}
  2618  				ft.booleanTrue(v)
  2619  			}
  2620  		case OpPhi:
  2621  			addLocalFactsPhi(ft, v)
  2622  		}
  2623  	}
  2624  }
  2625  
  2626  func addLocalFactsPhi(ft *factsTable, v *Value) {
  2627  	// Look for phis that implement min/max.
  2628  	//   z:
  2629  	//      c = Less64 x y (or other Less/Leq operation)
  2630  	//      If c -> bx by
  2631  	//   bx: <- z
  2632  	//       -> b ...
  2633  	//   by: <- z
  2634  	//      -> b ...
  2635  	//   b: <- bx by
  2636  	//      v = Phi x y
  2637  	// Then v is either min or max of x,y.
  2638  	// If it is the min, then we deduce v <= x && v <= y.
  2639  	// If it is the max, then we deduce v >= x && v >= y.
  2640  	// The min case is useful for the copy builtin, see issue 16833.
  2641  	if len(v.Args) != 2 {
  2642  		return
  2643  	}
  2644  	b := v.Block
  2645  	x := v.Args[0]
  2646  	y := v.Args[1]
  2647  	bx := b.Preds[0].b
  2648  	by := b.Preds[1].b
  2649  	var z *Block // branch point
  2650  	switch {
  2651  	case bx == by: // bx == by == z case
  2652  		z = bx
  2653  	case by.uniquePred() == bx: // bx == z case
  2654  		z = bx
  2655  	case bx.uniquePred() == by: // by == z case
  2656  		z = by
  2657  	case bx.uniquePred() == by.uniquePred():
  2658  		z = bx.uniquePred()
  2659  	}
  2660  	if z == nil || z.Kind != BlockIf {
  2661  		return
  2662  	}
  2663  	c := z.Controls[0]
  2664  	if len(c.Args) != 2 {
  2665  		return
  2666  	}
  2667  	var isMin bool // if c, a less-than comparison, is true, phi chooses x.
  2668  	if bx == z {
  2669  		isMin = b.Preds[0].i == 0
  2670  	} else {
  2671  		isMin = bx.Preds[0].i == 0
  2672  	}
  2673  	if c.Args[0] == x && c.Args[1] == y {
  2674  		// ok
  2675  	} else if c.Args[0] == y && c.Args[1] == x {
  2676  		// Comparison is reversed from how the values are listed in the Phi.
  2677  		isMin = !isMin
  2678  	} else {
  2679  		// Not comparing x and y.
  2680  		return
  2681  	}
  2682  	var dom domain
  2683  	switch c.Op {
  2684  	case OpLess64, OpLess32, OpLess16, OpLess8, OpLeq64, OpLeq32, OpLeq16, OpLeq8:
  2685  		dom = signed
  2686  	case OpLess64U, OpLess32U, OpLess16U, OpLess8U, OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
  2687  		dom = unsigned
  2688  	default:
  2689  		return
  2690  	}
  2691  	var rel relation
  2692  	if isMin {
  2693  		rel = lt | eq
  2694  	} else {
  2695  		rel = gt | eq
  2696  	}
  2697  	ft.update(b, v, x, dom, rel)
  2698  	ft.update(b, v, y, dom, rel)
  2699  }
  2700  
  2701  var ctzNonZeroOp = map[Op]Op{
  2702  	OpCtz8:  OpCtz8NonZero,
  2703  	OpCtz16: OpCtz16NonZero,
  2704  	OpCtz32: OpCtz32NonZero,
  2705  	OpCtz64: OpCtz64NonZero,
  2706  }
  2707  var mostNegativeDividend = map[Op]int64{
  2708  	OpDiv16: -1 << 15,
  2709  	OpMod16: -1 << 15,
  2710  	OpDiv32: -1 << 31,
  2711  	OpMod32: -1 << 31,
  2712  	OpDiv64: -1 << 63,
  2713  	OpMod64: -1 << 63,
  2714  }
  2715  var unsignedOp = map[Op]Op{
  2716  	OpDiv8:     OpDiv8u,
  2717  	OpDiv16:    OpDiv16u,
  2718  	OpDiv32:    OpDiv32u,
  2719  	OpDiv64:    OpDiv64u,
  2720  	OpMod8:     OpMod8u,
  2721  	OpMod16:    OpMod16u,
  2722  	OpMod32:    OpMod32u,
  2723  	OpMod64:    OpMod64u,
  2724  	OpRsh8x8:   OpRsh8Ux8,
  2725  	OpRsh8x16:  OpRsh8Ux16,
  2726  	OpRsh8x32:  OpRsh8Ux32,
  2727  	OpRsh8x64:  OpRsh8Ux64,
  2728  	OpRsh16x8:  OpRsh16Ux8,
  2729  	OpRsh16x16: OpRsh16Ux16,
  2730  	OpRsh16x32: OpRsh16Ux32,
  2731  	OpRsh16x64: OpRsh16Ux64,
  2732  	OpRsh32x8:  OpRsh32Ux8,
  2733  	OpRsh32x16: OpRsh32Ux16,
  2734  	OpRsh32x32: OpRsh32Ux32,
  2735  	OpRsh32x64: OpRsh32Ux64,
  2736  	OpRsh64x8:  OpRsh64Ux8,
  2737  	OpRsh64x16: OpRsh64Ux16,
  2738  	OpRsh64x32: OpRsh64Ux32,
  2739  	OpRsh64x64: OpRsh64Ux64,
  2740  }
  2741  
  2742  var bytesizeToConst = [...]Op{
  2743  	8 / 8:  OpConst8,
  2744  	16 / 8: OpConst16,
  2745  	32 / 8: OpConst32,
  2746  	64 / 8: OpConst64,
  2747  }
  2748  var bytesizeToNeq = [...]Op{
  2749  	8 / 8:  OpNeq8,
  2750  	16 / 8: OpNeq16,
  2751  	32 / 8: OpNeq32,
  2752  	64 / 8: OpNeq64,
  2753  }
  2754  var bytesizeToAnd = [...]Op{
  2755  	8 / 8:  OpAnd8,
  2756  	16 / 8: OpAnd16,
  2757  	32 / 8: OpAnd32,
  2758  	64 / 8: OpAnd64,
  2759  }
  2760  
  2761  // simplifyBlock simplifies some constant values in b and evaluates
  2762  // branches to non-uniquely dominated successors of b.
  2763  func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
  2764  	for iv, v := range b.Values {
  2765  		switch v.Op {
  2766  		case OpStaticLECall:
  2767  			if b.Func.pass.debug > 0 && len(v.Args) == 2 {
  2768  				fn := auxToCall(v.Aux).Fn
  2769  				if fn != nil && strings.Contains(fn.String(), "prove") {
  2770  					// Print bounds of any argument to single-arg function with "prove" in name,
  2771  					// for debugging and especially for test/prove.go.
  2772  					// (v.Args[1] is mem).
  2773  					x := v.Args[0]
  2774  					b.Func.Warnl(v.Pos, "Proved %v (%v)", ft.limits[x.ID], x)
  2775  				}
  2776  			}
  2777  		case OpSlicemask:
  2778  			// Replace OpSlicemask operations in b with constants where possible.
  2779  			cap := v.Args[0]
  2780  			x, delta := isConstDelta(cap)
  2781  			if x != nil {
  2782  				// slicemask(x + y)
  2783  				// if x is larger than -y (y is negative), then slicemask is -1.
  2784  				lim := ft.limits[x.ID]
  2785  				if lim.umin > uint64(-delta) {
  2786  					if cap.Op == OpAdd64 {
  2787  						v.reset(OpConst64)
  2788  					} else {
  2789  						v.reset(OpConst32)
  2790  					}
  2791  					if b.Func.pass.debug > 0 {
  2792  						b.Func.Warnl(v.Pos, "Proved slicemask not needed")
  2793  					}
  2794  					v.AuxInt = -1
  2795  				}
  2796  				break
  2797  			}
  2798  			lim := ft.limits[cap.ID]
  2799  			if lim.umin > 0 {
  2800  				if cap.Type.Size() == 8 {
  2801  					v.reset(OpConst64)
  2802  				} else {
  2803  					v.reset(OpConst32)
  2804  				}
  2805  				if b.Func.pass.debug > 0 {
  2806  					b.Func.Warnl(v.Pos, "Proved slicemask not needed (by limit)")
  2807  				}
  2808  				v.AuxInt = -1
  2809  			}
  2810  
  2811  		case OpCtz8, OpCtz16, OpCtz32, OpCtz64:
  2812  			// On some architectures, notably amd64, we can generate much better
  2813  			// code for CtzNN if we know that the argument is non-zero.
  2814  			// Capture that information here for use in arch-specific optimizations.
  2815  			x := v.Args[0]
  2816  			lim := ft.limits[x.ID]
  2817  			if lim.umin > 0 || lim.min > 0 || lim.max < 0 {
  2818  				if b.Func.pass.debug > 0 {
  2819  					b.Func.Warnl(v.Pos, "Proved %v non-zero", v.Op)
  2820  				}
  2821  				v.Op = ctzNonZeroOp[v.Op]
  2822  			}
  2823  		case OpRsh8x8, OpRsh8x16, OpRsh8x32, OpRsh8x64,
  2824  			OpRsh16x8, OpRsh16x16, OpRsh16x32, OpRsh16x64,
  2825  			OpRsh32x8, OpRsh32x16, OpRsh32x32, OpRsh32x64,
  2826  			OpRsh64x8, OpRsh64x16, OpRsh64x32, OpRsh64x64:
  2827  			if ft.isNonNegative(v.Args[0]) {
  2828  				if b.Func.pass.debug > 0 {
  2829  					b.Func.Warnl(v.Pos, "Proved %v is unsigned", v.Op)
  2830  				}
  2831  				v.Op = unsignedOp[v.Op]
  2832  			}
  2833  			fallthrough
  2834  		case OpLsh8x8, OpLsh8x16, OpLsh8x32, OpLsh8x64,
  2835  			OpLsh16x8, OpLsh16x16, OpLsh16x32, OpLsh16x64,
  2836  			OpLsh32x8, OpLsh32x16, OpLsh32x32, OpLsh32x64,
  2837  			OpLsh64x8, OpLsh64x16, OpLsh64x32, OpLsh64x64,
  2838  			OpRsh8Ux8, OpRsh8Ux16, OpRsh8Ux32, OpRsh8Ux64,
  2839  			OpRsh16Ux8, OpRsh16Ux16, OpRsh16Ux32, OpRsh16Ux64,
  2840  			OpRsh32Ux8, OpRsh32Ux16, OpRsh32Ux32, OpRsh32Ux64,
  2841  			OpRsh64Ux8, OpRsh64Ux16, OpRsh64Ux32, OpRsh64Ux64:
  2842  			// Check whether, for a << b, we know that b
  2843  			// is strictly less than the number of bits in a.
  2844  			by := v.Args[1]
  2845  			lim := ft.limits[by.ID]
  2846  			bits := 8 * v.Args[0].Type.Size()
  2847  			if lim.umax < uint64(bits) || (lim.max < bits && ft.isNonNegative(by)) {
  2848  				v.AuxInt = 1 // see shiftIsBounded
  2849  				if b.Func.pass.debug > 0 && !by.isGenericIntConst() {
  2850  					b.Func.Warnl(v.Pos, "Proved %v bounded", v.Op)
  2851  				}
  2852  			}
  2853  		case OpDiv8, OpDiv16, OpDiv32, OpDiv64, OpMod8, OpMod16, OpMod32, OpMod64:
  2854  			p, q := ft.limits[v.Args[0].ID], ft.limits[v.Args[1].ID] // p/q
  2855  			if p.nonnegative() && q.nonnegative() {
  2856  				if b.Func.pass.debug > 0 {
  2857  					b.Func.Warnl(v.Pos, "Proved %v is unsigned", v.Op)
  2858  				}
  2859  				v.Op = unsignedOp[v.Op]
  2860  				v.AuxInt = 0
  2861  				break
  2862  			}
  2863  			// Fixup code can be avoided on x86 if we know
  2864  			//  the divisor is not -1 or the dividend > MinIntNN.
  2865  			if v.Op != OpDiv8 && v.Op != OpMod8 && (q.max < -1 || q.min > -1 || p.min > mostNegativeDividend[v.Op]) {
  2866  				// See DivisionNeedsFixUp in rewrite.go.
  2867  				// v.AuxInt = 1 means we have proved that the divisor is not -1
  2868  				// or that the dividend is not the most negative integer,
  2869  				// so we do not need to add fix-up code.
  2870  				if b.Func.pass.debug > 0 {
  2871  					b.Func.Warnl(v.Pos, "Proved %v does not need fix-up", v.Op)
  2872  				}
  2873  				// Only usable on amd64 and 386, and only for ≥ 16-bit ops.
  2874  				// Don't modify AuxInt on other architectures, as that can interfere with CSE.
  2875  				// (Print the debug info above always, so that test/prove.go can be
  2876  				// checked on non-x86 systems.)
  2877  				// TODO: add other architectures?
  2878  				if b.Func.Config.arch == "386" || b.Func.Config.arch == "amd64" {
  2879  					v.AuxInt = 1
  2880  				}
  2881  			}
  2882  		case OpMul64, OpMul32, OpMul16, OpMul8:
  2883  			if vl := ft.limits[v.ID]; vl.min == vl.max || vl.umin == vl.umax {
  2884  				// v is going to be constant folded away; don't "optimize" it.
  2885  				break
  2886  			}
  2887  			x := v.Args[0]
  2888  			xl := ft.limits[x.ID]
  2889  			y := v.Args[1]
  2890  			yl := ft.limits[y.ID]
  2891  			if xl.umin == xl.umax && isUnsignedPowerOfTwo(xl.umin) ||
  2892  				xl.min == xl.max && isPowerOfTwo(xl.min) ||
  2893  				yl.umin == yl.umax && isUnsignedPowerOfTwo(yl.umin) ||
  2894  				yl.min == yl.max && isPowerOfTwo(yl.min) {
  2895  				// 0,1 * a power of two is better done as a shift
  2896  				break
  2897  			}
  2898  			switch xOne, yOne := xl.umax <= 1, yl.umax <= 1; {
  2899  			case xOne && yOne:
  2900  				v.Op = bytesizeToAnd[v.Type.Size()]
  2901  				if b.Func.pass.debug > 0 {
  2902  					b.Func.Warnl(v.Pos, "Rewrote Mul %v into And", v)
  2903  				}
  2904  			case yOne && b.Func.Config.haveCondSelect:
  2905  				x, y = y, x
  2906  				fallthrough
  2907  			case xOne && b.Func.Config.haveCondSelect:
  2908  				if !canCondSelect(v, b.Func.Config.arch, nil) {
  2909  					break
  2910  				}
  2911  				zero := b.Func.constVal(bytesizeToConst[v.Type.Size()], v.Type, 0, true)
  2912  				ft.initLimitForNewValue(zero)
  2913  				check := b.NewValue2(v.Pos, bytesizeToNeq[v.Type.Size()], types.Types[types.TBOOL], zero, x)
  2914  				ft.initLimitForNewValue(check)
  2915  				v.reset(OpCondSelect)
  2916  				v.AddArg3(y, zero, check)
  2917  
  2918  				// FIXME: workaround for go.dev/issues/76060
  2919  				// we need to schedule the Neq before the CondSelect even tho
  2920  				// scheduling is meaningless until we reach the schedule pass.
  2921  				if b.Values[len(b.Values)-1] != check {
  2922  					panic("unreachable; failed sanity check, new value isn't at the end of the block")
  2923  				}
  2924  				b.Values[iv], b.Values[len(b.Values)-1] = b.Values[len(b.Values)-1], b.Values[iv]
  2925  
  2926  				if b.Func.pass.debug > 0 {
  2927  					b.Func.Warnl(v.Pos, "Rewrote Mul %v into CondSelect; %v is bool", v, x)
  2928  				}
  2929  			}
  2930  		}
  2931  
  2932  		// Fold provable constant results.
  2933  		// Helps in cases where we reuse a value after branching on its equality.
  2934  		for i, arg := range v.Args {
  2935  			lim := ft.limits[arg.ID]
  2936  			var constValue int64
  2937  			switch {
  2938  			case lim.min == lim.max:
  2939  				constValue = lim.min
  2940  			case lim.umin == lim.umax:
  2941  				constValue = int64(lim.umin)
  2942  			default:
  2943  				continue
  2944  			}
  2945  			switch arg.Op {
  2946  			case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConstNil:
  2947  				continue
  2948  			}
  2949  			typ := arg.Type
  2950  			f := b.Func
  2951  			var c *Value
  2952  			switch {
  2953  			case typ.IsBoolean():
  2954  				c = f.ConstBool(typ, constValue != 0)
  2955  			case typ.IsInteger() && typ.Size() == 1:
  2956  				c = f.ConstInt8(typ, int8(constValue))
  2957  			case typ.IsInteger() && typ.Size() == 2:
  2958  				c = f.ConstInt16(typ, int16(constValue))
  2959  			case typ.IsInteger() && typ.Size() == 4:
  2960  				c = f.ConstInt32(typ, int32(constValue))
  2961  			case typ.IsInteger() && typ.Size() == 8:
  2962  				c = f.ConstInt64(typ, constValue)
  2963  			case typ.IsPtrShaped():
  2964  				if constValue == 0 {
  2965  					c = f.ConstNil(typ)
  2966  				} else {
  2967  					// Not sure how this might happen, but if it
  2968  					// does, just skip it.
  2969  					continue
  2970  				}
  2971  			default:
  2972  				// Not sure how this might happen, but if it
  2973  				// does, just skip it.
  2974  				continue
  2975  			}
  2976  			v.SetArg(i, c)
  2977  			ft.initLimitForNewValue(c)
  2978  			if b.Func.pass.debug > 1 {
  2979  				b.Func.Warnl(v.Pos, "Proved %v's arg %d (%v) is constant %d", v, i, arg, constValue)
  2980  			}
  2981  		}
  2982  	}
  2983  
  2984  	if b.Kind != BlockIf {
  2985  		return
  2986  	}
  2987  
  2988  	// Consider outgoing edges from this block.
  2989  	parent := b
  2990  	for i, branch := range [...]branch{positive, negative} {
  2991  		child := parent.Succs[i].b
  2992  		if getBranch(sdom, parent, child) != unknown {
  2993  			// For edges to uniquely dominated blocks, we
  2994  			// already did this when we visited the child.
  2995  			continue
  2996  		}
  2997  		// For edges to other blocks, this can trim a branch
  2998  		// even if we couldn't get rid of the child itself.
  2999  		ft.checkpoint()
  3000  		addBranchRestrictions(ft, parent, branch)
  3001  		unsat := ft.unsat
  3002  		ft.restore()
  3003  		if unsat {
  3004  			// This branch is impossible, so remove it
  3005  			// from the block.
  3006  			removeBranch(parent, branch)
  3007  			// No point in considering the other branch.
  3008  			// (It *is* possible for both to be
  3009  			// unsatisfiable since the fact table is
  3010  			// incomplete. We could turn this into a
  3011  			// BlockExit, but it doesn't seem worth it.)
  3012  			break
  3013  		}
  3014  	}
  3015  }
  3016  
  3017  func removeBranch(b *Block, branch branch) {
  3018  	c := b.Controls[0]
  3019  	if b.Func.pass.debug > 0 {
  3020  		verb := "Proved"
  3021  		if branch == positive {
  3022  			verb = "Disproved"
  3023  		}
  3024  		if b.Func.pass.debug > 1 {
  3025  			b.Func.Warnl(b.Pos, "%s %s (%s)", verb, c.Op, c)
  3026  		} else {
  3027  			b.Func.Warnl(b.Pos, "%s %s", verb, c.Op)
  3028  		}
  3029  	}
  3030  	if c != nil && c.Pos.IsStmt() == src.PosIsStmt && c.Pos.SameFileAndLine(b.Pos) {
  3031  		// attempt to preserve statement marker.
  3032  		b.Pos = b.Pos.WithIsStmt()
  3033  	}
  3034  	if branch == positive || branch == negative {
  3035  		b.Kind = BlockFirst
  3036  		b.ResetControls()
  3037  		if branch == positive {
  3038  			b.swapSuccessors()
  3039  		}
  3040  	} else {
  3041  		// TODO: figure out how to remove an entry from a jump table
  3042  	}
  3043  }
  3044  
  3045  // isConstDelta returns non-nil if v is equivalent to w+delta (signed).
  3046  func isConstDelta(v *Value) (w *Value, delta int64) {
  3047  	cop := OpConst64
  3048  	switch v.Op {
  3049  	case OpAdd32, OpSub32:
  3050  		cop = OpConst32
  3051  	case OpAdd16, OpSub16:
  3052  		cop = OpConst16
  3053  	case OpAdd8, OpSub8:
  3054  		cop = OpConst8
  3055  	}
  3056  	switch v.Op {
  3057  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  3058  		if v.Args[0].Op == cop {
  3059  			return v.Args[1], v.Args[0].AuxInt
  3060  		}
  3061  		if v.Args[1].Op == cop {
  3062  			return v.Args[0], v.Args[1].AuxInt
  3063  		}
  3064  	case OpSub64, OpSub32, OpSub16, OpSub8:
  3065  		if v.Args[1].Op == cop {
  3066  			aux := v.Args[1].AuxInt
  3067  			if aux != -aux { // Overflow; too bad
  3068  				return v.Args[0], -aux
  3069  			}
  3070  		}
  3071  	}
  3072  	return nil, 0
  3073  }
  3074  
  3075  // isCleanExt reports whether v is the result of a value-preserving
  3076  // sign or zero extension.
  3077  func isCleanExt(v *Value) bool {
  3078  	switch v.Op {
  3079  	case OpSignExt8to16, OpSignExt8to32, OpSignExt8to64,
  3080  		OpSignExt16to32, OpSignExt16to64, OpSignExt32to64:
  3081  		// signed -> signed is the only value-preserving sign extension
  3082  		return v.Args[0].Type.IsSigned() && v.Type.IsSigned()
  3083  
  3084  	case OpZeroExt8to16, OpZeroExt8to32, OpZeroExt8to64,
  3085  		OpZeroExt16to32, OpZeroExt16to64, OpZeroExt32to64:
  3086  		// unsigned -> signed/unsigned are value-preserving zero extensions
  3087  		return !v.Args[0].Type.IsSigned()
  3088  	}
  3089  	return false
  3090  }
  3091  
  3092  func getDependencyScore(scores []uint, v *Value) (score uint) {
  3093  	if score = scores[v.ID]; score != 0 {
  3094  		return score
  3095  	}
  3096  	defer func() {
  3097  		scores[v.ID] = score
  3098  	}()
  3099  	if v.Op == OpPhi {
  3100  		return 1
  3101  	}
  3102  	score = 2 // NIT(@Jorropo): always order phis first to make GOSSAFUNC pretty.
  3103  	for _, a := range v.Args {
  3104  		if a.Block != v.Block {
  3105  			continue
  3106  		}
  3107  		score = max(score, getDependencyScore(scores, a)+1)
  3108  	}
  3109  	return score
  3110  }
  3111  
  3112  // topoSortValuesInBlock ensure ranging over b.Values visit values before they are being used.
  3113  // It does not consider dependencies with other blocks; thus Phi nodes are considered to not have any dependecies.
  3114  // The result is always determistic and does not depend on the previous slice ordering.
  3115  func (ft *factsTable) topoSortValuesInBlock(b *Block) {
  3116  	f := b.Func
  3117  	want := f.NumValues()
  3118  
  3119  	scores := ft.reusedTopoSortScoresTable
  3120  	if want <= cap(scores) {
  3121  		scores = scores[:want]
  3122  	} else {
  3123  		if cap(scores) > 0 {
  3124  			f.Cache.freeUintSlice(scores)
  3125  		}
  3126  		scores = f.Cache.allocUintSlice(want)
  3127  		ft.reusedTopoSortScoresTable = scores
  3128  	}
  3129  
  3130  	for _, v := range b.Values {
  3131  		scores[v.ID] = 0 // sentinel
  3132  	}
  3133  
  3134  	slices.SortFunc(b.Values, func(a, b *Value) int {
  3135  		dependencyScoreA := getDependencyScore(scores, a)
  3136  		dependencyScoreB := getDependencyScore(scores, b)
  3137  		if dependencyScoreA != dependencyScoreB {
  3138  			return cmp.Compare(dependencyScoreA, dependencyScoreB)
  3139  		}
  3140  		return cmp.Compare(a.ID, b.ID)
  3141  	})
  3142  }
  3143  

View as plain text