Source file src/cmd/compile/internal/ssa/prove.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/src"
    10  	"cmp"
    11  	"fmt"
    12  	"math"
    13  	"math/bits"
    14  	"slices"
    15  	"strings"
    16  )
    17  
    18  type branch int
    19  
    20  const (
    21  	unknown branch = iota
    22  	positive
    23  	negative
    24  	// The outedges from a jump table are jumpTable0,
    25  	// jumpTable0+1, jumpTable0+2, etc. There could be an
    26  	// arbitrary number so we can't list them all here.
    27  	jumpTable0
    28  )
    29  
    30  func (b branch) String() string {
    31  	switch b {
    32  	case unknown:
    33  		return "unk"
    34  	case positive:
    35  		return "pos"
    36  	case negative:
    37  		return "neg"
    38  	default:
    39  		return fmt.Sprintf("jmp%d", b-jumpTable0)
    40  	}
    41  }
    42  
    43  // relation represents the set of possible relations between
    44  // pairs of variables (v, w). Without a priori knowledge the
    45  // mask is lt | eq | gt meaning v can be less than, equal to or
    46  // greater than w. When the execution path branches on the condition
    47  // `v op w` the set of relations is updated to exclude any
    48  // relation not possible due to `v op w` being true (or false).
    49  //
    50  // E.g.
    51  //
    52  //	r := relation(...)
    53  //
    54  //	if v < w {
    55  //	  newR := r & lt
    56  //	}
    57  //	if v >= w {
    58  //	  newR := r & (eq|gt)
    59  //	}
    60  //	if v != w {
    61  //	  newR := r & (lt|gt)
    62  //	}
    63  type relation uint
    64  
    65  const (
    66  	lt relation = 1 << iota
    67  	eq
    68  	gt
    69  )
    70  
    71  var relationStrings = [...]string{
    72  	0: "none", lt: "<", eq: "==", lt | eq: "<=",
    73  	gt: ">", gt | lt: "!=", gt | eq: ">=", gt | eq | lt: "any",
    74  }
    75  
    76  func (r relation) String() string {
    77  	if r < relation(len(relationStrings)) {
    78  		return relationStrings[r]
    79  	}
    80  	return fmt.Sprintf("relation(%d)", uint(r))
    81  }
    82  
    83  // domain represents the domain of a variable pair in which a set
    84  // of relations is known. For example, relations learned for unsigned
    85  // pairs cannot be transferred to signed pairs because the same bit
    86  // representation can mean something else.
    87  type domain uint
    88  
    89  const (
    90  	signed domain = 1 << iota
    91  	unsigned
    92  	pointer
    93  	boolean
    94  )
    95  
    96  var domainStrings = [...]string{
    97  	"signed", "unsigned", "pointer", "boolean",
    98  }
    99  
   100  func (d domain) String() string {
   101  	s := ""
   102  	for i, ds := range domainStrings {
   103  		if d&(1<<uint(i)) != 0 {
   104  			if len(s) != 0 {
   105  				s += "|"
   106  			}
   107  			s += ds
   108  			d &^= 1 << uint(i)
   109  		}
   110  	}
   111  	if d != 0 {
   112  		if len(s) != 0 {
   113  			s += "|"
   114  		}
   115  		s += fmt.Sprintf("0x%x", uint(d))
   116  	}
   117  	return s
   118  }
   119  
   120  // a limit records known upper and lower bounds for a value.
   121  //
   122  // If we have min>max or umin>umax, then this limit is
   123  // called "unsatisfiable". When we encounter such a limit, we
   124  // know that any code for which that limit applies is unreachable.
   125  // We don't particularly care how unsatisfiable limits propagate,
   126  // including becoming satisfiable, because any optimization
   127  // decisions based on those limits only apply to unreachable code.
   128  type limit struct {
   129  	min, max   int64  // min <= value <= max, signed
   130  	umin, umax uint64 // umin <= value <= umax, unsigned
   131  	// For booleans, we use 0==false, 1==true for both ranges
   132  	// For pointers, we use 0,0,0,0 for nil and minInt64,maxInt64,1,maxUint64 for nonnil
   133  }
   134  
   135  func (l limit) String() string {
   136  	return fmt.Sprintf("sm,SM=%d,%d um,UM=%d,%d", l.min, l.max, l.umin, l.umax)
   137  }
   138  
   139  func (l limit) intersect(l2 limit) limit {
   140  	l.min = max(l.min, l2.min)
   141  	l.umin = max(l.umin, l2.umin)
   142  	l.max = min(l.max, l2.max)
   143  	l.umax = min(l.umax, l2.umax)
   144  	return l
   145  }
   146  
   147  func (l limit) signedMin(m int64) limit {
   148  	l.min = max(l.min, m)
   149  	return l
   150  }
   151  
   152  func (l limit) signedMinMax(minimum, maximum int64) limit {
   153  	l.min = max(l.min, minimum)
   154  	l.max = min(l.max, maximum)
   155  	return l
   156  }
   157  
   158  func (l limit) unsignedMin(m uint64) limit {
   159  	l.umin = max(l.umin, m)
   160  	return l
   161  }
   162  func (l limit) unsignedMax(m uint64) limit {
   163  	l.umax = min(l.umax, m)
   164  	return l
   165  }
   166  func (l limit) unsignedMinMax(minimum, maximum uint64) limit {
   167  	l.umin = max(l.umin, minimum)
   168  	l.umax = min(l.umax, maximum)
   169  	return l
   170  }
   171  
   172  func (l limit) nonzero() bool {
   173  	return l.min > 0 || l.umin > 0 || l.max < 0
   174  }
   175  func (l limit) maybeZero() bool {
   176  	return !l.nonzero()
   177  }
   178  func (l limit) nonnegative() bool {
   179  	return l.min >= 0
   180  }
   181  func (l limit) unsat() bool {
   182  	return l.min > l.max || l.umin > l.umax
   183  }
   184  
   185  // If x and y can add without overflow or underflow
   186  // (using b bits), safeAdd returns x+y, true.
   187  // Otherwise, returns 0, false.
   188  func safeAdd(x, y int64, b uint) (int64, bool) {
   189  	s := x + y
   190  	if x >= 0 && y >= 0 && s < 0 {
   191  		return 0, false // 64-bit overflow
   192  	}
   193  	if x < 0 && y < 0 && s >= 0 {
   194  		return 0, false // 64-bit underflow
   195  	}
   196  	if !fitsInBits(s, b) {
   197  		return 0, false
   198  	}
   199  	return s, true
   200  }
   201  
   202  // same as safeAdd for unsigned arithmetic.
   203  func safeAddU(x, y uint64, b uint) (uint64, bool) {
   204  	s := x + y
   205  	if s < x || s < y {
   206  		return 0, false // 64-bit overflow
   207  	}
   208  	if !fitsInBitsU(s, b) {
   209  		return 0, false
   210  	}
   211  	return s, true
   212  }
   213  
   214  // same as safeAdd but for subtraction.
   215  func safeSub(x, y int64, b uint) (int64, bool) {
   216  	if y == math.MinInt64 {
   217  		if x == math.MaxInt64 {
   218  			return 0, false // 64-bit overflow
   219  		}
   220  		x++
   221  		y++
   222  	}
   223  	return safeAdd(x, -y, b)
   224  }
   225  
   226  // same as safeAddU but for subtraction.
   227  func safeSubU(x, y uint64, b uint) (uint64, bool) {
   228  	if x < y {
   229  		return 0, false // 64-bit underflow
   230  	}
   231  	s := x - y
   232  	if !fitsInBitsU(s, b) {
   233  		return 0, false
   234  	}
   235  	return s, true
   236  }
   237  
   238  // fitsInBits reports whether x fits in b bits (signed).
   239  func fitsInBits(x int64, b uint) bool {
   240  	if b == 64 {
   241  		return true
   242  	}
   243  	m := int64(-1) << (b - 1)
   244  	M := -m - 1
   245  	return x >= m && x <= M
   246  }
   247  
   248  // fitsInBitsU reports whether x fits in b bits (unsigned).
   249  func fitsInBitsU(x uint64, b uint) bool {
   250  	return x>>b == 0
   251  }
   252  
   253  func noLimitForBitsize(bitsize uint) limit {
   254  	return limit{min: -(1 << (bitsize - 1)), max: 1<<(bitsize-1) - 1, umin: 0, umax: 1<<bitsize - 1}
   255  }
   256  
   257  func convertIntWithBitsize[Target uint64 | int64, Source uint64 | int64](x Source, bitsize uint) Target {
   258  	switch bitsize {
   259  	case 64:
   260  		return Target(x)
   261  	case 32:
   262  		return Target(int32(x))
   263  	case 16:
   264  		return Target(int16(x))
   265  	case 8:
   266  		return Target(int8(x))
   267  	default:
   268  		panic("unreachable")
   269  	}
   270  }
   271  
   272  // add returns the limit obtained by adding a value with limit l
   273  // to a value with limit l2. The result must fit in b bits.
   274  func (l limit) add(l2 limit, b uint) limit {
   275  	var isLConst, isL2Const bool
   276  	var lConst, l2Const uint64
   277  	if l.min == l.max {
   278  		isLConst = true
   279  		lConst = convertIntWithBitsize[uint64](l.min, b)
   280  	} else if l.umin == l.umax {
   281  		isLConst = true
   282  		lConst = l.umin
   283  	}
   284  	if l2.min == l2.max {
   285  		isL2Const = true
   286  		l2Const = convertIntWithBitsize[uint64](l2.min, b)
   287  	} else if l2.umin == l2.umax {
   288  		isL2Const = true
   289  		l2Const = l2.umin
   290  	}
   291  	if isLConst && isL2Const {
   292  		r := lConst + l2Const
   293  		r &= (uint64(1) << b) - 1
   294  		int64r := convertIntWithBitsize[int64](r, b)
   295  		return limit{min: int64r, max: int64r, umin: r, umax: r}
   296  	}
   297  
   298  	r := noLimit
   299  	min, minOk := safeAdd(l.min, l2.min, b)
   300  	max, maxOk := safeAdd(l.max, l2.max, b)
   301  	if minOk && maxOk {
   302  		r.min = min
   303  		r.max = max
   304  	}
   305  	umin, uminOk := safeAddU(l.umin, l2.umin, b)
   306  	umax, umaxOk := safeAddU(l.umax, l2.umax, b)
   307  	if uminOk && umaxOk {
   308  		r.umin = umin
   309  		r.umax = umax
   310  	}
   311  	return r
   312  }
   313  
   314  // same as add but for subtraction.
   315  func (l limit) sub(l2 limit, b uint) limit {
   316  	r := noLimit
   317  	min, minOk := safeSub(l.min, l2.max, b)
   318  	max, maxOk := safeSub(l.max, l2.min, b)
   319  	if minOk && maxOk {
   320  		r.min = min
   321  		r.max = max
   322  	}
   323  	umin, uminOk := safeSubU(l.umin, l2.umax, b)
   324  	umax, umaxOk := safeSubU(l.umax, l2.umin, b)
   325  	if uminOk && umaxOk {
   326  		r.umin = umin
   327  		r.umax = umax
   328  	}
   329  	return r
   330  }
   331  
   332  // same as add but for multiplication.
   333  func (l limit) mul(l2 limit, b uint) limit {
   334  	r := noLimit
   335  	umaxhi, umaxlo := bits.Mul64(l.umax, l2.umax)
   336  	if umaxhi == 0 && fitsInBitsU(umaxlo, b) {
   337  		r.umax = umaxlo
   338  		r.umin = l.umin * l2.umin
   339  		// Note: if the code containing this multiply is
   340  		// unreachable, then we may have umin>umax, and this
   341  		// multiply may overflow.  But that's ok for
   342  		// unreachable code. If this code is reachable, we
   343  		// know umin<=umax, so this multiply will not overflow
   344  		// because the max multiply didn't.
   345  	}
   346  	// Signed is harder, so don't bother. The only useful
   347  	// case is when we know both multiplicands are nonnegative,
   348  	// but that case is handled above because we would have then
   349  	// previously propagated signed info to the unsigned domain,
   350  	// and will propagate it back after the multiply.
   351  	return r
   352  }
   353  
   354  // Similar to add, but compute 1 << l if it fits without overflow in b bits.
   355  func (l limit) exp2(b uint) limit {
   356  	r := noLimit
   357  	if l.umax < uint64(b) {
   358  		r.umin = 1 << l.umin
   359  		r.umax = 1 << l.umax
   360  		// Same as above in mul, signed<->unsigned propagation
   361  		// will handle the signed case for us.
   362  	}
   363  	return r
   364  }
   365  
   366  // Similar to add, but computes the complement of the limit for bitsize b.
   367  func (l limit) com(b uint) limit {
   368  	switch b {
   369  	case 64:
   370  		return limit{
   371  			min:  ^l.max,
   372  			max:  ^l.min,
   373  			umin: ^l.umax,
   374  			umax: ^l.umin,
   375  		}
   376  	case 32:
   377  		return limit{
   378  			min:  int64(^int32(l.max)),
   379  			max:  int64(^int32(l.min)),
   380  			umin: uint64(^uint32(l.umax)),
   381  			umax: uint64(^uint32(l.umin)),
   382  		}
   383  	case 16:
   384  		return limit{
   385  			min:  int64(^int16(l.max)),
   386  			max:  int64(^int16(l.min)),
   387  			umin: uint64(^uint16(l.umax)),
   388  			umax: uint64(^uint16(l.umin)),
   389  		}
   390  	case 8:
   391  		return limit{
   392  			min:  int64(^int8(l.max)),
   393  			max:  int64(^int8(l.min)),
   394  			umin: uint64(^uint8(l.umax)),
   395  			umax: uint64(^uint8(l.umin)),
   396  		}
   397  	default:
   398  		panic("unreachable")
   399  	}
   400  }
   401  
   402  // Similar to add, but computes the negation of the limit for bitsize b.
   403  func (l limit) neg(b uint) limit {
   404  	return l.com(b).add(limit{min: 1, max: 1, umin: 1, umax: 1}, b)
   405  }
   406  
   407  var noLimit = limit{math.MinInt64, math.MaxInt64, 0, math.MaxUint64}
   408  
   409  // a limitFact is a limit known for a particular value.
   410  type limitFact struct {
   411  	vid   ID
   412  	limit limit
   413  }
   414  
   415  // An ordering encodes facts like v < w.
   416  type ordering struct {
   417  	next *ordering // linked list of all known orderings for v.
   418  	// Note: v is implicit here, determined by which linked list it is in.
   419  	w *Value
   420  	d domain
   421  	r relation // one of ==,!=,<,<=,>,>=
   422  	// if d is boolean or pointer, r can only be ==, !=
   423  }
   424  
   425  // factsTable keeps track of relations between pairs of values.
   426  //
   427  // The fact table logic is sound, but incomplete. Outside of a few
   428  // special cases, it performs no deduction or arithmetic. While there
   429  // are known decision procedures for this, the ad hoc approach taken
   430  // by the facts table is effective for real code while remaining very
   431  // efficient.
   432  type factsTable struct {
   433  	// unsat is true if facts contains a contradiction.
   434  	//
   435  	// Note that the factsTable logic is incomplete, so if unsat
   436  	// is false, the assertions in factsTable could be satisfiable
   437  	// *or* unsatisfiable.
   438  	unsat      bool // true if facts contains a contradiction
   439  	unsatDepth int  // number of unsat checkpoints
   440  
   441  	// order* is a couple of partial order sets that record information
   442  	// about relations between SSA values in the signed and unsigned
   443  	// domain.
   444  	orderS *poset
   445  	orderU *poset
   446  
   447  	// orderings contains a list of known orderings between values.
   448  	// These lists are indexed by v.ID.
   449  	// We do not record transitive orderings. Only explicitly learned
   450  	// orderings are recorded. Transitive orderings can be obtained
   451  	// by walking along the individual orderings.
   452  	orderings map[ID]*ordering
   453  	// stack of IDs which have had an entry added in orderings.
   454  	// In addition, ID==0 are checkpoint markers.
   455  	orderingsStack []ID
   456  	orderingCache  *ordering // unused ordering records
   457  
   458  	// known lower and upper constant bounds on individual values.
   459  	limits       []limit     // indexed by value ID
   460  	limitStack   []limitFact // previous entries
   461  	recurseCheck []bool      // recursion detector for limit propagation
   462  
   463  	// For each slice s, a map from s to a len(s)/cap(s) value (if any)
   464  	// TODO: check if there are cases that matter where we have
   465  	// more than one len(s) for a slice. We could keep a list if necessary.
   466  	lens map[ID]*Value
   467  	caps map[ID]*Value
   468  
   469  	// reusedTopoSortScoresTable recycle allocations for topo-sort
   470  	reusedTopoSortScoresTable []uint
   471  }
   472  
   473  // checkpointBound is an invalid value used for checkpointing
   474  // and restoring factsTable.
   475  var checkpointBound = limitFact{}
   476  
   477  func newFactsTable(f *Func) *factsTable {
   478  	ft := &factsTable{}
   479  	ft.orderS = f.newPoset()
   480  	ft.orderU = f.newPoset()
   481  	ft.orderS.SetUnsigned(false)
   482  	ft.orderU.SetUnsigned(true)
   483  	ft.orderings = make(map[ID]*ordering)
   484  	ft.limits = f.Cache.allocLimitSlice(f.NumValues())
   485  	for _, b := range f.Blocks {
   486  		for _, v := range b.Values {
   487  			ft.limits[v.ID] = initLimit(v)
   488  		}
   489  	}
   490  	ft.limitStack = make([]limitFact, 4)
   491  	ft.recurseCheck = f.Cache.allocBoolSlice(f.NumValues())
   492  	return ft
   493  }
   494  
   495  // initLimitForNewValue initializes the limits for newly created values,
   496  // possibly needing to expand the limits slice. Currently used by
   497  // simplifyBlock when certain provably constant results are folded.
   498  func (ft *factsTable) initLimitForNewValue(v *Value) {
   499  	if int(v.ID) >= len(ft.limits) {
   500  		f := v.Block.Func
   501  		n := f.NumValues()
   502  		if cap(ft.limits) >= n {
   503  			ft.limits = ft.limits[:n]
   504  		} else {
   505  			old := ft.limits
   506  			ft.limits = f.Cache.allocLimitSlice(n)
   507  			copy(ft.limits, old)
   508  			f.Cache.freeLimitSlice(old)
   509  		}
   510  	}
   511  	ft.limits[v.ID] = initLimit(v)
   512  }
   513  
   514  // signedMin records the fact that we know v is at least
   515  // min in the signed domain.
   516  func (ft *factsTable) signedMin(v *Value, min int64) {
   517  	ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
   518  }
   519  
   520  // signedMax records the fact that we know v is at most
   521  // max in the signed domain.
   522  func (ft *factsTable) signedMax(v *Value, max int64) {
   523  	ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
   524  }
   525  func (ft *factsTable) signedMinMax(v *Value, min, max int64) {
   526  	ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
   527  }
   528  
   529  // setNonNegative records the fact that v is known to be non-negative.
   530  func (ft *factsTable) setNonNegative(v *Value) {
   531  	ft.signedMin(v, 0)
   532  }
   533  
   534  // unsignedMin records the fact that we know v is at least
   535  // min in the unsigned domain.
   536  func (ft *factsTable) unsignedMin(v *Value, min uint64) {
   537  	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
   538  }
   539  
   540  // unsignedMax records the fact that we know v is at most
   541  // max in the unsigned domain.
   542  func (ft *factsTable) unsignedMax(v *Value, max uint64) {
   543  	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
   544  }
   545  func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) {
   546  	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
   547  }
   548  
   549  func (ft *factsTable) booleanFalse(v *Value) {
   550  	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   551  }
   552  func (ft *factsTable) booleanTrue(v *Value) {
   553  	ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
   554  }
   555  func (ft *factsTable) pointerNil(v *Value) {
   556  	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   557  }
   558  func (ft *factsTable) pointerNonNil(v *Value) {
   559  	l := noLimit
   560  	l.umin = 1
   561  	ft.newLimit(v, l)
   562  }
   563  
   564  // newLimit adds new limiting information for v.
   565  func (ft *factsTable) newLimit(v *Value, newLim limit) {
   566  	oldLim := ft.limits[v.ID]
   567  
   568  	// Merge old and new information.
   569  	lim := oldLim.intersect(newLim)
   570  
   571  	// signed <-> unsigned propagation
   572  	if lim.min >= 0 {
   573  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
   574  	}
   575  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
   576  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
   577  	}
   578  
   579  	if lim == oldLim {
   580  		return // nothing new to record
   581  	}
   582  
   583  	if lim.unsat() {
   584  		ft.unsat = true
   585  		return
   586  	}
   587  
   588  	// Check for recursion. This normally happens because in unsatisfiable
   589  	// cases we have a < b < a, and every update to a's limits returns
   590  	// here again with the limit increased by 2.
   591  	// Normally this is caught early by the orderS/orderU posets, but in
   592  	// cases where the comparisons jump between signed and unsigned domains,
   593  	// the posets will not notice.
   594  	if ft.recurseCheck[v.ID] {
   595  		// This should only happen for unsatisfiable cases. TODO: check
   596  		return
   597  	}
   598  	ft.recurseCheck[v.ID] = true
   599  	defer func() {
   600  		ft.recurseCheck[v.ID] = false
   601  	}()
   602  
   603  	// Record undo information.
   604  	ft.limitStack = append(ft.limitStack, limitFact{v.ID, oldLim})
   605  	// Record new information.
   606  	ft.limits[v.ID] = lim
   607  	if v.Block.Func.pass.debug > 2 {
   608  		// TODO: pos is probably wrong. This is the position where v is defined,
   609  		// not the position where we learned the fact about it (which was
   610  		// probably some subsequent compare+branch).
   611  		v.Block.Func.Warnl(v.Pos, "new limit %s %s unsat=%v", v, lim.String(), ft.unsat)
   612  	}
   613  
   614  	// Propagate this new constant range to other values
   615  	// that we know are ordered with respect to this one.
   616  	// Note overflow/underflow in the arithmetic below is ok,
   617  	// it will just lead to imprecision (undetected unsatisfiability).
   618  	for o := ft.orderings[v.ID]; o != nil; o = o.next {
   619  		switch o.d {
   620  		case signed:
   621  			switch o.r {
   622  			case eq: // v == w
   623  				ft.signedMinMax(o.w, lim.min, lim.max)
   624  			case lt | eq: // v <= w
   625  				ft.signedMin(o.w, lim.min)
   626  			case lt: // v < w
   627  				ft.signedMin(o.w, lim.min+1)
   628  			case gt | eq: // v >= w
   629  				ft.signedMax(o.w, lim.max)
   630  			case gt: // v > w
   631  				ft.signedMax(o.w, lim.max-1)
   632  			case lt | gt: // v != w
   633  				if lim.min == lim.max { // v is a constant
   634  					c := lim.min
   635  					if ft.limits[o.w.ID].min == c {
   636  						ft.signedMin(o.w, c+1)
   637  					}
   638  					if ft.limits[o.w.ID].max == c {
   639  						ft.signedMax(o.w, c-1)
   640  					}
   641  				}
   642  			}
   643  		case unsigned:
   644  			switch o.r {
   645  			case eq: // v == w
   646  				ft.unsignedMinMax(o.w, lim.umin, lim.umax)
   647  			case lt | eq: // v <= w
   648  				ft.unsignedMin(o.w, lim.umin)
   649  			case lt: // v < w
   650  				ft.unsignedMin(o.w, lim.umin+1)
   651  			case gt | eq: // v >= w
   652  				ft.unsignedMax(o.w, lim.umax)
   653  			case gt: // v > w
   654  				ft.unsignedMax(o.w, lim.umax-1)
   655  			case lt | gt: // v != w
   656  				if lim.umin == lim.umax { // v is a constant
   657  					c := lim.umin
   658  					if ft.limits[o.w.ID].umin == c {
   659  						ft.unsignedMin(o.w, c+1)
   660  					}
   661  					if ft.limits[o.w.ID].umax == c {
   662  						ft.unsignedMax(o.w, c-1)
   663  					}
   664  				}
   665  			}
   666  		case boolean:
   667  			switch o.r {
   668  			case eq:
   669  				if lim.min == 0 && lim.max == 0 { // constant false
   670  					ft.booleanFalse(o.w)
   671  				}
   672  				if lim.min == 1 && lim.max == 1 { // constant true
   673  					ft.booleanTrue(o.w)
   674  				}
   675  			case lt | gt:
   676  				if lim.min == 0 && lim.max == 0 { // constant false
   677  					ft.booleanTrue(o.w)
   678  				}
   679  				if lim.min == 1 && lim.max == 1 { // constant true
   680  					ft.booleanFalse(o.w)
   681  				}
   682  			}
   683  		case pointer:
   684  			switch o.r {
   685  			case eq:
   686  				if lim.umax == 0 { // nil
   687  					ft.pointerNil(o.w)
   688  				}
   689  				if lim.umin > 0 { // non-nil
   690  					ft.pointerNonNil(o.w)
   691  				}
   692  			case lt | gt:
   693  				if lim.umax == 0 { // nil
   694  					ft.pointerNonNil(o.w)
   695  				}
   696  				// note: not equal to non-nil doesn't tell us anything.
   697  			}
   698  		}
   699  	}
   700  
   701  	// If this is new known constant for a boolean value,
   702  	// extract relation between its args. For example, if
   703  	// We learn v is false, and v is defined as a<b, then we learn a>=b.
   704  	if v.Type.IsBoolean() {
   705  		// If we reach here, it is because we have a more restrictive
   706  		// value for v than the default. The only two such values
   707  		// are constant true or constant false.
   708  		if lim.min != lim.max {
   709  			v.Block.Func.Fatalf("boolean not constant %v", v)
   710  		}
   711  		isTrue := lim.min == 1
   712  		if dr, ok := domainRelationTable[v.Op]; ok && v.Op != OpIsInBounds && v.Op != OpIsSliceInBounds {
   713  			d := dr.d
   714  			r := dr.r
   715  			if d == signed && ft.isNonNegative(v.Args[0]) && ft.isNonNegative(v.Args[1]) {
   716  				d |= unsigned
   717  			}
   718  			if !isTrue {
   719  				r ^= lt | gt | eq
   720  			}
   721  			// TODO: v.Block is wrong?
   722  			addRestrictions(v.Block, ft, d, v.Args[0], v.Args[1], r)
   723  		}
   724  		switch v.Op {
   725  		case OpIsNonNil:
   726  			if isTrue {
   727  				ft.pointerNonNil(v.Args[0])
   728  			} else {
   729  				ft.pointerNil(v.Args[0])
   730  			}
   731  		case OpIsInBounds, OpIsSliceInBounds:
   732  			// 0 <= a0 < a1 (or 0 <= a0 <= a1)
   733  			r := lt
   734  			if v.Op == OpIsSliceInBounds {
   735  				r |= eq
   736  			}
   737  			if isTrue {
   738  				// On the positive branch, we learn:
   739  				//   signed: 0 <= a0 < a1 (or 0 <= a0 <= a1)
   740  				//   unsigned:    a0 < a1 (or a0 <= a1)
   741  				ft.setNonNegative(v.Args[0])
   742  				ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   743  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   744  			} else {
   745  				// On the negative branch, we learn (0 > a0 ||
   746  				// a0 >= a1). In the unsigned domain, this is
   747  				// simply a0 >= a1 (which is the reverse of the
   748  				// positive branch, so nothing surprising).
   749  				// But in the signed domain, we can't express the ||
   750  				// condition, so check if a0 is non-negative instead,
   751  				// to be able to learn something.
   752  				r ^= lt | gt | eq // >= (index) or > (slice)
   753  				if ft.isNonNegative(v.Args[0]) {
   754  					ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   755  				}
   756  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   757  				// TODO: v.Block is wrong here
   758  			}
   759  		}
   760  	}
   761  }
   762  
   763  func (ft *factsTable) addOrdering(v, w *Value, d domain, r relation) {
   764  	o := ft.orderingCache
   765  	if o == nil {
   766  		o = &ordering{}
   767  	} else {
   768  		ft.orderingCache = o.next
   769  	}
   770  	o.w = w
   771  	o.d = d
   772  	o.r = r
   773  	o.next = ft.orderings[v.ID]
   774  	ft.orderings[v.ID] = o
   775  	ft.orderingsStack = append(ft.orderingsStack, v.ID)
   776  }
   777  
   778  // update updates the set of relations between v and w in domain d
   779  // restricting it to r.
   780  func (ft *factsTable) update(parent *Block, v, w *Value, d domain, r relation) {
   781  	if parent.Func.pass.debug > 2 {
   782  		parent.Func.Warnl(parent.Pos, "parent=%s, update %s %s %s", parent, v, w, r)
   783  	}
   784  	// No need to do anything else if we already found unsat.
   785  	if ft.unsat {
   786  		return
   787  	}
   788  
   789  	// Self-fact. It's wasteful to register it into the facts
   790  	// table, so just note whether it's satisfiable
   791  	if v == w {
   792  		if r&eq == 0 {
   793  			ft.unsat = true
   794  		}
   795  		return
   796  	}
   797  
   798  	if d == signed || d == unsigned {
   799  		var ok bool
   800  		order := ft.orderS
   801  		if d == unsigned {
   802  			order = ft.orderU
   803  		}
   804  		switch r {
   805  		case lt:
   806  			ok = order.SetOrder(v, w)
   807  		case gt:
   808  			ok = order.SetOrder(w, v)
   809  		case lt | eq:
   810  			ok = order.SetOrderOrEqual(v, w)
   811  		case gt | eq:
   812  			ok = order.SetOrderOrEqual(w, v)
   813  		case eq:
   814  			ok = order.SetEqual(v, w)
   815  		case lt | gt:
   816  			ok = order.SetNonEqual(v, w)
   817  		default:
   818  			panic("unknown relation")
   819  		}
   820  		ft.addOrdering(v, w, d, r)
   821  		ft.addOrdering(w, v, d, reverseBits[r])
   822  
   823  		if !ok {
   824  			if parent.Func.pass.debug > 2 {
   825  				parent.Func.Warnl(parent.Pos, "unsat %s %s %s", v, w, r)
   826  			}
   827  			ft.unsat = true
   828  			return
   829  		}
   830  	}
   831  	if d == boolean || d == pointer {
   832  		for o := ft.orderings[v.ID]; o != nil; o = o.next {
   833  			if o.d == d && o.w == w {
   834  				// We already know a relationship between v and w.
   835  				// Either it is a duplicate, or it is a contradiction,
   836  				// as we only allow eq and lt|gt for these domains,
   837  				if o.r != r {
   838  					ft.unsat = true
   839  				}
   840  				return
   841  			}
   842  		}
   843  		// TODO: this does not do transitive equality.
   844  		// We could use a poset like above, but somewhat degenerate (==,!= only).
   845  		ft.addOrdering(v, w, d, r)
   846  		ft.addOrdering(w, v, d, r) // note: reverseBits unnecessary for eq and lt|gt.
   847  	}
   848  
   849  	// Extract new constant limits based on the comparison.
   850  	vLimit := ft.limits[v.ID]
   851  	wLimit := ft.limits[w.ID]
   852  	// Note: all the +1/-1 below could overflow/underflow. Either will
   853  	// still generate correct results, it will just lead to imprecision.
   854  	// In fact if there is overflow/underflow, the corresponding
   855  	// code is unreachable because the known range is outside the range
   856  	// of the value's type.
   857  	switch d {
   858  	case signed:
   859  		switch r {
   860  		case eq: // v == w
   861  			ft.signedMinMax(v, wLimit.min, wLimit.max)
   862  			ft.signedMinMax(w, vLimit.min, vLimit.max)
   863  		case lt: // v < w
   864  			ft.signedMax(v, wLimit.max-1)
   865  			ft.signedMin(w, vLimit.min+1)
   866  		case lt | eq: // v <= w
   867  			ft.signedMax(v, wLimit.max)
   868  			ft.signedMin(w, vLimit.min)
   869  		case gt: // v > w
   870  			ft.signedMin(v, wLimit.min+1)
   871  			ft.signedMax(w, vLimit.max-1)
   872  		case gt | eq: // v >= w
   873  			ft.signedMin(v, wLimit.min)
   874  			ft.signedMax(w, vLimit.max)
   875  		case lt | gt: // v != w
   876  			if vLimit.min == vLimit.max { // v is a constant
   877  				c := vLimit.min
   878  				if wLimit.min == c {
   879  					ft.signedMin(w, c+1)
   880  				}
   881  				if wLimit.max == c {
   882  					ft.signedMax(w, c-1)
   883  				}
   884  			}
   885  			if wLimit.min == wLimit.max { // w is a constant
   886  				c := wLimit.min
   887  				if vLimit.min == c {
   888  					ft.signedMin(v, c+1)
   889  				}
   890  				if vLimit.max == c {
   891  					ft.signedMax(v, c-1)
   892  				}
   893  			}
   894  		}
   895  	case unsigned:
   896  		switch r {
   897  		case eq: // v == w
   898  			ft.unsignedMinMax(v, wLimit.umin, wLimit.umax)
   899  			ft.unsignedMinMax(w, vLimit.umin, vLimit.umax)
   900  		case lt: // v < w
   901  			ft.unsignedMax(v, wLimit.umax-1)
   902  			ft.unsignedMin(w, vLimit.umin+1)
   903  		case lt | eq: // v <= w
   904  			ft.unsignedMax(v, wLimit.umax)
   905  			ft.unsignedMin(w, vLimit.umin)
   906  		case gt: // v > w
   907  			ft.unsignedMin(v, wLimit.umin+1)
   908  			ft.unsignedMax(w, vLimit.umax-1)
   909  		case gt | eq: // v >= w
   910  			ft.unsignedMin(v, wLimit.umin)
   911  			ft.unsignedMax(w, vLimit.umax)
   912  		case lt | gt: // v != w
   913  			if vLimit.umin == vLimit.umax { // v is a constant
   914  				c := vLimit.umin
   915  				if wLimit.umin == c {
   916  					ft.unsignedMin(w, c+1)
   917  				}
   918  				if wLimit.umax == c {
   919  					ft.unsignedMax(w, c-1)
   920  				}
   921  			}
   922  			if wLimit.umin == wLimit.umax { // w is a constant
   923  				c := wLimit.umin
   924  				if vLimit.umin == c {
   925  					ft.unsignedMin(v, c+1)
   926  				}
   927  				if vLimit.umax == c {
   928  					ft.unsignedMax(v, c-1)
   929  				}
   930  			}
   931  		}
   932  	case boolean:
   933  		switch r {
   934  		case eq: // v == w
   935  			if vLimit.min == 1 { // v is true
   936  				ft.booleanTrue(w)
   937  			}
   938  			if vLimit.max == 0 { // v is false
   939  				ft.booleanFalse(w)
   940  			}
   941  			if wLimit.min == 1 { // w is true
   942  				ft.booleanTrue(v)
   943  			}
   944  			if wLimit.max == 0 { // w is false
   945  				ft.booleanFalse(v)
   946  			}
   947  		case lt | gt: // v != w
   948  			if vLimit.min == 1 { // v is true
   949  				ft.booleanFalse(w)
   950  			}
   951  			if vLimit.max == 0 { // v is false
   952  				ft.booleanTrue(w)
   953  			}
   954  			if wLimit.min == 1 { // w is true
   955  				ft.booleanFalse(v)
   956  			}
   957  			if wLimit.max == 0 { // w is false
   958  				ft.booleanTrue(v)
   959  			}
   960  		}
   961  	case pointer:
   962  		switch r {
   963  		case eq: // v == w
   964  			if vLimit.umax == 0 { // v is nil
   965  				ft.pointerNil(w)
   966  			}
   967  			if vLimit.umin > 0 { // v is non-nil
   968  				ft.pointerNonNil(w)
   969  			}
   970  			if wLimit.umax == 0 { // w is nil
   971  				ft.pointerNil(v)
   972  			}
   973  			if wLimit.umin > 0 { // w is non-nil
   974  				ft.pointerNonNil(v)
   975  			}
   976  		case lt | gt: // v != w
   977  			if vLimit.umax == 0 { // v is nil
   978  				ft.pointerNonNil(w)
   979  			}
   980  			if wLimit.umax == 0 { // w is nil
   981  				ft.pointerNonNil(v)
   982  			}
   983  			// Note: the other direction doesn't work.
   984  			// Being not equal to a non-nil pointer doesn't
   985  			// make you (necessarily) a nil pointer.
   986  		}
   987  	}
   988  
   989  	// Derived facts below here are only about numbers.
   990  	if d != signed && d != unsigned {
   991  		return
   992  	}
   993  
   994  	// Additional facts we know given the relationship between len and cap.
   995  	//
   996  	// TODO: Since prove now derives transitive relations, it
   997  	// should be sufficient to learn that len(w) <= cap(w) at the
   998  	// beginning of prove where we look for all len/cap ops.
   999  	if v.Op == OpSliceLen && r&lt == 0 && ft.caps[v.Args[0].ID] != nil {
  1000  		// len(s) > w implies cap(s) > w
  1001  		// len(s) >= w implies cap(s) >= w
  1002  		// len(s) == w implies cap(s) >= w
  1003  		ft.update(parent, ft.caps[v.Args[0].ID], w, d, r|gt)
  1004  	}
  1005  	if w.Op == OpSliceLen && r&gt == 0 && ft.caps[w.Args[0].ID] != nil {
  1006  		// same, length on the RHS.
  1007  		ft.update(parent, v, ft.caps[w.Args[0].ID], d, r|lt)
  1008  	}
  1009  	if v.Op == OpSliceCap && r&gt == 0 && ft.lens[v.Args[0].ID] != nil {
  1010  		// cap(s) < w implies len(s) < w
  1011  		// cap(s) <= w implies len(s) <= w
  1012  		// cap(s) == w implies len(s) <= w
  1013  		ft.update(parent, ft.lens[v.Args[0].ID], w, d, r|lt)
  1014  	}
  1015  	if w.Op == OpSliceCap && r&lt == 0 && ft.lens[w.Args[0].ID] != nil {
  1016  		// same, capacity on the RHS.
  1017  		ft.update(parent, v, ft.lens[w.Args[0].ID], d, r|gt)
  1018  	}
  1019  
  1020  	// Process fence-post implications.
  1021  	//
  1022  	// First, make the condition > or >=.
  1023  	if r == lt || r == lt|eq {
  1024  		v, w = w, v
  1025  		r = reverseBits[r]
  1026  	}
  1027  	switch r {
  1028  	case gt:
  1029  		if x, delta := isConstDelta(v); x != nil && delta == 1 {
  1030  			// x+1 > w  ⇒  x >= w
  1031  			//
  1032  			// This is useful for eliminating the
  1033  			// growslice branch of append.
  1034  			ft.update(parent, x, w, d, gt|eq)
  1035  		} else if x, delta := isConstDelta(w); x != nil && delta == -1 {
  1036  			// v > x-1  ⇒  v >= x
  1037  			ft.update(parent, v, x, d, gt|eq)
  1038  		}
  1039  	case gt | eq:
  1040  		if x, delta := isConstDelta(v); x != nil && delta == -1 {
  1041  			// x-1 >= w && x > min  ⇒  x > w
  1042  			//
  1043  			// Useful for i > 0; s[i-1].
  1044  			lim := ft.limits[x.ID]
  1045  			if (d == signed && lim.min > opMin[v.Op]) || (d == unsigned && lim.umin > 0) {
  1046  				ft.update(parent, x, w, d, gt)
  1047  			}
  1048  		} else if x, delta := isConstDelta(w); x != nil && delta == 1 {
  1049  			// v >= x+1 && x < max  ⇒  v > x
  1050  			lim := ft.limits[x.ID]
  1051  			if (d == signed && lim.max < opMax[w.Op]) || (d == unsigned && lim.umax < opUMax[w.Op]) {
  1052  				ft.update(parent, v, x, d, gt)
  1053  			}
  1054  		}
  1055  	}
  1056  
  1057  	// Process: x+delta > w (with delta constant)
  1058  	// Only signed domain for now (useful for accesses to slices in loops).
  1059  	if r == gt || r == gt|eq {
  1060  		if x, delta := isConstDelta(v); x != nil && d == signed {
  1061  			if parent.Func.pass.debug > 1 {
  1062  				parent.Func.Warnl(parent.Pos, "x+d %s w; x:%v %v delta:%v w:%v d:%v", r, x, parent.String(), delta, w.AuxInt, d)
  1063  			}
  1064  			underflow := true
  1065  			if delta < 0 {
  1066  				l := ft.limits[x.ID]
  1067  				if (x.Type.Size() == 8 && l.min >= math.MinInt64-delta) ||
  1068  					(x.Type.Size() == 4 && l.min >= math.MinInt32-delta) {
  1069  					underflow = false
  1070  				}
  1071  			}
  1072  			if delta < 0 && !underflow {
  1073  				// If delta < 0 and x+delta cannot underflow then x > x+delta (that is, x > v)
  1074  				ft.update(parent, x, v, signed, gt)
  1075  			}
  1076  			if !w.isGenericIntConst() {
  1077  				// If we know that x+delta > w but w is not constant, we can derive:
  1078  				//    if delta < 0 and x+delta cannot underflow, then x > w
  1079  				// This is useful for loops with bounds "len(slice)-K" (delta = -K)
  1080  				if delta < 0 && !underflow {
  1081  					ft.update(parent, x, w, signed, r)
  1082  				}
  1083  			} else {
  1084  				// With w,delta constants, we want to derive: x+delta > w  ⇒  x > w-delta
  1085  				//
  1086  				// We compute (using integers of the correct size):
  1087  				//    min = w - delta
  1088  				//    max = MaxInt - delta
  1089  				//
  1090  				// And we prove that:
  1091  				//    if min<max: min < x AND x <= max
  1092  				//    if min>max: min < x OR  x <= max
  1093  				//
  1094  				// This is always correct, even in case of overflow.
  1095  				//
  1096  				// If the initial fact is x+delta >= w instead, the derived conditions are:
  1097  				//    if min<max: min <= x AND x <= max
  1098  				//    if min>max: min <= x OR  x <= max
  1099  				//
  1100  				// Notice the conditions for max are still <=, as they handle overflows.
  1101  				var min, max int64
  1102  				switch x.Type.Size() {
  1103  				case 8:
  1104  					min = w.AuxInt - delta
  1105  					max = int64(^uint64(0)>>1) - delta
  1106  				case 4:
  1107  					min = int64(int32(w.AuxInt) - int32(delta))
  1108  					max = int64(int32(^uint32(0)>>1) - int32(delta))
  1109  				case 2:
  1110  					min = int64(int16(w.AuxInt) - int16(delta))
  1111  					max = int64(int16(^uint16(0)>>1) - int16(delta))
  1112  				case 1:
  1113  					min = int64(int8(w.AuxInt) - int8(delta))
  1114  					max = int64(int8(^uint8(0)>>1) - int8(delta))
  1115  				default:
  1116  					panic("unimplemented")
  1117  				}
  1118  
  1119  				if min < max {
  1120  					// Record that x > min and max >= x
  1121  					if r == gt {
  1122  						min++
  1123  					}
  1124  					ft.signedMinMax(x, min, max)
  1125  				} else {
  1126  					// We know that either x>min OR x<=max. factsTable cannot record OR conditions,
  1127  					// so let's see if we can already prove that one of them is false, in which case
  1128  					// the other must be true
  1129  					l := ft.limits[x.ID]
  1130  					if l.max <= min {
  1131  						if r&eq == 0 || l.max < min {
  1132  							// x>min (x>=min) is impossible, so it must be x<=max
  1133  							ft.signedMax(x, max)
  1134  						}
  1135  					} else if l.min > max {
  1136  						// x<=max is impossible, so it must be x>min
  1137  						if r == gt {
  1138  							min++
  1139  						}
  1140  						ft.signedMin(x, min)
  1141  					}
  1142  				}
  1143  			}
  1144  		}
  1145  	}
  1146  
  1147  	// Look through value-preserving extensions.
  1148  	// If the domain is appropriate for the pre-extension Type,
  1149  	// repeat the update with the pre-extension Value.
  1150  	if isCleanExt(v) {
  1151  		switch {
  1152  		case d == signed && v.Args[0].Type.IsSigned():
  1153  			fallthrough
  1154  		case d == unsigned && !v.Args[0].Type.IsSigned():
  1155  			ft.update(parent, v.Args[0], w, d, r)
  1156  		}
  1157  	}
  1158  	if isCleanExt(w) {
  1159  		switch {
  1160  		case d == signed && w.Args[0].Type.IsSigned():
  1161  			fallthrough
  1162  		case d == unsigned && !w.Args[0].Type.IsSigned():
  1163  			ft.update(parent, v, w.Args[0], d, r)
  1164  		}
  1165  	}
  1166  }
  1167  
  1168  var opMin = map[Op]int64{
  1169  	OpAdd64: math.MinInt64, OpSub64: math.MinInt64,
  1170  	OpAdd32: math.MinInt32, OpSub32: math.MinInt32,
  1171  }
  1172  
  1173  var opMax = map[Op]int64{
  1174  	OpAdd64: math.MaxInt64, OpSub64: math.MaxInt64,
  1175  	OpAdd32: math.MaxInt32, OpSub32: math.MaxInt32,
  1176  }
  1177  
  1178  var opUMax = map[Op]uint64{
  1179  	OpAdd64: math.MaxUint64, OpSub64: math.MaxUint64,
  1180  	OpAdd32: math.MaxUint32, OpSub32: math.MaxUint32,
  1181  }
  1182  
  1183  // isNonNegative reports whether v is known to be non-negative.
  1184  func (ft *factsTable) isNonNegative(v *Value) bool {
  1185  	return ft.limits[v.ID].min >= 0
  1186  }
  1187  
  1188  // checkpoint saves the current state of known relations.
  1189  // Called when descending on a branch.
  1190  func (ft *factsTable) checkpoint() {
  1191  	if ft.unsat {
  1192  		ft.unsatDepth++
  1193  	}
  1194  	ft.limitStack = append(ft.limitStack, checkpointBound)
  1195  	ft.orderS.Checkpoint()
  1196  	ft.orderU.Checkpoint()
  1197  	ft.orderingsStack = append(ft.orderingsStack, 0)
  1198  }
  1199  
  1200  // restore restores known relation to the state just
  1201  // before the previous checkpoint.
  1202  // Called when backing up on a branch.
  1203  func (ft *factsTable) restore() {
  1204  	if ft.unsatDepth > 0 {
  1205  		ft.unsatDepth--
  1206  	} else {
  1207  		ft.unsat = false
  1208  	}
  1209  	for {
  1210  		old := ft.limitStack[len(ft.limitStack)-1]
  1211  		ft.limitStack = ft.limitStack[:len(ft.limitStack)-1]
  1212  		if old.vid == 0 { // checkpointBound
  1213  			break
  1214  		}
  1215  		ft.limits[old.vid] = old.limit
  1216  	}
  1217  	ft.orderS.Undo()
  1218  	ft.orderU.Undo()
  1219  	for {
  1220  		id := ft.orderingsStack[len(ft.orderingsStack)-1]
  1221  		ft.orderingsStack = ft.orderingsStack[:len(ft.orderingsStack)-1]
  1222  		if id == 0 { // checkpoint marker
  1223  			break
  1224  		}
  1225  		o := ft.orderings[id]
  1226  		ft.orderings[id] = o.next
  1227  		o.next = ft.orderingCache
  1228  		ft.orderingCache = o
  1229  	}
  1230  }
  1231  
  1232  var (
  1233  	reverseBits = [...]relation{0, 4, 2, 6, 1, 5, 3, 7}
  1234  
  1235  	// maps what we learn when the positive branch is taken.
  1236  	// For example:
  1237  	//      OpLess8:   {signed, lt},
  1238  	//	v1 = (OpLess8 v2 v3).
  1239  	// If we learn that v1 is true, then we can deduce that v2<v3
  1240  	// in the signed domain.
  1241  	domainRelationTable = map[Op]struct {
  1242  		d domain
  1243  		r relation
  1244  	}{
  1245  		OpEq8:   {signed | unsigned, eq},
  1246  		OpEq16:  {signed | unsigned, eq},
  1247  		OpEq32:  {signed | unsigned, eq},
  1248  		OpEq64:  {signed | unsigned, eq},
  1249  		OpEqPtr: {pointer, eq},
  1250  		OpEqB:   {boolean, eq},
  1251  
  1252  		OpNeq8:   {signed | unsigned, lt | gt},
  1253  		OpNeq16:  {signed | unsigned, lt | gt},
  1254  		OpNeq32:  {signed | unsigned, lt | gt},
  1255  		OpNeq64:  {signed | unsigned, lt | gt},
  1256  		OpNeqPtr: {pointer, lt | gt},
  1257  		OpNeqB:   {boolean, lt | gt},
  1258  
  1259  		OpLess8:   {signed, lt},
  1260  		OpLess8U:  {unsigned, lt},
  1261  		OpLess16:  {signed, lt},
  1262  		OpLess16U: {unsigned, lt},
  1263  		OpLess32:  {signed, lt},
  1264  		OpLess32U: {unsigned, lt},
  1265  		OpLess64:  {signed, lt},
  1266  		OpLess64U: {unsigned, lt},
  1267  
  1268  		OpLeq8:   {signed, lt | eq},
  1269  		OpLeq8U:  {unsigned, lt | eq},
  1270  		OpLeq16:  {signed, lt | eq},
  1271  		OpLeq16U: {unsigned, lt | eq},
  1272  		OpLeq32:  {signed, lt | eq},
  1273  		OpLeq32U: {unsigned, lt | eq},
  1274  		OpLeq64:  {signed, lt | eq},
  1275  		OpLeq64U: {unsigned, lt | eq},
  1276  	}
  1277  )
  1278  
  1279  // cleanup returns the posets to the free list
  1280  func (ft *factsTable) cleanup(f *Func) {
  1281  	for _, po := range []*poset{ft.orderS, ft.orderU} {
  1282  		// Make sure it's empty as it should be. A non-empty poset
  1283  		// might cause errors and miscompilations if reused.
  1284  		if checkEnabled {
  1285  			if err := po.CheckEmpty(); err != nil {
  1286  				f.Fatalf("poset not empty after function %s: %v", f.Name, err)
  1287  			}
  1288  		}
  1289  		f.retPoset(po)
  1290  	}
  1291  	f.Cache.freeLimitSlice(ft.limits)
  1292  	f.Cache.freeBoolSlice(ft.recurseCheck)
  1293  	if cap(ft.reusedTopoSortScoresTable) > 0 {
  1294  		f.Cache.freeUintSlice(ft.reusedTopoSortScoresTable)
  1295  	}
  1296  }
  1297  
  1298  // addSlicesOfSameLen finds the slices that are in the same block and whose Op
  1299  // is OpPhi and always have the same length, then add the equality relationship
  1300  // between them to ft. If two slices start out with the same length and decrease
  1301  // in length by the same amount on each round of the loop (or in the if block),
  1302  // then we think their lengths are always equal.
  1303  //
  1304  // See https://go.dev/issues/75144
  1305  //
  1306  // In fact, we are just propagating the equality
  1307  //
  1308  //	if len(a) == len(b) { // from here
  1309  //		for len(a) > 4 {
  1310  //			a = a[4:]
  1311  //			b = b[4:]
  1312  //		}
  1313  //		if len(a) == len(b) { // to here
  1314  //			return true
  1315  //		}
  1316  //	}
  1317  //
  1318  // or change the for to if:
  1319  //
  1320  //	if len(a) == len(b) { // from here
  1321  //		if len(a) > 4 {
  1322  //			a = a[4:]
  1323  //			b = b[4:]
  1324  //		}
  1325  //		if len(a) == len(b) { // to here
  1326  //			return true
  1327  //		}
  1328  //	}
  1329  func addSlicesOfSameLen(ft *factsTable, b *Block) {
  1330  	// Let w points to the first value we're interested in, and then we
  1331  	// only process those values ​​that appear to be the same length as w,
  1332  	// looping only once. This should be enough in most cases. And u is
  1333  	// similar to w, see comment for predIndex.
  1334  	var u, w *Value
  1335  	var i, j, k sliceInfo
  1336  	isInterested := func(v *Value) bool {
  1337  		j = getSliceInfo(v)
  1338  		return j.sliceWhere != sliceUnknown
  1339  	}
  1340  	for _, v := range b.Values {
  1341  		if v.Uses == 0 {
  1342  			continue
  1343  		}
  1344  		if v.Op == OpPhi && len(v.Args) == 2 && ft.lens[v.ID] != nil && isInterested(v) {
  1345  			if j.predIndex == 1 && ft.lens[v.Args[0].ID] != nil {
  1346  				// found v = (Phi x (SliceMake _ (Add64 (Const64 [n]) (SliceLen x)) _))) or
  1347  				// v = (Phi x (SliceMake _ (Add64 (Const64 [n]) (SliceLen v)) _)))
  1348  				if w == nil {
  1349  					k = j
  1350  					w = v
  1351  					continue
  1352  				}
  1353  				// propagate the equality
  1354  				if j == k && ft.orderS.Equal(ft.lens[v.Args[0].ID], ft.lens[w.Args[0].ID]) {
  1355  					ft.update(b, ft.lens[v.ID], ft.lens[w.ID], signed, eq)
  1356  				}
  1357  			} else if j.predIndex == 0 && ft.lens[v.Args[1].ID] != nil {
  1358  				// found v = (Phi (SliceMake _ (Add64 (Const64 [n]) (SliceLen x)) _)) x) or
  1359  				// v = (Phi (SliceMake _ (Add64 (Const64 [n]) (SliceLen v)) _)) x)
  1360  				if u == nil {
  1361  					i = j
  1362  					u = v
  1363  					continue
  1364  				}
  1365  				// propagate the equality
  1366  				if j == i && ft.orderS.Equal(ft.lens[v.Args[1].ID], ft.lens[u.Args[1].ID]) {
  1367  					ft.update(b, ft.lens[v.ID], ft.lens[u.ID], signed, eq)
  1368  				}
  1369  			}
  1370  		}
  1371  	}
  1372  }
  1373  
  1374  type sliceWhere int
  1375  
  1376  const (
  1377  	sliceUnknown sliceWhere = iota
  1378  	sliceInFor
  1379  	sliceInIf
  1380  )
  1381  
  1382  // predIndex is used to indicate the branch represented by the predecessor
  1383  // block in which the slicing operation occurs.
  1384  type predIndex int
  1385  
  1386  type sliceInfo struct {
  1387  	lengthDiff int64
  1388  	sliceWhere
  1389  	predIndex
  1390  }
  1391  
  1392  // getSliceInfo returns the negative increment of the slice length in a slice
  1393  // operation by examine the Phi node at the merge block. So, we only interest
  1394  // in the slice operation if it is inside a for block or an if block.
  1395  // Otherwise it returns sliceInfo{0, sliceUnknown, 0}.
  1396  //
  1397  // For the following for block:
  1398  //
  1399  //	for len(a) > 4 {
  1400  //	    a = a[4:]
  1401  //	}
  1402  //
  1403  // vp = (Phi v3 v9)
  1404  // v5 = (SliceLen vp)
  1405  // v7 = (Add64 (Const64 [-4]) v5)
  1406  // v9 = (SliceMake _ v7 _)
  1407  //
  1408  // returns sliceInfo{-4, sliceInFor, 1}
  1409  //
  1410  // For a subsequent merge block after an if block:
  1411  //
  1412  //	if len(a) > 4 {
  1413  //	    a = a[4:]
  1414  //	}
  1415  //	a // here
  1416  //
  1417  // vp = (Phi v3 v9)
  1418  // v5 = (SliceLen v3)
  1419  // v7 = (Add64 (Const64 [-4]) v5)
  1420  // v9 = (SliceMake _ v7 _)
  1421  //
  1422  // returns sliceInfo{-4, sliceInIf, 1}
  1423  //
  1424  // Returns sliceInfo{0, sliceUnknown, 0} if it is not the slice
  1425  // operation we are interested in.
  1426  func getSliceInfo(vp *Value) (inf sliceInfo) {
  1427  	if vp.Op != OpPhi || len(vp.Args) != 2 {
  1428  		return
  1429  	}
  1430  	var i predIndex
  1431  	var l *Value // length for OpSliceMake
  1432  	if vp.Args[0].Op != OpSliceMake && vp.Args[1].Op == OpSliceMake {
  1433  		l = vp.Args[1].Args[1]
  1434  		i = 1
  1435  	} else if vp.Args[0].Op == OpSliceMake && vp.Args[1].Op != OpSliceMake {
  1436  		l = vp.Args[0].Args[1]
  1437  		i = 0
  1438  	} else {
  1439  		return
  1440  	}
  1441  	var op Op
  1442  	switch l.Op {
  1443  	case OpAdd64:
  1444  		op = OpConst64
  1445  	case OpAdd32:
  1446  		op = OpConst32
  1447  	default:
  1448  		return
  1449  	}
  1450  	if l.Args[0].Op == op && l.Args[1].Op == OpSliceLen && l.Args[1].Args[0] == vp {
  1451  		return sliceInfo{l.Args[0].AuxInt, sliceInFor, i}
  1452  	}
  1453  	if l.Args[1].Op == op && l.Args[0].Op == OpSliceLen && l.Args[0].Args[0] == vp {
  1454  		return sliceInfo{l.Args[1].AuxInt, sliceInFor, i}
  1455  	}
  1456  	if l.Args[0].Op == op && l.Args[1].Op == OpSliceLen && l.Args[1].Args[0] == vp.Args[1-i] {
  1457  		return sliceInfo{l.Args[0].AuxInt, sliceInIf, i}
  1458  	}
  1459  	if l.Args[1].Op == op && l.Args[0].Op == OpSliceLen && l.Args[0].Args[0] == vp.Args[1-i] {
  1460  		return sliceInfo{l.Args[1].AuxInt, sliceInIf, i}
  1461  	}
  1462  	return
  1463  }
  1464  
  1465  // prove removes redundant BlockIf branches that can be inferred
  1466  // from previous dominating comparisons.
  1467  //
  1468  // By far, the most common redundant pair are generated by bounds checking.
  1469  // For example for the code:
  1470  //
  1471  //	a[i] = 4
  1472  //	foo(a[i])
  1473  //
  1474  // The compiler will generate the following code:
  1475  //
  1476  //	if i >= len(a) {
  1477  //	    panic("not in bounds")
  1478  //	}
  1479  //	a[i] = 4
  1480  //	if i >= len(a) {
  1481  //	    panic("not in bounds")
  1482  //	}
  1483  //	foo(a[i])
  1484  //
  1485  // The second comparison i >= len(a) is clearly redundant because if the
  1486  // else branch of the first comparison is executed, we already know that i < len(a).
  1487  // The code for the second panic can be removed.
  1488  //
  1489  // prove works by finding contradictions and trimming branches whose
  1490  // conditions are unsatisfiable given the branches leading up to them.
  1491  // It tracks a "fact table" of branch conditions. For each branching
  1492  // block, it asserts the branch conditions that uniquely dominate that
  1493  // block, and then separately asserts the block's branch condition and
  1494  // its negation. If either leads to a contradiction, it can trim that
  1495  // successor.
  1496  func prove(f *Func) {
  1497  	// Find induction variables. Currently, findIndVars
  1498  	// is limited to one induction variable per block.
  1499  	var indVars map[*Block]indVar
  1500  	for _, v := range findIndVar(f) {
  1501  		ind := v.ind
  1502  		if len(ind.Args) != 2 {
  1503  			// the rewrite code assumes there is only ever two parents to loops
  1504  			panic("unexpected induction with too many parents")
  1505  		}
  1506  
  1507  		nxt := v.nxt
  1508  		if !(ind.Uses == 2 && // 2 used by comparison and next
  1509  			nxt.Uses == 1) { // 1 used by induction
  1510  			// ind or nxt is used inside the loop, add it for the facts table
  1511  			if indVars == nil {
  1512  				indVars = make(map[*Block]indVar)
  1513  			}
  1514  			indVars[v.entry] = v
  1515  			continue
  1516  		} else {
  1517  			// Since this induction variable is not used for anything but counting the iterations,
  1518  			// no point in putting it into the facts table.
  1519  		}
  1520  
  1521  		// try to rewrite to a downward counting loop checking against start if the
  1522  		// loop body does not depend on ind or nxt and end is known before the loop.
  1523  		// This reduces pressure on the register allocator because this does not need
  1524  		// to use end on each iteration anymore. We compare against the start constant instead.
  1525  		// That means this code:
  1526  		//
  1527  		//	loop:
  1528  		//		ind = (Phi (Const [x]) nxt),
  1529  		//		if ind < end
  1530  		//		then goto enter_loop
  1531  		//		else goto exit_loop
  1532  		//
  1533  		//	enter_loop:
  1534  		//		do something without using ind nor nxt
  1535  		//		nxt = inc + ind
  1536  		//		goto loop
  1537  		//
  1538  		//	exit_loop:
  1539  		//
  1540  		// is rewritten to:
  1541  		//
  1542  		//	loop:
  1543  		//		ind = (Phi end nxt)
  1544  		//		if (Const [x]) < ind
  1545  		//		then goto enter_loop
  1546  		//		else goto exit_loop
  1547  		//
  1548  		//	enter_loop:
  1549  		//		do something without using ind nor nxt
  1550  		//		nxt = ind - inc
  1551  		//		goto loop
  1552  		//
  1553  		//	exit_loop:
  1554  		//
  1555  		// this is better because it only requires to keep ind then nxt alive while looping,
  1556  		// while the original form keeps ind then nxt and end alive
  1557  		start, end := v.min, v.max
  1558  		if v.flags&indVarCountDown != 0 {
  1559  			start, end = end, start
  1560  		}
  1561  
  1562  		if !start.isGenericIntConst() {
  1563  			// if start is not a constant we would be winning nothing from inverting the loop
  1564  			continue
  1565  		}
  1566  		if end.isGenericIntConst() {
  1567  			// TODO: if both start and end are constants we should rewrite such that the comparison
  1568  			// is against zero and nxt is ++ or -- operation
  1569  			// That means:
  1570  			//	for i := 2; i < 11; i += 2 {
  1571  			// should be rewritten to:
  1572  			//	for i := 5; 0 < i; i-- {
  1573  			continue
  1574  		}
  1575  
  1576  		if end.Block == ind.Block {
  1577  			// we can't rewrite loops where the condition depends on the loop body
  1578  			// this simple check is forced to work because if this is true a Phi in ind.Block must exist
  1579  			continue
  1580  		}
  1581  
  1582  		check := ind.Block.Controls[0]
  1583  		// invert the check
  1584  		check.Args[0], check.Args[1] = check.Args[1], check.Args[0]
  1585  
  1586  		// swap start and end in the loop
  1587  		for i, v := range check.Args {
  1588  			if v != end {
  1589  				continue
  1590  			}
  1591  
  1592  			check.SetArg(i, start)
  1593  			goto replacedEnd
  1594  		}
  1595  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1596  	replacedEnd:
  1597  
  1598  		for i, v := range ind.Args {
  1599  			if v != start {
  1600  				continue
  1601  			}
  1602  
  1603  			ind.SetArg(i, end)
  1604  			goto replacedStart
  1605  		}
  1606  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1607  	replacedStart:
  1608  
  1609  		if nxt.Args[0] != ind {
  1610  			// unlike additions subtractions are not commutative so be sure we get it right
  1611  			nxt.Args[0], nxt.Args[1] = nxt.Args[1], nxt.Args[0]
  1612  		}
  1613  
  1614  		switch nxt.Op {
  1615  		case OpAdd8:
  1616  			nxt.Op = OpSub8
  1617  		case OpAdd16:
  1618  			nxt.Op = OpSub16
  1619  		case OpAdd32:
  1620  			nxt.Op = OpSub32
  1621  		case OpAdd64:
  1622  			nxt.Op = OpSub64
  1623  		case OpSub8:
  1624  			nxt.Op = OpAdd8
  1625  		case OpSub16:
  1626  			nxt.Op = OpAdd16
  1627  		case OpSub32:
  1628  			nxt.Op = OpAdd32
  1629  		case OpSub64:
  1630  			nxt.Op = OpAdd64
  1631  		default:
  1632  			panic("unreachable")
  1633  		}
  1634  
  1635  		if f.pass.debug > 0 {
  1636  			f.Warnl(ind.Pos, "Inverted loop iteration")
  1637  		}
  1638  	}
  1639  
  1640  	ft := newFactsTable(f)
  1641  	ft.checkpoint()
  1642  
  1643  	// Find length and capacity ops.
  1644  	for _, b := range f.Blocks {
  1645  		for _, v := range b.Values {
  1646  			if v.Uses == 0 {
  1647  				// We don't care about dead values.
  1648  				// (There can be some that are CSEd but not removed yet.)
  1649  				continue
  1650  			}
  1651  			switch v.Op {
  1652  			case OpSliceLen:
  1653  				if ft.lens == nil {
  1654  					ft.lens = map[ID]*Value{}
  1655  				}
  1656  				// Set all len Values for the same slice as equal in the poset.
  1657  				// The poset handles transitive relations, so Values related to
  1658  				// any OpSliceLen for this slice will be correctly related to others.
  1659  				if l, ok := ft.lens[v.Args[0].ID]; ok {
  1660  					ft.update(b, v, l, signed, eq)
  1661  				} else {
  1662  					ft.lens[v.Args[0].ID] = v
  1663  				}
  1664  			case OpSliceCap:
  1665  				if ft.caps == nil {
  1666  					ft.caps = map[ID]*Value{}
  1667  				}
  1668  				// Same as case OpSliceLen above, but for slice cap.
  1669  				if c, ok := ft.caps[v.Args[0].ID]; ok {
  1670  					ft.update(b, v, c, signed, eq)
  1671  				} else {
  1672  					ft.caps[v.Args[0].ID] = v
  1673  				}
  1674  			}
  1675  		}
  1676  	}
  1677  
  1678  	// current node state
  1679  	type walkState int
  1680  	const (
  1681  		descend walkState = iota
  1682  		simplify
  1683  	)
  1684  	// work maintains the DFS stack.
  1685  	type bp struct {
  1686  		block *Block    // current handled block
  1687  		state walkState // what's to do
  1688  	}
  1689  	work := make([]bp, 0, 256)
  1690  	work = append(work, bp{
  1691  		block: f.Entry,
  1692  		state: descend,
  1693  	})
  1694  
  1695  	idom := f.Idom()
  1696  	sdom := f.Sdom()
  1697  
  1698  	// DFS on the dominator tree.
  1699  	//
  1700  	// For efficiency, we consider only the dominator tree rather
  1701  	// than the entire flow graph. On the way down, we consider
  1702  	// incoming branches and accumulate conditions that uniquely
  1703  	// dominate the current block. If we discover a contradiction,
  1704  	// we can eliminate the entire block and all of its children.
  1705  	// On the way back up, we consider outgoing branches that
  1706  	// haven't already been considered. This way we consider each
  1707  	// branch condition only once.
  1708  	for len(work) > 0 {
  1709  		node := work[len(work)-1]
  1710  		work = work[:len(work)-1]
  1711  		parent := idom[node.block.ID]
  1712  		branch := getBranch(sdom, parent, node.block)
  1713  
  1714  		switch node.state {
  1715  		case descend:
  1716  			ft.checkpoint()
  1717  
  1718  			// Entering the block, add facts about the induction variable
  1719  			// that is bound to this block.
  1720  			if iv, ok := indVars[node.block]; ok {
  1721  				addIndVarRestrictions(ft, parent, iv)
  1722  			}
  1723  
  1724  			// Add results of reaching this block via a branch from
  1725  			// its immediate dominator (if any).
  1726  			if branch != unknown {
  1727  				addBranchRestrictions(ft, parent, branch)
  1728  			}
  1729  
  1730  			// Add slices of the same length start from current block.
  1731  			addSlicesOfSameLen(ft, node.block)
  1732  
  1733  			if ft.unsat {
  1734  				// node.block is unreachable.
  1735  				// Remove it and don't visit
  1736  				// its children.
  1737  				removeBranch(parent, branch)
  1738  				ft.restore()
  1739  				break
  1740  			}
  1741  			// Otherwise, we can now commit to
  1742  			// taking this branch. We'll restore
  1743  			// ft when we unwind.
  1744  
  1745  			// Add facts about the values in the current block.
  1746  			addLocalFacts(ft, node.block)
  1747  
  1748  			work = append(work, bp{
  1749  				block: node.block,
  1750  				state: simplify,
  1751  			})
  1752  			for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) {
  1753  				work = append(work, bp{
  1754  					block: s,
  1755  					state: descend,
  1756  				})
  1757  			}
  1758  
  1759  		case simplify:
  1760  			simplifyBlock(sdom, ft, node.block)
  1761  			ft.restore()
  1762  		}
  1763  	}
  1764  
  1765  	ft.restore()
  1766  
  1767  	ft.cleanup(f)
  1768  }
  1769  
  1770  // initLimit sets initial constant limit for v.  This limit is based
  1771  // only on the operation itself, not any of its input arguments. This
  1772  // method is only used in two places, once when the prove pass startup
  1773  // and the other when a new ssa value is created, both for init. (unlike
  1774  // flowLimit, below, which computes additional constraints based on
  1775  // ranges of opcode arguments).
  1776  func initLimit(v *Value) limit {
  1777  	if v.Type.IsBoolean() {
  1778  		switch v.Op {
  1779  		case OpConstBool:
  1780  			b := v.AuxInt
  1781  			return limit{min: b, max: b, umin: uint64(b), umax: uint64(b)}
  1782  		default:
  1783  			return limit{min: 0, max: 1, umin: 0, umax: 1}
  1784  		}
  1785  	}
  1786  	if v.Type.IsPtrShaped() { // These are the types that EqPtr/NeqPtr operate on, except uintptr.
  1787  		switch v.Op {
  1788  		case OpConstNil:
  1789  			return limit{min: 0, max: 0, umin: 0, umax: 0}
  1790  		case OpAddr, OpLocalAddr: // TODO: others?
  1791  			l := noLimit
  1792  			l.umin = 1
  1793  			return l
  1794  		default:
  1795  			return noLimit
  1796  		}
  1797  	}
  1798  	if !v.Type.IsInteger() {
  1799  		return noLimit
  1800  	}
  1801  
  1802  	// Default limits based on type.
  1803  	lim := noLimitForBitsize(uint(v.Type.Size()) * 8)
  1804  
  1805  	// Tighter limits on some opcodes.
  1806  	switch v.Op {
  1807  	// constants
  1808  	case OpConst64:
  1809  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(v.AuxInt), umax: uint64(v.AuxInt)}
  1810  	case OpConst32:
  1811  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint32(v.AuxInt)), umax: uint64(uint32(v.AuxInt))}
  1812  	case OpConst16:
  1813  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint16(v.AuxInt)), umax: uint64(uint16(v.AuxInt))}
  1814  	case OpConst8:
  1815  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint8(v.AuxInt)), umax: uint64(uint8(v.AuxInt))}
  1816  
  1817  	// extensions
  1818  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16:
  1819  		lim = lim.signedMinMax(0, 1<<8-1)
  1820  		lim = lim.unsignedMax(1<<8 - 1)
  1821  	case OpZeroExt16to64, OpZeroExt16to32:
  1822  		lim = lim.signedMinMax(0, 1<<16-1)
  1823  		lim = lim.unsignedMax(1<<16 - 1)
  1824  	case OpZeroExt32to64:
  1825  		lim = lim.signedMinMax(0, 1<<32-1)
  1826  		lim = lim.unsignedMax(1<<32 - 1)
  1827  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16:
  1828  		lim = lim.signedMinMax(math.MinInt8, math.MaxInt8)
  1829  	case OpSignExt16to64, OpSignExt16to32:
  1830  		lim = lim.signedMinMax(math.MinInt16, math.MaxInt16)
  1831  	case OpSignExt32to64:
  1832  		lim = lim.signedMinMax(math.MinInt32, math.MaxInt32)
  1833  
  1834  	// math/bits intrinsics
  1835  	case OpCtz64, OpBitLen64, OpPopCount64,
  1836  		OpCtz32, OpBitLen32, OpPopCount32,
  1837  		OpCtz16, OpBitLen16, OpPopCount16,
  1838  		OpCtz8, OpBitLen8, OpPopCount8:
  1839  		lim = lim.unsignedMax(uint64(v.Args[0].Type.Size() * 8))
  1840  
  1841  	// bool to uint8 conversion
  1842  	case OpCvtBoolToUint8:
  1843  		lim = lim.unsignedMax(1)
  1844  
  1845  	// length operations
  1846  	case OpSliceLen, OpSliceCap:
  1847  		f := v.Block.Func
  1848  		elemSize := uint64(v.Args[0].Type.Elem().Size())
  1849  		if elemSize > 0 {
  1850  			heapSize := uint64(1)<<(uint64(f.Config.PtrSize)*8) - 1
  1851  			maximumElementsFittingInHeap := heapSize / elemSize
  1852  			lim = lim.unsignedMax(maximumElementsFittingInHeap)
  1853  		}
  1854  		fallthrough
  1855  	case OpStringLen:
  1856  		lim = lim.signedMin(0)
  1857  	}
  1858  
  1859  	// signed <-> unsigned propagation
  1860  	if lim.min >= 0 {
  1861  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
  1862  	}
  1863  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
  1864  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
  1865  	}
  1866  
  1867  	return lim
  1868  }
  1869  
  1870  // flowLimit updates the known limits of v in ft.
  1871  // flowLimit can use the ranges of input arguments.
  1872  //
  1873  // Note: this calculation only happens at the point the value is defined. We do not reevaluate
  1874  // it later. So for example:
  1875  //
  1876  //	v := x + y
  1877  //	if 0 <= x && x < 5 && 0 <= y && y < 5 { ... use v ... }
  1878  //
  1879  // we don't discover that the range of v is bounded in the conditioned
  1880  // block. We could recompute the range of v once we enter the block so
  1881  // we know that it is 0 <= v <= 8, but we don't have a mechanism to do
  1882  // that right now.
  1883  func (ft *factsTable) flowLimit(v *Value) {
  1884  	if !v.Type.IsInteger() {
  1885  		// TODO: boolean?
  1886  		return
  1887  	}
  1888  
  1889  	// Additional limits based on opcode and argument.
  1890  	// No need to repeat things here already done in initLimit.
  1891  	switch v.Op {
  1892  
  1893  	// extensions
  1894  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16, OpZeroExt16to64, OpZeroExt16to32, OpZeroExt32to64:
  1895  		a := ft.limits[v.Args[0].ID]
  1896  		ft.unsignedMinMax(v, a.umin, a.umax)
  1897  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16, OpSignExt16to64, OpSignExt16to32, OpSignExt32to64:
  1898  		a := ft.limits[v.Args[0].ID]
  1899  		ft.signedMinMax(v, a.min, a.max)
  1900  	case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
  1901  		a := ft.limits[v.Args[0].ID]
  1902  		if a.umax <= 1<<(uint64(v.Type.Size())*8)-1 {
  1903  			ft.unsignedMinMax(v, a.umin, a.umax)
  1904  		}
  1905  
  1906  	// math/bits
  1907  	case OpCtz64:
  1908  		a := ft.limits[v.Args[0].ID]
  1909  		if a.nonzero() {
  1910  			ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
  1911  		}
  1912  	case OpCtz32:
  1913  		a := ft.limits[v.Args[0].ID]
  1914  		if a.nonzero() {
  1915  			ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
  1916  		}
  1917  	case OpCtz16:
  1918  		a := ft.limits[v.Args[0].ID]
  1919  		if a.nonzero() {
  1920  			ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
  1921  		}
  1922  	case OpCtz8:
  1923  		a := ft.limits[v.Args[0].ID]
  1924  		if a.nonzero() {
  1925  			ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
  1926  		}
  1927  
  1928  	case OpPopCount64, OpPopCount32, OpPopCount16, OpPopCount8:
  1929  		a := ft.limits[v.Args[0].ID]
  1930  		changingBitsCount := uint64(bits.Len64(a.umax ^ a.umin))
  1931  		sharedLeadingMask := ^(uint64(1)<<changingBitsCount - 1)
  1932  		fixedBits := a.umax & sharedLeadingMask
  1933  		min := uint64(bits.OnesCount64(fixedBits))
  1934  		ft.unsignedMinMax(v, min, min+changingBitsCount)
  1935  
  1936  	case OpBitLen64:
  1937  		a := ft.limits[v.Args[0].ID]
  1938  		ft.unsignedMinMax(v,
  1939  			uint64(bits.Len64(a.umin)),
  1940  			uint64(bits.Len64(a.umax)))
  1941  	case OpBitLen32:
  1942  		a := ft.limits[v.Args[0].ID]
  1943  		ft.unsignedMinMax(v,
  1944  			uint64(bits.Len32(uint32(a.umin))),
  1945  			uint64(bits.Len32(uint32(a.umax))))
  1946  	case OpBitLen16:
  1947  		a := ft.limits[v.Args[0].ID]
  1948  		ft.unsignedMinMax(v,
  1949  			uint64(bits.Len16(uint16(a.umin))),
  1950  			uint64(bits.Len16(uint16(a.umax))))
  1951  	case OpBitLen8:
  1952  		a := ft.limits[v.Args[0].ID]
  1953  		ft.unsignedMinMax(v,
  1954  			uint64(bits.Len8(uint8(a.umin))),
  1955  			uint64(bits.Len8(uint8(a.umax))))
  1956  
  1957  	// Masks.
  1958  
  1959  	// TODO: if y.umax and y.umin share a leading bit pattern, y also has that leading bit pattern.
  1960  	// we could compare the patterns of always set bits in a and b and learn more about minimum and maximum.
  1961  	// But I doubt this help any real world code.
  1962  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  1963  		// AND can only make the value smaller.
  1964  		a := ft.limits[v.Args[0].ID]
  1965  		b := ft.limits[v.Args[1].ID]
  1966  		ft.unsignedMax(v, min(a.umax, b.umax))
  1967  	case OpOr64, OpOr32, OpOr16, OpOr8:
  1968  		// OR can only make the value bigger and can't flip bits proved to be zero in both inputs.
  1969  		a := ft.limits[v.Args[0].ID]
  1970  		b := ft.limits[v.Args[1].ID]
  1971  		ft.unsignedMinMax(v,
  1972  			max(a.umin, b.umin),
  1973  			1<<bits.Len64(a.umax|b.umax)-1)
  1974  	case OpXor64, OpXor32, OpXor16, OpXor8:
  1975  		// XOR can't flip bits that are proved to be zero in both inputs.
  1976  		a := ft.limits[v.Args[0].ID]
  1977  		b := ft.limits[v.Args[1].ID]
  1978  		ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
  1979  	case OpCom64, OpCom32, OpCom16, OpCom8:
  1980  		a := ft.limits[v.Args[0].ID]
  1981  		ft.newLimit(v, a.com(uint(v.Type.Size())*8))
  1982  
  1983  	// Arithmetic.
  1984  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  1985  		a := ft.limits[v.Args[0].ID]
  1986  		b := ft.limits[v.Args[1].ID]
  1987  		ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
  1988  	case OpSub64, OpSub32, OpSub16, OpSub8:
  1989  		a := ft.limits[v.Args[0].ID]
  1990  		b := ft.limits[v.Args[1].ID]
  1991  		ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
  1992  		ft.detectMod(v)
  1993  		ft.detectSliceLenRelation(v)
  1994  		ft.detectSubRelations(v)
  1995  	case OpNeg64, OpNeg32, OpNeg16, OpNeg8:
  1996  		a := ft.limits[v.Args[0].ID]
  1997  		bitsize := uint(v.Type.Size()) * 8
  1998  		ft.newLimit(v, a.neg(bitsize))
  1999  	case OpMul64, OpMul32, OpMul16, OpMul8:
  2000  		a := ft.limits[v.Args[0].ID]
  2001  		b := ft.limits[v.Args[1].ID]
  2002  		ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
  2003  	case OpLsh64x64, OpLsh64x32, OpLsh64x16, OpLsh64x8,
  2004  		OpLsh32x64, OpLsh32x32, OpLsh32x16, OpLsh32x8,
  2005  		OpLsh16x64, OpLsh16x32, OpLsh16x16, OpLsh16x8,
  2006  		OpLsh8x64, OpLsh8x32, OpLsh8x16, OpLsh8x8:
  2007  		a := ft.limits[v.Args[0].ID]
  2008  		b := ft.limits[v.Args[1].ID]
  2009  		bitsize := uint(v.Type.Size()) * 8
  2010  		ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
  2011  	case OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8,
  2012  		OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
  2013  		OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
  2014  		OpRsh8x64, OpRsh8x32, OpRsh8x16, OpRsh8x8:
  2015  		a := ft.limits[v.Args[0].ID]
  2016  		b := ft.limits[v.Args[1].ID]
  2017  		if b.min >= 0 {
  2018  			// Shift of negative makes a value closer to 0 (greater),
  2019  			// so if a.min is negative, v.min is a.min>>b.min instead of a.min>>b.max,
  2020  			// and similarly if a.max is negative, v.max is a.max>>b.max.
  2021  			// Easier to compute min and max of both than to write sign logic.
  2022  			vmin := min(a.min>>b.min, a.min>>b.max)
  2023  			vmax := max(a.max>>b.min, a.max>>b.max)
  2024  			ft.signedMinMax(v, vmin, vmax)
  2025  		}
  2026  	case OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8,
  2027  		OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2028  		OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2029  		OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8:
  2030  		a := ft.limits[v.Args[0].ID]
  2031  		b := ft.limits[v.Args[1].ID]
  2032  		if b.min >= 0 {
  2033  			ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min)
  2034  		}
  2035  	case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  2036  		a := ft.limits[v.Args[0].ID]
  2037  		b := ft.limits[v.Args[1].ID]
  2038  		if !(a.nonnegative() && b.nonnegative()) {
  2039  			// TODO: we could handle signed limits but I didn't bother.
  2040  			break
  2041  		}
  2042  		fallthrough
  2043  	case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u:
  2044  		a := ft.limits[v.Args[0].ID]
  2045  		b := ft.limits[v.Args[1].ID]
  2046  		lim := noLimit
  2047  		if b.umax > 0 {
  2048  			lim = lim.unsignedMin(a.umin / b.umax)
  2049  		}
  2050  		if b.umin > 0 {
  2051  			lim = lim.unsignedMax(a.umax / b.umin)
  2052  		}
  2053  		ft.newLimit(v, lim)
  2054  	case OpMod64, OpMod32, OpMod16, OpMod8:
  2055  		ft.modLimit(true, v, v.Args[0], v.Args[1])
  2056  	case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  2057  		ft.modLimit(false, v, v.Args[0], v.Args[1])
  2058  
  2059  	case OpPhi:
  2060  		// Compute the union of all the input phis.
  2061  		// Often this will convey no information, because the block
  2062  		// is not dominated by its predecessors and hence the
  2063  		// phi arguments might not have been processed yet. But if
  2064  		// the values are declared earlier, it may help. e.g., for
  2065  		//    v = phi(c3, c5)
  2066  		// where c3 = OpConst [3] and c5 = OpConst [5] are
  2067  		// defined in the entry block, we can derive [3,5]
  2068  		// as the limit for v.
  2069  		l := ft.limits[v.Args[0].ID]
  2070  		for _, a := range v.Args[1:] {
  2071  			l2 := ft.limits[a.ID]
  2072  			l.min = min(l.min, l2.min)
  2073  			l.max = max(l.max, l2.max)
  2074  			l.umin = min(l.umin, l2.umin)
  2075  			l.umax = max(l.umax, l2.umax)
  2076  		}
  2077  		ft.newLimit(v, l)
  2078  	}
  2079  }
  2080  
  2081  // detectSliceLenRelation matches the pattern where
  2082  //  1. v := slicelen - index, OR v := slicecap - index
  2083  //     AND
  2084  //  2. index <= slicelen - K
  2085  //     THEN
  2086  //
  2087  // slicecap - index >= slicelen - index >= K
  2088  //
  2089  // Note that "index" is not used for indexing in this pattern, but
  2090  // in the motivating example (chunked slice iteration) it is.
  2091  func (ft *factsTable) detectSliceLenRelation(v *Value) {
  2092  	if v.Op != OpSub64 {
  2093  		return
  2094  	}
  2095  
  2096  	if !(v.Args[0].Op == OpSliceLen || v.Args[0].Op == OpStringLen || v.Args[0].Op == OpSliceCap) {
  2097  		return
  2098  	}
  2099  
  2100  	index := v.Args[1]
  2101  	if !ft.isNonNegative(index) {
  2102  		return
  2103  	}
  2104  	slice := v.Args[0].Args[0]
  2105  
  2106  	for o := ft.orderings[index.ID]; o != nil; o = o.next {
  2107  		if o.d != signed {
  2108  			continue
  2109  		}
  2110  		or := o.r
  2111  		if or != lt && or != lt|eq {
  2112  			continue
  2113  		}
  2114  		ow := o.w
  2115  		if ow.Op != OpAdd64 && ow.Op != OpSub64 {
  2116  			continue
  2117  		}
  2118  		var lenOffset *Value
  2119  		if bound := ow.Args[0]; (bound.Op == OpSliceLen || bound.Op == OpStringLen) && bound.Args[0] == slice {
  2120  			lenOffset = ow.Args[1]
  2121  		} else if bound := ow.Args[1]; (bound.Op == OpSliceLen || bound.Op == OpStringLen) && bound.Args[0] == slice {
  2122  			lenOffset = ow.Args[0]
  2123  		}
  2124  		if lenOffset == nil || lenOffset.Op != OpConst64 {
  2125  			continue
  2126  		}
  2127  		K := lenOffset.AuxInt
  2128  		if ow.Op == OpAdd64 {
  2129  			K = -K
  2130  		}
  2131  		if K < 0 {
  2132  			continue
  2133  		}
  2134  		if or == lt {
  2135  			K++
  2136  		}
  2137  		if K < 0 { // We hate thinking about overflow
  2138  			continue
  2139  		}
  2140  		ft.signedMin(v, K)
  2141  	}
  2142  }
  2143  
  2144  // v must be Sub{64,32,16,8}.
  2145  func (ft *factsTable) detectSubRelations(v *Value) {
  2146  	// v = x-y
  2147  	x := v.Args[0]
  2148  	y := v.Args[1]
  2149  	if x == y {
  2150  		ft.signedMinMax(v, 0, 0)
  2151  		return
  2152  	}
  2153  	xLim := ft.limits[x.ID]
  2154  	yLim := ft.limits[y.ID]
  2155  
  2156  	// Check if we might wrap around. If so, give up.
  2157  	width := uint(v.Type.Size()) * 8
  2158  	if _, ok := safeSub(xLim.min, yLim.max, width); !ok {
  2159  		return // x-y might underflow
  2160  	}
  2161  	if _, ok := safeSub(xLim.max, yLim.min, width); !ok {
  2162  		return // x-y might overflow
  2163  	}
  2164  
  2165  	// Subtracting a positive number only makes
  2166  	// things smaller.
  2167  	if yLim.min >= 0 {
  2168  		ft.update(v.Block, v, x, signed, lt|eq)
  2169  		// TODO: is this worth it?
  2170  		//if yLim.min > 0 {
  2171  		//	ft.update(v.Block, v, x, signed, lt)
  2172  		//}
  2173  	}
  2174  
  2175  	// Subtracting a number from a bigger one
  2176  	// can't go below 0.
  2177  	if ft.orderS.OrderedOrEqual(y, x) {
  2178  		ft.setNonNegative(v)
  2179  		// TODO: is this worth it?
  2180  		//if ft.orderS.Ordered(y, x) {
  2181  		//	ft.signedMin(v, 1)
  2182  		//}
  2183  	}
  2184  }
  2185  
  2186  // x%d has been rewritten to x - (x/d)*d.
  2187  func (ft *factsTable) detectMod(v *Value) {
  2188  	var opDiv, opDivU, opMul, opConst Op
  2189  	switch v.Op {
  2190  	case OpSub64:
  2191  		opDiv = OpDiv64
  2192  		opDivU = OpDiv64u
  2193  		opMul = OpMul64
  2194  		opConst = OpConst64
  2195  	case OpSub32:
  2196  		opDiv = OpDiv32
  2197  		opDivU = OpDiv32u
  2198  		opMul = OpMul32
  2199  		opConst = OpConst32
  2200  	case OpSub16:
  2201  		opDiv = OpDiv16
  2202  		opDivU = OpDiv16u
  2203  		opMul = OpMul16
  2204  		opConst = OpConst16
  2205  	case OpSub8:
  2206  		opDiv = OpDiv8
  2207  		opDivU = OpDiv8u
  2208  		opMul = OpMul8
  2209  		opConst = OpConst8
  2210  	}
  2211  
  2212  	mul := v.Args[1]
  2213  	if mul.Op != opMul {
  2214  		return
  2215  	}
  2216  	div, con := mul.Args[0], mul.Args[1]
  2217  	if div.Op == opConst {
  2218  		div, con = con, div
  2219  	}
  2220  	if con.Op != opConst || (div.Op != opDiv && div.Op != opDivU) || div.Args[0] != v.Args[0] || div.Args[1].Op != opConst || div.Args[1].AuxInt != con.AuxInt {
  2221  		return
  2222  	}
  2223  	ft.modLimit(div.Op == opDiv, v, v.Args[0], con)
  2224  }
  2225  
  2226  // modLimit sets v with facts derived from v = p % q.
  2227  func (ft *factsTable) modLimit(signed bool, v, p, q *Value) {
  2228  	a := ft.limits[p.ID]
  2229  	b := ft.limits[q.ID]
  2230  	if signed {
  2231  		if a.min < 0 && b.min > 0 {
  2232  			ft.signedMinMax(v, -(b.max - 1), b.max-1)
  2233  			return
  2234  		}
  2235  		if !(a.nonnegative() && b.nonnegative()) {
  2236  			// TODO: we could handle signed limits but I didn't bother.
  2237  			return
  2238  		}
  2239  		if a.min >= 0 && b.min > 0 {
  2240  			ft.setNonNegative(v)
  2241  		}
  2242  	}
  2243  	// Underflow in the arithmetic below is ok, it gives to MaxUint64 which does nothing to the limit.
  2244  	ft.unsignedMax(v, min(a.umax, b.umax-1))
  2245  }
  2246  
  2247  // getBranch returns the range restrictions added by p
  2248  // when reaching b. p is the immediate dominator of b.
  2249  func getBranch(sdom SparseTree, p *Block, b *Block) branch {
  2250  	if p == nil {
  2251  		return unknown
  2252  	}
  2253  	switch p.Kind {
  2254  	case BlockIf:
  2255  		// If p and p.Succs[0] are dominators it means that every path
  2256  		// from entry to b passes through p and p.Succs[0]. We care that
  2257  		// no path from entry to b passes through p.Succs[1]. If p.Succs[0]
  2258  		// has one predecessor then (apart from the degenerate case),
  2259  		// there is no path from entry that can reach b through p.Succs[1].
  2260  		// TODO: how about p->yes->b->yes, i.e. a loop in yes.
  2261  		if sdom.IsAncestorEq(p.Succs[0].b, b) && len(p.Succs[0].b.Preds) == 1 {
  2262  			return positive
  2263  		}
  2264  		if sdom.IsAncestorEq(p.Succs[1].b, b) && len(p.Succs[1].b.Preds) == 1 {
  2265  			return negative
  2266  		}
  2267  	case BlockJumpTable:
  2268  		// TODO: this loop can lead to quadratic behavior, as
  2269  		// getBranch can be called len(p.Succs) times.
  2270  		for i, e := range p.Succs {
  2271  			if sdom.IsAncestorEq(e.b, b) && len(e.b.Preds) == 1 {
  2272  				return jumpTable0 + branch(i)
  2273  			}
  2274  		}
  2275  	}
  2276  	return unknown
  2277  }
  2278  
  2279  // addIndVarRestrictions updates the factsTables ft with the facts
  2280  // learned from the induction variable indVar which drives the loop
  2281  // starting in Block b.
  2282  func addIndVarRestrictions(ft *factsTable, b *Block, iv indVar) {
  2283  	d := signed
  2284  	if ft.isNonNegative(iv.min) && ft.isNonNegative(iv.max) {
  2285  		d |= unsigned
  2286  	}
  2287  
  2288  	if iv.flags&indVarMinExc == 0 {
  2289  		addRestrictions(b, ft, d, iv.min, iv.ind, lt|eq)
  2290  	} else {
  2291  		addRestrictions(b, ft, d, iv.min, iv.ind, lt)
  2292  	}
  2293  
  2294  	if iv.flags&indVarMaxInc == 0 {
  2295  		addRestrictions(b, ft, d, iv.ind, iv.max, lt)
  2296  	} else {
  2297  		addRestrictions(b, ft, d, iv.ind, iv.max, lt|eq)
  2298  	}
  2299  }
  2300  
  2301  // addBranchRestrictions updates the factsTables ft with the facts learned when
  2302  // branching from Block b in direction br.
  2303  func addBranchRestrictions(ft *factsTable, b *Block, br branch) {
  2304  	c := b.Controls[0]
  2305  	switch {
  2306  	case br == negative:
  2307  		ft.booleanFalse(c)
  2308  	case br == positive:
  2309  		ft.booleanTrue(c)
  2310  	case br >= jumpTable0:
  2311  		idx := br - jumpTable0
  2312  		val := int64(idx)
  2313  		if v, off := isConstDelta(c); v != nil {
  2314  			// Establish the bound on the underlying value we're switching on,
  2315  			// not on the offset-ed value used as the jump table index.
  2316  			c = v
  2317  			val -= off
  2318  		}
  2319  		ft.newLimit(c, limit{min: val, max: val, umin: uint64(val), umax: uint64(val)})
  2320  	default:
  2321  		panic("unknown branch")
  2322  	}
  2323  }
  2324  
  2325  // addRestrictions updates restrictions from the immediate
  2326  // dominating block (p) using r.
  2327  func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r relation) {
  2328  	if t == 0 {
  2329  		// Trivial case: nothing to do.
  2330  		// Should not happen, but just in case.
  2331  		return
  2332  	}
  2333  	for i := domain(1); i <= t; i <<= 1 {
  2334  		if t&i == 0 {
  2335  			continue
  2336  		}
  2337  		ft.update(parent, v, w, i, r)
  2338  	}
  2339  }
  2340  
  2341  func unsignedAddOverflows(a, b uint64, t *types.Type) bool {
  2342  	switch t.Size() {
  2343  	case 8:
  2344  		return a+b < a
  2345  	case 4:
  2346  		return a+b > math.MaxUint32
  2347  	case 2:
  2348  		return a+b > math.MaxUint16
  2349  	case 1:
  2350  		return a+b > math.MaxUint8
  2351  	default:
  2352  		panic("unreachable")
  2353  	}
  2354  }
  2355  
  2356  func signedAddOverflowsOrUnderflows(a, b int64, t *types.Type) bool {
  2357  	r := a + b
  2358  	switch t.Size() {
  2359  	case 8:
  2360  		return (a >= 0 && b >= 0 && r < 0) || (a < 0 && b < 0 && r >= 0)
  2361  	case 4:
  2362  		return r < math.MinInt32 || math.MaxInt32 < r
  2363  	case 2:
  2364  		return r < math.MinInt16 || math.MaxInt16 < r
  2365  	case 1:
  2366  		return r < math.MinInt8 || math.MaxInt8 < r
  2367  	default:
  2368  		panic("unreachable")
  2369  	}
  2370  }
  2371  
  2372  func unsignedSubUnderflows(a, b uint64) bool {
  2373  	return a < b
  2374  }
  2375  
  2376  // checkForChunkedIndexBounds looks for index expressions of the form
  2377  // A[i+delta] where delta < K and i <= len(A)-K.  That is, this is a chunked
  2378  // iteration where the index is not directly compared to the length.
  2379  // if isReslice, then delta can be equal to K.
  2380  func checkForChunkedIndexBounds(ft *factsTable, b *Block, index, bound *Value, isReslice bool) bool {
  2381  	if bound.Op != OpSliceLen && bound.Op != OpStringLen && bound.Op != OpSliceCap {
  2382  		return false
  2383  	}
  2384  
  2385  	// this is a slice bounds check against len or capacity,
  2386  	// and refers back to a prior check against length, which
  2387  	// will also work for the cap since that is not smaller
  2388  	// than the length.
  2389  
  2390  	slice := bound.Args[0]
  2391  	lim := ft.limits[index.ID]
  2392  	if lim.min < 0 {
  2393  		return false
  2394  	}
  2395  	i, delta := isConstDelta(index)
  2396  	if i == nil {
  2397  		return false
  2398  	}
  2399  	if delta < 0 {
  2400  		return false
  2401  	}
  2402  	// special case for blocked iteration over a slice.
  2403  	// slicelen > i + delta && <==== if clauses above
  2404  	// && index >= 0           <==== if clause above
  2405  	// delta >= 0 &&           <==== if clause above
  2406  	// slicelen-K >/>= x       <==== checked below
  2407  	// && K >=/> delta         <==== checked below
  2408  	// then v > w
  2409  	// example: i <=/< len - 4/3 means i+{0,1,2,3} are legal indices
  2410  	for o := ft.orderings[i.ID]; o != nil; o = o.next {
  2411  		if o.d != signed {
  2412  			continue
  2413  		}
  2414  		if ow := o.w; ow.Op == OpAdd64 {
  2415  			var lenOffset *Value
  2416  			if bound := ow.Args[0]; (bound.Op == OpSliceLen || bound.Op == OpStringLen) && bound.Args[0] == slice {
  2417  				lenOffset = ow.Args[1]
  2418  			} else if bound := ow.Args[1]; (bound.Op == OpSliceLen || bound.Op == OpStringLen) && bound.Args[0] == slice {
  2419  				lenOffset = ow.Args[0]
  2420  			}
  2421  			if lenOffset == nil || lenOffset.Op != OpConst64 {
  2422  				continue
  2423  			}
  2424  			if K := -lenOffset.AuxInt; K >= 0 {
  2425  				or := o.r
  2426  				if isReslice {
  2427  					K++
  2428  				}
  2429  				if or == lt {
  2430  					or = lt | eq
  2431  					K++
  2432  				}
  2433  				if K < 0 { // We hate thinking about overflow
  2434  					continue
  2435  				}
  2436  
  2437  				if delta < K && or == lt|eq {
  2438  					return true
  2439  				}
  2440  			}
  2441  		}
  2442  	}
  2443  	return false
  2444  }
  2445  
  2446  func addLocalFacts(ft *factsTable, b *Block) {
  2447  	ft.topoSortValuesInBlock(b)
  2448  
  2449  	for _, v := range b.Values {
  2450  		// Propagate constant ranges before relative relations to get
  2451  		// the most up-to-date constant bounds for isNonNegative calls.
  2452  		ft.flowLimit(v)
  2453  
  2454  		switch v.Op {
  2455  		case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2456  			x := ft.limits[v.Args[0].ID]
  2457  			y := ft.limits[v.Args[1].ID]
  2458  			if !unsignedAddOverflows(x.umax, y.umax, v.Type) {
  2459  				r := gt
  2460  				if x.maybeZero() {
  2461  					r |= eq
  2462  				}
  2463  				ft.update(b, v, v.Args[1], unsigned, r)
  2464  				r = gt
  2465  				if y.maybeZero() {
  2466  					r |= eq
  2467  				}
  2468  				ft.update(b, v, v.Args[0], unsigned, r)
  2469  			}
  2470  			if x.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2471  				r := gt
  2472  				if x.maybeZero() {
  2473  					r |= eq
  2474  				}
  2475  				ft.update(b, v, v.Args[1], signed, r)
  2476  			}
  2477  			if y.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2478  				r := gt
  2479  				if y.maybeZero() {
  2480  					r |= eq
  2481  				}
  2482  				ft.update(b, v, v.Args[0], signed, r)
  2483  			}
  2484  			if x.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2485  				r := lt
  2486  				if x.maybeZero() {
  2487  					r |= eq
  2488  				}
  2489  				ft.update(b, v, v.Args[1], signed, r)
  2490  			}
  2491  			if y.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2492  				r := lt
  2493  				if y.maybeZero() {
  2494  					r |= eq
  2495  				}
  2496  				ft.update(b, v, v.Args[0], signed, r)
  2497  			}
  2498  		case OpSub64, OpSub32, OpSub16, OpSub8:
  2499  			x := ft.limits[v.Args[0].ID]
  2500  			y := ft.limits[v.Args[1].ID]
  2501  			if !unsignedSubUnderflows(x.umin, y.umax) {
  2502  				r := lt
  2503  				if y.maybeZero() {
  2504  					r |= eq
  2505  				}
  2506  				ft.update(b, v, v.Args[0], unsigned, r)
  2507  			}
  2508  			// FIXME: we could also do signed facts but the overflow checks are much trickier and I don't need it yet.
  2509  		case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2510  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2511  			ft.update(b, v, v.Args[1], unsigned, lt|eq)
  2512  			if ft.isNonNegative(v.Args[0]) {
  2513  				ft.update(b, v, v.Args[0], signed, lt|eq)
  2514  			}
  2515  			if ft.isNonNegative(v.Args[1]) {
  2516  				ft.update(b, v, v.Args[1], signed, lt|eq)
  2517  			}
  2518  		case OpOr64, OpOr32, OpOr16, OpOr8:
  2519  			// TODO: investigate how to always add facts without much slowdown, see issue #57959
  2520  			//ft.update(b, v, v.Args[0], unsigned, gt|eq)
  2521  			//ft.update(b, v, v.Args[1], unsigned, gt|eq)
  2522  		case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  2523  			if !ft.isNonNegative(v.Args[1]) {
  2524  				break
  2525  			}
  2526  			fallthrough
  2527  		case OpRsh8x64, OpRsh8x32, OpRsh8x16, OpRsh8x8,
  2528  			OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
  2529  			OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
  2530  			OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8:
  2531  			if !ft.isNonNegative(v.Args[0]) {
  2532  				break
  2533  			}
  2534  			fallthrough
  2535  		case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u,
  2536  			OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8,
  2537  			OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2538  			OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2539  			OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8:
  2540  			switch add := v.Args[0]; add.Op {
  2541  			// round-up division pattern; given:
  2542  			// v = (x + y) / z
  2543  			// if y < z then v <= x
  2544  			case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2545  				z := v.Args[1]
  2546  				zl := ft.limits[z.ID]
  2547  				var uminDivisor uint64
  2548  				switch v.Op {
  2549  				case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u,
  2550  					OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  2551  					uminDivisor = zl.umin
  2552  				case OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8,
  2553  					OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2554  					OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2555  					OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8,
  2556  					OpRsh8x64, OpRsh8x32, OpRsh8x16, OpRsh8x8,
  2557  					OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
  2558  					OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
  2559  					OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8:
  2560  					uminDivisor = 1 << zl.umin
  2561  				default:
  2562  					panic("unreachable")
  2563  				}
  2564  
  2565  				x := add.Args[0]
  2566  				xl := ft.limits[x.ID]
  2567  				y := add.Args[1]
  2568  				yl := ft.limits[y.ID]
  2569  				if !unsignedAddOverflows(xl.umax, yl.umax, add.Type) {
  2570  					if xl.umax < uminDivisor {
  2571  						ft.update(b, v, y, unsigned, lt|eq)
  2572  					}
  2573  					if yl.umax < uminDivisor {
  2574  						ft.update(b, v, x, unsigned, lt|eq)
  2575  					}
  2576  				}
  2577  			}
  2578  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2579  		case OpMod64, OpMod32, OpMod16, OpMod8:
  2580  			if !ft.isNonNegative(v.Args[0]) || !ft.isNonNegative(v.Args[1]) {
  2581  				break
  2582  			}
  2583  			fallthrough
  2584  		case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  2585  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2586  			// Note: we have to be careful that this doesn't imply
  2587  			// that the modulus is >0, which isn't true until *after*
  2588  			// the mod instruction executes (and thus panics if the
  2589  			// modulus is 0). See issue 67625.
  2590  			ft.update(b, v, v.Args[1], unsigned, lt)
  2591  		case OpStringLen:
  2592  			if v.Args[0].Op == OpStringMake {
  2593  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2594  			}
  2595  		case OpSliceLen:
  2596  			if v.Args[0].Op == OpSliceMake {
  2597  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2598  			}
  2599  		case OpSliceCap:
  2600  			if v.Args[0].Op == OpSliceMake {
  2601  				ft.update(b, v, v.Args[0].Args[2], signed, eq)
  2602  			}
  2603  		case OpIsInBounds:
  2604  			if checkForChunkedIndexBounds(ft, b, v.Args[0], v.Args[1], false) {
  2605  				if b.Func.pass.debug > 0 {
  2606  					b.Func.Warnl(v.Pos, "Proved %s for blocked indexing", v.Op)
  2607  				}
  2608  				ft.booleanTrue(v)
  2609  			}
  2610  		case OpIsSliceInBounds:
  2611  			if checkForChunkedIndexBounds(ft, b, v.Args[0], v.Args[1], true) {
  2612  				if b.Func.pass.debug > 0 {
  2613  					b.Func.Warnl(v.Pos, "Proved %s for blocked reslicing", v.Op)
  2614  				}
  2615  				ft.booleanTrue(v)
  2616  			}
  2617  		case OpPhi:
  2618  			addLocalFactsPhi(ft, v)
  2619  		}
  2620  	}
  2621  }
  2622  
  2623  func addLocalFactsPhi(ft *factsTable, v *Value) {
  2624  	// Look for phis that implement min/max.
  2625  	//   z:
  2626  	//      c = Less64 x y (or other Less/Leq operation)
  2627  	//      If c -> bx by
  2628  	//   bx: <- z
  2629  	//       -> b ...
  2630  	//   by: <- z
  2631  	//      -> b ...
  2632  	//   b: <- bx by
  2633  	//      v = Phi x y
  2634  	// Then v is either min or max of x,y.
  2635  	// If it is the min, then we deduce v <= x && v <= y.
  2636  	// If it is the max, then we deduce v >= x && v >= y.
  2637  	// The min case is useful for the copy builtin, see issue 16833.
  2638  	if len(v.Args) != 2 {
  2639  		return
  2640  	}
  2641  	b := v.Block
  2642  	x := v.Args[0]
  2643  	y := v.Args[1]
  2644  	bx := b.Preds[0].b
  2645  	by := b.Preds[1].b
  2646  	var z *Block // branch point
  2647  	switch {
  2648  	case bx == by: // bx == by == z case
  2649  		z = bx
  2650  	case by.uniquePred() == bx: // bx == z case
  2651  		z = bx
  2652  	case bx.uniquePred() == by: // by == z case
  2653  		z = by
  2654  	case bx.uniquePred() == by.uniquePred():
  2655  		z = bx.uniquePred()
  2656  	}
  2657  	if z == nil || z.Kind != BlockIf {
  2658  		return
  2659  	}
  2660  	c := z.Controls[0]
  2661  	if len(c.Args) != 2 {
  2662  		return
  2663  	}
  2664  	var isMin bool // if c, a less-than comparison, is true, phi chooses x.
  2665  	if bx == z {
  2666  		isMin = b.Preds[0].i == 0
  2667  	} else {
  2668  		isMin = bx.Preds[0].i == 0
  2669  	}
  2670  	if c.Args[0] == x && c.Args[1] == y {
  2671  		// ok
  2672  	} else if c.Args[0] == y && c.Args[1] == x {
  2673  		// Comparison is reversed from how the values are listed in the Phi.
  2674  		isMin = !isMin
  2675  	} else {
  2676  		// Not comparing x and y.
  2677  		return
  2678  	}
  2679  	var dom domain
  2680  	switch c.Op {
  2681  	case OpLess64, OpLess32, OpLess16, OpLess8, OpLeq64, OpLeq32, OpLeq16, OpLeq8:
  2682  		dom = signed
  2683  	case OpLess64U, OpLess32U, OpLess16U, OpLess8U, OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
  2684  		dom = unsigned
  2685  	default:
  2686  		return
  2687  	}
  2688  	var rel relation
  2689  	if isMin {
  2690  		rel = lt | eq
  2691  	} else {
  2692  		rel = gt | eq
  2693  	}
  2694  	ft.update(b, v, x, dom, rel)
  2695  	ft.update(b, v, y, dom, rel)
  2696  }
  2697  
  2698  var ctzNonZeroOp = map[Op]Op{
  2699  	OpCtz8:  OpCtz8NonZero,
  2700  	OpCtz16: OpCtz16NonZero,
  2701  	OpCtz32: OpCtz32NonZero,
  2702  	OpCtz64: OpCtz64NonZero,
  2703  }
  2704  var mostNegativeDividend = map[Op]int64{
  2705  	OpDiv16: -1 << 15,
  2706  	OpMod16: -1 << 15,
  2707  	OpDiv32: -1 << 31,
  2708  	OpMod32: -1 << 31,
  2709  	OpDiv64: -1 << 63,
  2710  	OpMod64: -1 << 63,
  2711  }
  2712  var unsignedOp = map[Op]Op{
  2713  	OpDiv8:     OpDiv8u,
  2714  	OpDiv16:    OpDiv16u,
  2715  	OpDiv32:    OpDiv32u,
  2716  	OpDiv64:    OpDiv64u,
  2717  	OpMod8:     OpMod8u,
  2718  	OpMod16:    OpMod16u,
  2719  	OpMod32:    OpMod32u,
  2720  	OpMod64:    OpMod64u,
  2721  	OpRsh8x8:   OpRsh8Ux8,
  2722  	OpRsh8x16:  OpRsh8Ux16,
  2723  	OpRsh8x32:  OpRsh8Ux32,
  2724  	OpRsh8x64:  OpRsh8Ux64,
  2725  	OpRsh16x8:  OpRsh16Ux8,
  2726  	OpRsh16x16: OpRsh16Ux16,
  2727  	OpRsh16x32: OpRsh16Ux32,
  2728  	OpRsh16x64: OpRsh16Ux64,
  2729  	OpRsh32x8:  OpRsh32Ux8,
  2730  	OpRsh32x16: OpRsh32Ux16,
  2731  	OpRsh32x32: OpRsh32Ux32,
  2732  	OpRsh32x64: OpRsh32Ux64,
  2733  	OpRsh64x8:  OpRsh64Ux8,
  2734  	OpRsh64x16: OpRsh64Ux16,
  2735  	OpRsh64x32: OpRsh64Ux32,
  2736  	OpRsh64x64: OpRsh64Ux64,
  2737  }
  2738  
  2739  var bytesizeToConst = [...]Op{
  2740  	8 / 8:  OpConst8,
  2741  	16 / 8: OpConst16,
  2742  	32 / 8: OpConst32,
  2743  	64 / 8: OpConst64,
  2744  }
  2745  var bytesizeToNeq = [...]Op{
  2746  	8 / 8:  OpNeq8,
  2747  	16 / 8: OpNeq16,
  2748  	32 / 8: OpNeq32,
  2749  	64 / 8: OpNeq64,
  2750  }
  2751  var bytesizeToAnd = [...]Op{
  2752  	8 / 8:  OpAnd8,
  2753  	16 / 8: OpAnd16,
  2754  	32 / 8: OpAnd32,
  2755  	64 / 8: OpAnd64,
  2756  }
  2757  
  2758  // simplifyBlock simplifies some constant values in b and evaluates
  2759  // branches to non-uniquely dominated successors of b.
  2760  func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
  2761  	for iv, v := range b.Values {
  2762  		switch v.Op {
  2763  		case OpStaticLECall:
  2764  			if b.Func.pass.debug > 0 && len(v.Args) == 2 {
  2765  				fn := auxToCall(v.Aux).Fn
  2766  				if fn != nil && strings.Contains(fn.String(), "prove") {
  2767  					// Print bounds of any argument to single-arg function with "prove" in name,
  2768  					// for debugging and especially for test/prove.go.
  2769  					// (v.Args[1] is mem).
  2770  					x := v.Args[0]
  2771  					b.Func.Warnl(v.Pos, "Proved %v (%v)", ft.limits[x.ID], x)
  2772  				}
  2773  			}
  2774  		case OpSlicemask:
  2775  			// Replace OpSlicemask operations in b with constants where possible.
  2776  			cap := v.Args[0]
  2777  			x, delta := isConstDelta(cap)
  2778  			if x != nil {
  2779  				// slicemask(x + y)
  2780  				// if x is larger than -y (y is negative), then slicemask is -1.
  2781  				lim := ft.limits[x.ID]
  2782  				if lim.umin > uint64(-delta) {
  2783  					if cap.Op == OpAdd64 {
  2784  						v.reset(OpConst64)
  2785  					} else {
  2786  						v.reset(OpConst32)
  2787  					}
  2788  					if b.Func.pass.debug > 0 {
  2789  						b.Func.Warnl(v.Pos, "Proved slicemask not needed")
  2790  					}
  2791  					v.AuxInt = -1
  2792  				}
  2793  				break
  2794  			}
  2795  			lim := ft.limits[cap.ID]
  2796  			if lim.umin > 0 {
  2797  				if cap.Type.Size() == 8 {
  2798  					v.reset(OpConst64)
  2799  				} else {
  2800  					v.reset(OpConst32)
  2801  				}
  2802  				if b.Func.pass.debug > 0 {
  2803  					b.Func.Warnl(v.Pos, "Proved slicemask not needed (by limit)")
  2804  				}
  2805  				v.AuxInt = -1
  2806  			}
  2807  
  2808  		case OpCtz8, OpCtz16, OpCtz32, OpCtz64:
  2809  			// On some architectures, notably amd64, we can generate much better
  2810  			// code for CtzNN if we know that the argument is non-zero.
  2811  			// Capture that information here for use in arch-specific optimizations.
  2812  			x := v.Args[0]
  2813  			lim := ft.limits[x.ID]
  2814  			if lim.umin > 0 || lim.min > 0 || lim.max < 0 {
  2815  				if b.Func.pass.debug > 0 {
  2816  					b.Func.Warnl(v.Pos, "Proved %v non-zero", v.Op)
  2817  				}
  2818  				v.Op = ctzNonZeroOp[v.Op]
  2819  			}
  2820  		case OpRsh8x8, OpRsh8x16, OpRsh8x32, OpRsh8x64,
  2821  			OpRsh16x8, OpRsh16x16, OpRsh16x32, OpRsh16x64,
  2822  			OpRsh32x8, OpRsh32x16, OpRsh32x32, OpRsh32x64,
  2823  			OpRsh64x8, OpRsh64x16, OpRsh64x32, OpRsh64x64:
  2824  			if ft.isNonNegative(v.Args[0]) {
  2825  				if b.Func.pass.debug > 0 {
  2826  					b.Func.Warnl(v.Pos, "Proved %v is unsigned", v.Op)
  2827  				}
  2828  				v.Op = unsignedOp[v.Op]
  2829  			}
  2830  			fallthrough
  2831  		case OpLsh8x8, OpLsh8x16, OpLsh8x32, OpLsh8x64,
  2832  			OpLsh16x8, OpLsh16x16, OpLsh16x32, OpLsh16x64,
  2833  			OpLsh32x8, OpLsh32x16, OpLsh32x32, OpLsh32x64,
  2834  			OpLsh64x8, OpLsh64x16, OpLsh64x32, OpLsh64x64,
  2835  			OpRsh8Ux8, OpRsh8Ux16, OpRsh8Ux32, OpRsh8Ux64,
  2836  			OpRsh16Ux8, OpRsh16Ux16, OpRsh16Ux32, OpRsh16Ux64,
  2837  			OpRsh32Ux8, OpRsh32Ux16, OpRsh32Ux32, OpRsh32Ux64,
  2838  			OpRsh64Ux8, OpRsh64Ux16, OpRsh64Ux32, OpRsh64Ux64:
  2839  			// Check whether, for a << b, we know that b
  2840  			// is strictly less than the number of bits in a.
  2841  			by := v.Args[1]
  2842  			lim := ft.limits[by.ID]
  2843  			bits := 8 * v.Args[0].Type.Size()
  2844  			if lim.umax < uint64(bits) || (lim.max < bits && ft.isNonNegative(by)) {
  2845  				v.AuxInt = 1 // see shiftIsBounded
  2846  				if b.Func.pass.debug > 0 && !by.isGenericIntConst() {
  2847  					b.Func.Warnl(v.Pos, "Proved %v bounded", v.Op)
  2848  				}
  2849  			}
  2850  		case OpDiv8, OpDiv16, OpDiv32, OpDiv64, OpMod8, OpMod16, OpMod32, OpMod64:
  2851  			p, q := ft.limits[v.Args[0].ID], ft.limits[v.Args[1].ID] // p/q
  2852  			if p.nonnegative() && q.nonnegative() {
  2853  				if b.Func.pass.debug > 0 {
  2854  					b.Func.Warnl(v.Pos, "Proved %v is unsigned", v.Op)
  2855  				}
  2856  				v.Op = unsignedOp[v.Op]
  2857  				v.AuxInt = 0
  2858  				break
  2859  			}
  2860  			// Fixup code can be avoided on x86 if we know
  2861  			//  the divisor is not -1 or the dividend > MinIntNN.
  2862  			if v.Op != OpDiv8 && v.Op != OpMod8 && (q.max < -1 || q.min > -1 || p.min > mostNegativeDividend[v.Op]) {
  2863  				// See DivisionNeedsFixUp in rewrite.go.
  2864  				// v.AuxInt = 1 means we have proved that the divisor is not -1
  2865  				// or that the dividend is not the most negative integer,
  2866  				// so we do not need to add fix-up code.
  2867  				if b.Func.pass.debug > 0 {
  2868  					b.Func.Warnl(v.Pos, "Proved %v does not need fix-up", v.Op)
  2869  				}
  2870  				// Only usable on amd64 and 386, and only for ≥ 16-bit ops.
  2871  				// Don't modify AuxInt on other architectures, as that can interfere with CSE.
  2872  				// (Print the debug info above always, so that test/prove.go can be
  2873  				// checked on non-x86 systems.)
  2874  				// TODO: add other architectures?
  2875  				if b.Func.Config.arch == "386" || b.Func.Config.arch == "amd64" {
  2876  					v.AuxInt = 1
  2877  				}
  2878  			}
  2879  		case OpMul64, OpMul32, OpMul16, OpMul8:
  2880  			if vl := ft.limits[v.ID]; vl.min == vl.max || vl.umin == vl.umax {
  2881  				// v is going to be constant folded away; don't "optimize" it.
  2882  				break
  2883  			}
  2884  			x := v.Args[0]
  2885  			xl := ft.limits[x.ID]
  2886  			y := v.Args[1]
  2887  			yl := ft.limits[y.ID]
  2888  			if xl.umin == xl.umax && isPowerOfTwo(int64(xl.umin)) ||
  2889  				xl.min == xl.max && isPowerOfTwo(xl.min) ||
  2890  				yl.umin == yl.umax && isPowerOfTwo(int64(yl.umin)) ||
  2891  				yl.min == yl.max && isPowerOfTwo(yl.min) {
  2892  				// 0,1 * a power of two is better done as a shift
  2893  				break
  2894  			}
  2895  			switch xOne, yOne := xl.umax <= 1, yl.umax <= 1; {
  2896  			case xOne && yOne:
  2897  				v.Op = bytesizeToAnd[v.Type.Size()]
  2898  				if b.Func.pass.debug > 0 {
  2899  					b.Func.Warnl(v.Pos, "Rewrote Mul %v into And", v)
  2900  				}
  2901  			case yOne && b.Func.Config.haveCondSelect:
  2902  				x, y = y, x
  2903  				fallthrough
  2904  			case xOne && b.Func.Config.haveCondSelect:
  2905  				if !canCondSelect(v, b.Func.Config.arch, nil) {
  2906  					break
  2907  				}
  2908  				zero := b.Func.constVal(bytesizeToConst[v.Type.Size()], v.Type, 0, true)
  2909  				ft.initLimitForNewValue(zero)
  2910  				check := b.NewValue2(v.Pos, bytesizeToNeq[v.Type.Size()], types.Types[types.TBOOL], zero, x)
  2911  				ft.initLimitForNewValue(check)
  2912  				v.reset(OpCondSelect)
  2913  				v.AddArg3(y, zero, check)
  2914  
  2915  				// FIXME: workaround for go.dev/issues/76060
  2916  				// we need to schedule the Neq before the CondSelect even tho
  2917  				// scheduling is meaningless until we reach the schedule pass.
  2918  				if b.Values[len(b.Values)-1] != check {
  2919  					panic("unreachable; failed sanity check, new value isn't at the end of the block")
  2920  				}
  2921  				b.Values[iv], b.Values[len(b.Values)-1] = b.Values[len(b.Values)-1], b.Values[iv]
  2922  
  2923  				if b.Func.pass.debug > 0 {
  2924  					b.Func.Warnl(v.Pos, "Rewrote Mul %v into CondSelect; %v is bool", v, x)
  2925  				}
  2926  			}
  2927  		}
  2928  
  2929  		// Fold provable constant results.
  2930  		// Helps in cases where we reuse a value after branching on its equality.
  2931  		for i, arg := range v.Args {
  2932  			lim := ft.limits[arg.ID]
  2933  			var constValue int64
  2934  			switch {
  2935  			case lim.min == lim.max:
  2936  				constValue = lim.min
  2937  			case lim.umin == lim.umax:
  2938  				constValue = int64(lim.umin)
  2939  			default:
  2940  				continue
  2941  			}
  2942  			switch arg.Op {
  2943  			case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConstNil:
  2944  				continue
  2945  			}
  2946  			typ := arg.Type
  2947  			f := b.Func
  2948  			var c *Value
  2949  			switch {
  2950  			case typ.IsBoolean():
  2951  				c = f.ConstBool(typ, constValue != 0)
  2952  			case typ.IsInteger() && typ.Size() == 1:
  2953  				c = f.ConstInt8(typ, int8(constValue))
  2954  			case typ.IsInteger() && typ.Size() == 2:
  2955  				c = f.ConstInt16(typ, int16(constValue))
  2956  			case typ.IsInteger() && typ.Size() == 4:
  2957  				c = f.ConstInt32(typ, int32(constValue))
  2958  			case typ.IsInteger() && typ.Size() == 8:
  2959  				c = f.ConstInt64(typ, constValue)
  2960  			case typ.IsPtrShaped():
  2961  				if constValue == 0 {
  2962  					c = f.ConstNil(typ)
  2963  				} else {
  2964  					// Not sure how this might happen, but if it
  2965  					// does, just skip it.
  2966  					continue
  2967  				}
  2968  			default:
  2969  				// Not sure how this might happen, but if it
  2970  				// does, just skip it.
  2971  				continue
  2972  			}
  2973  			v.SetArg(i, c)
  2974  			ft.initLimitForNewValue(c)
  2975  			if b.Func.pass.debug > 1 {
  2976  				b.Func.Warnl(v.Pos, "Proved %v's arg %d (%v) is constant %d", v, i, arg, constValue)
  2977  			}
  2978  		}
  2979  	}
  2980  
  2981  	if b.Kind != BlockIf {
  2982  		return
  2983  	}
  2984  
  2985  	// Consider outgoing edges from this block.
  2986  	parent := b
  2987  	for i, branch := range [...]branch{positive, negative} {
  2988  		child := parent.Succs[i].b
  2989  		if getBranch(sdom, parent, child) != unknown {
  2990  			// For edges to uniquely dominated blocks, we
  2991  			// already did this when we visited the child.
  2992  			continue
  2993  		}
  2994  		// For edges to other blocks, this can trim a branch
  2995  		// even if we couldn't get rid of the child itself.
  2996  		ft.checkpoint()
  2997  		addBranchRestrictions(ft, parent, branch)
  2998  		unsat := ft.unsat
  2999  		ft.restore()
  3000  		if unsat {
  3001  			// This branch is impossible, so remove it
  3002  			// from the block.
  3003  			removeBranch(parent, branch)
  3004  			// No point in considering the other branch.
  3005  			// (It *is* possible for both to be
  3006  			// unsatisfiable since the fact table is
  3007  			// incomplete. We could turn this into a
  3008  			// BlockExit, but it doesn't seem worth it.)
  3009  			break
  3010  		}
  3011  	}
  3012  }
  3013  
  3014  func removeBranch(b *Block, branch branch) {
  3015  	c := b.Controls[0]
  3016  	if b.Func.pass.debug > 0 {
  3017  		verb := "Proved"
  3018  		if branch == positive {
  3019  			verb = "Disproved"
  3020  		}
  3021  		if b.Func.pass.debug > 1 {
  3022  			b.Func.Warnl(b.Pos, "%s %s (%s)", verb, c.Op, c)
  3023  		} else {
  3024  			b.Func.Warnl(b.Pos, "%s %s", verb, c.Op)
  3025  		}
  3026  	}
  3027  	if c != nil && c.Pos.IsStmt() == src.PosIsStmt && c.Pos.SameFileAndLine(b.Pos) {
  3028  		// attempt to preserve statement marker.
  3029  		b.Pos = b.Pos.WithIsStmt()
  3030  	}
  3031  	if branch == positive || branch == negative {
  3032  		b.Kind = BlockFirst
  3033  		b.ResetControls()
  3034  		if branch == positive {
  3035  			b.swapSuccessors()
  3036  		}
  3037  	} else {
  3038  		// TODO: figure out how to remove an entry from a jump table
  3039  	}
  3040  }
  3041  
  3042  // isConstDelta returns non-nil if v is equivalent to w+delta (signed).
  3043  func isConstDelta(v *Value) (w *Value, delta int64) {
  3044  	cop := OpConst64
  3045  	switch v.Op {
  3046  	case OpAdd32, OpSub32:
  3047  		cop = OpConst32
  3048  	case OpAdd16, OpSub16:
  3049  		cop = OpConst16
  3050  	case OpAdd8, OpSub8:
  3051  		cop = OpConst8
  3052  	}
  3053  	switch v.Op {
  3054  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  3055  		if v.Args[0].Op == cop {
  3056  			return v.Args[1], v.Args[0].AuxInt
  3057  		}
  3058  		if v.Args[1].Op == cop {
  3059  			return v.Args[0], v.Args[1].AuxInt
  3060  		}
  3061  	case OpSub64, OpSub32, OpSub16, OpSub8:
  3062  		if v.Args[1].Op == cop {
  3063  			aux := v.Args[1].AuxInt
  3064  			if aux != -aux { // Overflow; too bad
  3065  				return v.Args[0], -aux
  3066  			}
  3067  		}
  3068  	}
  3069  	return nil, 0
  3070  }
  3071  
  3072  // isCleanExt reports whether v is the result of a value-preserving
  3073  // sign or zero extension.
  3074  func isCleanExt(v *Value) bool {
  3075  	switch v.Op {
  3076  	case OpSignExt8to16, OpSignExt8to32, OpSignExt8to64,
  3077  		OpSignExt16to32, OpSignExt16to64, OpSignExt32to64:
  3078  		// signed -> signed is the only value-preserving sign extension
  3079  		return v.Args[0].Type.IsSigned() && v.Type.IsSigned()
  3080  
  3081  	case OpZeroExt8to16, OpZeroExt8to32, OpZeroExt8to64,
  3082  		OpZeroExt16to32, OpZeroExt16to64, OpZeroExt32to64:
  3083  		// unsigned -> signed/unsigned are value-preserving zero extensions
  3084  		return !v.Args[0].Type.IsSigned()
  3085  	}
  3086  	return false
  3087  }
  3088  
  3089  func getDependencyScore(scores []uint, v *Value) (score uint) {
  3090  	if score = scores[v.ID]; score != 0 {
  3091  		return score
  3092  	}
  3093  	defer func() {
  3094  		scores[v.ID] = score
  3095  	}()
  3096  	if v.Op == OpPhi {
  3097  		return 1
  3098  	}
  3099  	score = 2 // NIT(@Jorropo): always order phis first to make GOSSAFUNC pretty.
  3100  	for _, a := range v.Args {
  3101  		if a.Block != v.Block {
  3102  			continue
  3103  		}
  3104  		score = max(score, getDependencyScore(scores, a)+1)
  3105  	}
  3106  	return score
  3107  }
  3108  
  3109  // topoSortValuesInBlock ensure ranging over b.Values visit values before they are being used.
  3110  // It does not consider dependencies with other blocks; thus Phi nodes are considered to not have any dependecies.
  3111  // The result is always determistic and does not depend on the previous slice ordering.
  3112  func (ft *factsTable) topoSortValuesInBlock(b *Block) {
  3113  	f := b.Func
  3114  	want := f.NumValues()
  3115  
  3116  	scores := ft.reusedTopoSortScoresTable
  3117  	if want <= cap(scores) {
  3118  		scores = scores[:want]
  3119  	} else {
  3120  		if cap(scores) > 0 {
  3121  			f.Cache.freeUintSlice(scores)
  3122  		}
  3123  		scores = f.Cache.allocUintSlice(want)
  3124  		ft.reusedTopoSortScoresTable = scores
  3125  	}
  3126  
  3127  	for _, v := range b.Values {
  3128  		scores[v.ID] = 0 // sentinel
  3129  	}
  3130  
  3131  	slices.SortFunc(b.Values, func(a, b *Value) int {
  3132  		dependencyScoreA := getDependencyScore(scores, a)
  3133  		dependencyScoreB := getDependencyScore(scores, b)
  3134  		if dependencyScoreA != dependencyScoreB {
  3135  			return cmp.Compare(dependencyScoreA, dependencyScoreB)
  3136  		}
  3137  		return cmp.Compare(a.ID, b.ID)
  3138  	})
  3139  }
  3140  

View as plain text