Source file src/cmd/compile/internal/ssa/prove.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/src"
    10  	"fmt"
    11  	"math"
    12  	"math/bits"
    13  )
    14  
    15  type branch int
    16  
    17  const (
    18  	unknown branch = iota
    19  	positive
    20  	negative
    21  	// The outedges from a jump table are jumpTable0,
    22  	// jumpTable0+1, jumpTable0+2, etc. There could be an
    23  	// arbitrary number so we can't list them all here.
    24  	jumpTable0
    25  )
    26  
    27  func (b branch) String() string {
    28  	switch b {
    29  	case unknown:
    30  		return "unk"
    31  	case positive:
    32  		return "pos"
    33  	case negative:
    34  		return "neg"
    35  	default:
    36  		return fmt.Sprintf("jmp%d", b-jumpTable0)
    37  	}
    38  }
    39  
    40  // relation represents the set of possible relations between
    41  // pairs of variables (v, w). Without a priori knowledge the
    42  // mask is lt | eq | gt meaning v can be less than, equal to or
    43  // greater than w. When the execution path branches on the condition
    44  // `v op w` the set of relations is updated to exclude any
    45  // relation not possible due to `v op w` being true (or false).
    46  //
    47  // E.g.
    48  //
    49  //	r := relation(...)
    50  //
    51  //	if v < w {
    52  //	  newR := r & lt
    53  //	}
    54  //	if v >= w {
    55  //	  newR := r & (eq|gt)
    56  //	}
    57  //	if v != w {
    58  //	  newR := r & (lt|gt)
    59  //	}
    60  type relation uint
    61  
    62  const (
    63  	lt relation = 1 << iota
    64  	eq
    65  	gt
    66  )
    67  
    68  var relationStrings = [...]string{
    69  	0: "none", lt: "<", eq: "==", lt | eq: "<=",
    70  	gt: ">", gt | lt: "!=", gt | eq: ">=", gt | eq | lt: "any",
    71  }
    72  
    73  func (r relation) String() string {
    74  	if r < relation(len(relationStrings)) {
    75  		return relationStrings[r]
    76  	}
    77  	return fmt.Sprintf("relation(%d)", uint(r))
    78  }
    79  
    80  // domain represents the domain of a variable pair in which a set
    81  // of relations is known. For example, relations learned for unsigned
    82  // pairs cannot be transferred to signed pairs because the same bit
    83  // representation can mean something else.
    84  type domain uint
    85  
    86  const (
    87  	signed domain = 1 << iota
    88  	unsigned
    89  	pointer
    90  	boolean
    91  )
    92  
    93  var domainStrings = [...]string{
    94  	"signed", "unsigned", "pointer", "boolean",
    95  }
    96  
    97  func (d domain) String() string {
    98  	s := ""
    99  	for i, ds := range domainStrings {
   100  		if d&(1<<uint(i)) != 0 {
   101  			if len(s) != 0 {
   102  				s += "|"
   103  			}
   104  			s += ds
   105  			d &^= 1 << uint(i)
   106  		}
   107  	}
   108  	if d != 0 {
   109  		if len(s) != 0 {
   110  			s += "|"
   111  		}
   112  		s += fmt.Sprintf("0x%x", uint(d))
   113  	}
   114  	return s
   115  }
   116  
   117  // a limit records known upper and lower bounds for a value.
   118  //
   119  // If we have min>max or umin>umax, then this limit is
   120  // called "unsatisfiable". When we encounter such a limit, we
   121  // know that any code for which that limit applies is unreachable.
   122  // We don't particularly care how unsatisfiable limits propagate,
   123  // including becoming satisfiable, because any optimization
   124  // decisions based on those limits only apply to unreachable code.
   125  type limit struct {
   126  	min, max   int64  // min <= value <= max, signed
   127  	umin, umax uint64 // umin <= value <= umax, unsigned
   128  	// For booleans, we use 0==false, 1==true for both ranges
   129  	// For pointers, we use 0,0,0,0 for nil and minInt64,maxInt64,1,maxUint64 for nonnil
   130  }
   131  
   132  func (l limit) String() string {
   133  	return fmt.Sprintf("sm,SM,um,UM=%d,%d,%d,%d", l.min, l.max, l.umin, l.umax)
   134  }
   135  
   136  func (l limit) intersect(l2 limit) limit {
   137  	l.min = max(l.min, l2.min)
   138  	l.umin = max(l.umin, l2.umin)
   139  	l.max = min(l.max, l2.max)
   140  	l.umax = min(l.umax, l2.umax)
   141  	return l
   142  }
   143  
   144  func (l limit) signedMin(m int64) limit {
   145  	l.min = max(l.min, m)
   146  	return l
   147  }
   148  
   149  func (l limit) signedMinMax(minimum, maximum int64) limit {
   150  	l.min = max(l.min, minimum)
   151  	l.max = min(l.max, maximum)
   152  	return l
   153  }
   154  
   155  func (l limit) unsignedMin(m uint64) limit {
   156  	l.umin = max(l.umin, m)
   157  	return l
   158  }
   159  func (l limit) unsignedMax(m uint64) limit {
   160  	l.umax = min(l.umax, m)
   161  	return l
   162  }
   163  func (l limit) unsignedMinMax(minimum, maximum uint64) limit {
   164  	l.umin = max(l.umin, minimum)
   165  	l.umax = min(l.umax, maximum)
   166  	return l
   167  }
   168  
   169  func (l limit) nonzero() bool {
   170  	return l.min > 0 || l.umin > 0 || l.max < 0
   171  }
   172  func (l limit) nonnegative() bool {
   173  	return l.min >= 0
   174  }
   175  func (l limit) unsat() bool {
   176  	return l.min > l.max || l.umin > l.umax
   177  }
   178  
   179  // If x and y can add without overflow or underflow
   180  // (using b bits), safeAdd returns x+y, true.
   181  // Otherwise, returns 0, false.
   182  func safeAdd(x, y int64, b uint) (int64, bool) {
   183  	s := x + y
   184  	if x >= 0 && y >= 0 && s < 0 {
   185  		return 0, false // 64-bit overflow
   186  	}
   187  	if x < 0 && y < 0 && s >= 0 {
   188  		return 0, false // 64-bit underflow
   189  	}
   190  	if !fitsInBits(s, b) {
   191  		return 0, false
   192  	}
   193  	return s, true
   194  }
   195  
   196  // same as safeAdd for unsigned arithmetic.
   197  func safeAddU(x, y uint64, b uint) (uint64, bool) {
   198  	s := x + y
   199  	if s < x || s < y {
   200  		return 0, false // 64-bit overflow
   201  	}
   202  	if !fitsInBitsU(s, b) {
   203  		return 0, false
   204  	}
   205  	return s, true
   206  }
   207  
   208  // same as safeAdd but for subtraction.
   209  func safeSub(x, y int64, b uint) (int64, bool) {
   210  	if y == math.MinInt64 {
   211  		if x == math.MaxInt64 {
   212  			return 0, false // 64-bit overflow
   213  		}
   214  		x++
   215  		y++
   216  	}
   217  	return safeAdd(x, -y, b)
   218  }
   219  
   220  // same as safeAddU but for subtraction.
   221  func safeSubU(x, y uint64, b uint) (uint64, bool) {
   222  	if x < y {
   223  		return 0, false // 64-bit underflow
   224  	}
   225  	s := x - y
   226  	if !fitsInBitsU(s, b) {
   227  		return 0, false
   228  	}
   229  	return s, true
   230  }
   231  
   232  // fitsInBits reports whether x fits in b bits (signed).
   233  func fitsInBits(x int64, b uint) bool {
   234  	if b == 64 {
   235  		return true
   236  	}
   237  	m := int64(-1) << (b - 1)
   238  	M := -m - 1
   239  	return x >= m && x <= M
   240  }
   241  
   242  // fitsInBitsU reports whether x fits in b bits (unsigned).
   243  func fitsInBitsU(x uint64, b uint) bool {
   244  	return x>>b == 0
   245  }
   246  
   247  // add returns the limit obtained by adding a value with limit l
   248  // to a value with limit l2. The result must fit in b bits.
   249  func (l limit) add(l2 limit, b uint) limit {
   250  	r := noLimit
   251  	min, minOk := safeAdd(l.min, l2.min, b)
   252  	max, maxOk := safeAdd(l.max, l2.max, b)
   253  	if minOk && maxOk {
   254  		r.min = min
   255  		r.max = max
   256  	}
   257  	umin, uminOk := safeAddU(l.umin, l2.umin, b)
   258  	umax, umaxOk := safeAddU(l.umax, l2.umax, b)
   259  	if uminOk && umaxOk {
   260  		r.umin = umin
   261  		r.umax = umax
   262  	}
   263  	return r
   264  }
   265  
   266  // same as add but for subtraction.
   267  func (l limit) sub(l2 limit, b uint) limit {
   268  	r := noLimit
   269  	min, minOk := safeSub(l.min, l2.max, b)
   270  	max, maxOk := safeSub(l.max, l2.min, b)
   271  	if minOk && maxOk {
   272  		r.min = min
   273  		r.max = max
   274  	}
   275  	umin, uminOk := safeSubU(l.umin, l2.umax, b)
   276  	umax, umaxOk := safeSubU(l.umax, l2.umin, b)
   277  	if uminOk && umaxOk {
   278  		r.umin = umin
   279  		r.umax = umax
   280  	}
   281  	return r
   282  }
   283  
   284  // same as add but for multiplication.
   285  func (l limit) mul(l2 limit, b uint) limit {
   286  	r := noLimit
   287  	umaxhi, umaxlo := bits.Mul64(l.umax, l2.umax)
   288  	if umaxhi == 0 && fitsInBitsU(umaxlo, b) {
   289  		r.umax = umaxlo
   290  		r.umin = l.umin * l2.umin
   291  		// Note: if the code containing this multiply is
   292  		// unreachable, then we may have umin>umax, and this
   293  		// multiply may overflow.  But that's ok for
   294  		// unreachable code. If this code is reachable, we
   295  		// know umin<=umax, so this multiply will not overflow
   296  		// because the max multiply didn't.
   297  	}
   298  	// Signed is harder, so don't bother. The only useful
   299  	// case is when we know both multiplicands are nonnegative,
   300  	// but that case is handled above because we would have then
   301  	// previously propagated signed info to the unsigned domain,
   302  	// and will propagate it back after the multiply.
   303  	return r
   304  }
   305  
   306  // Similar to add, but compute 1 << l if it fits without overflow in b bits.
   307  func (l limit) exp2(b uint) limit {
   308  	r := noLimit
   309  	if l.umax < uint64(b) {
   310  		r.umin = 1 << l.umin
   311  		r.umax = 1 << l.umax
   312  		// Same as above in mul, signed<->unsigned propagation
   313  		// will handle the signed case for us.
   314  	}
   315  	return r
   316  }
   317  
   318  // Similar to add, but computes the complement of the limit for bitsize b.
   319  func (l limit) com(b uint) limit {
   320  	switch b {
   321  	case 64:
   322  		return limit{
   323  			min:  ^l.max,
   324  			max:  ^l.min,
   325  			umin: ^l.umax,
   326  			umax: ^l.umin,
   327  		}
   328  	case 32:
   329  		return limit{
   330  			min:  int64(^int32(l.max)),
   331  			max:  int64(^int32(l.min)),
   332  			umin: uint64(^uint32(l.umax)),
   333  			umax: uint64(^uint32(l.umin)),
   334  		}
   335  	case 16:
   336  		return limit{
   337  			min:  int64(^int16(l.max)),
   338  			max:  int64(^int16(l.min)),
   339  			umin: uint64(^uint16(l.umax)),
   340  			umax: uint64(^uint16(l.umin)),
   341  		}
   342  	case 8:
   343  		return limit{
   344  			min:  int64(^int8(l.max)),
   345  			max:  int64(^int8(l.min)),
   346  			umin: uint64(^uint8(l.umax)),
   347  			umax: uint64(^uint8(l.umin)),
   348  		}
   349  	default:
   350  		panic("unreachable")
   351  	}
   352  }
   353  
   354  var noLimit = limit{math.MinInt64, math.MaxInt64, 0, math.MaxUint64}
   355  
   356  // a limitFact is a limit known for a particular value.
   357  type limitFact struct {
   358  	vid   ID
   359  	limit limit
   360  }
   361  
   362  // An ordering encodes facts like v < w.
   363  type ordering struct {
   364  	next *ordering // linked list of all known orderings for v.
   365  	// Note: v is implicit here, determined by which linked list it is in.
   366  	w *Value
   367  	d domain
   368  	r relation // one of ==,!=,<,<=,>,>=
   369  	// if d is boolean or pointer, r can only be ==, !=
   370  }
   371  
   372  // factsTable keeps track of relations between pairs of values.
   373  //
   374  // The fact table logic is sound, but incomplete. Outside of a few
   375  // special cases, it performs no deduction or arithmetic. While there
   376  // are known decision procedures for this, the ad hoc approach taken
   377  // by the facts table is effective for real code while remaining very
   378  // efficient.
   379  type factsTable struct {
   380  	// unsat is true if facts contains a contradiction.
   381  	//
   382  	// Note that the factsTable logic is incomplete, so if unsat
   383  	// is false, the assertions in factsTable could be satisfiable
   384  	// *or* unsatisfiable.
   385  	unsat      bool // true if facts contains a contradiction
   386  	unsatDepth int  // number of unsat checkpoints
   387  
   388  	// order* is a couple of partial order sets that record information
   389  	// about relations between SSA values in the signed and unsigned
   390  	// domain.
   391  	orderS *poset
   392  	orderU *poset
   393  
   394  	// orderings contains a list of known orderings between values.
   395  	// These lists are indexed by v.ID.
   396  	// We do not record transitive orderings. Only explicitly learned
   397  	// orderings are recorded. Transitive orderings can be obtained
   398  	// by walking along the individual orderings.
   399  	orderings map[ID]*ordering
   400  	// stack of IDs which have had an entry added in orderings.
   401  	// In addition, ID==0 are checkpoint markers.
   402  	orderingsStack []ID
   403  	orderingCache  *ordering // unused ordering records
   404  
   405  	// known lower and upper constant bounds on individual values.
   406  	limits       []limit     // indexed by value ID
   407  	limitStack   []limitFact // previous entries
   408  	recurseCheck []bool      // recursion detector for limit propagation
   409  
   410  	// For each slice s, a map from s to a len(s)/cap(s) value (if any)
   411  	// TODO: check if there are cases that matter where we have
   412  	// more than one len(s) for a slice. We could keep a list if necessary.
   413  	lens map[ID]*Value
   414  	caps map[ID]*Value
   415  }
   416  
   417  // checkpointBound is an invalid value used for checkpointing
   418  // and restoring factsTable.
   419  var checkpointBound = limitFact{}
   420  
   421  func newFactsTable(f *Func) *factsTable {
   422  	ft := &factsTable{}
   423  	ft.orderS = f.newPoset()
   424  	ft.orderU = f.newPoset()
   425  	ft.orderS.SetUnsigned(false)
   426  	ft.orderU.SetUnsigned(true)
   427  	ft.orderings = make(map[ID]*ordering)
   428  	ft.limits = f.Cache.allocLimitSlice(f.NumValues())
   429  	for _, b := range f.Blocks {
   430  		for _, v := range b.Values {
   431  			ft.limits[v.ID] = initLimit(v)
   432  		}
   433  	}
   434  	ft.limitStack = make([]limitFact, 4)
   435  	ft.recurseCheck = f.Cache.allocBoolSlice(f.NumValues())
   436  	return ft
   437  }
   438  
   439  // initLimitForNewValue initializes the limits for newly created values,
   440  // possibly needing to expand the limits slice. Currently used by
   441  // simplifyBlock when certain provably constant results are folded.
   442  func (ft *factsTable) initLimitForNewValue(v *Value) {
   443  	if int(v.ID) >= len(ft.limits) {
   444  		f := v.Block.Func
   445  		n := f.NumValues()
   446  		if cap(ft.limits) >= n {
   447  			ft.limits = ft.limits[:n]
   448  		} else {
   449  			old := ft.limits
   450  			ft.limits = f.Cache.allocLimitSlice(n)
   451  			copy(ft.limits, old)
   452  			f.Cache.freeLimitSlice(old)
   453  		}
   454  	}
   455  	ft.limits[v.ID] = initLimit(v)
   456  }
   457  
   458  // signedMin records the fact that we know v is at least
   459  // min in the signed domain.
   460  func (ft *factsTable) signedMin(v *Value, min int64) bool {
   461  	return ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
   462  }
   463  
   464  // signedMax records the fact that we know v is at most
   465  // max in the signed domain.
   466  func (ft *factsTable) signedMax(v *Value, max int64) bool {
   467  	return ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
   468  }
   469  func (ft *factsTable) signedMinMax(v *Value, min, max int64) bool {
   470  	return ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
   471  }
   472  
   473  // setNonNegative records the fact that v is known to be non-negative.
   474  func (ft *factsTable) setNonNegative(v *Value) bool {
   475  	return ft.signedMin(v, 0)
   476  }
   477  
   478  // unsignedMin records the fact that we know v is at least
   479  // min in the unsigned domain.
   480  func (ft *factsTable) unsignedMin(v *Value, min uint64) bool {
   481  	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
   482  }
   483  
   484  // unsignedMax records the fact that we know v is at most
   485  // max in the unsigned domain.
   486  func (ft *factsTable) unsignedMax(v *Value, max uint64) bool {
   487  	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
   488  }
   489  func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) bool {
   490  	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
   491  }
   492  
   493  func (ft *factsTable) booleanFalse(v *Value) bool {
   494  	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   495  }
   496  func (ft *factsTable) booleanTrue(v *Value) bool {
   497  	return ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
   498  }
   499  func (ft *factsTable) pointerNil(v *Value) bool {
   500  	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
   501  }
   502  func (ft *factsTable) pointerNonNil(v *Value) bool {
   503  	l := noLimit
   504  	l.umin = 1
   505  	return ft.newLimit(v, l)
   506  }
   507  
   508  // newLimit adds new limiting information for v.
   509  // Returns true if the new limit added any new information.
   510  func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
   511  	oldLim := ft.limits[v.ID]
   512  
   513  	// Merge old and new information.
   514  	lim := oldLim.intersect(newLim)
   515  
   516  	// signed <-> unsigned propagation
   517  	if lim.min >= 0 {
   518  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
   519  	}
   520  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
   521  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
   522  	}
   523  
   524  	if lim == oldLim {
   525  		return false // nothing new to record
   526  	}
   527  
   528  	if lim.unsat() {
   529  		r := !ft.unsat
   530  		ft.unsat = true
   531  		return r
   532  	}
   533  
   534  	// Check for recursion. This normally happens because in unsatisfiable
   535  	// cases we have a < b < a, and every update to a's limits returns
   536  	// here again with the limit increased by 2.
   537  	// Normally this is caught early by the orderS/orderU posets, but in
   538  	// cases where the comparisons jump between signed and unsigned domains,
   539  	// the posets will not notice.
   540  	if ft.recurseCheck[v.ID] {
   541  		// This should only happen for unsatisfiable cases. TODO: check
   542  		return false
   543  	}
   544  	ft.recurseCheck[v.ID] = true
   545  	defer func() {
   546  		ft.recurseCheck[v.ID] = false
   547  	}()
   548  
   549  	// Record undo information.
   550  	ft.limitStack = append(ft.limitStack, limitFact{v.ID, oldLim})
   551  	// Record new information.
   552  	ft.limits[v.ID] = lim
   553  	if v.Block.Func.pass.debug > 2 {
   554  		// TODO: pos is probably wrong. This is the position where v is defined,
   555  		// not the position where we learned the fact about it (which was
   556  		// probably some subsequent compare+branch).
   557  		v.Block.Func.Warnl(v.Pos, "new limit %s %s unsat=%v", v, lim.String(), ft.unsat)
   558  	}
   559  
   560  	// Propagate this new constant range to other values
   561  	// that we know are ordered with respect to this one.
   562  	// Note overflow/underflow in the arithmetic below is ok,
   563  	// it will just lead to imprecision (undetected unsatisfiability).
   564  	for o := ft.orderings[v.ID]; o != nil; o = o.next {
   565  		switch o.d {
   566  		case signed:
   567  			switch o.r {
   568  			case eq: // v == w
   569  				ft.signedMinMax(o.w, lim.min, lim.max)
   570  			case lt | eq: // v <= w
   571  				ft.signedMin(o.w, lim.min)
   572  			case lt: // v < w
   573  				ft.signedMin(o.w, lim.min+1)
   574  			case gt | eq: // v >= w
   575  				ft.signedMax(o.w, lim.max)
   576  			case gt: // v > w
   577  				ft.signedMax(o.w, lim.max-1)
   578  			case lt | gt: // v != w
   579  				if lim.min == lim.max { // v is a constant
   580  					c := lim.min
   581  					if ft.limits[o.w.ID].min == c {
   582  						ft.signedMin(o.w, c+1)
   583  					}
   584  					if ft.limits[o.w.ID].max == c {
   585  						ft.signedMax(o.w, c-1)
   586  					}
   587  				}
   588  			}
   589  		case unsigned:
   590  			switch o.r {
   591  			case eq: // v == w
   592  				ft.unsignedMinMax(o.w, lim.umin, lim.umax)
   593  			case lt | eq: // v <= w
   594  				ft.unsignedMin(o.w, lim.umin)
   595  			case lt: // v < w
   596  				ft.unsignedMin(o.w, lim.umin+1)
   597  			case gt | eq: // v >= w
   598  				ft.unsignedMax(o.w, lim.umax)
   599  			case gt: // v > w
   600  				ft.unsignedMax(o.w, lim.umax-1)
   601  			case lt | gt: // v != w
   602  				if lim.umin == lim.umax { // v is a constant
   603  					c := lim.umin
   604  					if ft.limits[o.w.ID].umin == c {
   605  						ft.unsignedMin(o.w, c+1)
   606  					}
   607  					if ft.limits[o.w.ID].umax == c {
   608  						ft.unsignedMax(o.w, c-1)
   609  					}
   610  				}
   611  			}
   612  		case boolean:
   613  			switch o.r {
   614  			case eq:
   615  				if lim.min == 0 && lim.max == 0 { // constant false
   616  					ft.booleanFalse(o.w)
   617  				}
   618  				if lim.min == 1 && lim.max == 1 { // constant true
   619  					ft.booleanTrue(o.w)
   620  				}
   621  			case lt | gt:
   622  				if lim.min == 0 && lim.max == 0 { // constant false
   623  					ft.booleanTrue(o.w)
   624  				}
   625  				if lim.min == 1 && lim.max == 1 { // constant true
   626  					ft.booleanFalse(o.w)
   627  				}
   628  			}
   629  		case pointer:
   630  			switch o.r {
   631  			case eq:
   632  				if lim.umax == 0 { // nil
   633  					ft.pointerNil(o.w)
   634  				}
   635  				if lim.umin > 0 { // non-nil
   636  					ft.pointerNonNil(o.w)
   637  				}
   638  			case lt | gt:
   639  				if lim.umax == 0 { // nil
   640  					ft.pointerNonNil(o.w)
   641  				}
   642  				// note: not equal to non-nil doesn't tell us anything.
   643  			}
   644  		}
   645  	}
   646  
   647  	// If this is new known constant for a boolean value,
   648  	// extract relation between its args. For example, if
   649  	// We learn v is false, and v is defined as a<b, then we learn a>=b.
   650  	if v.Type.IsBoolean() {
   651  		// If we reach here, it is because we have a more restrictive
   652  		// value for v than the default. The only two such values
   653  		// are constant true or constant false.
   654  		if lim.min != lim.max {
   655  			v.Block.Func.Fatalf("boolean not constant %v", v)
   656  		}
   657  		isTrue := lim.min == 1
   658  		if dr, ok := domainRelationTable[v.Op]; ok && v.Op != OpIsInBounds && v.Op != OpIsSliceInBounds {
   659  			d := dr.d
   660  			r := dr.r
   661  			if d == signed && ft.isNonNegative(v.Args[0]) && ft.isNonNegative(v.Args[1]) {
   662  				d |= unsigned
   663  			}
   664  			if !isTrue {
   665  				r ^= lt | gt | eq
   666  			}
   667  			// TODO: v.Block is wrong?
   668  			addRestrictions(v.Block, ft, d, v.Args[0], v.Args[1], r)
   669  		}
   670  		switch v.Op {
   671  		case OpIsNonNil:
   672  			if isTrue {
   673  				ft.pointerNonNil(v.Args[0])
   674  			} else {
   675  				ft.pointerNil(v.Args[0])
   676  			}
   677  		case OpIsInBounds, OpIsSliceInBounds:
   678  			// 0 <= a0 < a1 (or 0 <= a0 <= a1)
   679  			r := lt
   680  			if v.Op == OpIsSliceInBounds {
   681  				r |= eq
   682  			}
   683  			if isTrue {
   684  				// On the positive branch, we learn:
   685  				//   signed: 0 <= a0 < a1 (or 0 <= a0 <= a1)
   686  				//   unsigned:    a0 < a1 (or a0 <= a1)
   687  				ft.setNonNegative(v.Args[0])
   688  				ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   689  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   690  			} else {
   691  				// On the negative branch, we learn (0 > a0 ||
   692  				// a0 >= a1). In the unsigned domain, this is
   693  				// simply a0 >= a1 (which is the reverse of the
   694  				// positive branch, so nothing surprising).
   695  				// But in the signed domain, we can't express the ||
   696  				// condition, so check if a0 is non-negative instead,
   697  				// to be able to learn something.
   698  				r ^= lt | gt | eq // >= (index) or > (slice)
   699  				if ft.isNonNegative(v.Args[0]) {
   700  					ft.update(v.Block, v.Args[0], v.Args[1], signed, r)
   701  				}
   702  				ft.update(v.Block, v.Args[0], v.Args[1], unsigned, r)
   703  				// TODO: v.Block is wrong here
   704  			}
   705  		}
   706  	}
   707  
   708  	return true
   709  }
   710  
   711  func (ft *factsTable) addOrdering(v, w *Value, d domain, r relation) {
   712  	o := ft.orderingCache
   713  	if o == nil {
   714  		o = &ordering{}
   715  	} else {
   716  		ft.orderingCache = o.next
   717  	}
   718  	o.w = w
   719  	o.d = d
   720  	o.r = r
   721  	o.next = ft.orderings[v.ID]
   722  	ft.orderings[v.ID] = o
   723  	ft.orderingsStack = append(ft.orderingsStack, v.ID)
   724  }
   725  
   726  // update updates the set of relations between v and w in domain d
   727  // restricting it to r.
   728  func (ft *factsTable) update(parent *Block, v, w *Value, d domain, r relation) {
   729  	if parent.Func.pass.debug > 2 {
   730  		parent.Func.Warnl(parent.Pos, "parent=%s, update %s %s %s", parent, v, w, r)
   731  	}
   732  	// No need to do anything else if we already found unsat.
   733  	if ft.unsat {
   734  		return
   735  	}
   736  
   737  	// Self-fact. It's wasteful to register it into the facts
   738  	// table, so just note whether it's satisfiable
   739  	if v == w {
   740  		if r&eq == 0 {
   741  			ft.unsat = true
   742  		}
   743  		return
   744  	}
   745  
   746  	if d == signed || d == unsigned {
   747  		var ok bool
   748  		order := ft.orderS
   749  		if d == unsigned {
   750  			order = ft.orderU
   751  		}
   752  		switch r {
   753  		case lt:
   754  			ok = order.SetOrder(v, w)
   755  		case gt:
   756  			ok = order.SetOrder(w, v)
   757  		case lt | eq:
   758  			ok = order.SetOrderOrEqual(v, w)
   759  		case gt | eq:
   760  			ok = order.SetOrderOrEqual(w, v)
   761  		case eq:
   762  			ok = order.SetEqual(v, w)
   763  		case lt | gt:
   764  			ok = order.SetNonEqual(v, w)
   765  		default:
   766  			panic("unknown relation")
   767  		}
   768  		ft.addOrdering(v, w, d, r)
   769  		ft.addOrdering(w, v, d, reverseBits[r])
   770  
   771  		if !ok {
   772  			if parent.Func.pass.debug > 2 {
   773  				parent.Func.Warnl(parent.Pos, "unsat %s %s %s", v, w, r)
   774  			}
   775  			ft.unsat = true
   776  			return
   777  		}
   778  	}
   779  	if d == boolean || d == pointer {
   780  		for o := ft.orderings[v.ID]; o != nil; o = o.next {
   781  			if o.d == d && o.w == w {
   782  				// We already know a relationship between v and w.
   783  				// Either it is a duplicate, or it is a contradiction,
   784  				// as we only allow eq and lt|gt for these domains,
   785  				if o.r != r {
   786  					ft.unsat = true
   787  				}
   788  				return
   789  			}
   790  		}
   791  		// TODO: this does not do transitive equality.
   792  		// We could use a poset like above, but somewhat degenerate (==,!= only).
   793  		ft.addOrdering(v, w, d, r)
   794  		ft.addOrdering(w, v, d, r) // note: reverseBits unnecessary for eq and lt|gt.
   795  	}
   796  
   797  	// Extract new constant limits based on the comparison.
   798  	vLimit := ft.limits[v.ID]
   799  	wLimit := ft.limits[w.ID]
   800  	// Note: all the +1/-1 below could overflow/underflow. Either will
   801  	// still generate correct results, it will just lead to imprecision.
   802  	// In fact if there is overflow/underflow, the corresponding
   803  	// code is unreachable because the known range is outside the range
   804  	// of the value's type.
   805  	switch d {
   806  	case signed:
   807  		switch r {
   808  		case eq: // v == w
   809  			ft.signedMinMax(v, wLimit.min, wLimit.max)
   810  			ft.signedMinMax(w, vLimit.min, vLimit.max)
   811  		case lt: // v < w
   812  			ft.signedMax(v, wLimit.max-1)
   813  			ft.signedMin(w, vLimit.min+1)
   814  		case lt | eq: // v <= w
   815  			ft.signedMax(v, wLimit.max)
   816  			ft.signedMin(w, vLimit.min)
   817  		case gt: // v > w
   818  			ft.signedMin(v, wLimit.min+1)
   819  			ft.signedMax(w, vLimit.max-1)
   820  		case gt | eq: // v >= w
   821  			ft.signedMin(v, wLimit.min)
   822  			ft.signedMax(w, vLimit.max)
   823  		case lt | gt: // v != w
   824  			if vLimit.min == vLimit.max { // v is a constant
   825  				c := vLimit.min
   826  				if wLimit.min == c {
   827  					ft.signedMin(w, c+1)
   828  				}
   829  				if wLimit.max == c {
   830  					ft.signedMax(w, c-1)
   831  				}
   832  			}
   833  			if wLimit.min == wLimit.max { // w is a constant
   834  				c := wLimit.min
   835  				if vLimit.min == c {
   836  					ft.signedMin(v, c+1)
   837  				}
   838  				if vLimit.max == c {
   839  					ft.signedMax(v, c-1)
   840  				}
   841  			}
   842  		}
   843  	case unsigned:
   844  		switch r {
   845  		case eq: // v == w
   846  			ft.unsignedMinMax(v, wLimit.umin, wLimit.umax)
   847  			ft.unsignedMinMax(w, vLimit.umin, vLimit.umax)
   848  		case lt: // v < w
   849  			ft.unsignedMax(v, wLimit.umax-1)
   850  			ft.unsignedMin(w, vLimit.umin+1)
   851  		case lt | eq: // v <= w
   852  			ft.unsignedMax(v, wLimit.umax)
   853  			ft.unsignedMin(w, vLimit.umin)
   854  		case gt: // v > w
   855  			ft.unsignedMin(v, wLimit.umin+1)
   856  			ft.unsignedMax(w, vLimit.umax-1)
   857  		case gt | eq: // v >= w
   858  			ft.unsignedMin(v, wLimit.umin)
   859  			ft.unsignedMax(w, vLimit.umax)
   860  		case lt | gt: // v != w
   861  			if vLimit.umin == vLimit.umax { // v is a constant
   862  				c := vLimit.umin
   863  				if wLimit.umin == c {
   864  					ft.unsignedMin(w, c+1)
   865  				}
   866  				if wLimit.umax == c {
   867  					ft.unsignedMax(w, c-1)
   868  				}
   869  			}
   870  			if wLimit.umin == wLimit.umax { // w is a constant
   871  				c := wLimit.umin
   872  				if vLimit.umin == c {
   873  					ft.unsignedMin(v, c+1)
   874  				}
   875  				if vLimit.umax == c {
   876  					ft.unsignedMax(v, c-1)
   877  				}
   878  			}
   879  		}
   880  	case boolean:
   881  		switch r {
   882  		case eq: // v == w
   883  			if vLimit.min == 1 { // v is true
   884  				ft.booleanTrue(w)
   885  			}
   886  			if vLimit.max == 0 { // v is false
   887  				ft.booleanFalse(w)
   888  			}
   889  			if wLimit.min == 1 { // w is true
   890  				ft.booleanTrue(v)
   891  			}
   892  			if wLimit.max == 0 { // w is false
   893  				ft.booleanFalse(v)
   894  			}
   895  		case lt | gt: // v != w
   896  			if vLimit.min == 1 { // v is true
   897  				ft.booleanFalse(w)
   898  			}
   899  			if vLimit.max == 0 { // v is false
   900  				ft.booleanTrue(w)
   901  			}
   902  			if wLimit.min == 1 { // w is true
   903  				ft.booleanFalse(v)
   904  			}
   905  			if wLimit.max == 0 { // w is false
   906  				ft.booleanTrue(v)
   907  			}
   908  		}
   909  	case pointer:
   910  		switch r {
   911  		case eq: // v == w
   912  			if vLimit.umax == 0 { // v is nil
   913  				ft.pointerNil(w)
   914  			}
   915  			if vLimit.umin > 0 { // v is non-nil
   916  				ft.pointerNonNil(w)
   917  			}
   918  			if wLimit.umax == 0 { // w is nil
   919  				ft.pointerNil(v)
   920  			}
   921  			if wLimit.umin > 0 { // w is non-nil
   922  				ft.pointerNonNil(v)
   923  			}
   924  		case lt | gt: // v != w
   925  			if vLimit.umax == 0 { // v is nil
   926  				ft.pointerNonNil(w)
   927  			}
   928  			if wLimit.umax == 0 { // w is nil
   929  				ft.pointerNonNil(v)
   930  			}
   931  			// Note: the other direction doesn't work.
   932  			// Being not equal to a non-nil pointer doesn't
   933  			// make you (necessarily) a nil pointer.
   934  		}
   935  	}
   936  
   937  	// Derived facts below here are only about numbers.
   938  	if d != signed && d != unsigned {
   939  		return
   940  	}
   941  
   942  	// Additional facts we know given the relationship between len and cap.
   943  	//
   944  	// TODO: Since prove now derives transitive relations, it
   945  	// should be sufficient to learn that len(w) <= cap(w) at the
   946  	// beginning of prove where we look for all len/cap ops.
   947  	if v.Op == OpSliceLen && r&lt == 0 && ft.caps[v.Args[0].ID] != nil {
   948  		// len(s) > w implies cap(s) > w
   949  		// len(s) >= w implies cap(s) >= w
   950  		// len(s) == w implies cap(s) >= w
   951  		ft.update(parent, ft.caps[v.Args[0].ID], w, d, r|gt)
   952  	}
   953  	if w.Op == OpSliceLen && r&gt == 0 && ft.caps[w.Args[0].ID] != nil {
   954  		// same, length on the RHS.
   955  		ft.update(parent, v, ft.caps[w.Args[0].ID], d, r|lt)
   956  	}
   957  	if v.Op == OpSliceCap && r&gt == 0 && ft.lens[v.Args[0].ID] != nil {
   958  		// cap(s) < w implies len(s) < w
   959  		// cap(s) <= w implies len(s) <= w
   960  		// cap(s) == w implies len(s) <= w
   961  		ft.update(parent, ft.lens[v.Args[0].ID], w, d, r|lt)
   962  	}
   963  	if w.Op == OpSliceCap && r&lt == 0 && ft.lens[w.Args[0].ID] != nil {
   964  		// same, capacity on the RHS.
   965  		ft.update(parent, v, ft.lens[w.Args[0].ID], d, r|gt)
   966  	}
   967  
   968  	// Process fence-post implications.
   969  	//
   970  	// First, make the condition > or >=.
   971  	if r == lt || r == lt|eq {
   972  		v, w = w, v
   973  		r = reverseBits[r]
   974  	}
   975  	switch r {
   976  	case gt:
   977  		if x, delta := isConstDelta(v); x != nil && delta == 1 {
   978  			// x+1 > w  ⇒  x >= w
   979  			//
   980  			// This is useful for eliminating the
   981  			// growslice branch of append.
   982  			ft.update(parent, x, w, d, gt|eq)
   983  		} else if x, delta := isConstDelta(w); x != nil && delta == -1 {
   984  			// v > x-1  ⇒  v >= x
   985  			ft.update(parent, v, x, d, gt|eq)
   986  		}
   987  	case gt | eq:
   988  		if x, delta := isConstDelta(v); x != nil && delta == -1 {
   989  			// x-1 >= w && x > min  ⇒  x > w
   990  			//
   991  			// Useful for i > 0; s[i-1].
   992  			lim := ft.limits[x.ID]
   993  			if (d == signed && lim.min > opMin[v.Op]) || (d == unsigned && lim.umin > 0) {
   994  				ft.update(parent, x, w, d, gt)
   995  			}
   996  		} else if x, delta := isConstDelta(w); x != nil && delta == 1 {
   997  			// v >= x+1 && x < max  ⇒  v > x
   998  			lim := ft.limits[x.ID]
   999  			if (d == signed && lim.max < opMax[w.Op]) || (d == unsigned && lim.umax < opUMax[w.Op]) {
  1000  				ft.update(parent, v, x, d, gt)
  1001  			}
  1002  		}
  1003  	}
  1004  
  1005  	// Process: x+delta > w (with delta constant)
  1006  	// Only signed domain for now (useful for accesses to slices in loops).
  1007  	if r == gt || r == gt|eq {
  1008  		if x, delta := isConstDelta(v); x != nil && d == signed {
  1009  			if parent.Func.pass.debug > 1 {
  1010  				parent.Func.Warnl(parent.Pos, "x+d %s w; x:%v %v delta:%v w:%v d:%v", r, x, parent.String(), delta, w.AuxInt, d)
  1011  			}
  1012  			underflow := true
  1013  			if delta < 0 {
  1014  				l := ft.limits[x.ID]
  1015  				if (x.Type.Size() == 8 && l.min >= math.MinInt64-delta) ||
  1016  					(x.Type.Size() == 4 && l.min >= math.MinInt32-delta) {
  1017  					underflow = false
  1018  				}
  1019  			}
  1020  			if delta < 0 && !underflow {
  1021  				// If delta < 0 and x+delta cannot underflow then x > x+delta (that is, x > v)
  1022  				ft.update(parent, x, v, signed, gt)
  1023  			}
  1024  			if !w.isGenericIntConst() {
  1025  				// If we know that x+delta > w but w is not constant, we can derive:
  1026  				//    if delta < 0 and x+delta cannot underflow, then x > w
  1027  				// This is useful for loops with bounds "len(slice)-K" (delta = -K)
  1028  				if delta < 0 && !underflow {
  1029  					ft.update(parent, x, w, signed, r)
  1030  				}
  1031  			} else {
  1032  				// With w,delta constants, we want to derive: x+delta > w  ⇒  x > w-delta
  1033  				//
  1034  				// We compute (using integers of the correct size):
  1035  				//    min = w - delta
  1036  				//    max = MaxInt - delta
  1037  				//
  1038  				// And we prove that:
  1039  				//    if min<max: min < x AND x <= max
  1040  				//    if min>max: min < x OR  x <= max
  1041  				//
  1042  				// This is always correct, even in case of overflow.
  1043  				//
  1044  				// If the initial fact is x+delta >= w instead, the derived conditions are:
  1045  				//    if min<max: min <= x AND x <= max
  1046  				//    if min>max: min <= x OR  x <= max
  1047  				//
  1048  				// Notice the conditions for max are still <=, as they handle overflows.
  1049  				var min, max int64
  1050  				switch x.Type.Size() {
  1051  				case 8:
  1052  					min = w.AuxInt - delta
  1053  					max = int64(^uint64(0)>>1) - delta
  1054  				case 4:
  1055  					min = int64(int32(w.AuxInt) - int32(delta))
  1056  					max = int64(int32(^uint32(0)>>1) - int32(delta))
  1057  				case 2:
  1058  					min = int64(int16(w.AuxInt) - int16(delta))
  1059  					max = int64(int16(^uint16(0)>>1) - int16(delta))
  1060  				case 1:
  1061  					min = int64(int8(w.AuxInt) - int8(delta))
  1062  					max = int64(int8(^uint8(0)>>1) - int8(delta))
  1063  				default:
  1064  					panic("unimplemented")
  1065  				}
  1066  
  1067  				if min < max {
  1068  					// Record that x > min and max >= x
  1069  					if r == gt {
  1070  						min++
  1071  					}
  1072  					ft.signedMinMax(x, min, max)
  1073  				} else {
  1074  					// We know that either x>min OR x<=max. factsTable cannot record OR conditions,
  1075  					// so let's see if we can already prove that one of them is false, in which case
  1076  					// the other must be true
  1077  					l := ft.limits[x.ID]
  1078  					if l.max <= min {
  1079  						if r&eq == 0 || l.max < min {
  1080  							// x>min (x>=min) is impossible, so it must be x<=max
  1081  							ft.signedMax(x, max)
  1082  						}
  1083  					} else if l.min > max {
  1084  						// x<=max is impossible, so it must be x>min
  1085  						if r == gt {
  1086  							min++
  1087  						}
  1088  						ft.signedMin(x, min)
  1089  					}
  1090  				}
  1091  			}
  1092  		}
  1093  	}
  1094  
  1095  	// Look through value-preserving extensions.
  1096  	// If the domain is appropriate for the pre-extension Type,
  1097  	// repeat the update with the pre-extension Value.
  1098  	if isCleanExt(v) {
  1099  		switch {
  1100  		case d == signed && v.Args[0].Type.IsSigned():
  1101  			fallthrough
  1102  		case d == unsigned && !v.Args[0].Type.IsSigned():
  1103  			ft.update(parent, v.Args[0], w, d, r)
  1104  		}
  1105  	}
  1106  	if isCleanExt(w) {
  1107  		switch {
  1108  		case d == signed && w.Args[0].Type.IsSigned():
  1109  			fallthrough
  1110  		case d == unsigned && !w.Args[0].Type.IsSigned():
  1111  			ft.update(parent, v, w.Args[0], d, r)
  1112  		}
  1113  	}
  1114  }
  1115  
  1116  var opMin = map[Op]int64{
  1117  	OpAdd64: math.MinInt64, OpSub64: math.MinInt64,
  1118  	OpAdd32: math.MinInt32, OpSub32: math.MinInt32,
  1119  }
  1120  
  1121  var opMax = map[Op]int64{
  1122  	OpAdd64: math.MaxInt64, OpSub64: math.MaxInt64,
  1123  	OpAdd32: math.MaxInt32, OpSub32: math.MaxInt32,
  1124  }
  1125  
  1126  var opUMax = map[Op]uint64{
  1127  	OpAdd64: math.MaxUint64, OpSub64: math.MaxUint64,
  1128  	OpAdd32: math.MaxUint32, OpSub32: math.MaxUint32,
  1129  }
  1130  
  1131  // isNonNegative reports whether v is known to be non-negative.
  1132  func (ft *factsTable) isNonNegative(v *Value) bool {
  1133  	return ft.limits[v.ID].min >= 0
  1134  }
  1135  
  1136  // checkpoint saves the current state of known relations.
  1137  // Called when descending on a branch.
  1138  func (ft *factsTable) checkpoint() {
  1139  	if ft.unsat {
  1140  		ft.unsatDepth++
  1141  	}
  1142  	ft.limitStack = append(ft.limitStack, checkpointBound)
  1143  	ft.orderS.Checkpoint()
  1144  	ft.orderU.Checkpoint()
  1145  	ft.orderingsStack = append(ft.orderingsStack, 0)
  1146  }
  1147  
  1148  // restore restores known relation to the state just
  1149  // before the previous checkpoint.
  1150  // Called when backing up on a branch.
  1151  func (ft *factsTable) restore() {
  1152  	if ft.unsatDepth > 0 {
  1153  		ft.unsatDepth--
  1154  	} else {
  1155  		ft.unsat = false
  1156  	}
  1157  	for {
  1158  		old := ft.limitStack[len(ft.limitStack)-1]
  1159  		ft.limitStack = ft.limitStack[:len(ft.limitStack)-1]
  1160  		if old.vid == 0 { // checkpointBound
  1161  			break
  1162  		}
  1163  		ft.limits[old.vid] = old.limit
  1164  	}
  1165  	ft.orderS.Undo()
  1166  	ft.orderU.Undo()
  1167  	for {
  1168  		id := ft.orderingsStack[len(ft.orderingsStack)-1]
  1169  		ft.orderingsStack = ft.orderingsStack[:len(ft.orderingsStack)-1]
  1170  		if id == 0 { // checkpoint marker
  1171  			break
  1172  		}
  1173  		o := ft.orderings[id]
  1174  		ft.orderings[id] = o.next
  1175  		o.next = ft.orderingCache
  1176  		ft.orderingCache = o
  1177  	}
  1178  }
  1179  
  1180  var (
  1181  	reverseBits = [...]relation{0, 4, 2, 6, 1, 5, 3, 7}
  1182  
  1183  	// maps what we learn when the positive branch is taken.
  1184  	// For example:
  1185  	//      OpLess8:   {signed, lt},
  1186  	//	v1 = (OpLess8 v2 v3).
  1187  	// If we learn that v1 is true, then we can deduce that v2<v3
  1188  	// in the signed domain.
  1189  	domainRelationTable = map[Op]struct {
  1190  		d domain
  1191  		r relation
  1192  	}{
  1193  		OpEq8:   {signed | unsigned, eq},
  1194  		OpEq16:  {signed | unsigned, eq},
  1195  		OpEq32:  {signed | unsigned, eq},
  1196  		OpEq64:  {signed | unsigned, eq},
  1197  		OpEqPtr: {pointer, eq},
  1198  		OpEqB:   {boolean, eq},
  1199  
  1200  		OpNeq8:   {signed | unsigned, lt | gt},
  1201  		OpNeq16:  {signed | unsigned, lt | gt},
  1202  		OpNeq32:  {signed | unsigned, lt | gt},
  1203  		OpNeq64:  {signed | unsigned, lt | gt},
  1204  		OpNeqPtr: {pointer, lt | gt},
  1205  		OpNeqB:   {boolean, lt | gt},
  1206  
  1207  		OpLess8:   {signed, lt},
  1208  		OpLess8U:  {unsigned, lt},
  1209  		OpLess16:  {signed, lt},
  1210  		OpLess16U: {unsigned, lt},
  1211  		OpLess32:  {signed, lt},
  1212  		OpLess32U: {unsigned, lt},
  1213  		OpLess64:  {signed, lt},
  1214  		OpLess64U: {unsigned, lt},
  1215  
  1216  		OpLeq8:   {signed, lt | eq},
  1217  		OpLeq8U:  {unsigned, lt | eq},
  1218  		OpLeq16:  {signed, lt | eq},
  1219  		OpLeq16U: {unsigned, lt | eq},
  1220  		OpLeq32:  {signed, lt | eq},
  1221  		OpLeq32U: {unsigned, lt | eq},
  1222  		OpLeq64:  {signed, lt | eq},
  1223  		OpLeq64U: {unsigned, lt | eq},
  1224  	}
  1225  )
  1226  
  1227  // cleanup returns the posets to the free list
  1228  func (ft *factsTable) cleanup(f *Func) {
  1229  	for _, po := range []*poset{ft.orderS, ft.orderU} {
  1230  		// Make sure it's empty as it should be. A non-empty poset
  1231  		// might cause errors and miscompilations if reused.
  1232  		if checkEnabled {
  1233  			if err := po.CheckEmpty(); err != nil {
  1234  				f.Fatalf("poset not empty after function %s: %v", f.Name, err)
  1235  			}
  1236  		}
  1237  		f.retPoset(po)
  1238  	}
  1239  	f.Cache.freeLimitSlice(ft.limits)
  1240  	f.Cache.freeBoolSlice(ft.recurseCheck)
  1241  }
  1242  
  1243  // prove removes redundant BlockIf branches that can be inferred
  1244  // from previous dominating comparisons.
  1245  //
  1246  // By far, the most common redundant pair are generated by bounds checking.
  1247  // For example for the code:
  1248  //
  1249  //	a[i] = 4
  1250  //	foo(a[i])
  1251  //
  1252  // The compiler will generate the following code:
  1253  //
  1254  //	if i >= len(a) {
  1255  //	    panic("not in bounds")
  1256  //	}
  1257  //	a[i] = 4
  1258  //	if i >= len(a) {
  1259  //	    panic("not in bounds")
  1260  //	}
  1261  //	foo(a[i])
  1262  //
  1263  // The second comparison i >= len(a) is clearly redundant because if the
  1264  // else branch of the first comparison is executed, we already know that i < len(a).
  1265  // The code for the second panic can be removed.
  1266  //
  1267  // prove works by finding contradictions and trimming branches whose
  1268  // conditions are unsatisfiable given the branches leading up to them.
  1269  // It tracks a "fact table" of branch conditions. For each branching
  1270  // block, it asserts the branch conditions that uniquely dominate that
  1271  // block, and then separately asserts the block's branch condition and
  1272  // its negation. If either leads to a contradiction, it can trim that
  1273  // successor.
  1274  func prove(f *Func) {
  1275  	// Find induction variables. Currently, findIndVars
  1276  	// is limited to one induction variable per block.
  1277  	var indVars map[*Block]indVar
  1278  	for _, v := range findIndVar(f) {
  1279  		ind := v.ind
  1280  		if len(ind.Args) != 2 {
  1281  			// the rewrite code assumes there is only ever two parents to loops
  1282  			panic("unexpected induction with too many parents")
  1283  		}
  1284  
  1285  		nxt := v.nxt
  1286  		if !(ind.Uses == 2 && // 2 used by comparison and next
  1287  			nxt.Uses == 1) { // 1 used by induction
  1288  			// ind or nxt is used inside the loop, add it for the facts table
  1289  			if indVars == nil {
  1290  				indVars = make(map[*Block]indVar)
  1291  			}
  1292  			indVars[v.entry] = v
  1293  			continue
  1294  		} else {
  1295  			// Since this induction variable is not used for anything but counting the iterations,
  1296  			// no point in putting it into the facts table.
  1297  		}
  1298  
  1299  		// try to rewrite to a downward counting loop checking against start if the
  1300  		// loop body does not depend on ind or nxt and end is known before the loop.
  1301  		// This reduces pressure on the register allocator because this does not need
  1302  		// to use end on each iteration anymore. We compare against the start constant instead.
  1303  		// That means this code:
  1304  		//
  1305  		//	loop:
  1306  		//		ind = (Phi (Const [x]) nxt),
  1307  		//		if ind < end
  1308  		//		then goto enter_loop
  1309  		//		else goto exit_loop
  1310  		//
  1311  		//	enter_loop:
  1312  		//		do something without using ind nor nxt
  1313  		//		nxt = inc + ind
  1314  		//		goto loop
  1315  		//
  1316  		//	exit_loop:
  1317  		//
  1318  		// is rewritten to:
  1319  		//
  1320  		//	loop:
  1321  		//		ind = (Phi end nxt)
  1322  		//		if (Const [x]) < ind
  1323  		//		then goto enter_loop
  1324  		//		else goto exit_loop
  1325  		//
  1326  		//	enter_loop:
  1327  		//		do something without using ind nor nxt
  1328  		//		nxt = ind - inc
  1329  		//		goto loop
  1330  		//
  1331  		//	exit_loop:
  1332  		//
  1333  		// this is better because it only requires to keep ind then nxt alive while looping,
  1334  		// while the original form keeps ind then nxt and end alive
  1335  		start, end := v.min, v.max
  1336  		if v.flags&indVarCountDown != 0 {
  1337  			start, end = end, start
  1338  		}
  1339  
  1340  		if !start.isGenericIntConst() {
  1341  			// if start is not a constant we would be winning nothing from inverting the loop
  1342  			continue
  1343  		}
  1344  		if end.isGenericIntConst() {
  1345  			// TODO: if both start and end are constants we should rewrite such that the comparison
  1346  			// is against zero and nxt is ++ or -- operation
  1347  			// That means:
  1348  			//	for i := 2; i < 11; i += 2 {
  1349  			// should be rewritten to:
  1350  			//	for i := 5; 0 < i; i-- {
  1351  			continue
  1352  		}
  1353  
  1354  		if end.Block == ind.Block {
  1355  			// we can't rewrite loops where the condition depends on the loop body
  1356  			// this simple check is forced to work because if this is true a Phi in ind.Block must exist
  1357  			continue
  1358  		}
  1359  
  1360  		check := ind.Block.Controls[0]
  1361  		// invert the check
  1362  		check.Args[0], check.Args[1] = check.Args[1], check.Args[0]
  1363  
  1364  		// swap start and end in the loop
  1365  		for i, v := range check.Args {
  1366  			if v != end {
  1367  				continue
  1368  			}
  1369  
  1370  			check.SetArg(i, start)
  1371  			goto replacedEnd
  1372  		}
  1373  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1374  	replacedEnd:
  1375  
  1376  		for i, v := range ind.Args {
  1377  			if v != start {
  1378  				continue
  1379  			}
  1380  
  1381  			ind.SetArg(i, end)
  1382  			goto replacedStart
  1383  		}
  1384  		panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end))
  1385  	replacedStart:
  1386  
  1387  		if nxt.Args[0] != ind {
  1388  			// unlike additions subtractions are not commutative so be sure we get it right
  1389  			nxt.Args[0], nxt.Args[1] = nxt.Args[1], nxt.Args[0]
  1390  		}
  1391  
  1392  		switch nxt.Op {
  1393  		case OpAdd8:
  1394  			nxt.Op = OpSub8
  1395  		case OpAdd16:
  1396  			nxt.Op = OpSub16
  1397  		case OpAdd32:
  1398  			nxt.Op = OpSub32
  1399  		case OpAdd64:
  1400  			nxt.Op = OpSub64
  1401  		case OpSub8:
  1402  			nxt.Op = OpAdd8
  1403  		case OpSub16:
  1404  			nxt.Op = OpAdd16
  1405  		case OpSub32:
  1406  			nxt.Op = OpAdd32
  1407  		case OpSub64:
  1408  			nxt.Op = OpAdd64
  1409  		default:
  1410  			panic("unreachable")
  1411  		}
  1412  
  1413  		if f.pass.debug > 0 {
  1414  			f.Warnl(ind.Pos, "Inverted loop iteration")
  1415  		}
  1416  	}
  1417  
  1418  	ft := newFactsTable(f)
  1419  	ft.checkpoint()
  1420  
  1421  	// Find length and capacity ops.
  1422  	for _, b := range f.Blocks {
  1423  		for _, v := range b.Values {
  1424  			if v.Uses == 0 {
  1425  				// We don't care about dead values.
  1426  				// (There can be some that are CSEd but not removed yet.)
  1427  				continue
  1428  			}
  1429  			switch v.Op {
  1430  			case OpSliceLen:
  1431  				if ft.lens == nil {
  1432  					ft.lens = map[ID]*Value{}
  1433  				}
  1434  				// Set all len Values for the same slice as equal in the poset.
  1435  				// The poset handles transitive relations, so Values related to
  1436  				// any OpSliceLen for this slice will be correctly related to others.
  1437  				if l, ok := ft.lens[v.Args[0].ID]; ok {
  1438  					ft.update(b, v, l, signed, eq)
  1439  				} else {
  1440  					ft.lens[v.Args[0].ID] = v
  1441  				}
  1442  			case OpSliceCap:
  1443  				if ft.caps == nil {
  1444  					ft.caps = map[ID]*Value{}
  1445  				}
  1446  				// Same as case OpSliceLen above, but for slice cap.
  1447  				if c, ok := ft.caps[v.Args[0].ID]; ok {
  1448  					ft.update(b, v, c, signed, eq)
  1449  				} else {
  1450  					ft.caps[v.Args[0].ID] = v
  1451  				}
  1452  			}
  1453  		}
  1454  	}
  1455  
  1456  	// current node state
  1457  	type walkState int
  1458  	const (
  1459  		descend walkState = iota
  1460  		simplify
  1461  	)
  1462  	// work maintains the DFS stack.
  1463  	type bp struct {
  1464  		block *Block    // current handled block
  1465  		state walkState // what's to do
  1466  	}
  1467  	work := make([]bp, 0, 256)
  1468  	work = append(work, bp{
  1469  		block: f.Entry,
  1470  		state: descend,
  1471  	})
  1472  
  1473  	idom := f.Idom()
  1474  	sdom := f.Sdom()
  1475  
  1476  	// DFS on the dominator tree.
  1477  	//
  1478  	// For efficiency, we consider only the dominator tree rather
  1479  	// than the entire flow graph. On the way down, we consider
  1480  	// incoming branches and accumulate conditions that uniquely
  1481  	// dominate the current block. If we discover a contradiction,
  1482  	// we can eliminate the entire block and all of its children.
  1483  	// On the way back up, we consider outgoing branches that
  1484  	// haven't already been considered. This way we consider each
  1485  	// branch condition only once.
  1486  	for len(work) > 0 {
  1487  		node := work[len(work)-1]
  1488  		work = work[:len(work)-1]
  1489  		parent := idom[node.block.ID]
  1490  		branch := getBranch(sdom, parent, node.block)
  1491  
  1492  		switch node.state {
  1493  		case descend:
  1494  			ft.checkpoint()
  1495  
  1496  			// Entering the block, add facts about the induction variable
  1497  			// that is bound to this block.
  1498  			if iv, ok := indVars[node.block]; ok {
  1499  				addIndVarRestrictions(ft, parent, iv)
  1500  			}
  1501  
  1502  			// Add results of reaching this block via a branch from
  1503  			// its immediate dominator (if any).
  1504  			if branch != unknown {
  1505  				addBranchRestrictions(ft, parent, branch)
  1506  			}
  1507  
  1508  			if ft.unsat {
  1509  				// node.block is unreachable.
  1510  				// Remove it and don't visit
  1511  				// its children.
  1512  				removeBranch(parent, branch)
  1513  				ft.restore()
  1514  				break
  1515  			}
  1516  			// Otherwise, we can now commit to
  1517  			// taking this branch. We'll restore
  1518  			// ft when we unwind.
  1519  
  1520  			// Add facts about the values in the current block.
  1521  			addLocalFacts(ft, node.block)
  1522  
  1523  			work = append(work, bp{
  1524  				block: node.block,
  1525  				state: simplify,
  1526  			})
  1527  			for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) {
  1528  				work = append(work, bp{
  1529  					block: s,
  1530  					state: descend,
  1531  				})
  1532  			}
  1533  
  1534  		case simplify:
  1535  			simplifyBlock(sdom, ft, node.block)
  1536  			ft.restore()
  1537  		}
  1538  	}
  1539  
  1540  	ft.restore()
  1541  
  1542  	ft.cleanup(f)
  1543  }
  1544  
  1545  // initLimit sets initial constant limit for v.  This limit is based
  1546  // only on the operation itself, not any of its input arguments. This
  1547  // method is only used in two places, once when the prove pass startup
  1548  // and the other when a new ssa value is created, both for init. (unlike
  1549  // flowLimit, below, which computes additional constraints based on
  1550  // ranges of opcode arguments).
  1551  func initLimit(v *Value) limit {
  1552  	if v.Type.IsBoolean() {
  1553  		switch v.Op {
  1554  		case OpConstBool:
  1555  			b := v.AuxInt
  1556  			return limit{min: b, max: b, umin: uint64(b), umax: uint64(b)}
  1557  		default:
  1558  			return limit{min: 0, max: 1, umin: 0, umax: 1}
  1559  		}
  1560  	}
  1561  	if v.Type.IsPtrShaped() { // These are the types that EqPtr/NeqPtr operate on, except uintptr.
  1562  		switch v.Op {
  1563  		case OpConstNil:
  1564  			return limit{min: 0, max: 0, umin: 0, umax: 0}
  1565  		case OpAddr, OpLocalAddr: // TODO: others?
  1566  			l := noLimit
  1567  			l.umin = 1
  1568  			return l
  1569  		default:
  1570  			return noLimit
  1571  		}
  1572  	}
  1573  	if !v.Type.IsInteger() {
  1574  		return noLimit
  1575  	}
  1576  
  1577  	// Default limits based on type.
  1578  	bitsize := v.Type.Size() * 8
  1579  	lim := limit{min: -(1 << (bitsize - 1)), max: 1<<(bitsize-1) - 1, umin: 0, umax: 1<<bitsize - 1}
  1580  
  1581  	// Tighter limits on some opcodes.
  1582  	switch v.Op {
  1583  	// constants
  1584  	case OpConst64:
  1585  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(v.AuxInt), umax: uint64(v.AuxInt)}
  1586  	case OpConst32:
  1587  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint32(v.AuxInt)), umax: uint64(uint32(v.AuxInt))}
  1588  	case OpConst16:
  1589  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint16(v.AuxInt)), umax: uint64(uint16(v.AuxInt))}
  1590  	case OpConst8:
  1591  		lim = limit{min: v.AuxInt, max: v.AuxInt, umin: uint64(uint8(v.AuxInt)), umax: uint64(uint8(v.AuxInt))}
  1592  
  1593  	// extensions
  1594  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16:
  1595  		lim = lim.signedMinMax(0, 1<<8-1)
  1596  		lim = lim.unsignedMax(1<<8 - 1)
  1597  	case OpZeroExt16to64, OpZeroExt16to32:
  1598  		lim = lim.signedMinMax(0, 1<<16-1)
  1599  		lim = lim.unsignedMax(1<<16 - 1)
  1600  	case OpZeroExt32to64:
  1601  		lim = lim.signedMinMax(0, 1<<32-1)
  1602  		lim = lim.unsignedMax(1<<32 - 1)
  1603  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16:
  1604  		lim = lim.signedMinMax(math.MinInt8, math.MaxInt8)
  1605  	case OpSignExt16to64, OpSignExt16to32:
  1606  		lim = lim.signedMinMax(math.MinInt16, math.MaxInt16)
  1607  	case OpSignExt32to64:
  1608  		lim = lim.signedMinMax(math.MinInt32, math.MaxInt32)
  1609  
  1610  	// math/bits intrinsics
  1611  	case OpCtz64, OpBitLen64, OpPopCount64,
  1612  		OpCtz32, OpBitLen32, OpPopCount32,
  1613  		OpCtz16, OpBitLen16, OpPopCount16,
  1614  		OpCtz8, OpBitLen8, OpPopCount8:
  1615  		lim = lim.unsignedMax(uint64(v.Args[0].Type.Size() * 8))
  1616  
  1617  	// bool to uint8 conversion
  1618  	case OpCvtBoolToUint8:
  1619  		lim = lim.unsignedMax(1)
  1620  
  1621  	// length operations
  1622  	case OpSliceLen, OpSliceCap:
  1623  		f := v.Block.Func
  1624  		elemSize := uint64(v.Args[0].Type.Elem().Size())
  1625  		if elemSize > 0 {
  1626  			heapSize := uint64(1)<<(uint64(f.Config.PtrSize)*8) - 1
  1627  			maximumElementsFittingInHeap := heapSize / elemSize
  1628  			lim = lim.unsignedMax(maximumElementsFittingInHeap)
  1629  		}
  1630  		fallthrough
  1631  	case OpStringLen:
  1632  		lim = lim.signedMin(0)
  1633  	}
  1634  
  1635  	// signed <-> unsigned propagation
  1636  	if lim.min >= 0 {
  1637  		lim = lim.unsignedMinMax(uint64(lim.min), uint64(lim.max))
  1638  	}
  1639  	if fitsInBitsU(lim.umax, uint(8*v.Type.Size()-1)) {
  1640  		lim = lim.signedMinMax(int64(lim.umin), int64(lim.umax))
  1641  	}
  1642  
  1643  	return lim
  1644  }
  1645  
  1646  // flowLimit updates the known limits of v in ft. Returns true if anything changed.
  1647  // flowLimit can use the ranges of input arguments.
  1648  //
  1649  // Note: this calculation only happens at the point the value is defined. We do not reevaluate
  1650  // it later. So for example:
  1651  //
  1652  //	v := x + y
  1653  //	if 0 <= x && x < 5 && 0 <= y && y < 5 { ... use v ... }
  1654  //
  1655  // we don't discover that the range of v is bounded in the conditioned
  1656  // block. We could recompute the range of v once we enter the block so
  1657  // we know that it is 0 <= v <= 8, but we don't have a mechanism to do
  1658  // that right now.
  1659  func (ft *factsTable) flowLimit(v *Value) bool {
  1660  	if !v.Type.IsInteger() {
  1661  		// TODO: boolean?
  1662  		return false
  1663  	}
  1664  
  1665  	// Additional limits based on opcode and argument.
  1666  	// No need to repeat things here already done in initLimit.
  1667  	switch v.Op {
  1668  
  1669  	// extensions
  1670  	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16, OpZeroExt16to64, OpZeroExt16to32, OpZeroExt32to64:
  1671  		a := ft.limits[v.Args[0].ID]
  1672  		return ft.unsignedMinMax(v, a.umin, a.umax)
  1673  	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16, OpSignExt16to64, OpSignExt16to32, OpSignExt32to64:
  1674  		a := ft.limits[v.Args[0].ID]
  1675  		return ft.signedMinMax(v, a.min, a.max)
  1676  	case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
  1677  		a := ft.limits[v.Args[0].ID]
  1678  		if a.umax <= 1<<(uint64(v.Type.Size())*8)-1 {
  1679  			return ft.unsignedMinMax(v, a.umin, a.umax)
  1680  		}
  1681  
  1682  	// math/bits
  1683  	case OpCtz64:
  1684  		a := ft.limits[v.Args[0].ID]
  1685  		if a.nonzero() {
  1686  			return ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
  1687  		}
  1688  	case OpCtz32:
  1689  		a := ft.limits[v.Args[0].ID]
  1690  		if a.nonzero() {
  1691  			return ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
  1692  		}
  1693  	case OpCtz16:
  1694  		a := ft.limits[v.Args[0].ID]
  1695  		if a.nonzero() {
  1696  			return ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
  1697  		}
  1698  	case OpCtz8:
  1699  		a := ft.limits[v.Args[0].ID]
  1700  		if a.nonzero() {
  1701  			return ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
  1702  		}
  1703  
  1704  	case OpPopCount64, OpPopCount32, OpPopCount16, OpPopCount8:
  1705  		a := ft.limits[v.Args[0].ID]
  1706  		changingBitsCount := uint64(bits.Len64(a.umax ^ a.umin))
  1707  		sharedLeadingMask := ^(uint64(1)<<changingBitsCount - 1)
  1708  		fixedBits := a.umax & sharedLeadingMask
  1709  		min := uint64(bits.OnesCount64(fixedBits))
  1710  		return ft.unsignedMinMax(v, min, min+changingBitsCount)
  1711  
  1712  	case OpBitLen64:
  1713  		a := ft.limits[v.Args[0].ID]
  1714  		return ft.unsignedMinMax(v,
  1715  			uint64(bits.Len64(a.umin)),
  1716  			uint64(bits.Len64(a.umax)))
  1717  	case OpBitLen32:
  1718  		a := ft.limits[v.Args[0].ID]
  1719  		return ft.unsignedMinMax(v,
  1720  			uint64(bits.Len32(uint32(a.umin))),
  1721  			uint64(bits.Len32(uint32(a.umax))))
  1722  	case OpBitLen16:
  1723  		a := ft.limits[v.Args[0].ID]
  1724  		return ft.unsignedMinMax(v,
  1725  			uint64(bits.Len16(uint16(a.umin))),
  1726  			uint64(bits.Len16(uint16(a.umax))))
  1727  	case OpBitLen8:
  1728  		a := ft.limits[v.Args[0].ID]
  1729  		return ft.unsignedMinMax(v,
  1730  			uint64(bits.Len8(uint8(a.umin))),
  1731  			uint64(bits.Len8(uint8(a.umax))))
  1732  
  1733  	// Masks.
  1734  
  1735  	// TODO: if y.umax and y.umin share a leading bit pattern, y also has that leading bit pattern.
  1736  	// we could compare the patterns of always set bits in a and b and learn more about minimum and maximum.
  1737  	// But I doubt this help any real world code.
  1738  	case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  1739  		// AND can only make the value smaller.
  1740  		a := ft.limits[v.Args[0].ID]
  1741  		b := ft.limits[v.Args[1].ID]
  1742  		return ft.unsignedMax(v, min(a.umax, b.umax))
  1743  	case OpOr64, OpOr32, OpOr16, OpOr8:
  1744  		// OR can only make the value bigger and can't flip bits proved to be zero in both inputs.
  1745  		a := ft.limits[v.Args[0].ID]
  1746  		b := ft.limits[v.Args[1].ID]
  1747  		return ft.unsignedMinMax(v,
  1748  			max(a.umin, b.umin),
  1749  			1<<bits.Len64(a.umax|b.umax)-1)
  1750  	case OpXor64, OpXor32, OpXor16, OpXor8:
  1751  		// XOR can't flip bits that are proved to be zero in both inputs.
  1752  		a := ft.limits[v.Args[0].ID]
  1753  		b := ft.limits[v.Args[1].ID]
  1754  		return ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
  1755  	case OpCom64, OpCom32, OpCom16, OpCom8:
  1756  		a := ft.limits[v.Args[0].ID]
  1757  		return ft.newLimit(v, a.com(uint(v.Type.Size())*8))
  1758  
  1759  	// Arithmetic.
  1760  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  1761  		a := ft.limits[v.Args[0].ID]
  1762  		b := ft.limits[v.Args[1].ID]
  1763  		return ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
  1764  	case OpSub64, OpSub32, OpSub16, OpSub8:
  1765  		a := ft.limits[v.Args[0].ID]
  1766  		b := ft.limits[v.Args[1].ID]
  1767  		sub := ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
  1768  		mod := ft.detectSignedMod(v)
  1769  		return sub || mod
  1770  	case OpNeg64, OpNeg32, OpNeg16, OpNeg8:
  1771  		a := ft.limits[v.Args[0].ID]
  1772  		bitsize := uint(v.Type.Size()) * 8
  1773  		return ft.newLimit(v, a.com(bitsize).add(limit{min: 1, max: 1, umin: 1, umax: 1}, bitsize))
  1774  	case OpMul64, OpMul32, OpMul16, OpMul8:
  1775  		a := ft.limits[v.Args[0].ID]
  1776  		b := ft.limits[v.Args[1].ID]
  1777  		return ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
  1778  	case OpLsh64x64, OpLsh64x32, OpLsh64x16, OpLsh64x8,
  1779  		OpLsh32x64, OpLsh32x32, OpLsh32x16, OpLsh32x8,
  1780  		OpLsh16x64, OpLsh16x32, OpLsh16x16, OpLsh16x8,
  1781  		OpLsh8x64, OpLsh8x32, OpLsh8x16, OpLsh8x8:
  1782  		a := ft.limits[v.Args[0].ID]
  1783  		b := ft.limits[v.Args[1].ID]
  1784  		bitsize := uint(v.Type.Size()) * 8
  1785  		return ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
  1786  	case OpMod64, OpMod32, OpMod16, OpMod8:
  1787  		a := ft.limits[v.Args[0].ID]
  1788  		b := ft.limits[v.Args[1].ID]
  1789  		if !(a.nonnegative() && b.nonnegative()) {
  1790  			// TODO: we could handle signed limits but I didn't bother.
  1791  			break
  1792  		}
  1793  		fallthrough
  1794  	case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  1795  		a := ft.limits[v.Args[0].ID]
  1796  		b := ft.limits[v.Args[1].ID]
  1797  		// Underflow in the arithmetic below is ok, it gives to MaxUint64 which does nothing to the limit.
  1798  		return ft.unsignedMax(v, min(a.umax, b.umax-1))
  1799  	case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
  1800  		a := ft.limits[v.Args[0].ID]
  1801  		b := ft.limits[v.Args[1].ID]
  1802  		if !(a.nonnegative() && b.nonnegative()) {
  1803  			// TODO: we could handle signed limits but I didn't bother.
  1804  			break
  1805  		}
  1806  		fallthrough
  1807  	case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u:
  1808  		a := ft.limits[v.Args[0].ID]
  1809  		b := ft.limits[v.Args[1].ID]
  1810  		lim := noLimit
  1811  		if b.umax > 0 {
  1812  			lim = lim.unsignedMin(a.umin / b.umax)
  1813  		}
  1814  		if b.umin > 0 {
  1815  			lim = lim.unsignedMax(a.umax / b.umin)
  1816  		}
  1817  		return ft.newLimit(v, lim)
  1818  
  1819  	case OpPhi:
  1820  		{
  1821  			// Work around for go.dev/issue/68857, look for min(x, y) and max(x, y).
  1822  			b := v.Block
  1823  			if len(b.Preds) != 2 {
  1824  				goto notMinNorMax
  1825  			}
  1826  			// FIXME: this code searches for the following losange pattern
  1827  			// because that what ssagen produce for min and max builtins:
  1828  			// conditionBlock → (firstBlock, secondBlock) → v.Block
  1829  			// there are three non losange equivalent constructions
  1830  			// we could match for, but I didn't bother:
  1831  			// conditionBlock → (v.Block, secondBlock → v.Block)
  1832  			// conditionBlock → (firstBlock → v.Block, v.Block)
  1833  			// conditionBlock → (v.Block, v.Block)
  1834  			firstBlock, secondBlock := b.Preds[0].b, b.Preds[1].b
  1835  			if firstBlock.Kind != BlockPlain || secondBlock.Kind != BlockPlain {
  1836  				goto notMinNorMax
  1837  			}
  1838  			if len(firstBlock.Preds) != 1 || len(secondBlock.Preds) != 1 {
  1839  				goto notMinNorMax
  1840  			}
  1841  			conditionBlock := firstBlock.Preds[0].b
  1842  			if conditionBlock != secondBlock.Preds[0].b {
  1843  				goto notMinNorMax
  1844  			}
  1845  			if conditionBlock.Kind != BlockIf {
  1846  				goto notMinNorMax
  1847  			}
  1848  
  1849  			less := conditionBlock.Controls[0]
  1850  			var unsigned bool
  1851  			switch less.Op {
  1852  			case OpLess64U, OpLess32U, OpLess16U, OpLess8U,
  1853  				OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
  1854  				unsigned = true
  1855  			case OpLess64, OpLess32, OpLess16, OpLess8,
  1856  				OpLeq64, OpLeq32, OpLeq16, OpLeq8:
  1857  			default:
  1858  				goto notMinNorMax
  1859  			}
  1860  			small, big := less.Args[0], less.Args[1]
  1861  			truev, falsev := v.Args[0], v.Args[1]
  1862  			if conditionBlock.Succs[0].b == secondBlock {
  1863  				truev, falsev = falsev, truev
  1864  			}
  1865  
  1866  			bigl, smalll := ft.limits[big.ID], ft.limits[small.ID]
  1867  			if truev == big {
  1868  				if falsev == small {
  1869  					// v := big if small <¿=? big else small
  1870  					if unsigned {
  1871  						maximum := max(bigl.umax, smalll.umax)
  1872  						minimum := max(bigl.umin, smalll.umin)
  1873  						return ft.unsignedMinMax(v, minimum, maximum)
  1874  					} else {
  1875  						maximum := max(bigl.max, smalll.max)
  1876  						minimum := max(bigl.min, smalll.min)
  1877  						return ft.signedMinMax(v, minimum, maximum)
  1878  					}
  1879  				} else {
  1880  					goto notMinNorMax
  1881  				}
  1882  			} else if truev == small {
  1883  				if falsev == big {
  1884  					// v := small if small <¿=? big else big
  1885  					if unsigned {
  1886  						maximum := min(bigl.umax, smalll.umax)
  1887  						minimum := min(bigl.umin, smalll.umin)
  1888  						return ft.unsignedMinMax(v, minimum, maximum)
  1889  					} else {
  1890  						maximum := min(bigl.max, smalll.max)
  1891  						minimum := min(bigl.min, smalll.min)
  1892  						return ft.signedMinMax(v, minimum, maximum)
  1893  					}
  1894  				} else {
  1895  					goto notMinNorMax
  1896  				}
  1897  			} else {
  1898  				goto notMinNorMax
  1899  			}
  1900  		}
  1901  	notMinNorMax:
  1902  
  1903  		// Compute the union of all the input phis.
  1904  		// Often this will convey no information, because the block
  1905  		// is not dominated by its predecessors and hence the
  1906  		// phi arguments might not have been processed yet. But if
  1907  		// the values are declared earlier, it may help. e.g., for
  1908  		//    v = phi(c3, c5)
  1909  		// where c3 = OpConst [3] and c5 = OpConst [5] are
  1910  		// defined in the entry block, we can derive [3,5]
  1911  		// as the limit for v.
  1912  		l := ft.limits[v.Args[0].ID]
  1913  		for _, a := range v.Args[1:] {
  1914  			l2 := ft.limits[a.ID]
  1915  			l.min = min(l.min, l2.min)
  1916  			l.max = max(l.max, l2.max)
  1917  			l.umin = min(l.umin, l2.umin)
  1918  			l.umax = max(l.umax, l2.umax)
  1919  		}
  1920  		return ft.newLimit(v, l)
  1921  	}
  1922  	return false
  1923  }
  1924  
  1925  // See if we can get any facts because v is the result of signed mod by a constant.
  1926  // The mod operation has already been rewritten, so we have to try and reconstruct it.
  1927  //
  1928  //	x % d
  1929  //
  1930  // is rewritten as
  1931  //
  1932  //	x - (x / d) * d
  1933  //
  1934  // furthermore, the divide itself gets rewritten. If d is a power of 2 (d == 1<<k), we do
  1935  //
  1936  //	(x / d) * d = ((x + adj) >> k) << k
  1937  //	            = (x + adj) & (-1<<k)
  1938  //
  1939  // with adj being an adjustment in case x is negative (see below).
  1940  // if d is not a power of 2, we do
  1941  //
  1942  //	x / d = ... TODO ...
  1943  func (ft *factsTable) detectSignedMod(v *Value) bool {
  1944  	if ft.detectSignedModByPowerOfTwo(v) {
  1945  		return true
  1946  	}
  1947  	// TODO: non-powers-of-2
  1948  	return false
  1949  }
  1950  func (ft *factsTable) detectSignedModByPowerOfTwo(v *Value) bool {
  1951  	// We're looking for:
  1952  	//
  1953  	//   x % d ==
  1954  	//   x - (x / d) * d
  1955  	//
  1956  	// which for d a power of 2, d == 1<<k, is done as
  1957  	//
  1958  	//   x - ((x + (x>>(w-1))>>>(w-k)) & (-1<<k))
  1959  	//
  1960  	// w = bit width of x.
  1961  	// (>> = signed shift, >>> = unsigned shift).
  1962  	// See ./_gen/generic.rules, search for "Signed divide by power of 2".
  1963  
  1964  	var w int64
  1965  	var addOp, andOp, constOp, sshiftOp, ushiftOp Op
  1966  	switch v.Op {
  1967  	case OpSub64:
  1968  		w = 64
  1969  		addOp = OpAdd64
  1970  		andOp = OpAnd64
  1971  		constOp = OpConst64
  1972  		sshiftOp = OpRsh64x64
  1973  		ushiftOp = OpRsh64Ux64
  1974  	case OpSub32:
  1975  		w = 32
  1976  		addOp = OpAdd32
  1977  		andOp = OpAnd32
  1978  		constOp = OpConst32
  1979  		sshiftOp = OpRsh32x64
  1980  		ushiftOp = OpRsh32Ux64
  1981  	case OpSub16:
  1982  		w = 16
  1983  		addOp = OpAdd16
  1984  		andOp = OpAnd16
  1985  		constOp = OpConst16
  1986  		sshiftOp = OpRsh16x64
  1987  		ushiftOp = OpRsh16Ux64
  1988  	case OpSub8:
  1989  		w = 8
  1990  		addOp = OpAdd8
  1991  		andOp = OpAnd8
  1992  		constOp = OpConst8
  1993  		sshiftOp = OpRsh8x64
  1994  		ushiftOp = OpRsh8Ux64
  1995  	default:
  1996  		return false
  1997  	}
  1998  
  1999  	x := v.Args[0]
  2000  	and := v.Args[1]
  2001  	if and.Op != andOp {
  2002  		return false
  2003  	}
  2004  	var add, mask *Value
  2005  	if and.Args[0].Op == addOp && and.Args[1].Op == constOp {
  2006  		add = and.Args[0]
  2007  		mask = and.Args[1]
  2008  	} else if and.Args[1].Op == addOp && and.Args[0].Op == constOp {
  2009  		add = and.Args[1]
  2010  		mask = and.Args[0]
  2011  	} else {
  2012  		return false
  2013  	}
  2014  	var ushift *Value
  2015  	if add.Args[0] == x {
  2016  		ushift = add.Args[1]
  2017  	} else if add.Args[1] == x {
  2018  		ushift = add.Args[0]
  2019  	} else {
  2020  		return false
  2021  	}
  2022  	if ushift.Op != ushiftOp {
  2023  		return false
  2024  	}
  2025  	if ushift.Args[1].Op != OpConst64 {
  2026  		return false
  2027  	}
  2028  	k := w - ushift.Args[1].AuxInt // Now we know k!
  2029  	d := int64(1) << k             // divisor
  2030  	sshift := ushift.Args[0]
  2031  	if sshift.Op != sshiftOp {
  2032  		return false
  2033  	}
  2034  	if sshift.Args[0] != x {
  2035  		return false
  2036  	}
  2037  	if sshift.Args[1].Op != OpConst64 || sshift.Args[1].AuxInt != w-1 {
  2038  		return false
  2039  	}
  2040  	if mask.AuxInt != -d {
  2041  		return false
  2042  	}
  2043  
  2044  	// All looks ok. x % d is at most +/- d-1.
  2045  	return ft.signedMinMax(v, -d+1, d-1)
  2046  }
  2047  
  2048  // getBranch returns the range restrictions added by p
  2049  // when reaching b. p is the immediate dominator of b.
  2050  func getBranch(sdom SparseTree, p *Block, b *Block) branch {
  2051  	if p == nil {
  2052  		return unknown
  2053  	}
  2054  	switch p.Kind {
  2055  	case BlockIf:
  2056  		// If p and p.Succs[0] are dominators it means that every path
  2057  		// from entry to b passes through p and p.Succs[0]. We care that
  2058  		// no path from entry to b passes through p.Succs[1]. If p.Succs[0]
  2059  		// has one predecessor then (apart from the degenerate case),
  2060  		// there is no path from entry that can reach b through p.Succs[1].
  2061  		// TODO: how about p->yes->b->yes, i.e. a loop in yes.
  2062  		if sdom.IsAncestorEq(p.Succs[0].b, b) && len(p.Succs[0].b.Preds) == 1 {
  2063  			return positive
  2064  		}
  2065  		if sdom.IsAncestorEq(p.Succs[1].b, b) && len(p.Succs[1].b.Preds) == 1 {
  2066  			return negative
  2067  		}
  2068  	case BlockJumpTable:
  2069  		// TODO: this loop can lead to quadratic behavior, as
  2070  		// getBranch can be called len(p.Succs) times.
  2071  		for i, e := range p.Succs {
  2072  			if sdom.IsAncestorEq(e.b, b) && len(e.b.Preds) == 1 {
  2073  				return jumpTable0 + branch(i)
  2074  			}
  2075  		}
  2076  	}
  2077  	return unknown
  2078  }
  2079  
  2080  // addIndVarRestrictions updates the factsTables ft with the facts
  2081  // learned from the induction variable indVar which drives the loop
  2082  // starting in Block b.
  2083  func addIndVarRestrictions(ft *factsTable, b *Block, iv indVar) {
  2084  	d := signed
  2085  	if ft.isNonNegative(iv.min) && ft.isNonNegative(iv.max) {
  2086  		d |= unsigned
  2087  	}
  2088  
  2089  	if iv.flags&indVarMinExc == 0 {
  2090  		addRestrictions(b, ft, d, iv.min, iv.ind, lt|eq)
  2091  	} else {
  2092  		addRestrictions(b, ft, d, iv.min, iv.ind, lt)
  2093  	}
  2094  
  2095  	if iv.flags&indVarMaxInc == 0 {
  2096  		addRestrictions(b, ft, d, iv.ind, iv.max, lt)
  2097  	} else {
  2098  		addRestrictions(b, ft, d, iv.ind, iv.max, lt|eq)
  2099  	}
  2100  }
  2101  
  2102  // addBranchRestrictions updates the factsTables ft with the facts learned when
  2103  // branching from Block b in direction br.
  2104  func addBranchRestrictions(ft *factsTable, b *Block, br branch) {
  2105  	c := b.Controls[0]
  2106  	switch {
  2107  	case br == negative:
  2108  		ft.booleanFalse(c)
  2109  	case br == positive:
  2110  		ft.booleanTrue(c)
  2111  	case br >= jumpTable0:
  2112  		idx := br - jumpTable0
  2113  		val := int64(idx)
  2114  		if v, off := isConstDelta(c); v != nil {
  2115  			// Establish the bound on the underlying value we're switching on,
  2116  			// not on the offset-ed value used as the jump table index.
  2117  			c = v
  2118  			val -= off
  2119  		}
  2120  		ft.newLimit(c, limit{min: val, max: val, umin: uint64(val), umax: uint64(val)})
  2121  	default:
  2122  		panic("unknown branch")
  2123  	}
  2124  }
  2125  
  2126  // addRestrictions updates restrictions from the immediate
  2127  // dominating block (p) using r.
  2128  func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r relation) {
  2129  	if t == 0 {
  2130  		// Trivial case: nothing to do.
  2131  		// Should not happen, but just in case.
  2132  		return
  2133  	}
  2134  	for i := domain(1); i <= t; i <<= 1 {
  2135  		if t&i == 0 {
  2136  			continue
  2137  		}
  2138  		ft.update(parent, v, w, i, r)
  2139  	}
  2140  }
  2141  
  2142  func unsignedAddOverflows(a, b uint64, t *types.Type) bool {
  2143  	switch t.Size() {
  2144  	case 8:
  2145  		return a+b < a
  2146  	case 4:
  2147  		return a+b > math.MaxUint32
  2148  	case 2:
  2149  		return a+b > math.MaxUint16
  2150  	case 1:
  2151  		return a+b > math.MaxUint8
  2152  	default:
  2153  		panic("unreachable")
  2154  	}
  2155  }
  2156  
  2157  func signedAddOverflowsOrUnderflows(a, b int64, t *types.Type) bool {
  2158  	r := a + b
  2159  	switch t.Size() {
  2160  	case 8:
  2161  		return (a >= 0 && b >= 0 && r < 0) || (a < 0 && b < 0 && r >= 0)
  2162  	case 4:
  2163  		return r < math.MinInt32 || math.MaxInt32 < r
  2164  	case 2:
  2165  		return r < math.MinInt16 || math.MaxInt16 < r
  2166  	case 1:
  2167  		return r < math.MinInt8 || math.MaxInt8 < r
  2168  	default:
  2169  		panic("unreachable")
  2170  	}
  2171  }
  2172  
  2173  func unsignedSubUnderflows(a, b uint64) bool {
  2174  	return a < b
  2175  }
  2176  
  2177  func addLocalFacts(ft *factsTable, b *Block) {
  2178  	// Propagate constant ranges among values in this block.
  2179  	// We do this before the second loop so that we have the
  2180  	// most up-to-date constant bounds for isNonNegative calls.
  2181  	for {
  2182  		changed := false
  2183  		for _, v := range b.Values {
  2184  			changed = ft.flowLimit(v) || changed
  2185  		}
  2186  		if !changed {
  2187  			break
  2188  		}
  2189  	}
  2190  
  2191  	// Add facts about individual operations.
  2192  	for _, v := range b.Values {
  2193  		// FIXME(go.dev/issue/68857): this loop only set up limits properly when b.Values is in topological order.
  2194  		// flowLimit can also depend on limits given by this loop which right now is not handled.
  2195  		switch v.Op {
  2196  		case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2197  			x := ft.limits[v.Args[0].ID]
  2198  			y := ft.limits[v.Args[1].ID]
  2199  			if !unsignedAddOverflows(x.umax, y.umax, v.Type) {
  2200  				r := gt
  2201  				if !x.nonzero() {
  2202  					r |= eq
  2203  				}
  2204  				ft.update(b, v, v.Args[1], unsigned, r)
  2205  				r = gt
  2206  				if !y.nonzero() {
  2207  					r |= eq
  2208  				}
  2209  				ft.update(b, v, v.Args[0], unsigned, r)
  2210  			}
  2211  			if x.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2212  				r := gt
  2213  				if !x.nonzero() {
  2214  					r |= eq
  2215  				}
  2216  				ft.update(b, v, v.Args[1], signed, r)
  2217  			}
  2218  			if y.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
  2219  				r := gt
  2220  				if !y.nonzero() {
  2221  					r |= eq
  2222  				}
  2223  				ft.update(b, v, v.Args[0], signed, r)
  2224  			}
  2225  			if x.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2226  				r := lt
  2227  				if !x.nonzero() {
  2228  					r |= eq
  2229  				}
  2230  				ft.update(b, v, v.Args[1], signed, r)
  2231  			}
  2232  			if y.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
  2233  				r := lt
  2234  				if !y.nonzero() {
  2235  					r |= eq
  2236  				}
  2237  				ft.update(b, v, v.Args[0], signed, r)
  2238  			}
  2239  		case OpSub64, OpSub32, OpSub16, OpSub8:
  2240  			x := ft.limits[v.Args[0].ID]
  2241  			y := ft.limits[v.Args[1].ID]
  2242  			if !unsignedSubUnderflows(x.umin, y.umax) {
  2243  				r := lt
  2244  				if !y.nonzero() {
  2245  					r |= eq
  2246  				}
  2247  				ft.update(b, v, v.Args[0], unsigned, r)
  2248  			}
  2249  			// FIXME: we could also do signed facts but the overflow checks are much trickier and I don't need it yet.
  2250  		case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
  2251  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2252  			ft.update(b, v, v.Args[1], unsigned, lt|eq)
  2253  			if ft.isNonNegative(v.Args[0]) {
  2254  				ft.update(b, v, v.Args[0], signed, lt|eq)
  2255  			}
  2256  			if ft.isNonNegative(v.Args[1]) {
  2257  				ft.update(b, v, v.Args[1], signed, lt|eq)
  2258  			}
  2259  		case OpOr64, OpOr32, OpOr16, OpOr8:
  2260  			// TODO: investigate how to always add facts without much slowdown, see issue #57959
  2261  			//ft.update(b, v, v.Args[0], unsigned, gt|eq)
  2262  			//ft.update(b, v, v.Args[1], unsigned, gt|eq)
  2263  		case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u,
  2264  			OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8,
  2265  			OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8,
  2266  			OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
  2267  			OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8:
  2268  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2269  		case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
  2270  			ft.update(b, v, v.Args[0], unsigned, lt|eq)
  2271  			// Note: we have to be careful that this doesn't imply
  2272  			// that the modulus is >0, which isn't true until *after*
  2273  			// the mod instruction executes (and thus panics if the
  2274  			// modulus is 0). See issue 67625.
  2275  			ft.update(b, v, v.Args[1], unsigned, lt)
  2276  		case OpStringLen:
  2277  			if v.Args[0].Op == OpStringMake {
  2278  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2279  			}
  2280  		case OpSliceLen:
  2281  			if v.Args[0].Op == OpSliceMake {
  2282  				ft.update(b, v, v.Args[0].Args[1], signed, eq)
  2283  			}
  2284  		case OpSliceCap:
  2285  			if v.Args[0].Op == OpSliceMake {
  2286  				ft.update(b, v, v.Args[0].Args[2], signed, eq)
  2287  			}
  2288  		case OpPhi:
  2289  			addLocalFactsPhi(ft, v)
  2290  		}
  2291  	}
  2292  }
  2293  
  2294  func addLocalFactsPhi(ft *factsTable, v *Value) {
  2295  	// Look for phis that implement min/max.
  2296  	//   z:
  2297  	//      c = Less64 x y (or other Less/Leq operation)
  2298  	//      If c -> bx by
  2299  	//   bx: <- z
  2300  	//       -> b ...
  2301  	//   by: <- z
  2302  	//      -> b ...
  2303  	//   b: <- bx by
  2304  	//      v = Phi x y
  2305  	// Then v is either min or max of x,y.
  2306  	// If it is the min, then we deduce v <= x && v <= y.
  2307  	// If it is the max, then we deduce v >= x && v >= y.
  2308  	// The min case is useful for the copy builtin, see issue 16833.
  2309  	if len(v.Args) != 2 {
  2310  		return
  2311  	}
  2312  	b := v.Block
  2313  	x := v.Args[0]
  2314  	y := v.Args[1]
  2315  	bx := b.Preds[0].b
  2316  	by := b.Preds[1].b
  2317  	var z *Block // branch point
  2318  	switch {
  2319  	case bx == by: // bx == by == z case
  2320  		z = bx
  2321  	case by.uniquePred() == bx: // bx == z case
  2322  		z = bx
  2323  	case bx.uniquePred() == by: // by == z case
  2324  		z = by
  2325  	case bx.uniquePred() == by.uniquePred():
  2326  		z = bx.uniquePred()
  2327  	}
  2328  	if z == nil || z.Kind != BlockIf {
  2329  		return
  2330  	}
  2331  	c := z.Controls[0]
  2332  	if len(c.Args) != 2 {
  2333  		return
  2334  	}
  2335  	var isMin bool // if c, a less-than comparison, is true, phi chooses x.
  2336  	if bx == z {
  2337  		isMin = b.Preds[0].i == 0
  2338  	} else {
  2339  		isMin = bx.Preds[0].i == 0
  2340  	}
  2341  	if c.Args[0] == x && c.Args[1] == y {
  2342  		// ok
  2343  	} else if c.Args[0] == y && c.Args[1] == x {
  2344  		// Comparison is reversed from how the values are listed in the Phi.
  2345  		isMin = !isMin
  2346  	} else {
  2347  		// Not comparing x and y.
  2348  		return
  2349  	}
  2350  	var dom domain
  2351  	switch c.Op {
  2352  	case OpLess64, OpLess32, OpLess16, OpLess8, OpLeq64, OpLeq32, OpLeq16, OpLeq8:
  2353  		dom = signed
  2354  	case OpLess64U, OpLess32U, OpLess16U, OpLess8U, OpLeq64U, OpLeq32U, OpLeq16U, OpLeq8U:
  2355  		dom = unsigned
  2356  	default:
  2357  		return
  2358  	}
  2359  	var rel relation
  2360  	if isMin {
  2361  		rel = lt | eq
  2362  	} else {
  2363  		rel = gt | eq
  2364  	}
  2365  	ft.update(b, v, x, dom, rel)
  2366  	ft.update(b, v, y, dom, rel)
  2367  }
  2368  
  2369  var ctzNonZeroOp = map[Op]Op{OpCtz8: OpCtz8NonZero, OpCtz16: OpCtz16NonZero, OpCtz32: OpCtz32NonZero, OpCtz64: OpCtz64NonZero}
  2370  var mostNegativeDividend = map[Op]int64{
  2371  	OpDiv16: -1 << 15,
  2372  	OpMod16: -1 << 15,
  2373  	OpDiv32: -1 << 31,
  2374  	OpMod32: -1 << 31,
  2375  	OpDiv64: -1 << 63,
  2376  	OpMod64: -1 << 63}
  2377  
  2378  // simplifyBlock simplifies some constant values in b and evaluates
  2379  // branches to non-uniquely dominated successors of b.
  2380  func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) {
  2381  	for _, v := range b.Values {
  2382  		switch v.Op {
  2383  		case OpSlicemask:
  2384  			// Replace OpSlicemask operations in b with constants where possible.
  2385  			x, delta := isConstDelta(v.Args[0])
  2386  			if x == nil {
  2387  				break
  2388  			}
  2389  			// slicemask(x + y)
  2390  			// if x is larger than -y (y is negative), then slicemask is -1.
  2391  			lim := ft.limits[x.ID]
  2392  			if lim.umin > uint64(-delta) {
  2393  				if v.Args[0].Op == OpAdd64 {
  2394  					v.reset(OpConst64)
  2395  				} else {
  2396  					v.reset(OpConst32)
  2397  				}
  2398  				if b.Func.pass.debug > 0 {
  2399  					b.Func.Warnl(v.Pos, "Proved slicemask not needed")
  2400  				}
  2401  				v.AuxInt = -1
  2402  			}
  2403  		case OpCtz8, OpCtz16, OpCtz32, OpCtz64:
  2404  			// On some architectures, notably amd64, we can generate much better
  2405  			// code for CtzNN if we know that the argument is non-zero.
  2406  			// Capture that information here for use in arch-specific optimizations.
  2407  			x := v.Args[0]
  2408  			lim := ft.limits[x.ID]
  2409  			if lim.umin > 0 || lim.min > 0 || lim.max < 0 {
  2410  				if b.Func.pass.debug > 0 {
  2411  					b.Func.Warnl(v.Pos, "Proved %v non-zero", v.Op)
  2412  				}
  2413  				v.Op = ctzNonZeroOp[v.Op]
  2414  			}
  2415  		case OpRsh8x8, OpRsh8x16, OpRsh8x32, OpRsh8x64,
  2416  			OpRsh16x8, OpRsh16x16, OpRsh16x32, OpRsh16x64,
  2417  			OpRsh32x8, OpRsh32x16, OpRsh32x32, OpRsh32x64,
  2418  			OpRsh64x8, OpRsh64x16, OpRsh64x32, OpRsh64x64:
  2419  			// Check whether, for a >> b, we know that a is non-negative
  2420  			// and b is all of a's bits except the MSB. If so, a is shifted to zero.
  2421  			bits := 8 * v.Args[0].Type.Size()
  2422  			if v.Args[1].isGenericIntConst() && v.Args[1].AuxInt >= bits-1 && ft.isNonNegative(v.Args[0]) {
  2423  				if b.Func.pass.debug > 0 {
  2424  					b.Func.Warnl(v.Pos, "Proved %v shifts to zero", v.Op)
  2425  				}
  2426  				switch bits {
  2427  				case 64:
  2428  					v.reset(OpConst64)
  2429  				case 32:
  2430  					v.reset(OpConst32)
  2431  				case 16:
  2432  					v.reset(OpConst16)
  2433  				case 8:
  2434  					v.reset(OpConst8)
  2435  				default:
  2436  					panic("unexpected integer size")
  2437  				}
  2438  				v.AuxInt = 0
  2439  				break // Be sure not to fallthrough - this is no longer OpRsh.
  2440  			}
  2441  			// If the Rsh hasn't been replaced with 0, still check if it is bounded.
  2442  			fallthrough
  2443  		case OpLsh8x8, OpLsh8x16, OpLsh8x32, OpLsh8x64,
  2444  			OpLsh16x8, OpLsh16x16, OpLsh16x32, OpLsh16x64,
  2445  			OpLsh32x8, OpLsh32x16, OpLsh32x32, OpLsh32x64,
  2446  			OpLsh64x8, OpLsh64x16, OpLsh64x32, OpLsh64x64,
  2447  			OpRsh8Ux8, OpRsh8Ux16, OpRsh8Ux32, OpRsh8Ux64,
  2448  			OpRsh16Ux8, OpRsh16Ux16, OpRsh16Ux32, OpRsh16Ux64,
  2449  			OpRsh32Ux8, OpRsh32Ux16, OpRsh32Ux32, OpRsh32Ux64,
  2450  			OpRsh64Ux8, OpRsh64Ux16, OpRsh64Ux32, OpRsh64Ux64:
  2451  			// Check whether, for a << b, we know that b
  2452  			// is strictly less than the number of bits in a.
  2453  			by := v.Args[1]
  2454  			lim := ft.limits[by.ID]
  2455  			bits := 8 * v.Args[0].Type.Size()
  2456  			if lim.umax < uint64(bits) || (lim.max < bits && ft.isNonNegative(by)) {
  2457  				v.AuxInt = 1 // see shiftIsBounded
  2458  				if b.Func.pass.debug > 0 && !by.isGenericIntConst() {
  2459  					b.Func.Warnl(v.Pos, "Proved %v bounded", v.Op)
  2460  				}
  2461  			}
  2462  		case OpDiv16, OpDiv32, OpDiv64, OpMod16, OpMod32, OpMod64:
  2463  			// On amd64 and 386 fix-up code can be avoided if we know
  2464  			//  the divisor is not -1 or the dividend > MinIntNN.
  2465  			// Don't modify AuxInt on other architectures,
  2466  			// as that can interfere with CSE.
  2467  			// TODO: add other architectures?
  2468  			if b.Func.Config.arch != "386" && b.Func.Config.arch != "amd64" {
  2469  				break
  2470  			}
  2471  			divr := v.Args[1]
  2472  			divrLim := ft.limits[divr.ID]
  2473  			divd := v.Args[0]
  2474  			divdLim := ft.limits[divd.ID]
  2475  			if divrLim.max < -1 || divrLim.min > -1 || divdLim.min > mostNegativeDividend[v.Op] {
  2476  				// See DivisionNeedsFixUp in rewrite.go.
  2477  				// v.AuxInt = 1 means we have proved both that the divisor is not -1
  2478  				// and that the dividend is not the most negative integer,
  2479  				// so we do not need to add fix-up code.
  2480  				v.AuxInt = 1
  2481  				if b.Func.pass.debug > 0 {
  2482  					b.Func.Warnl(v.Pos, "Proved %v does not need fix-up", v.Op)
  2483  				}
  2484  			}
  2485  		}
  2486  		// Fold provable constant results.
  2487  		// Helps in cases where we reuse a value after branching on its equality.
  2488  		for i, arg := range v.Args {
  2489  			lim := ft.limits[arg.ID]
  2490  			var constValue int64
  2491  			switch {
  2492  			case lim.min == lim.max:
  2493  				constValue = lim.min
  2494  			case lim.umin == lim.umax:
  2495  				constValue = int64(lim.umin)
  2496  			default:
  2497  				continue
  2498  			}
  2499  			switch arg.Op {
  2500  			case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConstNil:
  2501  				continue
  2502  			}
  2503  			typ := arg.Type
  2504  			f := b.Func
  2505  			var c *Value
  2506  			switch {
  2507  			case typ.IsBoolean():
  2508  				c = f.ConstBool(typ, constValue != 0)
  2509  			case typ.IsInteger() && typ.Size() == 1:
  2510  				c = f.ConstInt8(typ, int8(constValue))
  2511  			case typ.IsInteger() && typ.Size() == 2:
  2512  				c = f.ConstInt16(typ, int16(constValue))
  2513  			case typ.IsInteger() && typ.Size() == 4:
  2514  				c = f.ConstInt32(typ, int32(constValue))
  2515  			case typ.IsInteger() && typ.Size() == 8:
  2516  				c = f.ConstInt64(typ, constValue)
  2517  			case typ.IsPtrShaped():
  2518  				if constValue == 0 {
  2519  					c = f.ConstNil(typ)
  2520  				} else {
  2521  					// Not sure how this might happen, but if it
  2522  					// does, just skip it.
  2523  					continue
  2524  				}
  2525  			default:
  2526  				// Not sure how this might happen, but if it
  2527  				// does, just skip it.
  2528  				continue
  2529  			}
  2530  			v.SetArg(i, c)
  2531  			ft.initLimitForNewValue(c)
  2532  			if b.Func.pass.debug > 1 {
  2533  				b.Func.Warnl(v.Pos, "Proved %v's arg %d (%v) is constant %d", v, i, arg, constValue)
  2534  			}
  2535  		}
  2536  	}
  2537  
  2538  	if b.Kind != BlockIf {
  2539  		return
  2540  	}
  2541  
  2542  	// Consider outgoing edges from this block.
  2543  	parent := b
  2544  	for i, branch := range [...]branch{positive, negative} {
  2545  		child := parent.Succs[i].b
  2546  		if getBranch(sdom, parent, child) != unknown {
  2547  			// For edges to uniquely dominated blocks, we
  2548  			// already did this when we visited the child.
  2549  			continue
  2550  		}
  2551  		// For edges to other blocks, this can trim a branch
  2552  		// even if we couldn't get rid of the child itself.
  2553  		ft.checkpoint()
  2554  		addBranchRestrictions(ft, parent, branch)
  2555  		unsat := ft.unsat
  2556  		ft.restore()
  2557  		if unsat {
  2558  			// This branch is impossible, so remove it
  2559  			// from the block.
  2560  			removeBranch(parent, branch)
  2561  			// No point in considering the other branch.
  2562  			// (It *is* possible for both to be
  2563  			// unsatisfiable since the fact table is
  2564  			// incomplete. We could turn this into a
  2565  			// BlockExit, but it doesn't seem worth it.)
  2566  			break
  2567  		}
  2568  	}
  2569  }
  2570  
  2571  func removeBranch(b *Block, branch branch) {
  2572  	c := b.Controls[0]
  2573  	if b.Func.pass.debug > 0 {
  2574  		verb := "Proved"
  2575  		if branch == positive {
  2576  			verb = "Disproved"
  2577  		}
  2578  		if b.Func.pass.debug > 1 {
  2579  			b.Func.Warnl(b.Pos, "%s %s (%s)", verb, c.Op, c)
  2580  		} else {
  2581  			b.Func.Warnl(b.Pos, "%s %s", verb, c.Op)
  2582  		}
  2583  	}
  2584  	if c != nil && c.Pos.IsStmt() == src.PosIsStmt && c.Pos.SameFileAndLine(b.Pos) {
  2585  		// attempt to preserve statement marker.
  2586  		b.Pos = b.Pos.WithIsStmt()
  2587  	}
  2588  	if branch == positive || branch == negative {
  2589  		b.Kind = BlockFirst
  2590  		b.ResetControls()
  2591  		if branch == positive {
  2592  			b.swapSuccessors()
  2593  		}
  2594  	} else {
  2595  		// TODO: figure out how to remove an entry from a jump table
  2596  	}
  2597  }
  2598  
  2599  // isConstDelta returns non-nil if v is equivalent to w+delta (signed).
  2600  func isConstDelta(v *Value) (w *Value, delta int64) {
  2601  	cop := OpConst64
  2602  	switch v.Op {
  2603  	case OpAdd32, OpSub32:
  2604  		cop = OpConst32
  2605  	case OpAdd16, OpSub16:
  2606  		cop = OpConst16
  2607  	case OpAdd8, OpSub8:
  2608  		cop = OpConst8
  2609  	}
  2610  	switch v.Op {
  2611  	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
  2612  		if v.Args[0].Op == cop {
  2613  			return v.Args[1], v.Args[0].AuxInt
  2614  		}
  2615  		if v.Args[1].Op == cop {
  2616  			return v.Args[0], v.Args[1].AuxInt
  2617  		}
  2618  	case OpSub64, OpSub32, OpSub16, OpSub8:
  2619  		if v.Args[1].Op == cop {
  2620  			aux := v.Args[1].AuxInt
  2621  			if aux != -aux { // Overflow; too bad
  2622  				return v.Args[0], -aux
  2623  			}
  2624  		}
  2625  	}
  2626  	return nil, 0
  2627  }
  2628  
  2629  // isCleanExt reports whether v is the result of a value-preserving
  2630  // sign or zero extension.
  2631  func isCleanExt(v *Value) bool {
  2632  	switch v.Op {
  2633  	case OpSignExt8to16, OpSignExt8to32, OpSignExt8to64,
  2634  		OpSignExt16to32, OpSignExt16to64, OpSignExt32to64:
  2635  		// signed -> signed is the only value-preserving sign extension
  2636  		return v.Args[0].Type.IsSigned() && v.Type.IsSigned()
  2637  
  2638  	case OpZeroExt8to16, OpZeroExt8to32, OpZeroExt8to64,
  2639  		OpZeroExt16to32, OpZeroExt16to64, OpZeroExt32to64:
  2640  		// unsigned -> signed/unsigned are value-preserving zero extensions
  2641  		return !v.Args[0].Type.IsSigned()
  2642  	}
  2643  	return false
  2644  }
  2645  

View as plain text