Source file src/cmd/compile/internal/ssa/cse.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/src"
    10  	"cmp"
    11  	"fmt"
    12  	"slices"
    13  )
    14  
    15  // cse does common-subexpression elimination on the Function.
    16  // Values are just relinked, nothing is deleted. A subsequent deadcode
    17  // pass is required to actually remove duplicate expressions.
    18  func cse(f *Func) {
    19  	// Two values are equivalent if they satisfy the following definition:
    20  	// equivalent(v, w):
    21  	//   v.op == w.op
    22  	//   v.type == w.type
    23  	//   v.aux == w.aux
    24  	//   v.auxint == w.auxint
    25  	//   len(v.args) == len(w.args)
    26  	//   v.block == w.block if v.op == OpPhi
    27  	//   equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1
    28  
    29  	// The algorithm searches for a partition of f's values into
    30  	// equivalence classes using the above definition.
    31  	// It starts with a coarse partition and iteratively refines it
    32  	// until it reaches a fixed point.
    33  
    34  	// Make initial coarse partitions by using a subset of the conditions above.
    35  	a := f.Cache.allocValueSlice(f.NumValues())
    36  	defer func() { f.Cache.freeValueSlice(a) }() // inside closure to use final value of a
    37  	a = a[:0]
    38  	o := f.Cache.allocInt32Slice(f.NumValues()) // the ordering score for stores
    39  	defer func() { f.Cache.freeInt32Slice(o) }()
    40  	if f.auxmap == nil {
    41  		f.auxmap = auxmap{}
    42  	}
    43  	for _, b := range f.Blocks {
    44  		for _, v := range b.Values {
    45  			if v.Type.IsMemory() {
    46  				continue // memory values can never cse
    47  			}
    48  			if f.auxmap[v.Aux] == 0 {
    49  				f.auxmap[v.Aux] = int32(len(f.auxmap)) + 1
    50  			}
    51  			a = append(a, v)
    52  		}
    53  	}
    54  	partition := partitionValues(a, f.auxmap)
    55  
    56  	// map from value id back to eqclass id
    57  	valueEqClass := f.Cache.allocIDSlice(f.NumValues())
    58  	defer f.Cache.freeIDSlice(valueEqClass)
    59  	for _, b := range f.Blocks {
    60  		for _, v := range b.Values {
    61  			// Use negative equivalence class #s for unique values.
    62  			valueEqClass[v.ID] = -v.ID
    63  		}
    64  	}
    65  	var pNum ID = 1
    66  	for _, e := range partition {
    67  		if f.pass.debug > 1 && len(e) > 500 {
    68  			fmt.Printf("CSE.large partition (%d): ", len(e))
    69  			for j := 0; j < 3; j++ {
    70  				fmt.Printf("%s ", e[j].LongString())
    71  			}
    72  			fmt.Println()
    73  		}
    74  
    75  		for _, v := range e {
    76  			valueEqClass[v.ID] = pNum
    77  		}
    78  		if f.pass.debug > 2 && len(e) > 1 {
    79  			fmt.Printf("CSE.partition #%d:", pNum)
    80  			for _, v := range e {
    81  				fmt.Printf(" %s", v.String())
    82  			}
    83  			fmt.Printf("\n")
    84  		}
    85  		pNum++
    86  	}
    87  
    88  	// Split equivalence classes at points where they have
    89  	// non-equivalent arguments.  Repeat until we can't find any
    90  	// more splits.
    91  	var splitPoints []int
    92  	for {
    93  		changed := false
    94  
    95  		// partition can grow in the loop. By not using a range loop here,
    96  		// we process new additions as they arrive, avoiding O(n^2) behavior.
    97  		for i := 0; i < len(partition); i++ {
    98  			e := partition[i]
    99  
   100  			if opcodeTable[e[0].Op].commutative {
   101  				// Order the first two args before comparison.
   102  				for _, v := range e {
   103  					if valueEqClass[v.Args[0].ID] > valueEqClass[v.Args[1].ID] {
   104  						v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   105  					}
   106  				}
   107  			}
   108  
   109  			// Sort by eq class of arguments.
   110  			slices.SortFunc(e, func(v, w *Value) int {
   111  				for i, a := range v.Args {
   112  					b := w.Args[i]
   113  					if valueEqClass[a.ID] < valueEqClass[b.ID] {
   114  						return -1
   115  					}
   116  					if valueEqClass[a.ID] > valueEqClass[b.ID] {
   117  						return +1
   118  					}
   119  				}
   120  				return 0
   121  			})
   122  
   123  			// Find split points.
   124  			splitPoints = append(splitPoints[:0], 0)
   125  			for j := 1; j < len(e); j++ {
   126  				v, w := e[j-1], e[j]
   127  				// Note: commutative args already correctly ordered by byArgClass.
   128  				eqArgs := true
   129  				for k, a := range v.Args {
   130  					if v.Op == OpLocalAddr && k == 1 {
   131  						continue
   132  					}
   133  					b := w.Args[k]
   134  					if valueEqClass[a.ID] != valueEqClass[b.ID] {
   135  						eqArgs = false
   136  						break
   137  					}
   138  				}
   139  				if !eqArgs {
   140  					splitPoints = append(splitPoints, j)
   141  				}
   142  			}
   143  			if len(splitPoints) == 1 {
   144  				continue // no splits, leave equivalence class alone.
   145  			}
   146  
   147  			// Move another equivalence class down in place of e.
   148  			partition[i] = partition[len(partition)-1]
   149  			partition = partition[:len(partition)-1]
   150  			i--
   151  
   152  			// Add new equivalence classes for the parts of e we found.
   153  			splitPoints = append(splitPoints, len(e))
   154  			for j := 0; j < len(splitPoints)-1; j++ {
   155  				f := e[splitPoints[j]:splitPoints[j+1]]
   156  				if len(f) == 1 {
   157  					// Don't add singletons.
   158  					valueEqClass[f[0].ID] = -f[0].ID
   159  					continue
   160  				}
   161  				for _, v := range f {
   162  					valueEqClass[v.ID] = pNum
   163  				}
   164  				pNum++
   165  				partition = append(partition, f)
   166  			}
   167  			changed = true
   168  		}
   169  
   170  		if !changed {
   171  			break
   172  		}
   173  	}
   174  
   175  	sdom := f.Sdom()
   176  
   177  	// Compute substitutions we would like to do. We substitute v for w
   178  	// if v and w are in the same equivalence class and v dominates w.
   179  	rewrite := f.Cache.allocValueSlice(f.NumValues())
   180  	defer f.Cache.freeValueSlice(rewrite)
   181  	for _, e := range partition {
   182  		slices.SortFunc(e, func(v, w *Value) int {
   183  			c := cmp.Compare(sdom.domorder(v.Block), sdom.domorder(w.Block))
   184  			if c != 0 {
   185  				return c
   186  			}
   187  			if v.Op == OpLocalAddr {
   188  				// compare the memory args for OpLocalAddrs in the same block
   189  				vm := v.Args[1]
   190  				wm := w.Args[1]
   191  				if vm == wm {
   192  					return 0
   193  				}
   194  				// if the two OpLocalAddrs are in the same block, and one's memory
   195  				// arg also in the same block, but the other one's memory arg not,
   196  				// the latter must be in an ancestor block
   197  				if vm.Block != v.Block {
   198  					return -1
   199  				}
   200  				if wm.Block != w.Block {
   201  					return +1
   202  				}
   203  				// use store order if the memory args are in the same block
   204  				vs := storeOrdering(vm, o)
   205  				ws := storeOrdering(wm, o)
   206  				if vs <= 0 {
   207  					f.Fatalf("unable to determine the order of %s", vm.LongString())
   208  				}
   209  				if ws <= 0 {
   210  					f.Fatalf("unable to determine the order of %s", wm.LongString())
   211  				}
   212  				return cmp.Compare(vs, ws)
   213  			}
   214  			vStmt := v.Pos.IsStmt() == src.PosIsStmt
   215  			wStmt := w.Pos.IsStmt() == src.PosIsStmt
   216  			if vStmt != wStmt {
   217  				if vStmt {
   218  					return -1
   219  				}
   220  				return +1
   221  			}
   222  			return 0
   223  		})
   224  
   225  		for i := 0; i < len(e)-1; i++ {
   226  			// e is sorted by domorder, so a maximal dominant element is first in the slice
   227  			v := e[i]
   228  			if v == nil {
   229  				continue
   230  			}
   231  
   232  			e[i] = nil
   233  			// Replace all elements of e which v dominates
   234  			for j := i + 1; j < len(e); j++ {
   235  				w := e[j]
   236  				if w == nil {
   237  					continue
   238  				}
   239  				if sdom.IsAncestorEq(v.Block, w.Block) {
   240  					rewrite[w.ID] = v
   241  					e[j] = nil
   242  				} else {
   243  					// e is sorted by domorder, so v.Block doesn't dominate any subsequent blocks in e
   244  					break
   245  				}
   246  			}
   247  		}
   248  	}
   249  
   250  	rewrites := int64(0)
   251  
   252  	// Apply substitutions
   253  	for _, b := range f.Blocks {
   254  		for _, v := range b.Values {
   255  			for i, w := range v.Args {
   256  				if x := rewrite[w.ID]; x != nil {
   257  					if w.Pos.IsStmt() == src.PosIsStmt {
   258  						// about to lose a statement marker, w
   259  						// w is an input to v; if they're in the same block
   260  						// and the same line, v is a good-enough new statement boundary.
   261  						if w.Block == v.Block && w.Pos.Line() == v.Pos.Line() {
   262  							v.Pos = v.Pos.WithIsStmt()
   263  							w.Pos = w.Pos.WithNotStmt()
   264  						} // TODO and if this fails?
   265  					}
   266  					v.SetArg(i, x)
   267  					rewrites++
   268  				}
   269  			}
   270  		}
   271  		for i, v := range b.ControlValues() {
   272  			if x := rewrite[v.ID]; x != nil {
   273  				if v.Op == OpNilCheck {
   274  					// nilcheck pass will remove the nil checks and log
   275  					// them appropriately, so don't mess with them here.
   276  					continue
   277  				}
   278  				b.ReplaceControl(i, x)
   279  			}
   280  		}
   281  	}
   282  
   283  	if f.pass.stats > 0 {
   284  		f.LogStat("CSE REWRITES", rewrites)
   285  	}
   286  }
   287  
   288  // storeOrdering computes the order for stores by iterate over the store
   289  // chain, assigns a score to each store. The scores only make sense for
   290  // stores within the same block, and the first store by store order has
   291  // the lowest score. The cache was used to ensure only compute once.
   292  func storeOrdering(v *Value, cache []int32) int32 {
   293  	const minScore int32 = 1
   294  	score := minScore
   295  	w := v
   296  	for {
   297  		if s := cache[w.ID]; s >= minScore {
   298  			score += s
   299  			break
   300  		}
   301  		if w.Op == OpPhi || w.Op == OpInitMem {
   302  			break
   303  		}
   304  		a := w.MemoryArg()
   305  		if a.Block != w.Block {
   306  			break
   307  		}
   308  		w = a
   309  		score++
   310  	}
   311  	w = v
   312  	for cache[w.ID] == 0 {
   313  		cache[w.ID] = score
   314  		if score == minScore {
   315  			break
   316  		}
   317  		w = w.MemoryArg()
   318  		score--
   319  	}
   320  	return cache[v.ID]
   321  }
   322  
   323  // An eqclass approximates an equivalence class. During the
   324  // algorithm it may represent the union of several of the
   325  // final equivalence classes.
   326  type eqclass []*Value
   327  
   328  // partitionValues partitions the values into equivalence classes
   329  // based on having all the following features match:
   330  //   - opcode
   331  //   - type
   332  //   - auxint
   333  //   - aux
   334  //   - nargs
   335  //   - block # if a phi op
   336  //   - first two arg's opcodes and auxint
   337  //   - NOT first two arg's aux; that can break CSE.
   338  //
   339  // partitionValues returns a list of equivalence classes, each
   340  // being a sorted by ID list of *Values. The eqclass slices are
   341  // backed by the same storage as the input slice.
   342  // Equivalence classes of size 1 are ignored.
   343  func partitionValues(a []*Value, auxIDs auxmap) []eqclass {
   344  	slices.SortFunc(a, func(v, w *Value) int {
   345  		switch cmpVal(v, w, auxIDs) {
   346  		case types.CMPlt:
   347  			return -1
   348  		case types.CMPgt:
   349  			return +1
   350  		default:
   351  			// Sort by value ID last to keep the sort result deterministic.
   352  			return cmp.Compare(v.ID, w.ID)
   353  		}
   354  	})
   355  
   356  	var partition []eqclass
   357  	for len(a) > 0 {
   358  		v := a[0]
   359  		j := 1
   360  		for ; j < len(a); j++ {
   361  			w := a[j]
   362  			if cmpVal(v, w, auxIDs) != types.CMPeq {
   363  				break
   364  			}
   365  		}
   366  		if j > 1 {
   367  			partition = append(partition, a[:j])
   368  		}
   369  		a = a[j:]
   370  	}
   371  
   372  	return partition
   373  }
   374  func lt2Cmp(isLt bool) types.Cmp {
   375  	if isLt {
   376  		return types.CMPlt
   377  	}
   378  	return types.CMPgt
   379  }
   380  
   381  type auxmap map[Aux]int32
   382  
   383  func cmpVal(v, w *Value, auxIDs auxmap) types.Cmp {
   384  	// Try to order these comparison by cost (cheaper first)
   385  	if v.Op != w.Op {
   386  		return lt2Cmp(v.Op < w.Op)
   387  	}
   388  	if v.AuxInt != w.AuxInt {
   389  		return lt2Cmp(v.AuxInt < w.AuxInt)
   390  	}
   391  	if len(v.Args) != len(w.Args) {
   392  		return lt2Cmp(len(v.Args) < len(w.Args))
   393  	}
   394  	if v.Op == OpPhi && v.Block != w.Block {
   395  		return lt2Cmp(v.Block.ID < w.Block.ID)
   396  	}
   397  	if v.Type.IsMemory() {
   398  		// We will never be able to CSE two values
   399  		// that generate memory.
   400  		return lt2Cmp(v.ID < w.ID)
   401  	}
   402  	// OpSelect is a pseudo-op. We need to be more aggressive
   403  	// regarding CSE to keep multiple OpSelect's of the same
   404  	// argument from existing.
   405  	if v.Op != OpSelect0 && v.Op != OpSelect1 && v.Op != OpSelectN {
   406  		if tc := v.Type.Compare(w.Type); tc != types.CMPeq {
   407  			return tc
   408  		}
   409  	}
   410  
   411  	if v.Aux != w.Aux {
   412  		if v.Aux == nil {
   413  			return types.CMPlt
   414  		}
   415  		if w.Aux == nil {
   416  			return types.CMPgt
   417  		}
   418  		return lt2Cmp(auxIDs[v.Aux] < auxIDs[w.Aux])
   419  	}
   420  
   421  	return types.CMPeq
   422  }
   423  

View as plain text