Source file src/cmd/compile/internal/ssa/cse.go

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssa
     6  
     7  import (
     8  	"cmd/compile/internal/types"
     9  	"cmd/internal/src"
    10  	"cmp"
    11  	"fmt"
    12  	"slices"
    13  )
    14  
    15  // cse does common-subexpression elimination on the Function.
    16  // Values are just relinked, nothing is deleted. A subsequent deadcode
    17  // pass is required to actually remove duplicate expressions.
    18  func cse(f *Func) {
    19  	// Two values are equivalent if they satisfy the following definition:
    20  	// equivalent(v, w):
    21  	//   v.op == w.op
    22  	//   v.type == w.type
    23  	//   v.aux == w.aux
    24  	//   v.auxint == w.auxint
    25  	//   len(v.args) == len(w.args)
    26  	//   v.block == w.block if v.op == OpPhi
    27  	//   equivalent(v.args[i], w.args[i]) for i in 0..len(v.args)-1
    28  
    29  	// The algorithm searches for a partition of f's values into
    30  	// equivalence classes using the above definition.
    31  	// It starts with a coarse partition and iteratively refines it
    32  	// until it reaches a fixed point.
    33  
    34  	// Make initial coarse partitions by using a subset of the conditions above.
    35  	a := f.Cache.allocValueSlice(f.NumValues())
    36  	defer func() { f.Cache.freeValueSlice(a) }() // inside closure to use final value of a
    37  	a = a[:0]
    38  	o := f.Cache.allocInt32Slice(f.NumValues()) // the ordering score for stores
    39  	defer func() { f.Cache.freeInt32Slice(o) }()
    40  	if f.auxmap == nil {
    41  		f.auxmap = auxmap{}
    42  	}
    43  	for _, b := range f.Blocks {
    44  		for _, v := range b.Values {
    45  			if v.Type.IsMemory() {
    46  				continue // memory values can never cse
    47  			}
    48  			if f.auxmap[v.Aux] == 0 {
    49  				f.auxmap[v.Aux] = int32(len(f.auxmap)) + 1
    50  			}
    51  			a = append(a, v)
    52  		}
    53  	}
    54  	partition := partitionValues(a, f.auxmap)
    55  
    56  	// map from value id back to eqclass id
    57  	valueEqClass := f.Cache.allocIDSlice(f.NumValues())
    58  	defer f.Cache.freeIDSlice(valueEqClass)
    59  	for _, b := range f.Blocks {
    60  		for _, v := range b.Values {
    61  			// Use negative equivalence class #s for unique values.
    62  			valueEqClass[v.ID] = -v.ID
    63  		}
    64  	}
    65  	var pNum ID = 1
    66  	for _, e := range partition {
    67  		if f.pass.debug > 1 && len(e) > 500 {
    68  			fmt.Printf("CSE.large partition (%d): ", len(e))
    69  			for j := 0; j < 3; j++ {
    70  				fmt.Printf("%s ", e[j].LongString())
    71  			}
    72  			fmt.Println()
    73  		}
    74  
    75  		for _, v := range e {
    76  			valueEqClass[v.ID] = pNum
    77  		}
    78  		if f.pass.debug > 2 && len(e) > 1 {
    79  			fmt.Printf("CSE.partition #%d:", pNum)
    80  			for _, v := range e {
    81  				fmt.Printf(" %s", v.String())
    82  			}
    83  			fmt.Printf("\n")
    84  		}
    85  		pNum++
    86  	}
    87  
    88  	// Keep a table to remap memory operand of any memory user which does not have a memory result (such as a regular load),
    89  	// to some dominating memory operation, skipping the memory defs that do not alias with it.
    90  	memTable := f.Cache.allocInt32Slice(f.NumValues())
    91  	defer f.Cache.freeInt32Slice(memTable)
    92  
    93  	// Split equivalence classes at points where they have
    94  	// non-equivalent arguments.  Repeat until we can't find any
    95  	// more splits.
    96  	var splitPoints []int
    97  	for {
    98  		changed := false
    99  
   100  		// partition can grow in the loop. By not using a range loop here,
   101  		// we process new additions as they arrive, avoiding O(n^2) behavior.
   102  		for i := 0; i < len(partition); i++ {
   103  			e := partition[i]
   104  
   105  			if opcodeTable[e[0].Op].commutative {
   106  				// Order the first two args before comparison.
   107  				for _, v := range e {
   108  					if valueEqClass[v.Args[0].ID] > valueEqClass[v.Args[1].ID] {
   109  						v.Args[0], v.Args[1] = v.Args[1], v.Args[0]
   110  					}
   111  				}
   112  			}
   113  
   114  			// Sort by eq class of arguments.
   115  			slices.SortFunc(e, func(v, w *Value) int {
   116  				_, idxMem, _, _ := isMemUser(v)
   117  				for i, a := range v.Args {
   118  					var aId, bId ID
   119  					if i != idxMem {
   120  						b := w.Args[i]
   121  						aId = a.ID
   122  						bId = b.ID
   123  					} else {
   124  						// A memory user's mem argument may be remapped to allow matching
   125  						// identical load-like instructions across disjoint stores.
   126  						aId, _ = getEffectiveMemoryArg(memTable, v)
   127  						bId, _ = getEffectiveMemoryArg(memTable, w)
   128  					}
   129  					if valueEqClass[aId] < valueEqClass[bId] {
   130  						return -1
   131  					}
   132  					if valueEqClass[aId] > valueEqClass[bId] {
   133  						return +1
   134  					}
   135  				}
   136  				return 0
   137  			})
   138  
   139  			// Find split points.
   140  			splitPoints = append(splitPoints[:0], 0)
   141  			for j := 1; j < len(e); j++ {
   142  				v, w := e[j-1], e[j]
   143  				// Note: commutative args already correctly ordered by byArgClass.
   144  				eqArgs := true
   145  				_, idxMem, _, _ := isMemUser(v)
   146  				for k, a := range v.Args {
   147  					if v.Op == OpLocalAddr && k == 1 {
   148  						continue
   149  					}
   150  					var aId, bId ID
   151  					if k != idxMem {
   152  						b := w.Args[k]
   153  						aId = a.ID
   154  						bId = b.ID
   155  					} else {
   156  						// A memory user's mem argument may be remapped to allow matching
   157  						// identical load-like instructions across disjoint stores.
   158  						aId, _ = getEffectiveMemoryArg(memTable, v)
   159  						bId, _ = getEffectiveMemoryArg(memTable, w)
   160  					}
   161  					if valueEqClass[aId] != valueEqClass[bId] {
   162  						eqArgs = false
   163  						break
   164  					}
   165  				}
   166  				if !eqArgs {
   167  					splitPoints = append(splitPoints, j)
   168  				}
   169  			}
   170  			if len(splitPoints) == 1 {
   171  				continue // no splits, leave equivalence class alone.
   172  			}
   173  
   174  			// Move another equivalence class down in place of e.
   175  			partition[i] = partition[len(partition)-1]
   176  			partition = partition[:len(partition)-1]
   177  			i--
   178  
   179  			// Add new equivalence classes for the parts of e we found.
   180  			splitPoints = append(splitPoints, len(e))
   181  			for j := 0; j < len(splitPoints)-1; j++ {
   182  				f := e[splitPoints[j]:splitPoints[j+1]]
   183  				if len(f) == 1 {
   184  					// Don't add singletons.
   185  					valueEqClass[f[0].ID] = -f[0].ID
   186  					continue
   187  				}
   188  				for _, v := range f {
   189  					valueEqClass[v.ID] = pNum
   190  				}
   191  				pNum++
   192  				partition = append(partition, f)
   193  			}
   194  			changed = true
   195  		}
   196  
   197  		if !changed {
   198  			break
   199  		}
   200  	}
   201  
   202  	sdom := f.Sdom()
   203  
   204  	// Compute substitutions we would like to do. We substitute v for w
   205  	// if v and w are in the same equivalence class and v dominates w.
   206  	rewrite := f.Cache.allocValueSlice(f.NumValues())
   207  	defer f.Cache.freeValueSlice(rewrite)
   208  	for _, e := range partition {
   209  		slices.SortFunc(e, func(v, w *Value) int {
   210  			if c := cmp.Compare(sdom.domorder(v.Block), sdom.domorder(w.Block)); c != 0 {
   211  				return c
   212  			}
   213  			if _, _, _, ok := isMemUser(v); ok {
   214  				// Additional ordering among the memory users within one block: prefer the earliest
   215  				// possible value among the set of equivalent values, that is the one with the lowest
   216  				// skip count (lowest number of memory defs skipped until their common def).
   217  				_, vSkips := getEffectiveMemoryArg(memTable, v)
   218  				_, wSkips := getEffectiveMemoryArg(memTable, w)
   219  				if c := cmp.Compare(vSkips, wSkips); c != 0 {
   220  					return c
   221  				}
   222  			}
   223  			if v.Op == OpLocalAddr {
   224  				// compare the memory args for OpLocalAddrs in the same block
   225  				vm := v.Args[1]
   226  				wm := w.Args[1]
   227  				if vm == wm {
   228  					return 0
   229  				}
   230  				// if the two OpLocalAddrs are in the same block, and one's memory
   231  				// arg also in the same block, but the other one's memory arg not,
   232  				// the latter must be in an ancestor block
   233  				if vm.Block != v.Block {
   234  					return -1
   235  				}
   236  				if wm.Block != w.Block {
   237  					return +1
   238  				}
   239  				// use store order if the memory args are in the same block
   240  				vs := storeOrdering(vm, o)
   241  				ws := storeOrdering(wm, o)
   242  				if vs <= 0 {
   243  					f.Fatalf("unable to determine the order of %s", vm.LongString())
   244  				}
   245  				if ws <= 0 {
   246  					f.Fatalf("unable to determine the order of %s", wm.LongString())
   247  				}
   248  				return cmp.Compare(vs, ws)
   249  			}
   250  			vStmt := v.Pos.IsStmt() == src.PosIsStmt
   251  			wStmt := w.Pos.IsStmt() == src.PosIsStmt
   252  			if vStmt != wStmt {
   253  				if vStmt {
   254  					return -1
   255  				}
   256  				return +1
   257  			}
   258  			return 0
   259  		})
   260  
   261  		for i := 0; i < len(e)-1; i++ {
   262  			// e is sorted by domorder, so a maximal dominant element is first in the slice
   263  			v := e[i]
   264  			if v == nil {
   265  				continue
   266  			}
   267  
   268  			e[i] = nil
   269  			// Replace all elements of e which v dominates
   270  			for j := i + 1; j < len(e); j++ {
   271  				w := e[j]
   272  				if w == nil {
   273  					continue
   274  				}
   275  				if sdom.IsAncestorEq(v.Block, w.Block) {
   276  					rewrite[w.ID] = v
   277  					e[j] = nil
   278  				} else {
   279  					// e is sorted by domorder, so v.Block doesn't dominate any subsequent blocks in e
   280  					break
   281  				}
   282  			}
   283  		}
   284  	}
   285  
   286  	rewrites := int64(0)
   287  
   288  	// Apply substitutions
   289  	for _, b := range f.Blocks {
   290  		for _, v := range b.Values {
   291  			for i, w := range v.Args {
   292  				if x := rewrite[w.ID]; x != nil {
   293  					if w.Pos.IsStmt() == src.PosIsStmt && w.Op != OpNilCheck {
   294  						// about to lose a statement marker, w
   295  						// w is an input to v; if they're in the same block
   296  						// and the same line, v is a good-enough new statement boundary.
   297  						if w.Block == v.Block && w.Pos.Line() == v.Pos.Line() {
   298  							v.Pos = v.Pos.WithIsStmt()
   299  							w.Pos = w.Pos.WithNotStmt()
   300  						} // TODO and if this fails?
   301  					}
   302  					v.SetArg(i, x)
   303  					rewrites++
   304  				}
   305  			}
   306  		}
   307  		for i, v := range b.ControlValues() {
   308  			if x := rewrite[v.ID]; x != nil {
   309  				if v.Op == OpNilCheck {
   310  					// nilcheck pass will remove the nil checks and log
   311  					// them appropriately, so don't mess with them here.
   312  					continue
   313  				}
   314  				b.ReplaceControl(i, x)
   315  			}
   316  		}
   317  	}
   318  
   319  	if f.pass.stats > 0 {
   320  		f.LogStat("CSE REWRITES", rewrites)
   321  	}
   322  }
   323  
   324  // storeOrdering computes the order for stores by iterate over the store
   325  // chain, assigns a score to each store. The scores only make sense for
   326  // stores within the same block, and the first store by store order has
   327  // the lowest score. The cache was used to ensure only compute once.
   328  func storeOrdering(v *Value, cache []int32) int32 {
   329  	const minScore int32 = 1
   330  	score := minScore
   331  	w := v
   332  	for {
   333  		if s := cache[w.ID]; s >= minScore {
   334  			score += s
   335  			break
   336  		}
   337  		if w.Op == OpPhi || w.Op == OpInitMem {
   338  			break
   339  		}
   340  		a := w.MemoryArg()
   341  		if a.Block != w.Block {
   342  			break
   343  		}
   344  		w = a
   345  		score++
   346  	}
   347  	w = v
   348  	for cache[w.ID] == 0 {
   349  		cache[w.ID] = score
   350  		if score == minScore {
   351  			break
   352  		}
   353  		w = w.MemoryArg()
   354  		score--
   355  	}
   356  	return cache[v.ID]
   357  }
   358  
   359  // An eqclass approximates an equivalence class. During the
   360  // algorithm it may represent the union of several of the
   361  // final equivalence classes.
   362  type eqclass []*Value
   363  
   364  // partitionValues partitions the values into equivalence classes
   365  // based on having all the following features match:
   366  //   - opcode
   367  //   - type
   368  //   - auxint
   369  //   - aux
   370  //   - nargs
   371  //   - block # if a phi op
   372  //   - first two arg's opcodes and auxint
   373  //   - NOT first two arg's aux; that can break CSE.
   374  //
   375  // partitionValues returns a list of equivalence classes, each
   376  // being a sorted by ID list of *Values. The eqclass slices are
   377  // backed by the same storage as the input slice.
   378  // Equivalence classes of size 1 are ignored.
   379  func partitionValues(a []*Value, auxIDs auxmap) []eqclass {
   380  	slices.SortFunc(a, func(v, w *Value) int {
   381  		switch cmpVal(v, w, auxIDs) {
   382  		case types.CMPlt:
   383  			return -1
   384  		case types.CMPgt:
   385  			return +1
   386  		default:
   387  			// Sort by value ID last to keep the sort result deterministic.
   388  			return cmp.Compare(v.ID, w.ID)
   389  		}
   390  	})
   391  
   392  	var partition []eqclass
   393  	for len(a) > 0 {
   394  		v := a[0]
   395  		j := 1
   396  		for ; j < len(a); j++ {
   397  			w := a[j]
   398  			if cmpVal(v, w, auxIDs) != types.CMPeq {
   399  				break
   400  			}
   401  		}
   402  		if j > 1 {
   403  			partition = append(partition, a[:j])
   404  		}
   405  		a = a[j:]
   406  	}
   407  
   408  	return partition
   409  }
   410  func lt2Cmp(isLt bool) types.Cmp {
   411  	if isLt {
   412  		return types.CMPlt
   413  	}
   414  	return types.CMPgt
   415  }
   416  
   417  type auxmap map[Aux]int32
   418  
   419  func cmpVal(v, w *Value, auxIDs auxmap) types.Cmp {
   420  	// Try to order these comparison by cost (cheaper first)
   421  	if v.Op != w.Op {
   422  		return lt2Cmp(v.Op < w.Op)
   423  	}
   424  	if v.AuxInt != w.AuxInt {
   425  		return lt2Cmp(v.AuxInt < w.AuxInt)
   426  	}
   427  	if len(v.Args) != len(w.Args) {
   428  		return lt2Cmp(len(v.Args) < len(w.Args))
   429  	}
   430  	if v.Op == OpPhi && v.Block != w.Block {
   431  		return lt2Cmp(v.Block.ID < w.Block.ID)
   432  	}
   433  	if v.Type.IsMemory() {
   434  		// We will never be able to CSE two values
   435  		// that generate memory.
   436  		return lt2Cmp(v.ID < w.ID)
   437  	}
   438  	// OpSelect is a pseudo-op. We need to be more aggressive
   439  	// regarding CSE to keep multiple OpSelect's of the same
   440  	// argument from existing.
   441  	if v.Op != OpSelect0 && v.Op != OpSelect1 && v.Op != OpSelectN {
   442  		if tc := v.Type.Compare(w.Type); tc != types.CMPeq {
   443  			return tc
   444  		}
   445  	}
   446  
   447  	if v.Aux != w.Aux {
   448  		if v.Aux == nil {
   449  			return types.CMPlt
   450  		}
   451  		if w.Aux == nil {
   452  			return types.CMPgt
   453  		}
   454  		return lt2Cmp(auxIDs[v.Aux] < auxIDs[w.Aux])
   455  	}
   456  
   457  	return types.CMPeq
   458  }
   459  
   460  // Query if the given instruction only uses "memory" argument and we may try to skip some memory "defs" if they do not alias with its address.
   461  // Return index of pointer argument, index of "memory" argument, the access width and true on such instructions, otherwise return (-1, -1, 0, false).
   462  func isMemUser(v *Value) (int, int, int64, bool) {
   463  	switch v.Op {
   464  	case OpLoad:
   465  		return 0, 1, v.Type.Size(), true
   466  	case OpNilCheck:
   467  		return 0, 1, 0, true
   468  	default:
   469  		return -1, -1, 0, false
   470  	}
   471  }
   472  
   473  // Query if the given "memory"-defining instruction's memory destination can be analyzed for aliasing with a memory "user" instructions.
   474  // Return index of pointer argument, index of "memory" argument, the access width and true on such instructions, otherwise return (-1, -1, 0, false).
   475  func isMemDef(v *Value) (int, int, int64, bool) {
   476  	switch v.Op {
   477  	case OpStore:
   478  		return 0, 2, auxToType(v.Aux).Size(), true
   479  	default:
   480  		return -1, -1, 0, false
   481  	}
   482  }
   483  
   484  // Mem table keeps memTableSkipBits lower bits to store the number of skips of "memory" operand
   485  // and the rest to store the ID of the destination "memory"-producing instruction.
   486  const memTableSkipBits = 8
   487  
   488  // The maximum ID value we are able to store in the memTable, otherwise fall back to v.ID
   489  const maxId = ID(1<<(31-memTableSkipBits)) - 1
   490  
   491  // Return the first possibly-aliased store along the memory chain starting at v's memory argument and the number of not-aliased stores skipped.
   492  func getEffectiveMemoryArg(memTable []int32, v *Value) (ID, uint32) {
   493  	if code := uint32(memTable[v.ID]); code != 0 {
   494  		return ID(code >> memTableSkipBits), code & ((1 << memTableSkipBits) - 1)
   495  	}
   496  	if idxPtr, idxMem, width, ok := isMemUser(v); ok {
   497  		// TODO: We could early return some predefined value if width==0
   498  		memId := v.Args[idxMem].ID
   499  		if memId > maxId {
   500  			return memId, 0
   501  		}
   502  		mem, skips := skipDisjointMemDefs(v, idxPtr, idxMem, width)
   503  		if mem.ID <= maxId {
   504  			memId = mem.ID
   505  		} else {
   506  			skips = 0 // avoid the skip
   507  		}
   508  		memTable[v.ID] = int32(memId<<memTableSkipBits) | int32(skips)
   509  		return memId, skips
   510  	} else {
   511  		v.Block.Func.Fatalf("expected memory user instruction: %v", v.LongString())
   512  	}
   513  	return 0, 0
   514  }
   515  
   516  // Find a memory def that's not trivially disjoint with the user instruction, count the number
   517  // of "skips" along the path. Return the corresponding memory def's value and the number of skips.
   518  func skipDisjointMemDefs(user *Value, idxUserPtr, idxUserMem int, useWidth int64) (*Value, uint32) {
   519  	usePtr, mem := user.Args[idxUserPtr], user.Args[idxUserMem]
   520  	const maxSkips = (1 << memTableSkipBits) - 1
   521  	var skips uint32
   522  	for skips = 0; skips < maxSkips; skips++ {
   523  		if idxPtr, idxMem, width, ok := isMemDef(mem); ok {
   524  			if mem.Args[idxMem].Uses > 50 {
   525  				// Skipping a memory def with a lot of uses may potentially increase register pressure.
   526  				break
   527  			}
   528  			defPtr := mem.Args[idxPtr]
   529  			if disjoint(defPtr, width, usePtr, useWidth) {
   530  				mem = mem.Args[idxMem]
   531  				continue
   532  			}
   533  		}
   534  		break
   535  	}
   536  	return mem, skips
   537  }
   538  

View as plain text