Source file src/cmd/compile/internal/liveness/mergelocals.go

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package liveness
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/bitvec"
    10  	"cmd/compile/internal/ir"
    11  	"cmd/compile/internal/ssa"
    12  	"cmd/internal/src"
    13  	"fmt"
    14  	"os"
    15  	"path/filepath"
    16  	"slices"
    17  	"sort"
    18  	"strings"
    19  )
    20  
    21  // MergeLocalsState encapsulates information about which AUTO
    22  // (stack-allocated) variables within a function can be safely
    23  // merged/overlapped, e.g. share a stack slot with some other auto).
    24  // An instance of MergeLocalsState is produced by MergeLocals() below
    25  // and then consumed in ssagen.AllocFrame. The map 'partition'
    26  // contains entries of the form <N,SL> where N is an *ir.Name and SL
    27  // is a slice holding the indices (within 'vars') of other variables
    28  // that share the same slot, specifically the slot of the first
    29  // element in the partition, which we'll call the "leader". For
    30  // example, if a function contains five variables where v1/v2/v3 are
    31  // safe to overlap and v4/v5 are safe to overlap, the MergeLocalsState
    32  // content might look like
    33  //
    34  //	vars: [v1, v2, v3, v4, v5]
    35  //	partition: v1 -> [1, 0, 2], v2 -> [1, 0, 2], v3 -> [1, 0, 2]
    36  //	           v4 -> [3, 4], v5 -> [3, 4]
    37  //
    38  // A nil MergeLocalsState indicates that no local variables meet the
    39  // necessary criteria for overlap.
    40  type MergeLocalsState struct {
    41  	// contains auto vars that participate in overlapping
    42  	vars []*ir.Name
    43  	// maps auto variable to overlap partition
    44  	partition map[*ir.Name][]int
    45  }
    46  
    47  // candRegion is a sub-range (start, end) corresponding to an interval
    48  // [st,en] within the list of candidate variables.
    49  type candRegion struct {
    50  	st, en int
    51  }
    52  
    53  // cstate holds state information we'll need during the analysis
    54  // phase of stack slot merging but can be discarded when the analysis
    55  // is done.
    56  type cstate struct {
    57  	fn             *ir.Func
    58  	f              *ssa.Func
    59  	lv             *liveness
    60  	cands          []*ir.Name
    61  	nameToSlot     map[*ir.Name]int32
    62  	regions        []candRegion
    63  	indirectUE     map[ssa.ID][]*ir.Name
    64  	ivs            []Intervals
    65  	hashDeselected map[*ir.Name]bool
    66  	trace          int // debug trace level
    67  }
    68  
    69  // MergeLocals analyzes the specified ssa function f to determine which
    70  // of its auto variables can safely share the same stack slot, returning
    71  // a state object that describes how the overlap should be done.
    72  func MergeLocals(fn *ir.Func, f *ssa.Func) *MergeLocalsState {
    73  
    74  	// Create a container object for useful state info and then
    75  	// call collectMergeCandidates to see if there are vars suitable
    76  	// for stack slot merging.
    77  	cs := &cstate{
    78  		fn:    fn,
    79  		f:     f,
    80  		trace: base.Debug.MergeLocalsTrace,
    81  	}
    82  	cs.collectMergeCandidates()
    83  	if len(cs.regions) == 0 {
    84  		return nil
    85  	}
    86  
    87  	// Kick off liveness analysis.
    88  	//
    89  	// If we have a local variable such as "r2" below that's written
    90  	// but then not read, something like:
    91  	//
    92  	//      vardef r1
    93  	//      r1.x = ...
    94  	//      vardef r2
    95  	//      r2.x = 0
    96  	//      r2.y = ...
    97  	//      <call foo>
    98  	//      // no subsequent use of r2
    99  	//      ... = r1.x
   100  	//
   101  	// then for the purpose of calculating stack maps at the call, we
   102  	// can ignore "r2" completely during liveness analysis for stack
   103  	// maps, however for stack slock merging we most definitely want
   104  	// to treat the writes as "uses".
   105  	cs.lv = newliveness(fn, f, cs.cands, cs.nameToSlot, 0)
   106  	cs.lv.conservativeWrites = true
   107  	cs.lv.prologue()
   108  	cs.lv.solve()
   109  
   110  	// Compute intervals for each candidate based on the liveness and
   111  	// on block effects.
   112  	cs.computeIntervals()
   113  
   114  	// Perform merging within each region of the candidates list.
   115  	rv := cs.performMerging()
   116  	if err := rv.check(); err != nil {
   117  		base.FatalfAt(fn.Pos(), "invalid mergelocals state: %v", err)
   118  	}
   119  	return rv
   120  }
   121  
   122  // Subsumed returns whether variable n is subsumed, e.g. appears
   123  // in an overlap position but is not the leader in that partition.
   124  func (mls *MergeLocalsState) Subsumed(n *ir.Name) bool {
   125  	if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] != n {
   126  		return true
   127  	}
   128  	return false
   129  }
   130  
   131  // IsLeader returns whether a variable n is the leader (first element)
   132  // in a sharing partition.
   133  func (mls *MergeLocalsState) IsLeader(n *ir.Name) bool {
   134  	if sl, ok := mls.partition[n]; ok && mls.vars[sl[0]] == n {
   135  		return true
   136  	}
   137  	return false
   138  }
   139  
   140  // Leader returns the leader variable for subsumed var n.
   141  func (mls *MergeLocalsState) Leader(n *ir.Name) *ir.Name {
   142  	if sl, ok := mls.partition[n]; ok {
   143  		if mls.vars[sl[0]] == n {
   144  			panic("variable is not subsumed")
   145  		}
   146  		return mls.vars[sl[0]]
   147  	}
   148  	panic("not a merge candidate")
   149  }
   150  
   151  // Followers writes a list of the followers for leader n into the slice tmp.
   152  func (mls *MergeLocalsState) Followers(n *ir.Name, tmp []*ir.Name) []*ir.Name {
   153  	tmp = tmp[:0]
   154  	sl, ok := mls.partition[n]
   155  	if !ok {
   156  		panic("no entry for leader")
   157  	}
   158  	if mls.vars[sl[0]] != n {
   159  		panic("followers invoked on subsumed var")
   160  	}
   161  	for _, k := range sl[1:] {
   162  		tmp = append(tmp, mls.vars[k])
   163  	}
   164  	slices.SortStableFunc(tmp, func(a, b *ir.Name) int {
   165  		return strings.Compare(a.Sym().Name, b.Sym().Name)
   166  	})
   167  	return tmp
   168  }
   169  
   170  // EstSavings returns the estimated reduction in stack size (number of bytes) for
   171  // the given merge locals state via a pair of ints, the first for non-pointer types and the second for pointer types.
   172  func (mls *MergeLocalsState) EstSavings() (int, int) {
   173  	totnp := 0
   174  	totp := 0
   175  	for n := range mls.partition {
   176  		if mls.Subsumed(n) {
   177  			sz := int(n.Type().Size())
   178  			if n.Type().HasPointers() {
   179  				totp += sz
   180  			} else {
   181  				totnp += sz
   182  			}
   183  		}
   184  	}
   185  	return totnp, totp
   186  }
   187  
   188  // check tests for various inconsistencies and problems in mls,
   189  // returning an error if any problems are found.
   190  func (mls *MergeLocalsState) check() error {
   191  	if mls == nil {
   192  		return nil
   193  	}
   194  	used := make(map[int]bool)
   195  	seenv := make(map[*ir.Name]int)
   196  	for ii, v := range mls.vars {
   197  		if prev, ok := seenv[v]; ok {
   198  			return fmt.Errorf("duplicate var %q in vslots: %d and %d\n",
   199  				v.Sym().Name, ii, prev)
   200  		}
   201  		seenv[v] = ii
   202  	}
   203  	for k, sl := range mls.partition {
   204  		// length of slice value needs to be more than 1
   205  		if len(sl) < 2 {
   206  			return fmt.Errorf("k=%q v=%+v slice len %d invalid",
   207  				k.Sym().Name, sl, len(sl))
   208  		}
   209  		// values in the slice need to be var indices
   210  		for i, v := range sl {
   211  			if v < 0 || v > len(mls.vars)-1 {
   212  				return fmt.Errorf("k=%q v=+%v slpos %d vslot %d out of range of m.v", k.Sym().Name, sl, i, v)
   213  			}
   214  		}
   215  	}
   216  	for k, sl := range mls.partition {
   217  		foundk := false
   218  		for i, v := range sl {
   219  			vv := mls.vars[v]
   220  			if i == 0 {
   221  				if !mls.IsLeader(vv) {
   222  					return fmt.Errorf("k=%s v=+%v slpos 0 vslot %d IsLeader(%q) is false should be true", k.Sym().Name, sl, v, vv.Sym().Name)
   223  				}
   224  			} else {
   225  				if !mls.Subsumed(vv) {
   226  					return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Subsumed(%q) is false should be true", k.Sym().Name, sl, i, v, vv.Sym().Name)
   227  				}
   228  				if mls.Leader(vv) != mls.vars[sl[0]] {
   229  					return fmt.Errorf("k=%s v=+%v slpos %d vslot %d Leader(%q) got %v want %v", k.Sym().Name, sl, i, v, vv.Sym().Name, mls.Leader(vv), mls.vars[sl[0]])
   230  				}
   231  			}
   232  			if vv == k {
   233  				foundk = true
   234  				if used[v] {
   235  					return fmt.Errorf("k=%s v=+%v val slice used violation at slpos %d vslot %d", k.Sym().Name, sl, i, v)
   236  				}
   237  				used[v] = true
   238  			}
   239  		}
   240  		if !foundk {
   241  			return fmt.Errorf("k=%s v=+%v slice value missing k", k.Sym().Name, sl)
   242  		}
   243  		vl := mls.vars[sl[0]]
   244  		for _, v := range sl[1:] {
   245  			vv := mls.vars[v]
   246  			if vv.Type().Size() > vl.Type().Size() {
   247  				return fmt.Errorf("k=%s v=+%v follower %s size %d larger than leader %s size %d", k.Sym().Name, sl, vv.Sym().Name, vv.Type().Size(), vl.Sym().Name, vl.Type().Size())
   248  			}
   249  			if vv.Type().HasPointers() && !vl.Type().HasPointers() {
   250  				return fmt.Errorf("k=%s v=+%v follower %s hasptr=true but leader %s hasptr=false", k.Sym().Name, sl, vv.Sym().Name, vl.Sym().Name)
   251  			}
   252  			if vv.Type().Alignment() > vl.Type().Alignment() {
   253  				return fmt.Errorf("k=%s v=+%v follower %s align %d greater than leader %s align %d", k.Sym().Name, sl, vv.Sym().Name, vv.Type().Alignment(), vl.Sym().Name, vl.Type().Alignment())
   254  			}
   255  		}
   256  	}
   257  	for i := range used {
   258  		if !used[i] {
   259  			return fmt.Errorf("pos %d var %q unused", i, mls.vars[i])
   260  		}
   261  	}
   262  	return nil
   263  }
   264  
   265  func (mls *MergeLocalsState) String() string {
   266  	var leaders []*ir.Name
   267  	for n, sl := range mls.partition {
   268  		if n == mls.vars[sl[0]] {
   269  			leaders = append(leaders, n)
   270  		}
   271  	}
   272  	slices.SortFunc(leaders, func(a, b *ir.Name) int {
   273  		return strings.Compare(a.Sym().Name, b.Sym().Name)
   274  	})
   275  	var sb strings.Builder
   276  	for _, n := range leaders {
   277  		sb.WriteString(n.Sym().Name + ":")
   278  		sl := mls.partition[n]
   279  		for _, k := range sl[1:] {
   280  			n := mls.vars[k]
   281  			sb.WriteString(" " + n.Sym().Name)
   282  		}
   283  		sb.WriteString("\n")
   284  	}
   285  	return sb.String()
   286  }
   287  
   288  // collectMergeCandidates visits all of the AUTO vars declared in
   289  // function fn and identifies a list of candidate variables for
   290  // merging / overlapping. On return the "cands" field of cs will be
   291  // filled in with our set of potentially overlappable candidate
   292  // variables, the "regions" field will hold regions/sequence of
   293  // compatible vars within the candidates list, "nameToSlot" field will
   294  // be populated, and the "indirectUE" field will be filled in with
   295  // information about indirect upwards-exposed uses in the func.
   296  func (cs *cstate) collectMergeCandidates() {
   297  	var cands []*ir.Name
   298  
   299  	// Collect up the available set of appropriate AUTOs in the
   300  	// function as a first step, and bail if we have fewer than
   301  	// two candidates.
   302  	for _, n := range cs.fn.Dcl {
   303  		if !n.Used() {
   304  			continue
   305  		}
   306  		if !ssa.IsMergeCandidate(n) {
   307  			continue
   308  		}
   309  		cands = append(cands, n)
   310  	}
   311  	if len(cands) < 2 {
   312  		return
   313  	}
   314  
   315  	// Sort by pointerness, size, and then name.
   316  	sort.SliceStable(cands, func(i, j int) bool {
   317  		return nameLess(cands[i], cands[j])
   318  	})
   319  
   320  	if cs.trace > 1 {
   321  		fmt.Fprintf(os.Stderr, "=-= raw cand list for func %v:\n", cs.fn)
   322  		for i := range cands {
   323  			dumpCand(cands[i], i)
   324  		}
   325  	}
   326  
   327  	// Now generate an initial pruned candidate list and regions list.
   328  	// This may be empty if we don't have enough compatible candidates.
   329  	initial, _ := cs.genRegions(cands)
   330  	if len(initial) < 2 {
   331  		return
   332  	}
   333  
   334  	// Set up for hash bisection if enabled.
   335  	cs.setupHashBisection(initial)
   336  
   337  	// Create and populate an indirect use table that we'll use
   338  	// during interval construction. As part of this process we may
   339  	// wind up tossing out additional candidates, so check to make
   340  	// sure we still have something to work with.
   341  	cs.cands, cs.regions = cs.populateIndirectUseTable(initial)
   342  	if len(cs.cands) < 2 {
   343  		return
   344  	}
   345  
   346  	// At this point we have a final pruned set of candidates and a
   347  	// corresponding set of regions for the candidates. Build a
   348  	// name-to-slot map for the candidates.
   349  	cs.nameToSlot = make(map[*ir.Name]int32)
   350  	for i, n := range cs.cands {
   351  		cs.nameToSlot[n] = int32(i)
   352  	}
   353  
   354  	if cs.trace > 1 {
   355  		fmt.Fprintf(os.Stderr, "=-= pruned candidate list for fn %v:\n", cs.fn)
   356  		for i := range cs.cands {
   357  			dumpCand(cs.cands[i], i)
   358  		}
   359  	}
   360  }
   361  
   362  // genRegions generates a set of regions within cands corresponding
   363  // to potentially overlappable/mergeable variables.
   364  func (cs *cstate) genRegions(cands []*ir.Name) ([]*ir.Name, []candRegion) {
   365  	var pruned []*ir.Name
   366  	var regions []candRegion
   367  	st := 0
   368  	for {
   369  		en := nextRegion(cands, st)
   370  		if en == -1 {
   371  			break
   372  		}
   373  		if st == en {
   374  			// region has just one element, we can skip it
   375  			st++
   376  			continue
   377  		}
   378  		pst := len(pruned)
   379  		pen := pst + (en - st)
   380  		if cs.trace > 1 {
   381  			fmt.Fprintf(os.Stderr, "=-= addregion st=%d en=%d: add part %d -> %d\n", st, en, pst, pen)
   382  		}
   383  
   384  		// non-empty region, add to pruned
   385  		pruned = append(pruned, cands[st:en+1]...)
   386  		regions = append(regions, candRegion{st: pst, en: pen})
   387  		st = en + 1
   388  	}
   389  	if len(pruned) < 2 {
   390  		return nil, nil
   391  	}
   392  	return pruned, regions
   393  }
   394  
   395  func (cs *cstate) dumpFunc() {
   396  	fmt.Fprintf(os.Stderr, "=-= mergelocalsdumpfunc %v:\n", cs.fn)
   397  	ii := 0
   398  	for k, b := range cs.f.Blocks {
   399  		fmt.Fprintf(os.Stderr, "b%d:\n", k)
   400  		for _, v := range b.Values {
   401  			pos := base.Ctxt.PosTable.Pos(v.Pos)
   402  			fmt.Fprintf(os.Stderr, "=-= %d L%d|C%d %s\n", ii, pos.RelLine(), pos.RelCol(), v.LongString())
   403  			ii++
   404  		}
   405  	}
   406  }
   407  
   408  func (cs *cstate) dumpFuncIfSelected() {
   409  	if base.Debug.MergeLocalsDumpFunc == "" {
   410  		return
   411  	}
   412  	if !strings.HasSuffix(fmt.Sprintf("%v", cs.fn),
   413  		base.Debug.MergeLocalsDumpFunc) {
   414  		return
   415  	}
   416  	cs.dumpFunc()
   417  }
   418  
   419  // setupHashBisection checks to see if any of the candidate
   420  // variables have been de-selected by our hash debug. Here
   421  // we also implement the -d=mergelocalshtrace flag, which turns
   422  // on debug tracing only if we have at least two candidates
   423  // selected by the hash debug for this function.
   424  func (cs *cstate) setupHashBisection(cands []*ir.Name) {
   425  	if base.Debug.MergeLocalsHash == "" {
   426  		return
   427  	}
   428  	deselected := make(map[*ir.Name]bool)
   429  	selCount := 0
   430  	for _, cand := range cands {
   431  		if !base.MergeLocalsHash.MatchPosWithInfo(cand.Pos(), "mergelocals", nil) {
   432  			deselected[cand] = true
   433  		} else {
   434  			deselected[cand] = false
   435  			selCount++
   436  		}
   437  	}
   438  	if selCount < len(cands) {
   439  		cs.hashDeselected = deselected
   440  	}
   441  	if base.Debug.MergeLocalsHTrace != 0 && selCount >= 2 {
   442  		cs.trace = base.Debug.MergeLocalsHTrace
   443  	}
   444  }
   445  
   446  // populateIndirectUseTable creates and populates the "indirectUE" table
   447  // within cs by doing some additional analysis of how the vars in
   448  // cands are accessed in the function.
   449  //
   450  // It is possible to have situations where a given ir.Name is
   451  // non-address-taken at the source level, but whose address is
   452  // materialized in order to accommodate the needs of
   453  // architecture-dependent operations or one sort or another (examples
   454  // include things like LoweredZero/DuffZero, etc). The issue here is
   455  // that the SymAddr op will show up as touching a variable of
   456  // interest, but the subsequent memory op will not. This is generally
   457  // not an issue for computing whether something is live across a call,
   458  // but it is problematic for collecting the more fine-grained live
   459  // interval info that drives stack slot merging.
   460  //
   461  // To handle this problem, make a forward pass over each basic block
   462  // looking for instructions of the form vK := SymAddr(N) where N is a
   463  // raw candidate. Create an entry in a map at that point from vK to
   464  // its use count. Continue the walk, looking for uses of vK: when we
   465  // see one, record it in a side table as an upwards exposed use of N.
   466  // Each time we see a use, decrement the use count in the map, and if
   467  // we hit zero, remove the map entry. If we hit the end of the basic
   468  // block and we still have map entries, then evict the name in
   469  // question from the candidate set.
   470  func (cs *cstate) populateIndirectUseTable(cands []*ir.Name) ([]*ir.Name, []candRegion) {
   471  
   472  	// main indirect UE table, this is what we're producing in this func
   473  	indirectUE := make(map[ssa.ID][]*ir.Name)
   474  
   475  	// this map holds the current set of candidates; the set may
   476  	// shrink if we have to evict any candidates.
   477  	rawcands := make(map[*ir.Name]struct{})
   478  
   479  	// maps ssa value V to the ir.Name it is taking the addr of,
   480  	// plus a count of the uses we've seen of V during a block walk.
   481  	pendingUses := make(map[ssa.ID]nameCount)
   482  
   483  	// A temporary indirect UE tab just for the current block
   484  	// being processed; used to help with evictions.
   485  	blockIndirectUE := make(map[ssa.ID][]*ir.Name)
   486  
   487  	// temporary map used to record evictions in a given block.
   488  	evicted := make(map[*ir.Name]bool)
   489  	for _, n := range cands {
   490  		rawcands[n] = struct{}{}
   491  	}
   492  	for k := 0; k < len(cs.f.Blocks); k++ {
   493  		clear(pendingUses)
   494  		clear(blockIndirectUE)
   495  		b := cs.f.Blocks[k]
   496  		for _, v := range b.Values {
   497  			if n, e := affectedVar(v); n != nil {
   498  				if _, ok := rawcands[n]; ok {
   499  					if e&ssa.SymAddr != 0 && v.Uses != 0 {
   500  						// we're taking the address of candidate var n
   501  						if _, ok := pendingUses[v.ID]; ok {
   502  							// should never happen
   503  							base.FatalfAt(v.Pos, "internal error: apparent multiple defs for SSA value %d", v.ID)
   504  						}
   505  						// Stash an entry in pendingUses recording
   506  						// that we took the address of "n" via this
   507  						// val.
   508  						pendingUses[v.ID] = nameCount{n: n, count: v.Uses}
   509  						if cs.trace > 2 {
   510  							fmt.Fprintf(os.Stderr, "=-= SymAddr(%s) on %s\n",
   511  								n.Sym().Name, v.LongString())
   512  						}
   513  					}
   514  				}
   515  			}
   516  			for _, arg := range v.Args {
   517  				if nc, ok := pendingUses[arg.ID]; ok {
   518  					// We found a use of some value that took the
   519  					// address of nc.n. Record this inst as a
   520  					// potential indirect use.
   521  					if cs.trace > 2 {
   522  						fmt.Fprintf(os.Stderr, "=-= add indirectUE(%s) count=%d on %s\n", nc.n.Sym().Name, nc.count, v.LongString())
   523  					}
   524  					blockIndirectUE[v.ID] = append(blockIndirectUE[v.ID], nc.n)
   525  					nc.count--
   526  					if nc.count == 0 {
   527  						// That was the last use of the value. Clean
   528  						// up the entry in pendingUses.
   529  						if cs.trace > 2 {
   530  							fmt.Fprintf(os.Stderr, "=-= last use of v%d\n",
   531  								arg.ID)
   532  						}
   533  						delete(pendingUses, arg.ID)
   534  					} else {
   535  						// Not the last use; record the decremented
   536  						// use count and move on.
   537  						pendingUses[arg.ID] = nc
   538  					}
   539  				}
   540  			}
   541  		}
   542  
   543  		// We've reached the end of this basic block: if we have any
   544  		// leftover entries in pendingUses, then evict the
   545  		// corresponding names from the candidate set. The idea here
   546  		// is that if we materialized the address of some local and
   547  		// that value is flowing out of the block off somewhere else,
   548  		// we're going to treat that local as truly address-taken and
   549  		// not have it be a merge candidate.
   550  		clear(evicted)
   551  		if len(pendingUses) != 0 {
   552  			for id, nc := range pendingUses {
   553  				if cs.trace > 2 {
   554  					fmt.Fprintf(os.Stderr, "=-= evicting %q due to pendingUse %d count %d\n", nc.n.Sym().Name, id, nc.count)
   555  				}
   556  				delete(rawcands, nc.n)
   557  				evicted[nc.n] = true
   558  			}
   559  		}
   560  		// Copy entries from blockIndirectUE into final indirectUE. Skip
   561  		// anything that we evicted in the loop above.
   562  		for id, sl := range blockIndirectUE {
   563  			for _, n := range sl {
   564  				if evicted[n] {
   565  					continue
   566  				}
   567  				indirectUE[id] = append(indirectUE[id], n)
   568  				if cs.trace > 2 {
   569  					fmt.Fprintf(os.Stderr, "=-= add final indUE v%d name %s\n", id, n.Sym().Name)
   570  				}
   571  			}
   572  		}
   573  	}
   574  	if len(rawcands) < 2 {
   575  		return nil, nil
   576  	}
   577  	cs.indirectUE = indirectUE
   578  	if cs.trace > 2 {
   579  		fmt.Fprintf(os.Stderr, "=-= iuetab:\n")
   580  		ids := make([]ssa.ID, 0, len(indirectUE))
   581  		for k := range indirectUE {
   582  			ids = append(ids, k)
   583  		}
   584  		slices.Sort(ids)
   585  		for _, id := range ids {
   586  			fmt.Fprintf(os.Stderr, "  v%d:", id)
   587  			for _, n := range indirectUE[id] {
   588  				fmt.Fprintf(os.Stderr, " %s", n.Sym().Name)
   589  			}
   590  			fmt.Fprintf(os.Stderr, "\n")
   591  		}
   592  	}
   593  
   594  	pruned := cands[:0]
   595  	for k := range rawcands {
   596  		pruned = append(pruned, k)
   597  	}
   598  	sort.Slice(pruned, func(i, j int) bool {
   599  		return nameLess(pruned[i], pruned[j])
   600  	})
   601  	var regions []candRegion
   602  	pruned, regions = cs.genRegions(pruned)
   603  	if len(pruned) < 2 {
   604  		return nil, nil
   605  	}
   606  	return pruned, regions
   607  }
   608  
   609  type nameCount struct {
   610  	n     *ir.Name
   611  	count int32
   612  }
   613  
   614  // nameLess compares ci with cj to see if ci should be less than cj in
   615  // a relative ordering of candidate variables. This is used to sort
   616  // vars by pointerness (variables with pointers first), then in order
   617  // of decreasing alignment, then by decreasing size. We are assuming a
   618  // merging algorithm that merges later entries in the list into
   619  // earlier entries. An example ordered candidate list produced by
   620  // nameLess:
   621  //
   622  //	idx   name    type       align    size
   623  //	0:    abc     [10]*int   8        80
   624  //	1:    xyz     [9]*int    8        72
   625  //	2:    qrs     [2]*int    8        16
   626  //	3:    tuv     [9]int     8        72
   627  //	4:    wxy     [9]int32   4        36
   628  //	5:    jkl     [8]int32   4        32
   629  func nameLess(ci, cj *ir.Name) bool {
   630  	if ci.Type().HasPointers() != cj.Type().HasPointers() {
   631  		return ci.Type().HasPointers()
   632  	}
   633  	if ci.Type().Alignment() != cj.Type().Alignment() {
   634  		return cj.Type().Alignment() < ci.Type().Alignment()
   635  	}
   636  	if ci.Type().Size() != cj.Type().Size() {
   637  		return cj.Type().Size() < ci.Type().Size()
   638  	}
   639  	if ci.Sym().Name != cj.Sym().Name {
   640  		return ci.Sym().Name < cj.Sym().Name
   641  	}
   642  	return fmt.Sprintf("%v", ci.Pos()) < fmt.Sprintf("%v", cj.Pos())
   643  }
   644  
   645  // nextRegion starts at location idx and walks forward in the cands
   646  // slice looking for variables that are "compatible" (potentially
   647  // overlappable, in the sense that they could potentially share the
   648  // stack slot of cands[idx]); it returns the end of the new region
   649  // (range of compatible variables starting at idx).
   650  func nextRegion(cands []*ir.Name, idx int) int {
   651  	n := len(cands)
   652  	if idx >= n {
   653  		return -1
   654  	}
   655  	c0 := cands[idx]
   656  	szprev := c0.Type().Size()
   657  	alnprev := c0.Type().Alignment()
   658  	for j := idx + 1; j < n; j++ {
   659  		cj := cands[j]
   660  		szj := cj.Type().Size()
   661  		if szj > szprev {
   662  			return j - 1
   663  		}
   664  		alnj := cj.Type().Alignment()
   665  		if alnj > alnprev {
   666  			return j - 1
   667  		}
   668  		szprev = szj
   669  		alnprev = alnj
   670  	}
   671  	return n - 1
   672  }
   673  
   674  // mergeVisitRegion tries to perform overlapping of variables with a
   675  // given subrange of cands described by st and en (indices into our
   676  // candidate var list), where the variables within this range have
   677  // already been determined to be compatible with respect to type,
   678  // size, etc. Overlapping is done in a greedy fashion: we select the
   679  // first element in the st->en range, then walk the rest of the
   680  // elements adding in vars whose lifetimes don't overlap with the
   681  // first element, then repeat the process until we run out of work.
   682  // Ordering of the candidates within the region [st,en] is important;
   683  // within the list the assumption is that if we overlap two variables
   684  // X and Y where X precedes Y in the list, we need to make X the
   685  // "leader" (keep X's slot and set Y's frame offset to X's) as opposed
   686  // to the other way around, since it's possible that Y is smaller in
   687  // size than X.
   688  func (cs *cstate) mergeVisitRegion(mls *MergeLocalsState, st, en int) {
   689  	if cs.trace > 1 {
   690  		fmt.Fprintf(os.Stderr, "=-= mergeVisitRegion(st=%d, en=%d)\n", st, en)
   691  	}
   692  	n := en - st + 1
   693  	used := bitvec.New(int32(n))
   694  
   695  	nxt := func(slot int) int {
   696  		for c := slot - st; c < n; c++ {
   697  			if used.Get(int32(c)) {
   698  				continue
   699  			}
   700  			return c + st
   701  		}
   702  		return -1
   703  	}
   704  
   705  	navail := n
   706  	cands := cs.cands
   707  	ivs := cs.ivs
   708  	if cs.trace > 1 {
   709  		fmt.Fprintf(os.Stderr, "  =-= navail = %d\n", navail)
   710  	}
   711  	for navail >= 2 {
   712  		leader := nxt(st)
   713  		used.Set(int32(leader - st))
   714  		navail--
   715  
   716  		if cs.trace > 1 {
   717  			fmt.Fprintf(os.Stderr, "  =-= begin leader %d used=%s\n", leader,
   718  				used.String())
   719  		}
   720  		elems := []int{leader}
   721  		lints := ivs[leader]
   722  
   723  		for succ := nxt(leader + 1); succ != -1; succ = nxt(succ + 1) {
   724  
   725  			// Skip if de-selected by merge locals hash.
   726  			if cs.hashDeselected != nil && cs.hashDeselected[cands[succ]] {
   727  				continue
   728  			}
   729  			// Skip if already used.
   730  			if used.Get(int32(succ - st)) {
   731  				continue
   732  			}
   733  			if cs.trace > 1 {
   734  				fmt.Fprintf(os.Stderr, "  =-= overlap of %d[%v] {%s} with %d[%v] {%s} is: %v\n", leader, cands[leader], lints.String(), succ, cands[succ], ivs[succ].String(), lints.Overlaps(ivs[succ]))
   735  			}
   736  
   737  			// Can we overlap leader with this var?
   738  			if lints.Overlaps(ivs[succ]) {
   739  				continue
   740  			} else {
   741  				// Add to overlap set.
   742  				elems = append(elems, succ)
   743  				lints = lints.Merge(ivs[succ])
   744  			}
   745  		}
   746  		if len(elems) > 1 {
   747  			// We found some things to overlap with leader. Add the
   748  			// candidate elements to "vars" and update "partition".
   749  			off := len(mls.vars)
   750  			sl := make([]int, len(elems))
   751  			for i, candslot := range elems {
   752  				sl[i] = off + i
   753  				mls.vars = append(mls.vars, cands[candslot])
   754  				mls.partition[cands[candslot]] = sl
   755  			}
   756  			navail -= (len(elems) - 1)
   757  			for i := range elems {
   758  				used.Set(int32(elems[i] - st))
   759  			}
   760  			if cs.trace > 1 {
   761  				fmt.Fprintf(os.Stderr, "=-= overlapping %+v:\n", sl)
   762  				for i := range sl {
   763  					dumpCand(mls.vars[sl[i]], sl[i])
   764  				}
   765  				for i, v := range elems {
   766  					fmt.Fprintf(os.Stderr, "=-= %d: sl=%d %s\n", i, v, ivs[v])
   767  				}
   768  			}
   769  		}
   770  	}
   771  }
   772  
   773  // performMerging carries out variable merging within each of the
   774  // candidate ranges in regions, returning a state object
   775  // that describes the variable overlaps.
   776  func (cs *cstate) performMerging() *MergeLocalsState {
   777  	cands := cs.cands
   778  
   779  	mls := &MergeLocalsState{
   780  		partition: make(map[*ir.Name][]int),
   781  	}
   782  
   783  	// Dump state before attempting overlap.
   784  	if cs.trace > 1 {
   785  		fmt.Fprintf(os.Stderr, "=-= cands live before overlap:\n")
   786  		for i := range cands {
   787  			c := cands[i]
   788  			fmt.Fprintf(os.Stderr, "%d: %v sz=%d ivs=%s\n",
   789  				i, c.Sym().Name, c.Type().Size(), cs.ivs[i].String())
   790  		}
   791  		fmt.Fprintf(os.Stderr, "=-= regions (%d): ", len(cs.regions))
   792  		for _, cr := range cs.regions {
   793  			fmt.Fprintf(os.Stderr, " [%d,%d]", cr.st, cr.en)
   794  		}
   795  		fmt.Fprintf(os.Stderr, "\n")
   796  	}
   797  
   798  	// Apply a greedy merge/overlap strategy within each region
   799  	// of compatible variables.
   800  	for _, cr := range cs.regions {
   801  		cs.mergeVisitRegion(mls, cr.st, cr.en)
   802  	}
   803  	if len(mls.vars) == 0 {
   804  		return nil
   805  	}
   806  	return mls
   807  }
   808  
   809  // computeIntervals performs a backwards sweep over the instructions
   810  // of the function we're compiling, building up an Intervals object
   811  // for each candidate variable by looking for upwards exposed uses
   812  // and kills.
   813  func (cs *cstate) computeIntervals() {
   814  	lv := cs.lv
   815  	ibuilders := make([]IntervalsBuilder, len(cs.cands))
   816  	nvars := int32(len(lv.vars))
   817  	liveout := bitvec.New(nvars)
   818  
   819  	cs.dumpFuncIfSelected()
   820  
   821  	// Count instructions.
   822  	ninstr := 0
   823  	for _, b := range lv.f.Blocks {
   824  		ninstr += len(b.Values)
   825  	}
   826  	// current instruction index during backwards walk
   827  	iidx := ninstr - 1
   828  
   829  	// Make a backwards pass over all blocks
   830  	for k := len(lv.f.Blocks) - 1; k >= 0; k-- {
   831  		b := lv.f.Blocks[k]
   832  		be := lv.blockEffects(b)
   833  
   834  		if cs.trace > 2 {
   835  			fmt.Fprintf(os.Stderr, "=-= liveout from tail of b%d: ", k)
   836  			for j := range lv.vars {
   837  				if be.liveout.Get(int32(j)) {
   838  					fmt.Fprintf(os.Stderr, " %q", lv.vars[j].Sym().Name)
   839  				}
   840  			}
   841  			fmt.Fprintf(os.Stderr, "\n")
   842  		}
   843  
   844  		// Take into account effects taking place at end of this basic
   845  		// block by comparing our current live set with liveout for
   846  		// the block. If a given var was not live before and is now
   847  		// becoming live we need to mark this transition with a
   848  		// builder "Live" call; similarly if a var was live before and
   849  		// is now no longer live, we need a "Kill" call.
   850  		for j := range lv.vars {
   851  			isLive := liveout.Get(int32(j))
   852  			blockLiveOut := be.liveout.Get(int32(j))
   853  			if isLive {
   854  				if !blockLiveOut {
   855  					if cs.trace > 2 {
   856  						fmt.Fprintf(os.Stderr, "=+= at instr %d block boundary kill of %v\n", iidx, lv.vars[j])
   857  					}
   858  					ibuilders[j].Kill(iidx)
   859  				}
   860  			} else if blockLiveOut {
   861  				if cs.trace > 2 {
   862  					fmt.Fprintf(os.Stderr, "=+= at block-end instr %d %v becomes live\n",
   863  						iidx, lv.vars[j])
   864  				}
   865  				ibuilders[j].Live(iidx)
   866  			}
   867  		}
   868  
   869  		// Set our working "currently live" set to the previously
   870  		// computed live out set for the block.
   871  		liveout.Copy(be.liveout)
   872  
   873  		// Now walk backwards through this block.
   874  		for i := len(b.Values) - 1; i >= 0; i-- {
   875  			v := b.Values[i]
   876  
   877  			if cs.trace > 2 {
   878  				fmt.Fprintf(os.Stderr, "=-= b%d instr %d: %s\n", k, iidx, v.LongString())
   879  			}
   880  
   881  			// Update liveness based on what we see happening in this
   882  			// instruction.
   883  			pos, e := lv.valueEffects(v)
   884  			becomeslive := e&uevar != 0
   885  			iskilled := e&varkill != 0
   886  			if becomeslive && iskilled {
   887  				// we do not ever expect to see both a kill and an
   888  				// upwards exposed use given our size constraints.
   889  				panic("should never happen")
   890  			}
   891  			if iskilled && liveout.Get(pos) {
   892  				ibuilders[pos].Kill(iidx)
   893  				liveout.Unset(pos)
   894  				if cs.trace > 2 {
   895  					fmt.Fprintf(os.Stderr, "=+= at instr %d kill of %v\n",
   896  						iidx, lv.vars[pos])
   897  				}
   898  			} else if becomeslive && !liveout.Get(pos) {
   899  				ibuilders[pos].Live(iidx)
   900  				liveout.Set(pos)
   901  				if cs.trace > 2 {
   902  					fmt.Fprintf(os.Stderr, "=+= at instr %d upwards-exposed use of %v\n",
   903  						iidx, lv.vars[pos])
   904  				}
   905  			}
   906  
   907  			if cs.indirectUE != nil {
   908  				// Now handle "indirect" upwards-exposed uses.
   909  				ues := cs.indirectUE[v.ID]
   910  				for _, n := range ues {
   911  					if pos, ok := lv.idx[n]; ok {
   912  						if !liveout.Get(pos) {
   913  							ibuilders[pos].Live(iidx)
   914  							liveout.Set(pos)
   915  							if cs.trace > 2 {
   916  								fmt.Fprintf(os.Stderr, "=+= at instr %d v%d indirect upwards-exposed use of %v\n", iidx, v.ID, lv.vars[pos])
   917  							}
   918  						}
   919  					}
   920  				}
   921  			}
   922  			iidx--
   923  		}
   924  
   925  		// This check disabled for now due to the way scheduling works
   926  		// for ops that materialize values of local variables. For
   927  		// many architecture we have rewrite rules of this form:
   928  		//
   929  		// (LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVDaddr {sym} (SPanchored base mem))
   930  		// (LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (MOVDaddr {sym} base)
   931  		//
   932  		// which are designed to ensure that if you have a pointerful
   933  		// variable "abc" sequence
   934  		//
   935  		//    v30 = VarDef <mem> {abc} v21
   936  		//    v31 = LocalAddr <*SB> {abc} v2 v30
   937  		//    v32 = Zero <mem> {SB} [2056] v31 v30
   938  		//
   939  		// this will be lowered into
   940  		//
   941  		//    v30 = VarDef <mem> {sb} v21
   942  		//   v106 = SPanchored <uintptr> v2 v30
   943  		//    v31 = MOVDaddr <*SB> {sb} v106
   944  		//     v3 = DUFFZERO <mem> [2056] v31 v30
   945  		//
   946  		// Note the SPanchored: this ensures that the scheduler won't
   947  		// move the MOVDaddr earlier than the vardef. With a variable
   948  		// "xyz" that has no pointers, however, if we start with
   949  		//
   950  		//    v66 = VarDef <mem> {t2} v65
   951  		//    v67 = LocalAddr <*T> {t2} v2 v66
   952  		//    v68 = Zero <mem> {T} [2056] v67 v66
   953  		//
   954  		// we might lower to
   955  		//
   956  		//    v66 = VarDef <mem> {t2} v65
   957  		//    v29 = MOVDaddr <*T> {t2} [2032] v2
   958  		//    v43 = LoweredZero <mem> v67 v29 v66
   959  		//    v68 = Zero [2056] v2 v43
   960  		//
   961  		// where that MOVDaddr can float around arbitrarily, meaning
   962  		// that we may see an upwards-exposed use to it before the
   963  		// VarDef.
   964  		//
   965  		// One avenue to restoring the check below would be to change
   966  		// the rewrite rules to something like
   967  		//
   968  		// (LocalAddr <t> {sym} base mem) && (t.Elem().HasPointers() || isMergeCandidate(t) => (MOVDaddr {sym} (SPanchored base mem))
   969  		//
   970  		// however that change will have to be carefully evaluated,
   971  		// since it would constrain the scheduler for _all_ LocalAddr
   972  		// ops for potential merge candidates, even if we don't
   973  		// actually succeed in any overlaps. This will be revisitged in
   974  		// a later CL if possible.
   975  		//
   976  		const checkLiveOnEntry = false
   977  		if checkLiveOnEntry && b == lv.f.Entry {
   978  			for j, v := range lv.vars {
   979  				if liveout.Get(int32(j)) {
   980  					lv.f.Fatalf("%v %L recorded as live on entry",
   981  						lv.fn.Nname, v)
   982  				}
   983  			}
   984  		}
   985  	}
   986  	if iidx != -1 {
   987  		panic("iidx underflow")
   988  	}
   989  
   990  	// Finish intervals construction.
   991  	ivs := make([]Intervals, len(cs.cands))
   992  	for i := range cs.cands {
   993  		var err error
   994  		ivs[i], err = ibuilders[i].Finish()
   995  		if err != nil {
   996  			cs.dumpFunc()
   997  			base.FatalfAt(cs.cands[i].Pos(), "interval construct error for var %q in func %q (%d instrs): %v", cs.cands[i].Sym().Name, ir.FuncName(cs.fn), ninstr, err)
   998  		}
   999  	}
  1000  	cs.ivs = ivs
  1001  }
  1002  
  1003  func fmtFullPos(p src.XPos) string {
  1004  	var sb strings.Builder
  1005  	sep := ""
  1006  	base.Ctxt.AllPos(p, func(pos src.Pos) {
  1007  		sb.WriteString(sep)
  1008  		sep = "|"
  1009  		file := filepath.Base(pos.Filename())
  1010  		fmt.Fprintf(&sb, "%s:%d:%d", file, pos.Line(), pos.Col())
  1011  	})
  1012  	return sb.String()
  1013  }
  1014  
  1015  func dumpCand(c *ir.Name, i int) {
  1016  	fmt.Fprintf(os.Stderr, " %d: %s %q sz=%d hp=%v align=%d t=%v\n",
  1017  		i, fmtFullPos(c.Pos()), c.Sym().Name, c.Type().Size(),
  1018  		c.Type().HasPointers(), c.Type().Alignment(), c.Type())
  1019  }
  1020  
  1021  // for unit testing only.
  1022  func MakeMergeLocalsState(partition map[*ir.Name][]int, vars []*ir.Name) (*MergeLocalsState, error) {
  1023  	mls := &MergeLocalsState{partition: partition, vars: vars}
  1024  	if err := mls.check(); err != nil {
  1025  		return nil, err
  1026  	}
  1027  	return mls, nil
  1028  }
  1029  

View as plain text