Source file src/cmd/compile/internal/inline/inlheur/scoring.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package inlheur
     6  
     7  import (
     8  	"cmd/compile/internal/base"
     9  	"cmd/compile/internal/ir"
    10  	"cmd/compile/internal/pgoir"
    11  	"cmd/compile/internal/types"
    12  	"fmt"
    13  	"os"
    14  	"sort"
    15  	"strconv"
    16  	"strings"
    17  )
    18  
    19  // These constants enumerate the set of possible ways/scenarios
    20  // in which we'll adjust the score of a given callsite.
    21  type scoreAdjustTyp uint
    22  
    23  // These constants capture the various ways in which the inliner's
    24  // scoring phase can adjust a callsite score based on heuristics. They
    25  // fall broadly into three categories:
    26  //
    27  // 1) adjustments based solely on the callsite context (ex: call
    28  // appears on panic path)
    29  //
    30  // 2) adjustments that take into account specific interesting values
    31  // passed at a call site (ex: passing a constant that could result in
    32  // cprop/deadcode in the caller)
    33  //
    34  // 3) adjustments that take into account values returned from the call
    35  // at a callsite (ex: call always returns the same inlinable function,
    36  // and return value flows unmodified into an indirect call)
    37  //
    38  // For categories 2 and 3 above, each adjustment can have either a
    39  // "must" version and a "may" version (but not both). Here the idea is
    40  // that in the "must" version the value flow is unconditional: if the
    41  // callsite executes, then the condition we're interested in (ex:
    42  // param feeding call) is guaranteed to happen. For the "may" version,
    43  // there may be control flow that could cause the benefit to be
    44  // bypassed.
    45  const (
    46  	// Category 1 adjustments (see above)
    47  	panicPathAdj scoreAdjustTyp = (1 << iota)
    48  	initFuncAdj
    49  	inLoopAdj
    50  
    51  	// Category 2 adjustments (see above).
    52  	passConstToIfAdj
    53  	passConstToNestedIfAdj
    54  	passConcreteToItfCallAdj
    55  	passConcreteToNestedItfCallAdj
    56  	passFuncToIndCallAdj
    57  	passFuncToNestedIndCallAdj
    58  	passInlinableFuncToIndCallAdj
    59  	passInlinableFuncToNestedIndCallAdj
    60  
    61  	// Category 3 adjustments.
    62  	returnFeedsConstToIfAdj
    63  	returnFeedsFuncToIndCallAdj
    64  	returnFeedsInlinableFuncToIndCallAdj
    65  	returnFeedsConcreteToInterfaceCallAdj
    66  
    67  	sentinelScoreAdj // sentinel; not a real adjustment
    68  )
    69  
    70  // This table records the specific values we use to adjust call
    71  // site scores in a given scenario.
    72  // NOTE: these numbers are chosen very arbitrarily; ideally
    73  // we will go through some sort of turning process to decide
    74  // what value for each one produces the best performance.
    75  
    76  var adjValues = map[scoreAdjustTyp]int{
    77  	panicPathAdj:                          40,
    78  	initFuncAdj:                           20,
    79  	inLoopAdj:                             -5,
    80  	passConstToIfAdj:                      -20,
    81  	passConstToNestedIfAdj:                -15,
    82  	passConcreteToItfCallAdj:              -30,
    83  	passConcreteToNestedItfCallAdj:        -25,
    84  	passFuncToIndCallAdj:                  -25,
    85  	passFuncToNestedIndCallAdj:            -20,
    86  	passInlinableFuncToIndCallAdj:         -45,
    87  	passInlinableFuncToNestedIndCallAdj:   -40,
    88  	returnFeedsConstToIfAdj:               -15,
    89  	returnFeedsFuncToIndCallAdj:           -25,
    90  	returnFeedsInlinableFuncToIndCallAdj:  -40,
    91  	returnFeedsConcreteToInterfaceCallAdj: -25,
    92  }
    93  
    94  // SetupScoreAdjustments interprets the value of the -d=inlscoreadj
    95  // debugging option, if set. The value of this flag is expected to be
    96  // a series of "/"-separated clauses of the form adj1:value1. Example:
    97  // -d=inlscoreadj=inLoopAdj=0/passConstToIfAdj=-99
    98  func SetupScoreAdjustments() {
    99  	if base.Debug.InlScoreAdj == "" {
   100  		return
   101  	}
   102  	if err := parseScoreAdj(base.Debug.InlScoreAdj); err != nil {
   103  		base.Fatalf("malformed -d=inlscoreadj argument %q: %v",
   104  			base.Debug.InlScoreAdj, err)
   105  	}
   106  }
   107  
   108  func adjStringToVal(s string) (scoreAdjustTyp, bool) {
   109  	for adj := scoreAdjustTyp(1); adj < sentinelScoreAdj; adj <<= 1 {
   110  		if adj.String() == s {
   111  			return adj, true
   112  		}
   113  	}
   114  	return 0, false
   115  }
   116  
   117  func parseScoreAdj(val string) error {
   118  	clauses := strings.Split(val, "/")
   119  	if len(clauses) == 0 {
   120  		return fmt.Errorf("no clauses")
   121  	}
   122  	for _, clause := range clauses {
   123  		elems := strings.Split(clause, ":")
   124  		if len(elems) < 2 {
   125  			return fmt.Errorf("clause %q: expected colon", clause)
   126  		}
   127  		if len(elems) != 2 {
   128  			return fmt.Errorf("clause %q has %d elements, wanted 2", clause,
   129  				len(elems))
   130  		}
   131  		adj, ok := adjStringToVal(elems[0])
   132  		if !ok {
   133  			return fmt.Errorf("clause %q: unknown adjustment", clause)
   134  		}
   135  		val, err := strconv.Atoi(elems[1])
   136  		if err != nil {
   137  			return fmt.Errorf("clause %q: malformed value: %v", clause, err)
   138  		}
   139  		adjValues[adj] = val
   140  	}
   141  	return nil
   142  }
   143  
   144  func adjValue(x scoreAdjustTyp) int {
   145  	if val, ok := adjValues[x]; ok {
   146  		return val
   147  	} else {
   148  		panic("internal error unregistered adjustment type")
   149  	}
   150  }
   151  
   152  var mayMustAdj = [...]struct{ may, must scoreAdjustTyp }{
   153  	{may: passConstToNestedIfAdj, must: passConstToIfAdj},
   154  	{may: passConcreteToNestedItfCallAdj, must: passConcreteToItfCallAdj},
   155  	{may: passFuncToNestedIndCallAdj, must: passFuncToNestedIndCallAdj},
   156  	{may: passInlinableFuncToNestedIndCallAdj, must: passInlinableFuncToIndCallAdj},
   157  }
   158  
   159  func isMay(x scoreAdjustTyp) bool {
   160  	return mayToMust(x) != 0
   161  }
   162  
   163  func isMust(x scoreAdjustTyp) bool {
   164  	return mustToMay(x) != 0
   165  }
   166  
   167  func mayToMust(x scoreAdjustTyp) scoreAdjustTyp {
   168  	for _, v := range mayMustAdj {
   169  		if x == v.may {
   170  			return v.must
   171  		}
   172  	}
   173  	return 0
   174  }
   175  
   176  func mustToMay(x scoreAdjustTyp) scoreAdjustTyp {
   177  	for _, v := range mayMustAdj {
   178  		if x == v.must {
   179  			return v.may
   180  		}
   181  	}
   182  	return 0
   183  }
   184  
   185  // computeCallSiteScore takes a given call site whose ir node is
   186  // 'call' and callee function is 'callee' and with previously computed
   187  // call site properties 'csflags', then computes a score for the
   188  // callsite that combines the size cost of the callee with heuristics
   189  // based on previously computed argument and function properties,
   190  // then stores the score and the adjustment mask in the appropriate
   191  // fields in 'cs'
   192  func (cs *CallSite) computeCallSiteScore(csa *callSiteAnalyzer, calleeProps *FuncProps) {
   193  	callee := cs.Callee
   194  	csflags := cs.Flags
   195  	call := cs.Call
   196  
   197  	// Start with the size-based score for the callee.
   198  	score := int(callee.Inl.Cost)
   199  	var tmask scoreAdjustTyp
   200  
   201  	if debugTrace&debugTraceScoring != 0 {
   202  		fmt.Fprintf(os.Stderr, "=-= scoring call to %s at %s , initial=%d\n",
   203  			callee.Sym().Name, fmtFullPos(call.Pos()), score)
   204  	}
   205  
   206  	// First some score adjustments to discourage inlining in selected cases.
   207  	if csflags&CallSiteOnPanicPath != 0 {
   208  		score, tmask = adjustScore(panicPathAdj, score, tmask)
   209  	}
   210  	if csflags&CallSiteInInitFunc != 0 {
   211  		score, tmask = adjustScore(initFuncAdj, score, tmask)
   212  	}
   213  
   214  	// Then adjustments to encourage inlining in selected cases.
   215  	if csflags&CallSiteInLoop != 0 {
   216  		score, tmask = adjustScore(inLoopAdj, score, tmask)
   217  	}
   218  
   219  	// Stop here if no callee props.
   220  	if calleeProps == nil {
   221  		cs.Score, cs.ScoreMask = score, tmask
   222  		return
   223  	}
   224  
   225  	// Walk through the actual expressions being passed at the call.
   226  	calleeRecvrParms := callee.Type().RecvParams()
   227  	for idx := range call.Args {
   228  		// ignore blanks
   229  		if calleeRecvrParms[idx].Sym == nil ||
   230  			calleeRecvrParms[idx].Sym.IsBlank() {
   231  			continue
   232  		}
   233  		arg := call.Args[idx]
   234  		pflag := calleeProps.ParamFlags[idx]
   235  		if debugTrace&debugTraceScoring != 0 {
   236  			fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n",
   237  				idx, len(call.Args), arg, pflag.String())
   238  		}
   239  
   240  		if len(cs.ArgProps) == 0 {
   241  			continue
   242  		}
   243  		argProps := cs.ArgProps[idx]
   244  
   245  		if debugTrace&debugTraceScoring != 0 {
   246  			fmt.Fprintf(os.Stderr, "=-= arg %d props %s value %v\n",
   247  				idx, argProps.String(), arg)
   248  		}
   249  
   250  		if argProps&ActualExprConstant != 0 {
   251  			if pflag&ParamMayFeedIfOrSwitch != 0 {
   252  				score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask)
   253  			}
   254  			if pflag&ParamFeedsIfOrSwitch != 0 {
   255  				score, tmask = adjustScore(passConstToIfAdj, score, tmask)
   256  			}
   257  		}
   258  
   259  		if argProps&ActualExprIsConcreteConvIface != 0 {
   260  			// FIXME: ideally here it would be nice to make a
   261  			// distinction between the inlinable case and the
   262  			// non-inlinable case, but this is hard to do. Example:
   263  			//
   264  			//    type I interface { Tiny() int; Giant() }
   265  			//    type Conc struct { x int }
   266  			//    func (c *Conc) Tiny() int { return 42 }
   267  			//    func (c *Conc) Giant() { <huge amounts of code> }
   268  			//
   269  			//    func passConcToItf(c *Conc) {
   270  			//        makesItfMethodCall(c)
   271  			//    }
   272  			//
   273  			// In the code above, function properties will only tell
   274  			// us that 'makesItfMethodCall' invokes a method on its
   275  			// interface parameter, but we don't know whether it calls
   276  			// "Tiny" or "Giant". If we knew if called "Tiny", then in
   277  			// theory in addition to converting the interface call to
   278  			// a direct call, we could also inline (in which case
   279  			// we'd want to decrease the score even more).
   280  			//
   281  			// One thing we could do (not yet implemented) is iterate
   282  			// through all of the methods of "*Conc" that allow it to
   283  			// satisfy I, and if all are inlinable, then exploit that.
   284  			if pflag&ParamMayFeedInterfaceMethodCall != 0 {
   285  				score, tmask = adjustScore(passConcreteToNestedItfCallAdj, score, tmask)
   286  			}
   287  			if pflag&ParamFeedsInterfaceMethodCall != 0 {
   288  				score, tmask = adjustScore(passConcreteToItfCallAdj, score, tmask)
   289  			}
   290  		}
   291  
   292  		if argProps&(ActualExprIsFunc|ActualExprIsInlinableFunc) != 0 {
   293  			mayadj := passFuncToNestedIndCallAdj
   294  			mustadj := passFuncToIndCallAdj
   295  			if argProps&ActualExprIsInlinableFunc != 0 {
   296  				mayadj = passInlinableFuncToNestedIndCallAdj
   297  				mustadj = passInlinableFuncToIndCallAdj
   298  			}
   299  			if pflag&ParamMayFeedIndirectCall != 0 {
   300  				score, tmask = adjustScore(mayadj, score, tmask)
   301  			}
   302  			if pflag&ParamFeedsIndirectCall != 0 {
   303  				score, tmask = adjustScore(mustadj, score, tmask)
   304  			}
   305  		}
   306  	}
   307  
   308  	cs.Score, cs.ScoreMask = score, tmask
   309  }
   310  
   311  func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) {
   312  
   313  	if isMust(typ) {
   314  		if mask&typ != 0 {
   315  			return score, mask
   316  		}
   317  		may := mustToMay(typ)
   318  		if mask&may != 0 {
   319  			// promote may to must, so undo may
   320  			score -= adjValue(may)
   321  			mask &^= may
   322  		}
   323  	} else if isMay(typ) {
   324  		must := mayToMust(typ)
   325  		if mask&(must|typ) != 0 {
   326  			return score, mask
   327  		}
   328  	}
   329  	if mask&typ == 0 {
   330  		if debugTrace&debugTraceScoring != 0 {
   331  			fmt.Fprintf(os.Stderr, "=-= applying adj %d for %s\n",
   332  				adjValue(typ), typ.String())
   333  		}
   334  		score += adjValue(typ)
   335  		mask |= typ
   336  	}
   337  	return score, mask
   338  }
   339  
   340  var resultFlagToPositiveAdj map[ResultPropBits]scoreAdjustTyp
   341  var paramFlagToPositiveAdj map[ParamPropBits]scoreAdjustTyp
   342  
   343  func setupFlagToAdjMaps() {
   344  	resultFlagToPositiveAdj = map[ResultPropBits]scoreAdjustTyp{
   345  		ResultIsAllocatedMem:     returnFeedsConcreteToInterfaceCallAdj,
   346  		ResultAlwaysSameFunc:     returnFeedsFuncToIndCallAdj,
   347  		ResultAlwaysSameConstant: returnFeedsConstToIfAdj,
   348  	}
   349  	paramFlagToPositiveAdj = map[ParamPropBits]scoreAdjustTyp{
   350  		ParamMayFeedInterfaceMethodCall: passConcreteToNestedItfCallAdj,
   351  		ParamFeedsInterfaceMethodCall:   passConcreteToItfCallAdj,
   352  		ParamMayFeedIndirectCall:        passInlinableFuncToNestedIndCallAdj,
   353  		ParamFeedsIndirectCall:          passInlinableFuncToIndCallAdj,
   354  	}
   355  }
   356  
   357  // LargestNegativeScoreAdjustment tries to estimate the largest possible
   358  // negative score adjustment that could be applied to a call of the
   359  // function with the specified props. Example:
   360  //
   361  //	func foo() {                  func bar(x int, p *int) int {
   362  //	   ...                          if x < 0 { *p = x }
   363  //	}                               return 99
   364  //	                              }
   365  //
   366  // Function 'foo' above on the left has no interesting properties,
   367  // thus as a result the most we'll adjust any call to is the value for
   368  // "call in loop". If the calculated cost of the function is 150, and
   369  // the in-loop adjustment is 5 (for example), then there is not much
   370  // point treating it as inlinable. On the other hand "bar" has a param
   371  // property (parameter "x" feeds unmodified to an "if" statement") and
   372  // a return property (always returns same constant) meaning that a
   373  // given call _could_ be rescored down as much as -35 points-- thus if
   374  // the size of "bar" is 100 (for example) then there is at least a
   375  // chance that scoring will enable inlining.
   376  func LargestNegativeScoreAdjustment(fn *ir.Func, props *FuncProps) int {
   377  	if resultFlagToPositiveAdj == nil {
   378  		setupFlagToAdjMaps()
   379  	}
   380  	var tmask scoreAdjustTyp
   381  	score := adjValues[inLoopAdj] // any call can be in a loop
   382  	for _, pf := range props.ParamFlags {
   383  		if adj, ok := paramFlagToPositiveAdj[pf]; ok {
   384  			score, tmask = adjustScore(adj, score, tmask)
   385  		}
   386  	}
   387  	for _, rf := range props.ResultFlags {
   388  		if adj, ok := resultFlagToPositiveAdj[rf]; ok {
   389  			score, tmask = adjustScore(adj, score, tmask)
   390  		}
   391  	}
   392  
   393  	if debugTrace&debugTraceScoring != 0 {
   394  		fmt.Fprintf(os.Stderr, "=-= largestScore(%v) is %d\n",
   395  			fn, score)
   396  	}
   397  
   398  	return score
   399  }
   400  
   401  // LargestPositiveScoreAdjustment tries to estimate the largest possible
   402  // positive score adjustment that could be applied to a given callsite.
   403  // At the moment we don't have very many positive score adjustments, so
   404  // this is just hard-coded, not table-driven.
   405  func LargestPositiveScoreAdjustment(fn *ir.Func) int {
   406  	return adjValues[panicPathAdj] + adjValues[initFuncAdj]
   407  }
   408  
   409  // callSiteTab contains entries for each call in the function
   410  // currently being processed by InlineCalls; this variable will either
   411  // be set to 'cstabCache' below (for non-inlinable routines) or to the
   412  // local 'cstab' entry in the fnInlHeur object for inlinable routines.
   413  //
   414  // NOTE: this assumes that inlining operations are happening in a serial,
   415  // single-threaded fashion,f which is true today but probably won't hold
   416  // in the future (for example, we might want to score the callsites
   417  // in multiple functions in parallel); if the inliner evolves in this
   418  // direction we'll need to come up with a different approach here.
   419  var callSiteTab CallSiteTab
   420  
   421  // scoreCallsCache caches a call site table and call site list between
   422  // invocations of ScoreCalls so that we can reuse previously allocated
   423  // storage.
   424  var scoreCallsCache scoreCallsCacheType
   425  
   426  type scoreCallsCacheType struct {
   427  	tab CallSiteTab
   428  	csl []*CallSite
   429  }
   430  
   431  // ScoreCalls assigns numeric scores to each of the callsites in
   432  // function 'fn'; the lower the score, the more helpful we think it
   433  // will be to inline.
   434  //
   435  // Unlike a lot of the other inline heuristics machinery, callsite
   436  // scoring can't be done as part of the CanInline call for a function,
   437  // due to fact that we may be working on a non-trivial SCC. So for
   438  // example with this SCC:
   439  //
   440  //	func foo(x int) {           func bar(x int, f func()) {
   441  //	  if x != 0 {                  f()
   442  //	    bar(x, func(){})           foo(x-1)
   443  //	  }                         }
   444  //	}
   445  //
   446  // We don't want to perform scoring for the 'foo' call in "bar" until
   447  // after foo has been analyzed, but it's conceivable that CanInline
   448  // might visit bar before foo for this SCC.
   449  func ScoreCalls(fn *ir.Func) {
   450  	if len(fn.Body) == 0 {
   451  		return
   452  	}
   453  	enableDebugTraceIfEnv()
   454  
   455  	nameFinder := newNameFinder(fn)
   456  
   457  	if debugTrace&debugTraceScoring != 0 {
   458  		fmt.Fprintf(os.Stderr, "=-= ScoreCalls(%v)\n", ir.FuncName(fn))
   459  	}
   460  
   461  	// If this is an inlinable function, use the precomputed
   462  	// call site table for it. If the function wasn't an inline
   463  	// candidate, collect a callsite table for it now.
   464  	var cstab CallSiteTab
   465  	if funcInlHeur, ok := fpmap[fn]; ok {
   466  		cstab = funcInlHeur.cstab
   467  	} else {
   468  		if len(scoreCallsCache.tab) != 0 {
   469  			panic("missing call to ScoreCallsCleanup")
   470  		}
   471  		if scoreCallsCache.tab == nil {
   472  			scoreCallsCache.tab = make(CallSiteTab)
   473  		}
   474  		if debugTrace&debugTraceScoring != 0 {
   475  			fmt.Fprintf(os.Stderr, "=-= building cstab for non-inl func %s\n",
   476  				ir.FuncName(fn))
   477  		}
   478  		cstab = computeCallSiteTable(fn, fn.Body, scoreCallsCache.tab, nil, 0,
   479  			nameFinder)
   480  	}
   481  
   482  	csa := makeCallSiteAnalyzer(fn)
   483  	const doCallResults = true
   484  	csa.scoreCallsRegion(fn, fn.Body, cstab, doCallResults, nil)
   485  
   486  	disableDebugTrace()
   487  }
   488  
   489  // scoreCallsRegion assigns numeric scores to each of the callsites in
   490  // region 'region' within function 'fn'. This can be called on
   491  // an entire function, or with 'region' set to a chunk of
   492  // code corresponding to an inlined call.
   493  func (csa *callSiteAnalyzer) scoreCallsRegion(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, doCallResults bool, ic *ir.InlinedCallExpr) {
   494  	if debugTrace&debugTraceScoring != 0 {
   495  		fmt.Fprintf(os.Stderr, "=-= scoreCallsRegion(%v, %s) len(cstab)=%d\n",
   496  			ir.FuncName(fn), region[0].Op().String(), len(cstab))
   497  	}
   498  
   499  	// Sort callsites to avoid any surprises with non deterministic
   500  	// map iteration order (this is probably not needed, but here just
   501  	// in case).
   502  	csl := scoreCallsCache.csl[:0]
   503  	for _, cs := range cstab {
   504  		csl = append(csl, cs)
   505  	}
   506  	scoreCallsCache.csl = csl[:0]
   507  	sort.Slice(csl, func(i, j int) bool {
   508  		return csl[i].ID < csl[j].ID
   509  	})
   510  
   511  	// Score each call site.
   512  	var resultNameTab map[*ir.Name]resultPropAndCS
   513  	for _, cs := range csl {
   514  		var cprops *FuncProps
   515  		fihcprops := false
   516  		desercprops := false
   517  		if funcInlHeur, ok := fpmap[cs.Callee]; ok {
   518  			cprops = funcInlHeur.props
   519  			fihcprops = true
   520  		} else if cs.Callee.Inl != nil {
   521  			cprops = DeserializeFromString(cs.Callee.Inl.Properties)
   522  			desercprops = true
   523  		} else {
   524  			if base.Debug.DumpInlFuncProps != "" {
   525  				fmt.Fprintf(os.Stderr, "=-= *** unable to score call to %s from %s\n", cs.Callee.Sym().Name, fmtFullPos(cs.Call.Pos()))
   526  				panic("should never happen")
   527  			} else {
   528  				continue
   529  			}
   530  		}
   531  		cs.computeCallSiteScore(csa, cprops)
   532  
   533  		if doCallResults {
   534  			if debugTrace&debugTraceScoring != 0 {
   535  				fmt.Fprintf(os.Stderr, "=-= examineCallResults at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
   536  			}
   537  			resultNameTab = csa.examineCallResults(cs, resultNameTab)
   538  		}
   539  
   540  		if debugTrace&debugTraceScoring != 0 {
   541  			fmt.Fprintf(os.Stderr, "=-= scoring call at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
   542  		}
   543  	}
   544  
   545  	if resultNameTab != nil {
   546  		csa.rescoreBasedOnCallResultUses(fn, resultNameTab, cstab)
   547  	}
   548  
   549  	disableDebugTrace()
   550  
   551  	if ic != nil && callSiteTab != nil {
   552  		// Integrate the calls from this cstab into the table for the caller.
   553  		if err := callSiteTab.merge(cstab); err != nil {
   554  			base.FatalfAt(ic.Pos(), "%v", err)
   555  		}
   556  	} else {
   557  		callSiteTab = cstab
   558  	}
   559  }
   560  
   561  // ScoreCallsCleanup resets the state of the callsite cache
   562  // once ScoreCalls is done with a function.
   563  func ScoreCallsCleanup() {
   564  	if base.Debug.DumpInlCallSiteScores != 0 {
   565  		if allCallSites == nil {
   566  			allCallSites = make(CallSiteTab)
   567  		}
   568  		for call, cs := range callSiteTab {
   569  			allCallSites[call] = cs
   570  		}
   571  	}
   572  	for k := range scoreCallsCache.tab {
   573  		delete(scoreCallsCache.tab, k)
   574  	}
   575  }
   576  
   577  // GetCallSiteScore returns the previously calculated score for call
   578  // within fn.
   579  func GetCallSiteScore(fn *ir.Func, call *ir.CallExpr) (int, bool) {
   580  	if funcInlHeur, ok := fpmap[fn]; ok {
   581  		if cs, ok := funcInlHeur.cstab[call]; ok {
   582  			return cs.Score, true
   583  		}
   584  	}
   585  	if cs, ok := callSiteTab[call]; ok {
   586  		return cs.Score, true
   587  	}
   588  	return 0, false
   589  }
   590  
   591  // BudgetExpansion returns the amount to relax/expand the base
   592  // inlining budget when the new inliner is turned on; the inliner
   593  // will add the returned value to the hairiness budget.
   594  //
   595  // Background: with the new inliner, the score for a given callsite
   596  // can be adjusted down by some amount due to heuristics, however we
   597  // won't know whether this is going to happen until much later after
   598  // the CanInline call. This function returns the amount to relax the
   599  // budget initially (to allow for a large score adjustment); later on
   600  // in RevisitInlinability we'll look at each individual function to
   601  // demote it if needed.
   602  func BudgetExpansion(maxBudget int32) int32 {
   603  	if base.Debug.InlBudgetSlack != 0 {
   604  		return int32(base.Debug.InlBudgetSlack)
   605  	}
   606  	// In the default case, return maxBudget, which will effectively
   607  	// double the budget from 80 to 160; this should be good enough
   608  	// for most cases.
   609  	return maxBudget
   610  }
   611  
   612  var allCallSites CallSiteTab
   613  
   614  // DumpInlCallSiteScores is invoked by the inliner if the debug flag
   615  // "-d=dumpinlcallsitescores" is set; it dumps out a human-readable
   616  // summary of all (potentially) inlinable callsites in the package,
   617  // along with info on call site scoring and the adjustments made to a
   618  // given score. Here profile is the PGO profile in use (may be
   619  // nil), budgetCallback is a callback that can be invoked to find out
   620  // the original pre-adjustment hairiness limit for the function, and
   621  // inlineHotMaxBudget is the constant of the same name used in the
   622  // inliner. Sample output lines:
   623  //
   624  // Score  Adjustment  Status  Callee  CallerPos ScoreFlags
   625  // 115    40          DEMOTED cmd/compile/internal/abi.(*ABIParamAssignment).Offset     expand_calls.go:1679:14|6       panicPathAdj
   626  // 76     -5n         PROMOTED runtime.persistentalloc   mcheckmark.go:48:45|3   inLoopAdj
   627  // 201    0           --- PGO  unicode.DecodeRuneInString        utf8.go:312:30|1
   628  // 7      -5          --- PGO  internal/abi.Name.DataChecked     type.go:625:22|0        inLoopAdj
   629  //
   630  // In the dump above, "Score" is the final score calculated for the
   631  // callsite, "Adjustment" is the amount added to or subtracted from
   632  // the original hairiness estimate to form the score. "Status" shows
   633  // whether anything changed with the site -- did the adjustment bump
   634  // it down just below the threshold ("PROMOTED") or instead bump it
   635  // above the threshold ("DEMOTED"); this will be blank ("---") if no
   636  // threshold was crossed as a result of the heuristics. Note that
   637  // "Status" also shows whether PGO was involved. "Callee" is the name
   638  // of the function called, "CallerPos" is the position of the
   639  // callsite, and "ScoreFlags" is a digest of the specific properties
   640  // we used to make adjustments to callsite score via heuristics.
   641  func DumpInlCallSiteScores(profile *pgoir.Profile, budgetCallback func(fn *ir.Func, profile *pgoir.Profile) (int32, bool)) {
   642  
   643  	var indirectlyDueToPromotion func(cs *CallSite) bool
   644  	indirectlyDueToPromotion = func(cs *CallSite) bool {
   645  		bud, _ := budgetCallback(cs.Callee, profile)
   646  		hairyval := cs.Callee.Inl.Cost
   647  		score := int32(cs.Score)
   648  		if hairyval > bud && score <= bud {
   649  			return true
   650  		}
   651  		if cs.parent != nil {
   652  			return indirectlyDueToPromotion(cs.parent)
   653  		}
   654  		return false
   655  	}
   656  
   657  	genstatus := func(cs *CallSite) string {
   658  		hairyval := cs.Callee.Inl.Cost
   659  		bud, isPGO := budgetCallback(cs.Callee, profile)
   660  		score := int32(cs.Score)
   661  		st := "---"
   662  		expinl := false
   663  		switch {
   664  		case hairyval <= bud && score <= bud:
   665  			// "Normal" inlined case: hairy val sufficiently low that
   666  			// it would have been inlined anyway without heuristics.
   667  			expinl = true
   668  		case hairyval > bud && score > bud:
   669  			// "Normal" not inlined case: hairy val sufficiently high
   670  			// and scoring didn't lower it.
   671  		case hairyval > bud && score <= bud:
   672  			// Promoted: we would not have inlined it before, but
   673  			// after score adjustment we decided to inline.
   674  			st = "PROMOTED"
   675  			expinl = true
   676  		case hairyval <= bud && score > bud:
   677  			// Demoted: we would have inlined it before, but after
   678  			// score adjustment we decided not to inline.
   679  			st = "DEMOTED"
   680  		}
   681  		inlined := cs.aux&csAuxInlined != 0
   682  		indprom := false
   683  		if cs.parent != nil {
   684  			indprom = indirectlyDueToPromotion(cs.parent)
   685  		}
   686  		if inlined && indprom {
   687  			st += "|INDPROM"
   688  		}
   689  		if inlined && !expinl {
   690  			st += "|[NI?]"
   691  		} else if !inlined && expinl {
   692  			st += "|[IN?]"
   693  		}
   694  		if isPGO {
   695  			st += "|PGO"
   696  		}
   697  		return st
   698  	}
   699  
   700  	if base.Debug.DumpInlCallSiteScores != 0 {
   701  		var sl []*CallSite
   702  		for _, cs := range allCallSites {
   703  			sl = append(sl, cs)
   704  		}
   705  		sort.Slice(sl, func(i, j int) bool {
   706  			if sl[i].Score != sl[j].Score {
   707  				return sl[i].Score < sl[j].Score
   708  			}
   709  			fni := ir.PkgFuncName(sl[i].Callee)
   710  			fnj := ir.PkgFuncName(sl[j].Callee)
   711  			if fni != fnj {
   712  				return fni < fnj
   713  			}
   714  			ecsi := EncodeCallSiteKey(sl[i])
   715  			ecsj := EncodeCallSiteKey(sl[j])
   716  			return ecsi < ecsj
   717  		})
   718  
   719  		mkname := func(fn *ir.Func) string {
   720  			var n string
   721  			if fn == nil || fn.Nname == nil {
   722  				return "<nil>"
   723  			}
   724  			if fn.Sym().Pkg == types.LocalPkg {
   725  				n = "ยท" + fn.Sym().Name
   726  			} else {
   727  				n = ir.PkgFuncName(fn)
   728  			}
   729  			// don't try to print super-long names
   730  			if len(n) <= 64 {
   731  				return n
   732  			}
   733  			return n[:32] + "..." + n[len(n)-32:len(n)]
   734  		}
   735  
   736  		if len(sl) != 0 {
   737  			fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path)
   738  			fmt.Fprintf(os.Stdout, "# Score  Adjustment  Status  Callee  CallerPos Flags ScoreFlags\n")
   739  		}
   740  		for _, cs := range sl {
   741  			hairyval := cs.Callee.Inl.Cost
   742  			adj := int32(cs.Score) - hairyval
   743  			nm := mkname(cs.Callee)
   744  			ecc := EncodeCallSiteKey(cs)
   745  			fmt.Fprintf(os.Stdout, "%d  %d\t%s\t%s\t%s\t%s\n",
   746  				cs.Score, adj, genstatus(cs),
   747  				nm, ecc,
   748  				cs.ScoreMask.String())
   749  		}
   750  	}
   751  }
   752  

View as plain text