mud.go

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package trace
     6  
     7  import (
     8  	"cmp"
     9  	"math"
    10  	"slices"
    11  )
    12  
    13  // mud is an updatable mutator utilization distribution.
    14  //
    15  // This is a continuous distribution of duration over mutator
    16  // utilization. For example, the integral from mutator utilization a
    17  // to b is the total duration during which the mutator utilization was
    18  // in the range [a, b].
    19  //
    20  // This distribution is *not* normalized (it is not a probability
    21  // distribution). This makes it easier to work with as it's being
    22  // updated.
    23  //
    24  // It is represented as the sum of scaled uniform distribution
    25  // functions and Dirac delta functions (which are treated as
    26  // degenerate uniform distributions).
    27  type mud struct {
    28  	sorted, unsorted []edge
    29  
    30  	// trackMass is the inverse cumulative sum to track as the
    31  	// distribution is updated.
    32  	trackMass float64
    33  	// trackBucket is the bucket in which trackMass falls. If the
    34  	// total mass of the distribution is < trackMass, this is
    35  	// len(hist).
    36  	trackBucket int
    37  	// trackSum is the cumulative sum of hist[:trackBucket]. Once
    38  	// trackSum >= trackMass, trackBucket must be recomputed.
    39  	trackSum float64
    40  
    41  	// hist is a hierarchical histogram of distribution mass.
    42  	hist [mudDegree]float64
    43  }
    44  
    45  const (
    46  	// mudDegree is the number of buckets in the MUD summary
    47  	// histogram.
    48  	mudDegree = 1024
    49  )
    50  
    51  type edge struct {
    52  	// At x, the function increases by y.
    53  	x, delta float64
    54  	// Additionally at x is a Dirac delta function with area dirac.
    55  	dirac float64
    56  }
    57  
    58  // add adds a uniform function over [l, r] scaled so the total weight
    59  // of the uniform is area. If l==r, this adds a Dirac delta function.
    60  func (d *mud) add(l, r, area float64) {
    61  	if area == 0 {
    62  		return
    63  	}
    64  
    65  	if r < l {
    66  		l, r = r, l
    67  	}
    68  
    69  	// Add the edges.
    70  	if l == r {
    71  		d.unsorted = append(d.unsorted, edge{l, 0, area})
    72  	} else {
    73  		delta := area / (r - l)
    74  		d.unsorted = append(d.unsorted, edge{l, delta, 0}, edge{r, -delta, 0})
    75  	}
    76  
    77  	// Update the histogram.
    78  	h := &d.hist
    79  	lbFloat, lf := math.Modf(l * mudDegree)
    80  	lb := int(lbFloat)
    81  	if lb >= mudDegree {
    82  		lb, lf = mudDegree-1, 1
    83  	}
    84  	if l == r {
    85  		h[lb] += area
    86  	} else {
    87  		rbFloat, rf := math.Modf(r * mudDegree)
    88  		rb := int(rbFloat)
    89  		if rb >= mudDegree {
    90  			rb, rf = mudDegree-1, 1
    91  		}
    92  		if lb == rb {
    93  			h[lb] += area
    94  		} else {
    95  			perBucket := area / (r - l) / mudDegree
    96  			h[lb] += perBucket * (1 - lf)
    97  			h[rb] += perBucket * rf
    98  			for i := lb + 1; i < rb; i++ {
    99  				h[i] += perBucket
   100  			}
   101  		}
   102  	}
   103  
   104  	// Update mass tracking.
   105  	if thresh := float64(d.trackBucket) / mudDegree; l < thresh {
   106  		if r < thresh {
   107  			d.trackSum += area
   108  		} else {
   109  			d.trackSum += area * (thresh - l) / (r - l)
   110  		}
   111  		if d.trackSum >= d.trackMass {
   112  			// The tracked mass now falls in a different
   113  			// bucket. Recompute the inverse cumulative sum.
   114  			d.setTrackMass(d.trackMass)
   115  		}
   116  	}
   117  }
   118  
   119  // setTrackMass sets the mass to track the inverse cumulative sum for.
   120  //
   121  // Specifically, mass is a cumulative duration, and the mutator
   122  // utilization bounds for this duration can be queried using
   123  // approxInvCumulativeSum.
   124  func (d *mud) setTrackMass(mass float64) {
   125  	d.trackMass = mass
   126  
   127  	// Find the bucket currently containing trackMass by computing
   128  	// the cumulative sum.
   129  	sum := 0.0
   130  	for i, val := range d.hist[:] {
   131  		newSum := sum + val
   132  		if newSum > mass {
   133  			// mass falls in bucket i.
   134  			d.trackBucket = i
   135  			d.trackSum = sum
   136  			return
   137  		}
   138  		sum = newSum
   139  	}
   140  	d.trackBucket = len(d.hist)
   141  	d.trackSum = sum
   142  }
   143  
   144  // approxInvCumulativeSum is like invCumulativeSum, but specifically
   145  // operates on the tracked mass and returns an upper and lower bound
   146  // approximation of the inverse cumulative sum.
   147  //
   148  // The true inverse cumulative sum will be in the range [lower, upper).
   149  func (d *mud) approxInvCumulativeSum() (float64, float64, bool) {
   150  	if d.trackBucket == len(d.hist) {
   151  		return math.NaN(), math.NaN(), false
   152  	}
   153  	return float64(d.trackBucket) / mudDegree, float64(d.trackBucket+1) / mudDegree, true
   154  }
   155  
   156  // invCumulativeSum returns x such that the integral of d from -∞ to x
   157  // is y. If the total weight of d is less than y, it returns the
   158  // maximum of the distribution and false.
   159  //
   160  // Specifically, y is a cumulative duration, and invCumulativeSum
   161  // returns the mutator utilization x such that at least y time has
   162  // been spent with mutator utilization <= x.
   163  func (d *mud) invCumulativeSum(y float64) (float64, bool) {
   164  	if len(d.sorted) == 0 && len(d.unsorted) == 0 {
   165  		return math.NaN(), false
   166  	}
   167  
   168  	// Sort edges.
   169  	edges := d.unsorted
   170  	slices.SortFunc(edges, func(a, b edge) int {
   171  		return cmp.Compare(a.x, b.x)
   172  	})
   173  	// Merge with sorted edges.
   174  	d.unsorted = nil
   175  	if d.sorted == nil {
   176  		d.sorted = edges
   177  	} else {
   178  		oldSorted := d.sorted
   179  		newSorted := make([]edge, len(oldSorted)+len(edges))
   180  		i, j := 0, 0
   181  		for o := range newSorted {
   182  			if i >= len(oldSorted) {
   183  				copy(newSorted[o:], edges[j:])
   184  				break
   185  			} else if j >= len(edges) {
   186  				copy(newSorted[o:], oldSorted[i:])
   187  				break
   188  			} else if oldSorted[i].x < edges[j].x {
   189  				newSorted[o] = oldSorted[i]
   190  				i++
   191  			} else {
   192  				newSorted[o] = edges[j]
   193  				j++
   194  			}
   195  		}
   196  		d.sorted = newSorted
   197  	}
   198  
   199  	// Traverse edges in order computing a cumulative sum.
   200  	csum, rate, prevX := 0.0, 0.0, 0.0
   201  	for _, e := range d.sorted {
   202  		newCsum := csum + (e.x-prevX)*rate
   203  		if newCsum >= y {
   204  			// y was exceeded between the previous edge
   205  			// and this one.
   206  			if rate == 0 {
   207  				// Anywhere between prevX and
   208  				// e.x will do. We return e.x
   209  				// because that takes care of
   210  				// the y==0 case naturally.
   211  				return e.x, true
   212  			}
   213  			return (y-csum)/rate + prevX, true
   214  		}
   215  		newCsum += e.dirac
   216  		if newCsum >= y {
   217  			// y was exceeded by the Dirac delta at e.x.
   218  			return e.x, true
   219  		}
   220  		csum, prevX = newCsum, e.x
   221  		rate += e.delta
   222  	}
   223  	return prevX, false
   224  }
   225
View as plain text