merge.go

     1  // Copyright 2014 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package profile
    16  
    17  import (
    18  	"encoding/binary"
    19  	"fmt"
    20  	"slices"
    21  	"sort"
    22  	"strconv"
    23  	"strings"
    24  )
    25  
    26  // Compact performs garbage collection on a profile to remove any
    27  // unreferenced fields. This is useful to reduce the size of a profile
    28  // after samples or locations have been removed.
    29  func (p *Profile) Compact() *Profile {
    30  	p, _ = Merge([]*Profile{p})
    31  	return p
    32  }
    33  
    34  // Merge merges all the profiles in profs into a single Profile.
    35  // Returns a new profile independent of the input profiles. The merged
    36  // profile is compacted to eliminate unused samples, locations,
    37  // functions and mappings. Profiles must have identical profile sample
    38  // and period types or the merge will fail. profile.Period of the
    39  // resulting profile will be the maximum of all profiles, and
    40  // profile.TimeNanos will be the earliest nonzero one. Merges are
    41  // associative with the caveat of the first profile having some
    42  // specialization in how headers are combined. There may be other
    43  // subtleties now or in the future regarding associativity.
    44  func Merge(srcs []*Profile) (*Profile, error) {
    45  	if len(srcs) == 0 {
    46  		return nil, fmt.Errorf("no profiles to merge")
    47  	}
    48  	p, err := combineHeaders(srcs)
    49  	if err != nil {
    50  		return nil, err
    51  	}
    52  
    53  	pm := &profileMerger{
    54  		p:         p,
    55  		samples:   make(map[sampleKey]*Sample, len(srcs[0].Sample)),
    56  		locations: make(map[locationKey]*Location, len(srcs[0].Location)),
    57  		functions: make(map[functionKey]*Function, len(srcs[0].Function)),
    58  		mappings:  make(map[mappingKey]*Mapping, len(srcs[0].Mapping)),
    59  	}
    60  
    61  	for _, src := range srcs {
    62  		// Clear the profile-specific hash tables
    63  		pm.locationsByID = makeLocationIDMap(len(src.Location))
    64  		pm.functionsByID = make(map[uint64]*Function, len(src.Function))
    65  		pm.mappingsByID = make(map[uint64]mapInfo, len(src.Mapping))
    66  
    67  		if len(pm.mappings) == 0 && len(src.Mapping) > 0 {
    68  			// The Mapping list has the property that the first mapping
    69  			// represents the main binary. Take the first Mapping we see,
    70  			// otherwise the operations below will add mappings in an
    71  			// arbitrary order.
    72  			pm.mapMapping(src.Mapping[0])
    73  		}
    74  
    75  		for _, s := range src.Sample {
    76  			if !isZeroSample(s) {
    77  				pm.mapSample(s)
    78  			}
    79  		}
    80  	}
    81  
    82  	if slices.ContainsFunc(p.Sample, isZeroSample) {
    83  		// If there are any zero samples, re-merge the profile to GC
    84  		// them.
    85  		return Merge([]*Profile{p})
    86  	}
    87  
    88  	return p, nil
    89  }
    90  
    91  // Normalize normalizes the source profile by multiplying each value in profile by the
    92  // ratio of the sum of the base profile's values of that sample type to the sum of the
    93  // source profile's value of that sample type.
    94  func (p *Profile) Normalize(pb *Profile) error {
    95  
    96  	if err := p.compatible(pb); err != nil {
    97  		return err
    98  	}
    99  
   100  	baseVals := make([]int64, len(p.SampleType))
   101  	for _, s := range pb.Sample {
   102  		for i, v := range s.Value {
   103  			baseVals[i] += v
   104  		}
   105  	}
   106  
   107  	srcVals := make([]int64, len(p.SampleType))
   108  	for _, s := range p.Sample {
   109  		for i, v := range s.Value {
   110  			srcVals[i] += v
   111  		}
   112  	}
   113  
   114  	normScale := make([]float64, len(baseVals))
   115  	for i := range baseVals {
   116  		if srcVals[i] == 0 {
   117  			normScale[i] = 0.0
   118  		} else {
   119  			normScale[i] = float64(baseVals[i]) / float64(srcVals[i])
   120  		}
   121  	}
   122  	p.ScaleN(normScale)
   123  	return nil
   124  }
   125  
   126  func isZeroSample(s *Sample) bool {
   127  	for _, v := range s.Value {
   128  		if v != 0 {
   129  			return false
   130  		}
   131  	}
   132  	return true
   133  }
   134  
   135  type profileMerger struct {
   136  	p *Profile
   137  
   138  	// Memoization tables within a profile.
   139  	locationsByID locationIDMap
   140  	functionsByID map[uint64]*Function
   141  	mappingsByID  map[uint64]mapInfo
   142  
   143  	// Memoization tables for profile entities.
   144  	samples   map[sampleKey]*Sample
   145  	locations map[locationKey]*Location
   146  	functions map[functionKey]*Function
   147  	mappings  map[mappingKey]*Mapping
   148  }
   149  
   150  type mapInfo struct {
   151  	m      *Mapping
   152  	offset int64
   153  }
   154  
   155  func (pm *profileMerger) mapSample(src *Sample) *Sample {
   156  	// Check memoization table
   157  	k := pm.sampleKey(src)
   158  	if ss, ok := pm.samples[k]; ok {
   159  		for i, v := range src.Value {
   160  			ss.Value[i] += v
   161  		}
   162  		return ss
   163  	}
   164  
   165  	// Make new sample.
   166  	s := &Sample{
   167  		Location: make([]*Location, len(src.Location)),
   168  		Value:    make([]int64, len(src.Value)),
   169  		Label:    make(map[string][]string, len(src.Label)),
   170  		NumLabel: make(map[string][]int64, len(src.NumLabel)),
   171  		NumUnit:  make(map[string][]string, len(src.NumLabel)),
   172  	}
   173  	for i, l := range src.Location {
   174  		s.Location[i] = pm.mapLocation(l)
   175  	}
   176  	for k, v := range src.Label {
   177  		vv := make([]string, len(v))
   178  		copy(vv, v)
   179  		s.Label[k] = vv
   180  	}
   181  	for k, v := range src.NumLabel {
   182  		u := src.NumUnit[k]
   183  		vv := make([]int64, len(v))
   184  		uu := make([]string, len(u))
   185  		copy(vv, v)
   186  		copy(uu, u)
   187  		s.NumLabel[k] = vv
   188  		s.NumUnit[k] = uu
   189  	}
   190  	copy(s.Value, src.Value)
   191  	pm.samples[k] = s
   192  	pm.p.Sample = append(pm.p.Sample, s)
   193  	return s
   194  }
   195  
   196  func (pm *profileMerger) sampleKey(sample *Sample) sampleKey {
   197  	// Accumulate contents into a string.
   198  	var buf strings.Builder
   199  	buf.Grow(64) // Heuristic to avoid extra allocs
   200  
   201  	// encode a number
   202  	putNumber := func(v uint64) {
   203  		var num [binary.MaxVarintLen64]byte
   204  		n := binary.PutUvarint(num[:], v)
   205  		buf.Write(num[:n])
   206  	}
   207  
   208  	// encode a string prefixed with its length.
   209  	putDelimitedString := func(s string) {
   210  		putNumber(uint64(len(s)))
   211  		buf.WriteString(s)
   212  	}
   213  
   214  	for _, l := range sample.Location {
   215  		// Get the location in the merged profile, which may have a different ID.
   216  		if loc := pm.mapLocation(l); loc != nil {
   217  			putNumber(loc.ID)
   218  		}
   219  	}
   220  	putNumber(0) // Delimiter
   221  
   222  	for _, l := range sortedKeys1(sample.Label) {
   223  		putDelimitedString(l)
   224  		values := sample.Label[l]
   225  		putNumber(uint64(len(values)))
   226  		for _, v := range values {
   227  			putDelimitedString(v)
   228  		}
   229  	}
   230  
   231  	for _, l := range sortedKeys2(sample.NumLabel) {
   232  		putDelimitedString(l)
   233  		values := sample.NumLabel[l]
   234  		putNumber(uint64(len(values)))
   235  		for _, v := range values {
   236  			putNumber(uint64(v))
   237  		}
   238  		units := sample.NumUnit[l]
   239  		putNumber(uint64(len(units)))
   240  		for _, v := range units {
   241  			putDelimitedString(v)
   242  		}
   243  	}
   244  
   245  	return sampleKey(buf.String())
   246  }
   247  
   248  type sampleKey string
   249  
   250  // sortedKeys1 returns the sorted keys found in a string->[]string map.
   251  //
   252  // Note: this is currently non-generic since github pprof runs golint,
   253  // which does not support generics. When that issue is fixed, it can
   254  // be merged with sortedKeys2 and made into a generic function.
   255  func sortedKeys1(m map[string][]string) []string {
   256  	if len(m) == 0 {
   257  		return nil
   258  	}
   259  	keys := make([]string, 0, len(m))
   260  	for k := range m {
   261  		keys = append(keys, k)
   262  	}
   263  	sort.Strings(keys)
   264  	return keys
   265  }
   266  
   267  // sortedKeys2 returns the sorted keys found in a string->[]int64 map.
   268  //
   269  // Note: this is currently non-generic since github pprof runs golint,
   270  // which does not support generics. When that issue is fixed, it can
   271  // be merged with sortedKeys1 and made into a generic function.
   272  func sortedKeys2(m map[string][]int64) []string {
   273  	if len(m) == 0 {
   274  		return nil
   275  	}
   276  	keys := make([]string, 0, len(m))
   277  	for k := range m {
   278  		keys = append(keys, k)
   279  	}
   280  	sort.Strings(keys)
   281  	return keys
   282  }
   283  
   284  func (pm *profileMerger) mapLocation(src *Location) *Location {
   285  	if src == nil {
   286  		return nil
   287  	}
   288  
   289  	if l := pm.locationsByID.get(src.ID); l != nil {
   290  		return l
   291  	}
   292  
   293  	mi := pm.mapMapping(src.Mapping)
   294  	l := &Location{
   295  		ID:       uint64(len(pm.p.Location) + 1),
   296  		Mapping:  mi.m,
   297  		Address:  uint64(int64(src.Address) + mi.offset),
   298  		Line:     make([]Line, len(src.Line)),
   299  		IsFolded: src.IsFolded,
   300  	}
   301  	for i, ln := range src.Line {
   302  		l.Line[i] = pm.mapLine(ln)
   303  	}
   304  	// Check memoization table. Must be done on the remapped location to
   305  	// account for the remapped mapping ID.
   306  	k := l.key()
   307  	if ll, ok := pm.locations[k]; ok {
   308  		pm.locationsByID.set(src.ID, ll)
   309  		return ll
   310  	}
   311  	pm.locationsByID.set(src.ID, l)
   312  	pm.locations[k] = l
   313  	pm.p.Location = append(pm.p.Location, l)
   314  	return l
   315  }
   316  
   317  // key generates locationKey to be used as a key for maps.
   318  func (l *Location) key() locationKey {
   319  	key := locationKey{
   320  		addr:     l.Address,
   321  		isFolded: l.IsFolded,
   322  	}
   323  	if l.Mapping != nil {
   324  		// Normalizes address to handle address space randomization.
   325  		key.addr -= l.Mapping.Start
   326  		key.mappingID = l.Mapping.ID
   327  	}
   328  	lines := make([]string, len(l.Line)*3)
   329  	for i, line := range l.Line {
   330  		if line.Function != nil {
   331  			lines[i*2] = strconv.FormatUint(line.Function.ID, 16)
   332  		}
   333  		lines[i*2+1] = strconv.FormatInt(line.Line, 16)
   334  		lines[i*2+2] = strconv.FormatInt(line.Column, 16)
   335  	}
   336  	key.lines = strings.Join(lines, "|")
   337  	return key
   338  }
   339  
   340  type locationKey struct {
   341  	addr, mappingID uint64
   342  	lines           string
   343  	isFolded        bool
   344  }
   345  
   346  func (pm *profileMerger) mapMapping(src *Mapping) mapInfo {
   347  	if src == nil {
   348  		return mapInfo{}
   349  	}
   350  
   351  	if mi, ok := pm.mappingsByID[src.ID]; ok {
   352  		return mi
   353  	}
   354  
   355  	// Check memoization tables.
   356  	mk := src.key()
   357  	if m, ok := pm.mappings[mk]; ok {
   358  		mi := mapInfo{m, int64(m.Start) - int64(src.Start)}
   359  		pm.mappingsByID[src.ID] = mi
   360  		return mi
   361  	}
   362  	m := &Mapping{
   363  		ID:                     uint64(len(pm.p.Mapping) + 1),
   364  		Start:                  src.Start,
   365  		Limit:                  src.Limit,
   366  		Offset:                 src.Offset,
   367  		File:                   src.File,
   368  		KernelRelocationSymbol: src.KernelRelocationSymbol,
   369  		BuildID:                src.BuildID,
   370  		HasFunctions:           src.HasFunctions,
   371  		HasFilenames:           src.HasFilenames,
   372  		HasLineNumbers:         src.HasLineNumbers,
   373  		HasInlineFrames:        src.HasInlineFrames,
   374  	}
   375  	pm.p.Mapping = append(pm.p.Mapping, m)
   376  
   377  	// Update memoization tables.
   378  	pm.mappings[mk] = m
   379  	mi := mapInfo{m, 0}
   380  	pm.mappingsByID[src.ID] = mi
   381  	return mi
   382  }
   383  
   384  // key generates encoded strings of Mapping to be used as a key for
   385  // maps.
   386  func (m *Mapping) key() mappingKey {
   387  	// Normalize addresses to handle address space randomization.
   388  	// Round up to next 4K boundary to avoid minor discrepancies.
   389  	const mapsizeRounding = 0x1000
   390  
   391  	size := m.Limit - m.Start
   392  	size = size + mapsizeRounding - 1
   393  	size = size - (size % mapsizeRounding)
   394  	key := mappingKey{
   395  		size:   size,
   396  		offset: m.Offset,
   397  	}
   398  
   399  	switch {
   400  	case m.BuildID != "":
   401  		key.buildIDOrFile = m.BuildID
   402  	case m.File != "":
   403  		key.buildIDOrFile = m.File
   404  	default:
   405  		// A mapping containing neither build ID nor file name is a fake mapping. A
   406  		// key with empty buildIDOrFile is used for fake mappings so that they are
   407  		// treated as the same mapping during merging.
   408  	}
   409  	return key
   410  }
   411  
   412  type mappingKey struct {
   413  	size, offset  uint64
   414  	buildIDOrFile string
   415  }
   416  
   417  func (pm *profileMerger) mapLine(src Line) Line {
   418  	ln := Line{
   419  		Function: pm.mapFunction(src.Function),
   420  		Line:     src.Line,
   421  		Column:   src.Column,
   422  	}
   423  	return ln
   424  }
   425  
   426  func (pm *profileMerger) mapFunction(src *Function) *Function {
   427  	if src == nil {
   428  		return nil
   429  	}
   430  	if f, ok := pm.functionsByID[src.ID]; ok {
   431  		return f
   432  	}
   433  	k := src.key()
   434  	if f, ok := pm.functions[k]; ok {
   435  		pm.functionsByID[src.ID] = f
   436  		return f
   437  	}
   438  	f := &Function{
   439  		ID:         uint64(len(pm.p.Function) + 1),
   440  		Name:       src.Name,
   441  		SystemName: src.SystemName,
   442  		Filename:   src.Filename,
   443  		StartLine:  src.StartLine,
   444  	}
   445  	pm.functions[k] = f
   446  	pm.functionsByID[src.ID] = f
   447  	pm.p.Function = append(pm.p.Function, f)
   448  	return f
   449  }
   450  
   451  // key generates a struct to be used as a key for maps.
   452  func (f *Function) key() functionKey {
   453  	return functionKey{
   454  		f.StartLine,
   455  		f.Name,
   456  		f.SystemName,
   457  		f.Filename,
   458  	}
   459  }
   460  
   461  type functionKey struct {
   462  	startLine                  int64
   463  	name, systemName, fileName string
   464  }
   465  
   466  // combineHeaders checks that all profiles can be merged and returns
   467  // their combined profile.
   468  func combineHeaders(srcs []*Profile) (*Profile, error) {
   469  	for _, s := range srcs[1:] {
   470  		if err := srcs[0].compatible(s); err != nil {
   471  			return nil, err
   472  		}
   473  	}
   474  
   475  	var timeNanos, durationNanos, period int64
   476  	var comments []string
   477  	seenComments := map[string]bool{}
   478  	var docURL string
   479  	var defaultSampleType string
   480  	for _, s := range srcs {
   481  		if timeNanos == 0 || s.TimeNanos < timeNanos {
   482  			timeNanos = s.TimeNanos
   483  		}
   484  		durationNanos += s.DurationNanos
   485  		if period == 0 || period < s.Period {
   486  			period = s.Period
   487  		}
   488  		for _, c := range s.Comments {
   489  			if seen := seenComments[c]; !seen {
   490  				comments = append(comments, c)
   491  				seenComments[c] = true
   492  			}
   493  		}
   494  		if defaultSampleType == "" {
   495  			defaultSampleType = s.DefaultSampleType
   496  		}
   497  		if docURL == "" {
   498  			docURL = s.DocURL
   499  		}
   500  	}
   501  
   502  	p := &Profile{
   503  		SampleType: make([]*ValueType, len(srcs[0].SampleType)),
   504  
   505  		DropFrames: srcs[0].DropFrames,
   506  		KeepFrames: srcs[0].KeepFrames,
   507  
   508  		TimeNanos:     timeNanos,
   509  		DurationNanos: durationNanos,
   510  		PeriodType:    srcs[0].PeriodType,
   511  		Period:        period,
   512  
   513  		Comments:          comments,
   514  		DefaultSampleType: defaultSampleType,
   515  		DocURL:            docURL,
   516  	}
   517  	copy(p.SampleType, srcs[0].SampleType)
   518  	return p, nil
   519  }
   520  
   521  // compatible determines if two profiles can be compared/merged.
   522  // returns nil if the profiles are compatible; otherwise an error with
   523  // details on the incompatibility.
   524  func (p *Profile) compatible(pb *Profile) error {
   525  	if !equalValueType(p.PeriodType, pb.PeriodType) {
   526  		return fmt.Errorf("incompatible period types %v and %v", p.PeriodType, pb.PeriodType)
   527  	}
   528  
   529  	if len(p.SampleType) != len(pb.SampleType) {
   530  		return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
   531  	}
   532  
   533  	for i := range p.SampleType {
   534  		if !equalValueType(p.SampleType[i], pb.SampleType[i]) {
   535  			return fmt.Errorf("incompatible sample types %v and %v", p.SampleType, pb.SampleType)
   536  		}
   537  	}
   538  	return nil
   539  }
   540  
   541  // equalValueType returns true if the two value types are semantically
   542  // equal. It ignores the internal fields used during encode/decode.
   543  func equalValueType(st1, st2 *ValueType) bool {
   544  	return st1.Type == st2.Type && st1.Unit == st2.Unit
   545  }
   546  
   547  // locationIDMap is like a map[uint64]*Location, but provides efficiency for
   548  // ids that are densely numbered, which is often the case.
   549  type locationIDMap struct {
   550  	dense  []*Location          // indexed by id for id < len(dense)
   551  	sparse map[uint64]*Location // indexed by id for id >= len(dense)
   552  }
   553  
   554  func makeLocationIDMap(n int) locationIDMap {
   555  	return locationIDMap{
   556  		dense:  make([]*Location, n),
   557  		sparse: map[uint64]*Location{},
   558  	}
   559  }
   560  
   561  func (lm locationIDMap) get(id uint64) *Location {
   562  	if id < uint64(len(lm.dense)) {
   563  		return lm.dense[int(id)]
   564  	}
   565  	return lm.sparse[id]
   566  }
   567  
   568  func (lm locationIDMap) set(id uint64, loc *Location) {
   569  	if id < uint64(len(lm.dense)) {
   570  		lm.dense[id] = loc
   571  		return
   572  	}
   573  	lm.sparse[id] = loc
   574  }
   575  
   576  // CompatibilizeSampleTypes makes profiles compatible to be compared/merged. It
   577  // keeps sample types that appear in all profiles only and drops/reorders the
   578  // sample types as necessary.
   579  //
   580  // In the case of sample types order is not the same for given profiles the
   581  // order is derived from the first profile.
   582  //
   583  // Profiles are modified in-place.
   584  //
   585  // It returns an error if the sample type's intersection is empty.
   586  func CompatibilizeSampleTypes(ps []*Profile) error {
   587  	sTypes := commonSampleTypes(ps)
   588  	if len(sTypes) == 0 {
   589  		return fmt.Errorf("profiles have empty common sample type list")
   590  	}
   591  	for _, p := range ps {
   592  		if err := compatibilizeSampleTypes(p, sTypes); err != nil {
   593  			return err
   594  		}
   595  	}
   596  	return nil
   597  }
   598  
   599  // commonSampleTypes returns sample types that appear in all profiles in the
   600  // order how they ordered in the first profile.
   601  func commonSampleTypes(ps []*Profile) []string {
   602  	if len(ps) == 0 {
   603  		return nil
   604  	}
   605  	sTypes := map[string]int{}
   606  	for _, p := range ps {
   607  		for _, st := range p.SampleType {
   608  			sTypes[st.Type]++
   609  		}
   610  	}
   611  	var res []string
   612  	for _, st := range ps[0].SampleType {
   613  		if sTypes[st.Type] == len(ps) {
   614  			res = append(res, st.Type)
   615  		}
   616  	}
   617  	return res
   618  }
   619  
   620  // compatibilizeSampleTypes drops sample types that are not present in sTypes
   621  // list and reorder them if needed.
   622  //
   623  // It sets DefaultSampleType to sType[0] if it is not in sType list.
   624  //
   625  // It assumes that all sample types from the sTypes list are present in the
   626  // given profile otherwise it returns an error.
   627  func compatibilizeSampleTypes(p *Profile, sTypes []string) error {
   628  	if len(sTypes) == 0 {
   629  		return fmt.Errorf("sample type list is empty")
   630  	}
   631  	defaultSampleType := sTypes[0]
   632  	reMap, needToModify := make([]int, len(sTypes)), false
   633  	for i, st := range sTypes {
   634  		if st == p.DefaultSampleType {
   635  			defaultSampleType = p.DefaultSampleType
   636  		}
   637  		idx := searchValueType(p.SampleType, st)
   638  		if idx < 0 {
   639  			return fmt.Errorf("%q sample type is not found in profile", st)
   640  		}
   641  		reMap[i] = idx
   642  		if idx != i {
   643  			needToModify = true
   644  		}
   645  	}
   646  	if !needToModify && len(sTypes) == len(p.SampleType) {
   647  		return nil
   648  	}
   649  	p.DefaultSampleType = defaultSampleType
   650  	oldSampleTypes := p.SampleType
   651  	p.SampleType = make([]*ValueType, len(sTypes))
   652  	for i, idx := range reMap {
   653  		p.SampleType[i] = oldSampleTypes[idx]
   654  	}
   655  	values := make([]int64, len(sTypes))
   656  	for _, s := range p.Sample {
   657  		for i, idx := range reMap {
   658  			values[i] = s.Value[idx]
   659  		}
   660  		s.Value = s.Value[:len(values)]
   661  		copy(s.Value, values)
   662  	}
   663  	return nil
   664  }
   665  
   666  func searchValueType(vts []*ValueType, s string) int {
   667  	for i, vt := range vts {
   668  		if vt.Type == s {
   669  			return i
   670  		}
   671  	}
   672  	return -1
   673  }
   674
View as plain text