Source file src/internal/pkgbits/decoder.go

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pkgbits
     6  
     7  import (
     8  	"encoding/binary"
     9  	"errors"
    10  	"fmt"
    11  	"go/constant"
    12  	"go/token"
    13  	"io"
    14  	"math/big"
    15  	"os"
    16  	"runtime"
    17  	"strings"
    18  )
    19  
    20  // A PkgDecoder provides methods for decoding a package's Unified IR
    21  // export data.
    22  type PkgDecoder struct {
    23  	// version is the file format version.
    24  	version Version
    25  
    26  	// sync indicates whether the file uses sync markers.
    27  	sync bool
    28  
    29  	// pkgPath is the package path for the package to be decoded.
    30  	//
    31  	// TODO(mdempsky): Remove; unneeded since CL 391014.
    32  	pkgPath string
    33  
    34  	// elemData is the full data payload of the encoded package.
    35  	// Elements are densely and contiguously packed together.
    36  	//
    37  	// The last 8 bytes of elemData are the package fingerprint.
    38  	elemData string
    39  
    40  	// elemEnds stores the byte-offset end positions of element
    41  	// bitstreams within elemData.
    42  	//
    43  	// For example, element I's bitstream data starts at elemEnds[I-1]
    44  	// (or 0, if I==0) and ends at elemEnds[I].
    45  	//
    46  	// Note: elemEnds is indexed by absolute indices, not
    47  	// section-relative indices.
    48  	elemEnds []uint32
    49  
    50  	// elemEndsEnds stores the index-offset end positions of relocation
    51  	// sections within elemEnds.
    52  	//
    53  	// For example, section K's end positions start at elemEndsEnds[K-1]
    54  	// (or 0, if K==0) and end at elemEndsEnds[K].
    55  	elemEndsEnds [numRelocs]uint32
    56  
    57  	scratchRelocEnt []RelocEnt
    58  }
    59  
    60  // PkgPath returns the package path for the package
    61  //
    62  // TODO(mdempsky): Remove; unneeded since CL 391014.
    63  func (pr *PkgDecoder) PkgPath() string { return pr.pkgPath }
    64  
    65  // SyncMarkers reports whether pr uses sync markers.
    66  func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
    67  
    68  // NewPkgDecoder returns a PkgDecoder initialized to read the Unified
    69  // IR export data from input. pkgPath is the package path for the
    70  // compilation unit that produced the export data.
    71  func NewPkgDecoder(pkgPath, input string) PkgDecoder {
    72  	pr := PkgDecoder{
    73  		pkgPath: pkgPath,
    74  	}
    75  
    76  	// TODO(mdempsky): Implement direct indexing of input string to
    77  	// avoid copying the position information.
    78  
    79  	r := strings.NewReader(input)
    80  
    81  	var ver uint32
    82  	assert(binary.Read(r, binary.LittleEndian, &ver) == nil)
    83  	pr.version = Version(ver)
    84  
    85  	if pr.version >= numVersions {
    86  		panic(fmt.Errorf("cannot decode %q, export data version %d is greater than maximum supported version %d", pkgPath, pr.version, numVersions-1))
    87  	}
    88  
    89  	if pr.version.Has(Flags) {
    90  		var flags uint32
    91  		assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
    92  		pr.sync = flags&flagSyncMarkers != 0
    93  	}
    94  
    95  	assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
    96  
    97  	pr.elemEnds = make([]uint32, pr.elemEndsEnds[len(pr.elemEndsEnds)-1])
    98  	assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
    99  
   100  	pos, err := r.Seek(0, io.SeekCurrent)
   101  	assert(err == nil)
   102  
   103  	pr.elemData = input[pos:]
   104  
   105  	const fingerprintSize = 8
   106  	assert(len(pr.elemData)-fingerprintSize == int(pr.elemEnds[len(pr.elemEnds)-1]))
   107  
   108  	return pr
   109  }
   110  
   111  // NumElems returns the number of elements in section k.
   112  func (pr *PkgDecoder) NumElems(k RelocKind) int {
   113  	count := int(pr.elemEndsEnds[k])
   114  	if k > 0 {
   115  		count -= int(pr.elemEndsEnds[k-1])
   116  	}
   117  	return count
   118  }
   119  
   120  // TotalElems returns the total number of elements across all sections.
   121  func (pr *PkgDecoder) TotalElems() int {
   122  	return len(pr.elemEnds)
   123  }
   124  
   125  // Fingerprint returns the package fingerprint.
   126  func (pr *PkgDecoder) Fingerprint() [8]byte {
   127  	var fp [8]byte
   128  	copy(fp[:], pr.elemData[len(pr.elemData)-8:])
   129  	return fp
   130  }
   131  
   132  // AbsIdx returns the absolute index for the given (section, index)
   133  // pair.
   134  func (pr *PkgDecoder) AbsIdx(k RelocKind, idx Index) int {
   135  	absIdx := int(idx)
   136  	if k > 0 {
   137  		absIdx += int(pr.elemEndsEnds[k-1])
   138  	}
   139  	if absIdx >= int(pr.elemEndsEnds[k]) {
   140  		panicf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
   141  	}
   142  	return absIdx
   143  }
   144  
   145  // DataIdx returns the raw element bitstream for the given (section,
   146  // index) pair.
   147  func (pr *PkgDecoder) DataIdx(k RelocKind, idx Index) string {
   148  	absIdx := pr.AbsIdx(k, idx)
   149  
   150  	var start uint32
   151  	if absIdx > 0 {
   152  		start = pr.elemEnds[absIdx-1]
   153  	}
   154  	end := pr.elemEnds[absIdx]
   155  
   156  	return pr.elemData[start:end]
   157  }
   158  
   159  // StringIdx returns the string value for the given string index.
   160  func (pr *PkgDecoder) StringIdx(idx Index) string {
   161  	return pr.DataIdx(RelocString, idx)
   162  }
   163  
   164  // NewDecoder returns a Decoder for the given (section, index) pair,
   165  // and decodes the given SyncMarker from the element bitstream.
   166  func (pr *PkgDecoder) NewDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
   167  	r := pr.NewDecoderRaw(k, idx)
   168  	r.Sync(marker)
   169  	return r
   170  }
   171  
   172  // TempDecoder returns a Decoder for the given (section, index) pair,
   173  // and decodes the given SyncMarker from the element bitstream.
   174  // If possible the Decoder should be RetireDecoder'd when it is no longer
   175  // needed, this will avoid heap allocations.
   176  func (pr *PkgDecoder) TempDecoder(k RelocKind, idx Index, marker SyncMarker) Decoder {
   177  	r := pr.TempDecoderRaw(k, idx)
   178  	r.Sync(marker)
   179  	return r
   180  }
   181  
   182  func (pr *PkgDecoder) RetireDecoder(d *Decoder) {
   183  	pr.scratchRelocEnt = d.Relocs
   184  	d.Relocs = nil
   185  }
   186  
   187  // NewDecoderRaw returns a Decoder for the given (section, index) pair.
   188  //
   189  // Most callers should use NewDecoder instead.
   190  func (pr *PkgDecoder) NewDecoderRaw(k RelocKind, idx Index) Decoder {
   191  	r := Decoder{
   192  		common: pr,
   193  		k:      k,
   194  		Idx:    idx,
   195  	}
   196  
   197  	r.Data.Reset(pr.DataIdx(k, idx))
   198  	r.Sync(SyncRelocs)
   199  	r.Relocs = make([]RelocEnt, r.Len())
   200  	for i := range r.Relocs {
   201  		r.Sync(SyncReloc)
   202  		r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
   203  	}
   204  
   205  	return r
   206  }
   207  
   208  func (pr *PkgDecoder) TempDecoderRaw(k RelocKind, idx Index) Decoder {
   209  	r := Decoder{
   210  		common: pr,
   211  		k:      k,
   212  		Idx:    idx,
   213  	}
   214  
   215  	r.Data.Reset(pr.DataIdx(k, idx))
   216  	r.Sync(SyncRelocs)
   217  	l := r.Len()
   218  	if cap(pr.scratchRelocEnt) >= l {
   219  		r.Relocs = pr.scratchRelocEnt[:l]
   220  		pr.scratchRelocEnt = nil
   221  	} else {
   222  		r.Relocs = make([]RelocEnt, l)
   223  	}
   224  	for i := range r.Relocs {
   225  		r.Sync(SyncReloc)
   226  		r.Relocs[i] = RelocEnt{RelocKind(r.Len()), Index(r.Len())}
   227  	}
   228  
   229  	return r
   230  }
   231  
   232  // A Decoder provides methods for decoding an individual element's
   233  // bitstream data.
   234  type Decoder struct {
   235  	common *PkgDecoder
   236  
   237  	Relocs []RelocEnt
   238  	Data   strings.Reader
   239  
   240  	k   RelocKind
   241  	Idx Index
   242  }
   243  
   244  func (r *Decoder) checkErr(err error) {
   245  	if err != nil {
   246  		panicf("unexpected decoding error: %w", err)
   247  	}
   248  }
   249  
   250  func (r *Decoder) rawUvarint() uint64 {
   251  	x, err := readUvarint(&r.Data)
   252  	r.checkErr(err)
   253  	return x
   254  }
   255  
   256  // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint.
   257  // This avoids the interface conversion and thus has better escape properties,
   258  // which flows up the stack.
   259  func readUvarint(r *strings.Reader) (uint64, error) {
   260  	var x uint64
   261  	var s uint
   262  	for i := 0; i < binary.MaxVarintLen64; i++ {
   263  		b, err := r.ReadByte()
   264  		if err != nil {
   265  			if i > 0 && err == io.EOF {
   266  				err = io.ErrUnexpectedEOF
   267  			}
   268  			return x, err
   269  		}
   270  		if b < 0x80 {
   271  			if i == binary.MaxVarintLen64-1 && b > 1 {
   272  				return x, overflow
   273  			}
   274  			return x | uint64(b)<<s, nil
   275  		}
   276  		x |= uint64(b&0x7f) << s
   277  		s += 7
   278  	}
   279  	return x, overflow
   280  }
   281  
   282  var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer")
   283  
   284  func (r *Decoder) rawVarint() int64 {
   285  	ux := r.rawUvarint()
   286  
   287  	// Zig-zag decode.
   288  	x := int64(ux >> 1)
   289  	if ux&1 != 0 {
   290  		x = ^x
   291  	}
   292  	return x
   293  }
   294  
   295  func (r *Decoder) rawReloc(k RelocKind, idx int) Index {
   296  	e := r.Relocs[idx]
   297  	assert(e.Kind == k)
   298  	return e.Idx
   299  }
   300  
   301  // Sync decodes a sync marker from the element bitstream and asserts
   302  // that it matches the expected marker.
   303  //
   304  // If EnableSync is false, then Sync is a no-op.
   305  func (r *Decoder) Sync(mWant SyncMarker) {
   306  	if !r.common.sync {
   307  		return
   308  	}
   309  
   310  	pos, _ := r.Data.Seek(0, io.SeekCurrent)
   311  	mHave := SyncMarker(r.rawUvarint())
   312  	writerPCs := make([]int, r.rawUvarint())
   313  	for i := range writerPCs {
   314  		writerPCs[i] = int(r.rawUvarint())
   315  	}
   316  
   317  	if mHave == mWant {
   318  		return
   319  	}
   320  
   321  	// There's some tension here between printing:
   322  	//
   323  	// (1) full file paths that tools can recognize (e.g., so emacs
   324  	//     hyperlinks the "file:line" text for easy navigation), or
   325  	//
   326  	// (2) short file paths that are easier for humans to read (e.g., by
   327  	//     omitting redundant or irrelevant details, so it's easier to
   328  	//     focus on the useful bits that remain).
   329  	//
   330  	// The current formatting favors the former, as it seems more
   331  	// helpful in practice. But perhaps the formatting could be improved
   332  	// to better address both concerns. For example, use relative file
   333  	// paths if they would be shorter, or rewrite file paths to contain
   334  	// "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
   335  	// to reliably expand that again.
   336  
   337  	fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
   338  
   339  	fmt.Printf("\nfound %v, written at:\n", mHave)
   340  	if len(writerPCs) == 0 {
   341  		fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
   342  	}
   343  	for _, pc := range writerPCs {
   344  		fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(RelocString, pc)))
   345  	}
   346  
   347  	fmt.Printf("\nexpected %v, reading at:\n", mWant)
   348  	var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
   349  	n := runtime.Callers(2, readerPCs[:])
   350  	for _, pc := range fmtFrames(readerPCs[:n]...) {
   351  		fmt.Printf("\t%s\n", pc)
   352  	}
   353  
   354  	// We already printed a stack trace for the reader, so now we can
   355  	// simply exit. Printing a second one with panic or base.Fatalf
   356  	// would just be noise.
   357  	os.Exit(1)
   358  }
   359  
   360  // Bool decodes and returns a bool value from the element bitstream.
   361  func (r *Decoder) Bool() bool {
   362  	r.Sync(SyncBool)
   363  	x, err := r.Data.ReadByte()
   364  	r.checkErr(err)
   365  	assert(x < 2)
   366  	return x != 0
   367  }
   368  
   369  // Int64 decodes and returns an int64 value from the element bitstream.
   370  func (r *Decoder) Int64() int64 {
   371  	r.Sync(SyncInt64)
   372  	return r.rawVarint()
   373  }
   374  
   375  // Int64 decodes and returns a uint64 value from the element bitstream.
   376  func (r *Decoder) Uint64() uint64 {
   377  	r.Sync(SyncUint64)
   378  	return r.rawUvarint()
   379  }
   380  
   381  // Len decodes and returns a non-negative int value from the element bitstream.
   382  func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
   383  
   384  // Int decodes and returns an int value from the element bitstream.
   385  func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
   386  
   387  // Uint decodes and returns a uint value from the element bitstream.
   388  func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
   389  
   390  // Code decodes a Code value from the element bitstream and returns
   391  // its ordinal value. It's the caller's responsibility to convert the
   392  // result to an appropriate Code type.
   393  //
   394  // TODO(mdempsky): Ideally this method would have signature "Code[T
   395  // Code] T" instead, but we don't allow generic methods and the
   396  // compiler can't depend on generics yet anyway.
   397  func (r *Decoder) Code(mark SyncMarker) int {
   398  	r.Sync(mark)
   399  	return r.Len()
   400  }
   401  
   402  // Reloc decodes a relocation of expected section k from the element
   403  // bitstream and returns an index to the referenced element.
   404  func (r *Decoder) Reloc(k RelocKind) Index {
   405  	r.Sync(SyncUseReloc)
   406  	return r.rawReloc(k, r.Len())
   407  }
   408  
   409  // String decodes and returns a string value from the element
   410  // bitstream.
   411  func (r *Decoder) String() string {
   412  	r.Sync(SyncString)
   413  	return r.common.StringIdx(r.Reloc(RelocString))
   414  }
   415  
   416  // Strings decodes and returns a variable-length slice of strings from
   417  // the element bitstream.
   418  func (r *Decoder) Strings() []string {
   419  	res := make([]string, r.Len())
   420  	for i := range res {
   421  		res[i] = r.String()
   422  	}
   423  	return res
   424  }
   425  
   426  // Value decodes and returns a constant.Value from the element
   427  // bitstream.
   428  func (r *Decoder) Value() constant.Value {
   429  	r.Sync(SyncValue)
   430  	isComplex := r.Bool()
   431  	val := r.scalar()
   432  	if isComplex {
   433  		val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
   434  	}
   435  	return val
   436  }
   437  
   438  func (r *Decoder) scalar() constant.Value {
   439  	switch tag := CodeVal(r.Code(SyncVal)); tag {
   440  	default:
   441  		panic(fmt.Errorf("unexpected scalar tag: %v", tag))
   442  
   443  	case ValBool:
   444  		return constant.MakeBool(r.Bool())
   445  	case ValString:
   446  		return constant.MakeString(r.String())
   447  	case ValInt64:
   448  		return constant.MakeInt64(r.Int64())
   449  	case ValBigInt:
   450  		return constant.Make(r.bigInt())
   451  	case ValBigRat:
   452  		num := r.bigInt()
   453  		denom := r.bigInt()
   454  		return constant.Make(new(big.Rat).SetFrac(num, denom))
   455  	case ValBigFloat:
   456  		return constant.Make(r.bigFloat())
   457  	}
   458  }
   459  
   460  func (r *Decoder) bigInt() *big.Int {
   461  	v := new(big.Int).SetBytes([]byte(r.String()))
   462  	if r.Bool() {
   463  		v.Neg(v)
   464  	}
   465  	return v
   466  }
   467  
   468  func (r *Decoder) bigFloat() *big.Float {
   469  	v := new(big.Float).SetPrec(512)
   470  	assert(v.UnmarshalText([]byte(r.String())) == nil)
   471  	return v
   472  }
   473  
   474  // @@@ Helpers
   475  
   476  // TODO(mdempsky): These should probably be removed. I think they're a
   477  // smell that the export data format is not yet quite right.
   478  
   479  // PeekPkgPath returns the package path for the specified package
   480  // index.
   481  func (pr *PkgDecoder) PeekPkgPath(idx Index) string {
   482  	var path string
   483  	{
   484  		r := pr.TempDecoder(RelocPkg, idx, SyncPkgDef)
   485  		path = r.String()
   486  		pr.RetireDecoder(&r)
   487  	}
   488  	if path == "" {
   489  		path = pr.pkgPath
   490  	}
   491  	return path
   492  }
   493  
   494  // PeekObj returns the package path, object name, and CodeObj for the
   495  // specified object index.
   496  func (pr *PkgDecoder) PeekObj(idx Index) (string, string, CodeObj) {
   497  	var ridx Index
   498  	var name string
   499  	var rcode int
   500  	{
   501  		r := pr.TempDecoder(RelocName, idx, SyncObject1)
   502  		r.Sync(SyncSym)
   503  		r.Sync(SyncPkg)
   504  		ridx = r.Reloc(RelocPkg)
   505  		name = r.String()
   506  		rcode = r.Code(SyncCodeObj)
   507  		pr.RetireDecoder(&r)
   508  	}
   509  
   510  	path := pr.PeekPkgPath(ridx)
   511  	assert(name != "")
   512  
   513  	tag := CodeObj(rcode)
   514  
   515  	return path, name, tag
   516  }
   517  
   518  // Version reports the version of the bitstream.
   519  func (w *Decoder) Version() Version { return w.common.version }
   520  

View as plain text