Source file src/internal/pkgbits/encoder.go

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package pkgbits
     6  
     7  import (
     8  	"bytes"
     9  	"crypto/md5"
    10  	"encoding/binary"
    11  	"go/constant"
    12  	"io"
    13  	"math/big"
    14  	"runtime"
    15  	"strings"
    16  )
    17  
    18  // A PkgEncoder provides methods for encoding a package's Unified IR
    19  // export data.
    20  type PkgEncoder struct {
    21  	// version of the bitstream.
    22  	version Version
    23  
    24  	// elems holds the bitstream for previously encoded elements.
    25  	elems [numRelocs][]string
    26  
    27  	// stringsIdx maps previously encoded strings to their index within
    28  	// the RelocString section, to allow deduplication. That is,
    29  	// elems[RelocString][stringsIdx[s]] == s (if present).
    30  	stringsIdx map[string]Index
    31  
    32  	// syncFrames is the number of frames to write at each sync
    33  	// marker. A negative value means sync markers are omitted.
    34  	syncFrames int
    35  }
    36  
    37  // SyncMarkers reports whether pw uses sync markers.
    38  func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
    39  
    40  // NewPkgEncoder returns an initialized PkgEncoder.
    41  //
    42  // syncFrames is the number of caller frames that should be serialized
    43  // at Sync points. Serializing additional frames results in larger
    44  // export data files, but can help diagnosing desync errors in
    45  // higher-level Unified IR reader/writer code. If syncFrames is
    46  // negative, then sync markers are omitted entirely.
    47  func NewPkgEncoder(version Version, syncFrames int) PkgEncoder {
    48  	return PkgEncoder{
    49  		version:    version,
    50  		stringsIdx: make(map[string]Index),
    51  		syncFrames: syncFrames,
    52  	}
    53  }
    54  
    55  // DumpTo writes the package's encoded data to out0 and returns the
    56  // package fingerprint.
    57  func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
    58  	h := md5.New()
    59  	out := io.MultiWriter(out0, h)
    60  
    61  	writeUint32 := func(x uint32) {
    62  		assert(binary.Write(out, binary.LittleEndian, x) == nil)
    63  	}
    64  
    65  	writeUint32(uint32(pw.version))
    66  
    67  	if pw.version.Has(Flags) {
    68  		var flags uint32
    69  		if pw.SyncMarkers() {
    70  			flags |= flagSyncMarkers
    71  		}
    72  		writeUint32(flags)
    73  	}
    74  
    75  	// Write elemEndsEnds.
    76  	var sum uint32
    77  	for _, elems := range &pw.elems {
    78  		sum += uint32(len(elems))
    79  		writeUint32(sum)
    80  	}
    81  
    82  	// Write elemEnds.
    83  	sum = 0
    84  	for _, elems := range &pw.elems {
    85  		for _, elem := range elems {
    86  			sum += uint32(len(elem))
    87  			writeUint32(sum)
    88  		}
    89  	}
    90  
    91  	// Write elemData.
    92  	for _, elems := range &pw.elems {
    93  		for _, elem := range elems {
    94  			_, err := io.WriteString(out, elem)
    95  			assert(err == nil)
    96  		}
    97  	}
    98  
    99  	// Write fingerprint.
   100  	copy(fingerprint[:], h.Sum(nil))
   101  	_, err := out0.Write(fingerprint[:])
   102  	assert(err == nil)
   103  
   104  	return
   105  }
   106  
   107  // StringIdx adds a string value to the strings section, if not
   108  // already present, and returns its index.
   109  func (pw *PkgEncoder) StringIdx(s string) Index {
   110  	if idx, ok := pw.stringsIdx[s]; ok {
   111  		assert(pw.elems[RelocString][idx] == s)
   112  		return idx
   113  	}
   114  
   115  	idx := Index(len(pw.elems[RelocString]))
   116  	pw.elems[RelocString] = append(pw.elems[RelocString], s)
   117  	pw.stringsIdx[s] = idx
   118  	return idx
   119  }
   120  
   121  // NewEncoder returns an Encoder for a new element within the given
   122  // section, and encodes the given SyncMarker as the start of the
   123  // element bitstream.
   124  func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
   125  	e := pw.NewEncoderRaw(k)
   126  	e.Sync(marker)
   127  	return e
   128  }
   129  
   130  // NewEncoderRaw returns an Encoder for a new element within the given
   131  // section.
   132  //
   133  // Most callers should use NewEncoder instead.
   134  func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
   135  	idx := Index(len(pw.elems[k]))
   136  	pw.elems[k] = append(pw.elems[k], "") // placeholder
   137  
   138  	return Encoder{
   139  		p:   pw,
   140  		k:   k,
   141  		Idx: idx,
   142  	}
   143  }
   144  
   145  // An Encoder provides methods for encoding an individual element's
   146  // bitstream data.
   147  type Encoder struct {
   148  	p *PkgEncoder
   149  
   150  	Relocs   []RelocEnt
   151  	RelocMap map[RelocEnt]uint32
   152  	Data     bytes.Buffer // accumulated element bitstream data
   153  
   154  	encodingRelocHeader bool
   155  
   156  	k   RelocKind
   157  	Idx Index // index within relocation section
   158  }
   159  
   160  // Flush finalizes the element's bitstream and returns its Index.
   161  func (w *Encoder) Flush() Index {
   162  	var sb strings.Builder
   163  
   164  	// Backup the data so we write the relocations at the front.
   165  	var tmp bytes.Buffer
   166  	io.Copy(&tmp, &w.Data)
   167  
   168  	// TODO(mdempsky): Consider writing these out separately so they're
   169  	// easier to strip, along with function bodies, so that we can prune
   170  	// down to just the data that's relevant to go/types.
   171  	if w.encodingRelocHeader {
   172  		panic("encodingRelocHeader already true; recursive flush?")
   173  	}
   174  	w.encodingRelocHeader = true
   175  	w.Sync(SyncRelocs)
   176  	w.Len(len(w.Relocs))
   177  	for _, rEnt := range w.Relocs {
   178  		w.Sync(SyncReloc)
   179  		w.Len(int(rEnt.Kind))
   180  		w.Len(int(rEnt.Idx))
   181  	}
   182  
   183  	io.Copy(&sb, &w.Data)
   184  	io.Copy(&sb, &tmp)
   185  	w.p.elems[w.k][w.Idx] = sb.String()
   186  
   187  	return w.Idx
   188  }
   189  
   190  func (w *Encoder) checkErr(err error) {
   191  	if err != nil {
   192  		panicf("unexpected encoding error: %v", err)
   193  	}
   194  }
   195  
   196  func (w *Encoder) rawUvarint(x uint64) {
   197  	var buf [binary.MaxVarintLen64]byte
   198  	n := binary.PutUvarint(buf[:], x)
   199  	_, err := w.Data.Write(buf[:n])
   200  	w.checkErr(err)
   201  }
   202  
   203  func (w *Encoder) rawVarint(x int64) {
   204  	// Zig-zag encode.
   205  	ux := uint64(x) << 1
   206  	if x < 0 {
   207  		ux = ^ux
   208  	}
   209  
   210  	w.rawUvarint(ux)
   211  }
   212  
   213  func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
   214  	e := RelocEnt{r, idx}
   215  	if w.RelocMap != nil {
   216  		if i, ok := w.RelocMap[e]; ok {
   217  			return int(i)
   218  		}
   219  	} else {
   220  		w.RelocMap = make(map[RelocEnt]uint32)
   221  	}
   222  
   223  	i := len(w.Relocs)
   224  	w.RelocMap[e] = uint32(i)
   225  	w.Relocs = append(w.Relocs, e)
   226  	return i
   227  }
   228  
   229  func (w *Encoder) Sync(m SyncMarker) {
   230  	if !w.p.SyncMarkers() {
   231  		return
   232  	}
   233  
   234  	// Writing out stack frame string references requires working
   235  	// relocations, but writing out the relocations themselves involves
   236  	// sync markers. To prevent infinite recursion, we simply trim the
   237  	// stack frame for sync markers within the relocation header.
   238  	var frames []string
   239  	if !w.encodingRelocHeader && w.p.syncFrames > 0 {
   240  		pcs := make([]uintptr, w.p.syncFrames)
   241  		n := runtime.Callers(2, pcs)
   242  		frames = fmtFrames(pcs[:n]...)
   243  	}
   244  
   245  	// TODO(mdempsky): Save space by writing out stack frames as a
   246  	// linked list so we can share common stack frames.
   247  	w.rawUvarint(uint64(m))
   248  	w.rawUvarint(uint64(len(frames)))
   249  	for _, frame := range frames {
   250  		w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
   251  	}
   252  }
   253  
   254  // Bool encodes and writes a bool value into the element bitstream,
   255  // and then returns the bool value.
   256  //
   257  // For simple, 2-alternative encodings, the idiomatic way to call Bool
   258  // is something like:
   259  //
   260  //	if w.Bool(x != 0) {
   261  //		// alternative #1
   262  //	} else {
   263  //		// alternative #2
   264  //	}
   265  //
   266  // For multi-alternative encodings, use Code instead.
   267  func (w *Encoder) Bool(b bool) bool {
   268  	w.Sync(SyncBool)
   269  	var x byte
   270  	if b {
   271  		x = 1
   272  	}
   273  	err := w.Data.WriteByte(x)
   274  	w.checkErr(err)
   275  	return b
   276  }
   277  
   278  // Int64 encodes and writes an int64 value into the element bitstream.
   279  func (w *Encoder) Int64(x int64) {
   280  	w.Sync(SyncInt64)
   281  	w.rawVarint(x)
   282  }
   283  
   284  // Uint64 encodes and writes a uint64 value into the element bitstream.
   285  func (w *Encoder) Uint64(x uint64) {
   286  	w.Sync(SyncUint64)
   287  	w.rawUvarint(x)
   288  }
   289  
   290  // Len encodes and writes a non-negative int value into the element bitstream.
   291  func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
   292  
   293  // Int encodes and writes an int value into the element bitstream.
   294  func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
   295  
   296  // Uint encodes and writes a uint value into the element bitstream.
   297  func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
   298  
   299  // Reloc encodes and writes a relocation for the given (section,
   300  // index) pair into the element bitstream.
   301  //
   302  // Note: Only the index is formally written into the element
   303  // bitstream, so bitstream decoders must know from context which
   304  // section an encoded relocation refers to.
   305  func (w *Encoder) Reloc(r RelocKind, idx Index) {
   306  	w.Sync(SyncUseReloc)
   307  	w.Len(w.rawReloc(r, idx))
   308  }
   309  
   310  // Code encodes and writes a Code value into the element bitstream.
   311  func (w *Encoder) Code(c Code) {
   312  	w.Sync(c.Marker())
   313  	w.Len(c.Value())
   314  }
   315  
   316  // String encodes and writes a string value into the element
   317  // bitstream.
   318  //
   319  // Internally, strings are deduplicated by adding them to the strings
   320  // section (if not already present), and then writing a relocation
   321  // into the element bitstream.
   322  func (w *Encoder) String(s string) {
   323  	w.StringRef(w.p.StringIdx(s))
   324  }
   325  
   326  // StringRef writes a reference to the given index, which must be a
   327  // previously encoded string value.
   328  func (w *Encoder) StringRef(idx Index) {
   329  	w.Sync(SyncString)
   330  	w.Reloc(RelocString, idx)
   331  }
   332  
   333  // Strings encodes and writes a variable-length slice of strings into
   334  // the element bitstream.
   335  func (w *Encoder) Strings(ss []string) {
   336  	w.Len(len(ss))
   337  	for _, s := range ss {
   338  		w.String(s)
   339  	}
   340  }
   341  
   342  // Value encodes and writes a constant.Value into the element
   343  // bitstream.
   344  func (w *Encoder) Value(val constant.Value) {
   345  	w.Sync(SyncValue)
   346  	if w.Bool(val.Kind() == constant.Complex) {
   347  		w.scalar(constant.Real(val))
   348  		w.scalar(constant.Imag(val))
   349  	} else {
   350  		w.scalar(val)
   351  	}
   352  }
   353  
   354  func (w *Encoder) scalar(val constant.Value) {
   355  	switch v := constant.Val(val).(type) {
   356  	default:
   357  		panicf("unhandled %v (%v)", val, val.Kind())
   358  	case bool:
   359  		w.Code(ValBool)
   360  		w.Bool(v)
   361  	case string:
   362  		w.Code(ValString)
   363  		w.String(v)
   364  	case int64:
   365  		w.Code(ValInt64)
   366  		w.Int64(v)
   367  	case *big.Int:
   368  		w.Code(ValBigInt)
   369  		w.bigInt(v)
   370  	case *big.Rat:
   371  		w.Code(ValBigRat)
   372  		w.bigInt(v.Num())
   373  		w.bigInt(v.Denom())
   374  	case *big.Float:
   375  		w.Code(ValBigFloat)
   376  		w.bigFloat(v)
   377  	}
   378  }
   379  
   380  func (w *Encoder) bigInt(v *big.Int) {
   381  	b := v.Bytes()
   382  	w.String(string(b)) // TODO: More efficient encoding.
   383  	w.Bool(v.Sign() < 0)
   384  }
   385  
   386  func (w *Encoder) bigFloat(v *big.Float) {
   387  	b := v.Append(nil, 'p', -1)
   388  	w.String(string(b)) // TODO: More efficient encoding.
   389  }
   390  
   391  // Version reports the version of the bitstream.
   392  func (w *Encoder) Version() Version { return w.p.version }
   393  

View as plain text