Source file src/cmd/internal/objfile/disasm.go

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package objfile
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"container/list"
    11  	"debug/gosym"
    12  	"encoding/binary"
    13  	"fmt"
    14  	"io"
    15  	"os"
    16  	"path/filepath"
    17  	"regexp"
    18  	"sort"
    19  	"strings"
    20  	"text/tabwriter"
    21  
    22  	"cmd/internal/src"
    23  
    24  	"golang.org/x/arch/arm/armasm"
    25  	"golang.org/x/arch/arm64/arm64asm"
    26  	"golang.org/x/arch/loong64/loong64asm"
    27  	"golang.org/x/arch/ppc64/ppc64asm"
    28  	"golang.org/x/arch/riscv64/riscv64asm"
    29  	"golang.org/x/arch/s390x/s390xasm"
    30  	"golang.org/x/arch/x86/x86asm"
    31  )
    32  
    33  // Disasm is a disassembler for a given File.
    34  type Disasm struct {
    35  	syms      []Sym            //symbols in file, sorted by address
    36  	pcln      Liner            // pcln table
    37  	text      []byte           // bytes of text segment (actual instructions)
    38  	textStart uint64           // start PC of text
    39  	textEnd   uint64           // end PC of text
    40  	goarch    string           // GOARCH string
    41  	disasm    disasmFunc       // disassembler function for goarch
    42  	byteOrder binary.ByteOrder // byte order for goarch
    43  }
    44  
    45  // Disasm returns a disassembler for the file f.
    46  func (e *Entry) Disasm() (*Disasm, error) {
    47  	syms, err := e.Symbols()
    48  	if err != nil {
    49  		return nil, err
    50  	}
    51  
    52  	pcln, err := e.PCLineTable()
    53  	if err != nil {
    54  		return nil, err
    55  	}
    56  
    57  	textStart, textBytes, err := e.Text()
    58  	if err != nil {
    59  		return nil, err
    60  	}
    61  
    62  	goarch := e.GOARCH()
    63  	disasm := disasms[goarch]
    64  	byteOrder := byteOrders[goarch]
    65  	if disasm == nil || byteOrder == nil {
    66  		return nil, fmt.Errorf("unsupported architecture %q", goarch)
    67  	}
    68  
    69  	// Filter out section symbols, overwriting syms in place.
    70  	keep := syms[:0]
    71  	for _, sym := range syms {
    72  		switch sym.Name {
    73  		case "runtime.text", "text", "_text", "runtime.etext", "etext", "_etext":
    74  			// drop
    75  		default:
    76  			keep = append(keep, sym)
    77  		}
    78  	}
    79  	syms = keep
    80  	d := &Disasm{
    81  		syms:      syms,
    82  		pcln:      pcln,
    83  		text:      textBytes,
    84  		textStart: textStart,
    85  		textEnd:   textStart + uint64(len(textBytes)),
    86  		goarch:    goarch,
    87  		disasm:    disasm,
    88  		byteOrder: byteOrder,
    89  	}
    90  
    91  	return d, nil
    92  }
    93  
    94  // lookup finds the symbol name containing addr.
    95  func (d *Disasm) lookup(addr uint64) (name string, base uint64) {
    96  	i := sort.Search(len(d.syms), func(i int) bool { return addr < d.syms[i].Addr })
    97  	if i > 0 {
    98  		s := d.syms[i-1]
    99  		if s.Addr != 0 && s.Addr <= addr && addr < s.Addr+uint64(s.Size) {
   100  			return s.Name, s.Addr
   101  		}
   102  	}
   103  	return "", 0
   104  }
   105  
   106  // base returns the final element in the path.
   107  // It works on both Windows and Unix paths,
   108  // regardless of host operating system.
   109  func base(path string) string {
   110  	path = path[strings.LastIndex(path, "/")+1:]
   111  	path = path[strings.LastIndex(path, `\`)+1:]
   112  	return path
   113  }
   114  
   115  // CachedFile contains the content of a file split into lines.
   116  type CachedFile struct {
   117  	FileName string
   118  	Lines    [][]byte
   119  }
   120  
   121  // FileCache is a simple LRU cache of file contents.
   122  type FileCache struct {
   123  	files  *list.List
   124  	maxLen int
   125  }
   126  
   127  // NewFileCache returns a FileCache which can contain up to maxLen cached file contents.
   128  func NewFileCache(maxLen int) *FileCache {
   129  	return &FileCache{
   130  		files:  list.New(),
   131  		maxLen: maxLen,
   132  	}
   133  }
   134  
   135  // Line returns the source code line for the given file and line number.
   136  // If the file is not already cached, reads it, inserts it into the cache,
   137  // and removes the least recently used file if necessary.
   138  // If the file is in cache, it is moved to the front of the list.
   139  func (fc *FileCache) Line(filename string, line int) ([]byte, error) {
   140  	if filepath.Ext(filename) != ".go" {
   141  		return nil, nil
   142  	}
   143  
   144  	// Clean filenames returned by src.Pos.SymFilename()
   145  	// or src.PosBase.SymFilename() removing
   146  	// the leading src.FileSymPrefix.
   147  	filename = strings.TrimPrefix(filename, src.FileSymPrefix)
   148  
   149  	// Expand literal "$GOROOT" rewritten by obj.AbsFile()
   150  	filename = filepath.Clean(os.ExpandEnv(filename))
   151  
   152  	var cf *CachedFile
   153  	var e *list.Element
   154  
   155  	for e = fc.files.Front(); e != nil; e = e.Next() {
   156  		cf = e.Value.(*CachedFile)
   157  		if cf.FileName == filename {
   158  			break
   159  		}
   160  	}
   161  
   162  	if e == nil {
   163  		content, err := os.ReadFile(filename)
   164  		if err != nil {
   165  			return nil, err
   166  		}
   167  
   168  		cf = &CachedFile{
   169  			FileName: filename,
   170  			Lines:    bytes.Split(content, []byte{'\n'}),
   171  		}
   172  		fc.files.PushFront(cf)
   173  
   174  		if fc.files.Len() >= fc.maxLen {
   175  			fc.files.Remove(fc.files.Back())
   176  		}
   177  	} else {
   178  		fc.files.MoveToFront(e)
   179  	}
   180  
   181  	// because //line directives can be out-of-range. (#36683)
   182  	if line-1 >= len(cf.Lines) || line-1 < 0 {
   183  		return nil, nil
   184  	}
   185  
   186  	return cf.Lines[line-1], nil
   187  }
   188  
   189  // Print prints a disassembly of the file to w.
   190  // If filter is non-nil, the disassembly only includes functions with names matching filter.
   191  // If printCode is true, the disassembly includs corresponding source lines.
   192  // The disassembly only includes functions that overlap the range [start, end).
   193  func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64, printCode bool, gnuAsm bool) {
   194  	if start < d.textStart {
   195  		start = d.textStart
   196  	}
   197  	if end > d.textEnd {
   198  		end = d.textEnd
   199  	}
   200  	printed := false
   201  	bw := bufio.NewWriter(w)
   202  
   203  	var fc *FileCache
   204  	if printCode {
   205  		fc = NewFileCache(8)
   206  	}
   207  
   208  	tw := tabwriter.NewWriter(bw, 18, 8, 1, '\t', tabwriter.StripEscape)
   209  	for _, sym := range d.syms {
   210  		symStart := sym.Addr
   211  		symEnd := sym.Addr + uint64(sym.Size)
   212  		relocs := sym.Relocs
   213  		if sym.Code != 'T' && sym.Code != 't' ||
   214  			symStart < d.textStart ||
   215  			symEnd <= start || end <= symStart ||
   216  			filter != nil && !filter.MatchString(sym.Name) {
   217  			continue
   218  		}
   219  		if printed {
   220  			fmt.Fprintf(bw, "\n")
   221  		}
   222  		printed = true
   223  
   224  		file, _, _ := d.pcln.PCToLine(sym.Addr)
   225  		fmt.Fprintf(bw, "TEXT %s(SB) %s\n", sym.Name, file)
   226  
   227  		if symEnd > end {
   228  			symEnd = end
   229  		}
   230  		code := d.text[:end-d.textStart]
   231  
   232  		var lastFile string
   233  		var lastLine int
   234  
   235  		d.Decode(symStart, symEnd, relocs, gnuAsm, func(pc, size uint64, file string, line int, text string) {
   236  			i := pc - d.textStart
   237  
   238  			if printCode {
   239  				if file != lastFile || line != lastLine {
   240  					if srcLine, err := fc.Line(file, line); err == nil {
   241  						fmt.Fprintf(tw, "%s%s%s\n", []byte{tabwriter.Escape}, srcLine, []byte{tabwriter.Escape})
   242  					}
   243  
   244  					lastFile, lastLine = file, line
   245  				}
   246  
   247  				fmt.Fprintf(tw, "  %#x\t", pc)
   248  			} else {
   249  				fmt.Fprintf(tw, "  %s:%d\t%#x\t", base(file), line, pc)
   250  			}
   251  
   252  			if size%4 != 0 || d.goarch == "386" || d.goarch == "amd64" {
   253  				// Print instruction as bytes.
   254  				fmt.Fprintf(tw, "%x", code[i:i+size])
   255  			} else {
   256  				// Print instruction as 32-bit words.
   257  				for j := uint64(0); j < size; j += 4 {
   258  					if j > 0 {
   259  						fmt.Fprintf(tw, " ")
   260  					}
   261  					fmt.Fprintf(tw, "%08x", d.byteOrder.Uint32(code[i+j:]))
   262  				}
   263  			}
   264  			fmt.Fprintf(tw, "\t%s\t\n", text)
   265  		})
   266  		tw.Flush()
   267  	}
   268  	bw.Flush()
   269  }
   270  
   271  // Decode disassembles the text segment range [start, end), calling f for each instruction.
   272  func (d *Disasm) Decode(start, end uint64, relocs []Reloc, gnuAsm bool, f func(pc, size uint64, file string, line int, text string)) {
   273  	if start < d.textStart {
   274  		start = d.textStart
   275  	}
   276  	if end > d.textEnd {
   277  		end = d.textEnd
   278  	}
   279  	code := d.text[:end-d.textStart]
   280  	lookup := d.lookup
   281  	for pc := start; pc < end; {
   282  		i := pc - d.textStart
   283  		text, size := d.disasm(code[i:], pc, lookup, d.byteOrder, gnuAsm)
   284  		file, line, _ := d.pcln.PCToLine(pc)
   285  		sep := "\t"
   286  		for len(relocs) > 0 && relocs[0].Addr < i+uint64(size) {
   287  			text += sep + relocs[0].Stringer.String(pc-start)
   288  			sep = " "
   289  			relocs = relocs[1:]
   290  		}
   291  		f(pc, uint64(size), file, line, text)
   292  		pc += uint64(size)
   293  	}
   294  }
   295  
   296  type lookupFunc = func(addr uint64) (sym string, base uint64)
   297  type disasmFunc func(code []byte, pc uint64, lookup lookupFunc, ord binary.ByteOrder, _ bool) (text string, size int)
   298  
   299  func disasm_386(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
   300  	return disasm_x86(code, pc, lookup, 32, gnuAsm)
   301  }
   302  
   303  func disasm_amd64(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
   304  	return disasm_x86(code, pc, lookup, 64, gnuAsm)
   305  }
   306  
   307  func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int, gnuAsm bool) (string, int) {
   308  	inst, err := x86asm.Decode(code, arch)
   309  	var text string
   310  	size := inst.Len
   311  	if err != nil || size == 0 || inst.Op == 0 {
   312  		size = 1
   313  		text = "?"
   314  	} else {
   315  		if gnuAsm {
   316  			text = fmt.Sprintf("%-36s // %s", x86asm.GoSyntax(inst, pc, lookup), x86asm.GNUSyntax(inst, pc, nil))
   317  		} else {
   318  			text = x86asm.GoSyntax(inst, pc, lookup)
   319  		}
   320  	}
   321  	return text, size
   322  }
   323  
   324  type textReader struct {
   325  	code []byte
   326  	pc   uint64
   327  }
   328  
   329  func (r textReader) ReadAt(data []byte, off int64) (n int, err error) {
   330  	if off < 0 || uint64(off) < r.pc {
   331  		return 0, io.EOF
   332  	}
   333  	d := uint64(off) - r.pc
   334  	if d >= uint64(len(r.code)) {
   335  		return 0, io.EOF
   336  	}
   337  	n = copy(data, r.code[d:])
   338  	if n < len(data) {
   339  		err = io.ErrUnexpectedEOF
   340  	}
   341  	return
   342  }
   343  
   344  func disasm_arm(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
   345  	inst, err := armasm.Decode(code, armasm.ModeARM)
   346  	var text string
   347  	size := inst.Len
   348  	if err != nil || size == 0 || inst.Op == 0 {
   349  		size = 4
   350  		text = "?"
   351  	} else if gnuAsm {
   352  		text = fmt.Sprintf("%-36s // %s", armasm.GoSyntax(inst, pc, lookup, textReader{code, pc}), armasm.GNUSyntax(inst))
   353  	} else {
   354  		text = armasm.GoSyntax(inst, pc, lookup, textReader{code, pc})
   355  	}
   356  	return text, size
   357  }
   358  
   359  func disasm_arm64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) {
   360  	inst, err := arm64asm.Decode(code)
   361  	var text string
   362  	if err != nil || inst.Op == 0 {
   363  		text = "?"
   364  	} else if gnuAsm {
   365  		text = fmt.Sprintf("%-36s // %s", arm64asm.GoSyntax(inst, pc, lookup, textReader{code, pc}), arm64asm.GNUSyntax(inst))
   366  	} else {
   367  		text = arm64asm.GoSyntax(inst, pc, lookup, textReader{code, pc})
   368  	}
   369  	return text, 4
   370  }
   371  
   372  func disasm_loong64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) {
   373  	inst, err := loong64asm.Decode(code)
   374  	var text string
   375  	if err != nil || inst.Op == 0 {
   376  		text = "?"
   377  	} else if gnuAsm {
   378  		text = fmt.Sprintf("%-36s // %s", loong64asm.GoSyntax(inst, pc, lookup), loong64asm.GNUSyntax(inst))
   379  	} else {
   380  		text = loong64asm.GoSyntax(inst, pc, lookup)
   381  	}
   382  	return text, 4
   383  }
   384  
   385  func disasm_ppc64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) {
   386  	inst, err := ppc64asm.Decode(code, byteOrder)
   387  	var text string
   388  	size := inst.Len
   389  	if err != nil || size == 0 {
   390  		size = 4
   391  		text = "?"
   392  	} else {
   393  		if gnuAsm {
   394  			text = fmt.Sprintf("%-36s // %s", ppc64asm.GoSyntax(inst, pc, lookup), ppc64asm.GNUSyntax(inst, pc))
   395  		} else {
   396  			text = ppc64asm.GoSyntax(inst, pc, lookup)
   397  		}
   398  	}
   399  	return text, size
   400  }
   401  
   402  func disasm_riscv64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) {
   403  	inst, err := riscv64asm.Decode(code)
   404  	var text string
   405  	if err != nil || inst.Op == 0 {
   406  		text = "?"
   407  	} else if gnuAsm {
   408  		text = fmt.Sprintf("%-36s // %s", riscv64asm.GoSyntax(inst, pc, lookup, textReader{code, pc}), riscv64asm.GNUSyntax(inst))
   409  	} else {
   410  		text = riscv64asm.GoSyntax(inst, pc, lookup, textReader{code, pc})
   411  	}
   412  	return text, 4
   413  }
   414  
   415  func disasm_s390x(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
   416  	inst, err := s390xasm.Decode(code)
   417  	var text string
   418  	size := inst.Len
   419  	if err != nil || size == 0 || inst.Op == 0 {
   420  		size = 2
   421  		text = "?"
   422  	} else {
   423  		if gnuAsm {
   424  			text = fmt.Sprintf("%-36s // %s", s390xasm.GoSyntax(inst, pc, lookup), s390xasm.GNUSyntax(inst, pc))
   425  		} else {
   426  			text = s390xasm.GoSyntax(inst, pc, lookup)
   427  		}
   428  	}
   429  	return text, size
   430  }
   431  
   432  var disasms = map[string]disasmFunc{
   433  	"386":     disasm_386,
   434  	"amd64":   disasm_amd64,
   435  	"arm":     disasm_arm,
   436  	"arm64":   disasm_arm64,
   437  	"loong64": disasm_loong64,
   438  	"ppc64":   disasm_ppc64,
   439  	"ppc64le": disasm_ppc64,
   440  	"riscv64": disasm_riscv64,
   441  	"s390x":   disasm_s390x,
   442  }
   443  
   444  var byteOrders = map[string]binary.ByteOrder{
   445  	"386":     binary.LittleEndian,
   446  	"amd64":   binary.LittleEndian,
   447  	"arm":     binary.LittleEndian,
   448  	"arm64":   binary.LittleEndian,
   449  	"loong64": binary.LittleEndian,
   450  	"ppc64":   binary.BigEndian,
   451  	"ppc64le": binary.LittleEndian,
   452  	"riscv64": binary.LittleEndian,
   453  	"s390x":   binary.BigEndian,
   454  }
   455  
   456  type Liner interface {
   457  	// Given a pc, returns the corresponding file, line, and function data.
   458  	// If unknown, returns "",0,nil.
   459  	PCToLine(uint64) (string, int, *gosym.Func)
   460  }
   461  

View as plain text