Source file src/cmd/internal/disasm/disasm.go

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package disasm provides disassembly routines.
     6  //
     7  // It is broken out from cmd/internal/objfile so tools that don't need
     8  // disassembling don't need to depend on x/arch disassembler code.
     9  package disasm
    10  
    11  import (
    12  	"bufio"
    13  	"bytes"
    14  	"container/list"
    15  	"encoding/binary"
    16  	"fmt"
    17  	"io"
    18  	"os"
    19  	"path/filepath"
    20  	"regexp"
    21  	"sort"
    22  	"strings"
    23  	"text/tabwriter"
    24  
    25  	"cmd/internal/objfile"
    26  	"cmd/internal/src"
    27  
    28  	"golang.org/x/arch/arm/armasm"
    29  	"golang.org/x/arch/arm64/arm64asm"
    30  	"golang.org/x/arch/loong64/loong64asm"
    31  	"golang.org/x/arch/ppc64/ppc64asm"
    32  	"golang.org/x/arch/riscv64/riscv64asm"
    33  	"golang.org/x/arch/s390x/s390xasm"
    34  	"golang.org/x/arch/x86/x86asm"
    35  )
    36  
    37  // Disasm is a disassembler for a given File.
    38  type Disasm struct {
    39  	syms      []objfile.Sym    // symbols in file, sorted by address
    40  	pcln      objfile.Liner    // pcln table
    41  	text      []byte           // bytes of text segment (actual instructions)
    42  	textStart uint64           // start PC of text
    43  	textEnd   uint64           // end PC of text
    44  	goarch    string           // GOARCH string
    45  	disasm    disasmFunc       // disassembler function for goarch
    46  	byteOrder binary.ByteOrder // byte order for goarch
    47  }
    48  
    49  // DisasmForFile returns a disassembler for the file f.
    50  func DisasmForFile(f *objfile.File) (*Disasm, error) {
    51  	return disasmForEntry(f.Entries()[0])
    52  }
    53  
    54  func disasmForEntry(e *objfile.Entry) (*Disasm, error) {
    55  	syms, err := e.Symbols()
    56  	if err != nil {
    57  		return nil, err
    58  	}
    59  
    60  	pcln, err := e.PCLineTable()
    61  	if err != nil {
    62  		return nil, err
    63  	}
    64  
    65  	textStart, textBytes, err := e.Text()
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  
    70  	goarch := e.GOARCH()
    71  	disasm := disasms[goarch]
    72  	byteOrder := byteOrders[goarch]
    73  	if disasm == nil || byteOrder == nil {
    74  		return nil, fmt.Errorf("unsupported architecture %q", goarch)
    75  	}
    76  
    77  	// Filter out section symbols, overwriting syms in place.
    78  	keep := syms[:0]
    79  	for _, sym := range syms {
    80  		switch sym.Name {
    81  		case "runtime.text", "text", "_text", "runtime.etext", "etext", "_etext":
    82  			// drop
    83  		default:
    84  			keep = append(keep, sym)
    85  		}
    86  	}
    87  	syms = keep
    88  	d := &Disasm{
    89  		syms:      syms,
    90  		pcln:      pcln,
    91  		text:      textBytes,
    92  		textStart: textStart,
    93  		textEnd:   textStart + uint64(len(textBytes)),
    94  		goarch:    goarch,
    95  		disasm:    disasm,
    96  		byteOrder: byteOrder,
    97  	}
    98  
    99  	return d, nil
   100  }
   101  
   102  // lookup finds the symbol name containing addr.
   103  func (d *Disasm) lookup(addr uint64) (name string, base uint64) {
   104  	i := sort.Search(len(d.syms), func(i int) bool { return addr < d.syms[i].Addr })
   105  	if i > 0 {
   106  		s := d.syms[i-1]
   107  		if s.Addr != 0 && s.Addr <= addr && addr < s.Addr+uint64(s.Size) {
   108  			return s.Name, s.Addr
   109  		}
   110  	}
   111  	return "", 0
   112  }
   113  
   114  // base returns the final element in the path.
   115  // It works on both Windows and Unix paths,
   116  // regardless of host operating system.
   117  func base(path string) string {
   118  	path = path[strings.LastIndex(path, "/")+1:]
   119  	path = path[strings.LastIndex(path, `\`)+1:]
   120  	return path
   121  }
   122  
   123  // CachedFile contains the content of a file split into lines.
   124  type CachedFile struct {
   125  	FileName string
   126  	Lines    [][]byte
   127  }
   128  
   129  // FileCache is a simple LRU cache of file contents.
   130  type FileCache struct {
   131  	files  *list.List
   132  	maxLen int
   133  }
   134  
   135  // NewFileCache returns a FileCache which can contain up to maxLen cached file contents.
   136  func NewFileCache(maxLen int) *FileCache {
   137  	return &FileCache{
   138  		files:  list.New(),
   139  		maxLen: maxLen,
   140  	}
   141  }
   142  
   143  // Line returns the source code line for the given file and line number.
   144  // If the file is not already cached, reads it, inserts it into the cache,
   145  // and removes the least recently used file if necessary.
   146  // If the file is in cache, it is moved to the front of the list.
   147  func (fc *FileCache) Line(filename string, line int) ([]byte, error) {
   148  	if filepath.Ext(filename) != ".go" {
   149  		return nil, nil
   150  	}
   151  
   152  	// Clean filenames returned by src.Pos.SymFilename()
   153  	// or src.PosBase.SymFilename() removing
   154  	// the leading src.FileSymPrefix.
   155  	filename = strings.TrimPrefix(filename, src.FileSymPrefix)
   156  
   157  	// Expand literal "$GOROOT" rewritten by obj.AbsFile()
   158  	filename = filepath.Clean(os.ExpandEnv(filename))
   159  
   160  	var cf *CachedFile
   161  	var e *list.Element
   162  
   163  	for e = fc.files.Front(); e != nil; e = e.Next() {
   164  		cf = e.Value.(*CachedFile)
   165  		if cf.FileName == filename {
   166  			break
   167  		}
   168  	}
   169  
   170  	if e == nil {
   171  		content, err := os.ReadFile(filename)
   172  		if err != nil {
   173  			return nil, err
   174  		}
   175  
   176  		cf = &CachedFile{
   177  			FileName: filename,
   178  			Lines:    bytes.Split(content, []byte{'\n'}),
   179  		}
   180  		fc.files.PushFront(cf)
   181  
   182  		if fc.files.Len() >= fc.maxLen {
   183  			fc.files.Remove(fc.files.Back())
   184  		}
   185  	} else {
   186  		fc.files.MoveToFront(e)
   187  	}
   188  
   189  	// because //line directives can be out-of-range. (#36683)
   190  	if line-1 >= len(cf.Lines) || line-1 < 0 {
   191  		return nil, nil
   192  	}
   193  
   194  	return cf.Lines[line-1], nil
   195  }
   196  
   197  // Print prints a disassembly of the file to w.
   198  // If filter is non-nil, the disassembly only includes functions with names matching filter.
   199  // If printCode is true, the disassembly includes corresponding source lines.
   200  // The disassembly only includes functions that overlap the range [start, end).
   201  func (d *Disasm) Print(w io.Writer, filter *regexp.Regexp, start, end uint64, printCode bool, gnuAsm bool) {
   202  	if start < d.textStart {
   203  		start = d.textStart
   204  	}
   205  	if end > d.textEnd {
   206  		end = d.textEnd
   207  	}
   208  	printed := false
   209  	bw := bufio.NewWriter(w)
   210  
   211  	var fc *FileCache
   212  	if printCode {
   213  		fc = NewFileCache(8)
   214  	}
   215  
   216  	tw := tabwriter.NewWriter(bw, 18, 8, 1, '\t', tabwriter.StripEscape)
   217  	for _, sym := range d.syms {
   218  		symStart := sym.Addr
   219  		symEnd := sym.Addr + uint64(sym.Size)
   220  		relocs := sym.Relocs
   221  		if sym.Code != 'T' && sym.Code != 't' ||
   222  			symStart < d.textStart ||
   223  			symEnd <= start || end <= symStart ||
   224  			filter != nil && !filter.MatchString(sym.Name) {
   225  			continue
   226  		}
   227  		if printed {
   228  			fmt.Fprintf(bw, "\n")
   229  		}
   230  		printed = true
   231  
   232  		file, _, _ := d.pcln.PCToLine(sym.Addr)
   233  		fmt.Fprintf(bw, "TEXT %s(SB) %s\n", sym.Name, file)
   234  
   235  		if symEnd > end {
   236  			symEnd = end
   237  		}
   238  		code := d.text[:end-d.textStart]
   239  
   240  		var lastFile string
   241  		var lastLine int
   242  
   243  		d.Decode(symStart, symEnd, relocs, gnuAsm, func(pc, size uint64, file string, line int, text string) {
   244  			i := pc - d.textStart
   245  
   246  			if printCode {
   247  				if file != lastFile || line != lastLine {
   248  					if srcLine, err := fc.Line(file, line); err == nil {
   249  						fmt.Fprintf(tw, "%s%s%s\n", []byte{tabwriter.Escape}, srcLine, []byte{tabwriter.Escape})
   250  					}
   251  
   252  					lastFile, lastLine = file, line
   253  				}
   254  
   255  				fmt.Fprintf(tw, "  %#x\t", pc)
   256  			} else {
   257  				fmt.Fprintf(tw, "  %s:%d\t%#x\t", base(file), line, pc)
   258  			}
   259  
   260  			if size%4 != 0 || d.goarch == "386" || d.goarch == "amd64" {
   261  				// Print instruction as bytes.
   262  				fmt.Fprintf(tw, "%x", code[i:i+size])
   263  			} else {
   264  				// Print instruction as 32-bit words.
   265  				for j := uint64(0); j < size; j += 4 {
   266  					if j > 0 {
   267  						fmt.Fprintf(tw, " ")
   268  					}
   269  					fmt.Fprintf(tw, "%08x", d.byteOrder.Uint32(code[i+j:]))
   270  				}
   271  			}
   272  			fmt.Fprintf(tw, "\t%s\t\n", text)
   273  		})
   274  		tw.Flush()
   275  	}
   276  	bw.Flush()
   277  }
   278  
   279  // Decode disassembles the text segment range [start, end), calling f for each instruction.
   280  func (d *Disasm) Decode(start, end uint64, relocs []objfile.Reloc, gnuAsm bool, f func(pc, size uint64, file string, line int, text string)) {
   281  	if start < d.textStart {
   282  		start = d.textStart
   283  	}
   284  	if end > d.textEnd {
   285  		end = d.textEnd
   286  	}
   287  	code := d.text[:end-d.textStart]
   288  	lookup := d.lookup
   289  	for pc := start; pc < end; {
   290  		i := pc - d.textStart
   291  		text, size := d.disasm(code[i:], pc, lookup, d.byteOrder, gnuAsm)
   292  		file, line, _ := d.pcln.PCToLine(pc)
   293  		sep := "\t"
   294  		for len(relocs) > 0 && relocs[0].Addr < i+uint64(size) {
   295  			text += sep + relocs[0].Stringer.String(pc-start)
   296  			sep = " "
   297  			relocs = relocs[1:]
   298  		}
   299  		f(pc, uint64(size), file, line, text)
   300  		pc += uint64(size)
   301  	}
   302  }
   303  
   304  type lookupFunc = func(addr uint64) (sym string, base uint64)
   305  type disasmFunc func(code []byte, pc uint64, lookup lookupFunc, ord binary.ByteOrder, _ bool) (text string, size int)
   306  
   307  func disasm_386(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
   308  	return disasm_x86(code, pc, lookup, 32, gnuAsm)
   309  }
   310  
   311  func disasm_amd64(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
   312  	return disasm_x86(code, pc, lookup, 64, gnuAsm)
   313  }
   314  
   315  func disasm_x86(code []byte, pc uint64, lookup lookupFunc, arch int, gnuAsm bool) (string, int) {
   316  	inst, err := x86asm.Decode(code, arch)
   317  	var text string
   318  	size := inst.Len
   319  	if err != nil || size == 0 || inst.Op == 0 {
   320  		size = 1
   321  		text = "?"
   322  	} else {
   323  		if gnuAsm {
   324  			text = fmt.Sprintf("%-36s // %s", x86asm.GoSyntax(inst, pc, lookup), x86asm.GNUSyntax(inst, pc, nil))
   325  		} else {
   326  			text = x86asm.GoSyntax(inst, pc, lookup)
   327  		}
   328  	}
   329  	return text, size
   330  }
   331  
   332  type textReader struct {
   333  	code []byte
   334  	pc   uint64
   335  }
   336  
   337  func (r textReader) ReadAt(data []byte, off int64) (n int, err error) {
   338  	if off < 0 || uint64(off) < r.pc {
   339  		return 0, io.EOF
   340  	}
   341  	d := uint64(off) - r.pc
   342  	if d >= uint64(len(r.code)) {
   343  		return 0, io.EOF
   344  	}
   345  	n = copy(data, r.code[d:])
   346  	if n < len(data) {
   347  		err = io.ErrUnexpectedEOF
   348  	}
   349  	return
   350  }
   351  
   352  func disasm_arm(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
   353  	inst, err := armasm.Decode(code, armasm.ModeARM)
   354  	var text string
   355  	size := inst.Len
   356  	if err != nil || size == 0 || inst.Op == 0 {
   357  		size = 4
   358  		text = "?"
   359  	} else if gnuAsm {
   360  		text = fmt.Sprintf("%-36s // %s", armasm.GoSyntax(inst, pc, lookup, textReader{code, pc}), armasm.GNUSyntax(inst))
   361  	} else {
   362  		text = armasm.GoSyntax(inst, pc, lookup, textReader{code, pc})
   363  	}
   364  	return text, size
   365  }
   366  
   367  func disasm_arm64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) {
   368  	inst, err := arm64asm.Decode(code)
   369  	var text string
   370  	if err != nil || inst.Op == 0 {
   371  		text = "?"
   372  	} else if gnuAsm {
   373  		text = fmt.Sprintf("%-36s // %s", arm64asm.GoSyntax(inst, pc, lookup, textReader{code, pc}), arm64asm.GNUSyntax(inst))
   374  	} else {
   375  		text = arm64asm.GoSyntax(inst, pc, lookup, textReader{code, pc})
   376  	}
   377  	return text, 4
   378  }
   379  
   380  func disasm_loong64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) {
   381  	inst, err := loong64asm.Decode(code)
   382  	var text string
   383  	if err != nil || inst.Op == 0 {
   384  		text = "?"
   385  	} else if gnuAsm {
   386  		text = fmt.Sprintf("%-36s // %s", loong64asm.GoSyntax(inst, pc, lookup), loong64asm.GNUSyntax(inst))
   387  	} else {
   388  		text = loong64asm.GoSyntax(inst, pc, lookup)
   389  	}
   390  	return text, 4
   391  }
   392  
   393  func disasm_ppc64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) {
   394  	inst, err := ppc64asm.Decode(code, byteOrder)
   395  	var text string
   396  	size := inst.Len
   397  	if err != nil || size == 0 {
   398  		size = 4
   399  		text = "?"
   400  	} else {
   401  		if gnuAsm {
   402  			text = fmt.Sprintf("%-36s // %s", ppc64asm.GoSyntax(inst, pc, lookup), ppc64asm.GNUSyntax(inst, pc))
   403  		} else {
   404  			text = ppc64asm.GoSyntax(inst, pc, lookup)
   405  		}
   406  	}
   407  	return text, size
   408  }
   409  
   410  func disasm_riscv64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) {
   411  	inst, err := riscv64asm.Decode(code)
   412  	var text string
   413  	size := inst.Len
   414  	if err != nil || inst.Op == 0 {
   415  		size = 2
   416  		text = "?"
   417  	} else if gnuAsm {
   418  		text = fmt.Sprintf("%-36s // %s", riscv64asm.GoSyntax(inst, pc, lookup, textReader{code, pc}), riscv64asm.GNUSyntax(inst))
   419  	} else {
   420  		text = riscv64asm.GoSyntax(inst, pc, lookup, textReader{code, pc})
   421  	}
   422  	return text, size
   423  }
   424  
   425  func disasm_s390x(code []byte, pc uint64, lookup lookupFunc, _ binary.ByteOrder, gnuAsm bool) (string, int) {
   426  	inst, err := s390xasm.Decode(code)
   427  	var text string
   428  	size := inst.Len
   429  	if err != nil || size == 0 || inst.Op == 0 {
   430  		size = 2
   431  		text = "?"
   432  	} else {
   433  		if gnuAsm {
   434  			text = fmt.Sprintf("%-36s // %s", s390xasm.GoSyntax(inst, pc, lookup), s390xasm.GNUSyntax(inst, pc))
   435  		} else {
   436  			text = s390xasm.GoSyntax(inst, pc, lookup)
   437  		}
   438  	}
   439  	return text, size
   440  }
   441  
   442  var disasms = map[string]disasmFunc{
   443  	"386":     disasm_386,
   444  	"amd64":   disasm_amd64,
   445  	"arm":     disasm_arm,
   446  	"arm64":   disasm_arm64,
   447  	"loong64": disasm_loong64,
   448  	"ppc64":   disasm_ppc64,
   449  	"ppc64le": disasm_ppc64,
   450  	"riscv64": disasm_riscv64,
   451  	"s390x":   disasm_s390x,
   452  }
   453  
   454  var byteOrders = map[string]binary.ByteOrder{
   455  	"386":     binary.LittleEndian,
   456  	"amd64":   binary.LittleEndian,
   457  	"arm":     binary.LittleEndian,
   458  	"arm64":   binary.LittleEndian,
   459  	"loong64": binary.LittleEndian,
   460  	"ppc64":   binary.BigEndian,
   461  	"ppc64le": binary.LittleEndian,
   462  	"riscv64": binary.LittleEndian,
   463  	"s390x":   binary.BigEndian,
   464  }
   465  

View as plain text