Source file src/go/token/position.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package token
     6  
     7  import (
     8  	"cmp"
     9  	"fmt"
    10  	"slices"
    11  	"strconv"
    12  	"sync"
    13  	"sync/atomic"
    14  )
    15  
    16  // If debug is set, invalid offset and position values cause a panic
    17  // (go.dev/issue/57490).
    18  const debug = false
    19  
    20  // -----------------------------------------------------------------------------
    21  // Positions
    22  
    23  // Position describes an arbitrary source position
    24  // including the file, line, and column location.
    25  // A Position is valid if the line number is > 0.
    26  type Position struct {
    27  	Filename string // filename, if any
    28  	Offset   int    // offset, starting at 0
    29  	Line     int    // line number, starting at 1
    30  	Column   int    // column number, starting at 1 (byte count)
    31  }
    32  
    33  // IsValid reports whether the position is valid.
    34  func (pos *Position) IsValid() bool { return pos.Line > 0 }
    35  
    36  // String returns a string in one of several forms:
    37  //
    38  //	file:line:column    valid position with file name
    39  //	file:line           valid position with file name but no column (column == 0)
    40  //	line:column         valid position without file name
    41  //	line                valid position without file name and no column (column == 0)
    42  //	file                invalid position with file name
    43  //	-                   invalid position without file name
    44  func (pos Position) String() string {
    45  	s := pos.Filename
    46  	if pos.IsValid() {
    47  		if s != "" {
    48  			s += ":"
    49  		}
    50  		s += strconv.Itoa(pos.Line)
    51  		if pos.Column != 0 {
    52  			s += fmt.Sprintf(":%d", pos.Column)
    53  		}
    54  	}
    55  	if s == "" {
    56  		s = "-"
    57  	}
    58  	return s
    59  }
    60  
    61  // Pos is a compact encoding of a source position within a file set.
    62  // It can be converted into a [Position] for a more convenient, but much
    63  // larger, representation.
    64  //
    65  // The Pos value for a given file is a number in the range [base, base+size],
    66  // where base and size are specified when a file is added to the file set.
    67  // The difference between a Pos value and the corresponding file base
    68  // corresponds to the byte offset of that position (represented by the Pos value)
    69  // from the beginning of the file. Thus, the file base offset is the Pos value
    70  // representing the first byte in the file.
    71  //
    72  // To create the Pos value for a specific source offset (measured in bytes),
    73  // first add the respective file to the current file set using [FileSet.AddFile]
    74  // and then call [File.Pos](offset) for that file. Given a Pos value p
    75  // for a specific file set fset, the corresponding [Position] value is
    76  // obtained by calling fset.Position(p).
    77  //
    78  // Pos values can be compared directly with the usual comparison operators:
    79  // If two Pos values p and q are in the same file, comparing p and q is
    80  // equivalent to comparing the respective source file offsets. If p and q
    81  // are in different files, p < q is true if the file implied by p was added
    82  // to the respective file set before the file implied by q.
    83  type Pos int
    84  
    85  // The zero value for [Pos] is NoPos; there is no file and line information
    86  // associated with it, and NoPos.IsValid() is false. NoPos is always
    87  // smaller than any other [Pos] value. The corresponding [Position] value
    88  // for NoPos is the zero value for [Position].
    89  const NoPos Pos = 0
    90  
    91  // IsValid reports whether the position is valid.
    92  func (p Pos) IsValid() bool {
    93  	return p != NoPos
    94  }
    95  
    96  // -----------------------------------------------------------------------------
    97  // File
    98  
    99  // A File is a handle for a file belonging to a [FileSet].
   100  // A File has a name, size, and line offset table.
   101  type File struct {
   102  	name string // file name as provided to AddFile
   103  	base int    // Pos value range for this file is [base...base+size]
   104  	size int    // file size as provided to AddFile
   105  
   106  	// lines and infos are protected by mutex
   107  	mutex sync.Mutex
   108  	lines []int // lines contains the offset of the first character for each line (the first entry is always 0)
   109  	infos []lineInfo
   110  }
   111  
   112  // Name returns the file name of file f as registered with AddFile.
   113  func (f *File) Name() string {
   114  	return f.name
   115  }
   116  
   117  // Base returns the base offset of file f as registered with AddFile.
   118  func (f *File) Base() int {
   119  	return f.base
   120  }
   121  
   122  // Size returns the size of file f as registered with AddFile.
   123  func (f *File) Size() int {
   124  	return f.size
   125  }
   126  
   127  // LineCount returns the number of lines in file f.
   128  func (f *File) LineCount() int {
   129  	f.mutex.Lock()
   130  	n := len(f.lines)
   131  	f.mutex.Unlock()
   132  	return n
   133  }
   134  
   135  // AddLine adds the line offset for a new line.
   136  // The line offset must be larger than the offset for the previous line
   137  // and smaller than the file size; otherwise the line offset is ignored.
   138  func (f *File) AddLine(offset int) {
   139  	f.mutex.Lock()
   140  	if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size {
   141  		f.lines = append(f.lines, offset)
   142  	}
   143  	f.mutex.Unlock()
   144  }
   145  
   146  // MergeLine merges a line with the following line. It is akin to replacing
   147  // the newline character at the end of the line with a space (to not change the
   148  // remaining offsets). To obtain the line number, consult e.g. [Position.Line].
   149  // MergeLine will panic if given an invalid line number.
   150  func (f *File) MergeLine(line int) {
   151  	if line < 1 {
   152  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
   153  	}
   154  	f.mutex.Lock()
   155  	defer f.mutex.Unlock()
   156  	if line >= len(f.lines) {
   157  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
   158  	}
   159  	// To merge the line numbered <line> with the line numbered <line+1>,
   160  	// we need to remove the entry in lines corresponding to the line
   161  	// numbered <line+1>. The entry in lines corresponding to the line
   162  	// numbered <line+1> is located at index <line>, since indices in lines
   163  	// are 0-based and line numbers are 1-based.
   164  	copy(f.lines[line:], f.lines[line+1:])
   165  	f.lines = f.lines[:len(f.lines)-1]
   166  }
   167  
   168  // Lines returns the effective line offset table of the form described by [File.SetLines].
   169  // Callers must not mutate the result.
   170  func (f *File) Lines() []int {
   171  	f.mutex.Lock()
   172  	lines := f.lines
   173  	f.mutex.Unlock()
   174  	return lines
   175  }
   176  
   177  // SetLines sets the line offsets for a file and reports whether it succeeded.
   178  // The line offsets are the offsets of the first character of each line;
   179  // for instance for the content "ab\nc\n" the line offsets are {0, 3}.
   180  // An empty file has an empty line offset table.
   181  // Each line offset must be larger than the offset for the previous line
   182  // and smaller than the file size; otherwise SetLines fails and returns
   183  // false.
   184  // Callers must not mutate the provided slice after SetLines returns.
   185  func (f *File) SetLines(lines []int) bool {
   186  	// verify validity of lines table
   187  	size := f.size
   188  	for i, offset := range lines {
   189  		if i > 0 && offset <= lines[i-1] || size <= offset {
   190  			return false
   191  		}
   192  	}
   193  
   194  	// set lines table
   195  	f.mutex.Lock()
   196  	f.lines = lines
   197  	f.mutex.Unlock()
   198  	return true
   199  }
   200  
   201  // SetLinesForContent sets the line offsets for the given file content.
   202  // It ignores position-altering //line comments.
   203  func (f *File) SetLinesForContent(content []byte) {
   204  	var lines []int
   205  	line := 0
   206  	for offset, b := range content {
   207  		if line >= 0 {
   208  			lines = append(lines, line)
   209  		}
   210  		line = -1
   211  		if b == '\n' {
   212  			line = offset + 1
   213  		}
   214  	}
   215  
   216  	// set lines table
   217  	f.mutex.Lock()
   218  	f.lines = lines
   219  	f.mutex.Unlock()
   220  }
   221  
   222  // LineStart returns the [Pos] value of the start of the specified line.
   223  // It ignores any alternative positions set using [File.AddLineColumnInfo].
   224  // LineStart panics if the 1-based line number is invalid.
   225  func (f *File) LineStart(line int) Pos {
   226  	if line < 1 {
   227  		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
   228  	}
   229  	f.mutex.Lock()
   230  	defer f.mutex.Unlock()
   231  	if line > len(f.lines) {
   232  		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
   233  	}
   234  	return Pos(f.base + f.lines[line-1])
   235  }
   236  
   237  // A lineInfo object describes alternative file, line, and column
   238  // number information (such as provided via a //line directive)
   239  // for a given file offset.
   240  type lineInfo struct {
   241  	// fields are exported to make them accessible to gob
   242  	Offset       int
   243  	Filename     string
   244  	Line, Column int
   245  }
   246  
   247  // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument.
   248  // It is here for backward-compatibility for code prior to Go 1.11.
   249  func (f *File) AddLineInfo(offset int, filename string, line int) {
   250  	f.AddLineColumnInfo(offset, filename, line, 1)
   251  }
   252  
   253  // AddLineColumnInfo adds alternative file, line, and column number
   254  // information for a given file offset. The offset must be larger
   255  // than the offset for the previously added alternative line info
   256  // and smaller than the file size; otherwise the information is
   257  // ignored.
   258  //
   259  // AddLineColumnInfo is typically used to register alternative position
   260  // information for line directives such as //line filename:line:column.
   261  func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) {
   262  	f.mutex.Lock()
   263  	if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size {
   264  		f.infos = append(f.infos, lineInfo{offset, filename, line, column})
   265  	}
   266  	f.mutex.Unlock()
   267  }
   268  
   269  // fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size.
   270  func (f *File) fixOffset(offset int) int {
   271  	switch {
   272  	case offset < 0:
   273  		if !debug {
   274  			return 0
   275  		}
   276  	case offset > f.size:
   277  		if !debug {
   278  			return f.size
   279  		}
   280  	default:
   281  		return offset
   282  	}
   283  
   284  	// only generate this code if needed
   285  	if debug {
   286  		panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])",
   287  			0 /* for symmetry */, offset, f.size,
   288  			f.base+offset, f.base, f.base+f.size))
   289  	}
   290  	return 0
   291  }
   292  
   293  // Pos returns the Pos value for the given file offset.
   294  //
   295  // If offset is negative, the result is the file's start
   296  // position; if the offset is too large, the result is
   297  // the file's end position (see also go.dev/issue/57490).
   298  //
   299  // The following invariant, though not true for Pos values
   300  // in general, holds for the result p:
   301  // f.Pos(f.Offset(p)) == p.
   302  func (f *File) Pos(offset int) Pos {
   303  	return Pos(f.base + f.fixOffset(offset))
   304  }
   305  
   306  // Offset returns the offset for the given file position p.
   307  //
   308  // If p is before the file's start position (or if p is NoPos),
   309  // the result is 0; if p is past the file's end position, the
   310  // the result is the file size (see also go.dev/issue/57490).
   311  //
   312  // The following invariant, though not true for offset values
   313  // in general, holds for the result offset:
   314  // f.Offset(f.Pos(offset)) == offset
   315  func (f *File) Offset(p Pos) int {
   316  	return f.fixOffset(int(p) - f.base)
   317  }
   318  
   319  // Line returns the line number for the given file position p;
   320  // p must be a [Pos] value in that file or [NoPos].
   321  func (f *File) Line(p Pos) int {
   322  	return f.Position(p).Line
   323  }
   324  
   325  func searchLineInfos(a []lineInfo, x int) int {
   326  	i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int {
   327  		return cmp.Compare(a.Offset, x)
   328  	})
   329  	if !found {
   330  		// We want the lineInfo containing x, but if we didn't
   331  		// find x then i is the next one.
   332  		i--
   333  	}
   334  	return i
   335  }
   336  
   337  // unpack returns the filename and line and column number for a file offset.
   338  // If adjusted is set, unpack will return the filename and line information
   339  // possibly adjusted by //line comments; otherwise those comments are ignored.
   340  func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) {
   341  	f.mutex.Lock()
   342  	filename = f.name
   343  	if i := searchInts(f.lines, offset); i >= 0 {
   344  		line, column = i+1, offset-f.lines[i]+1
   345  	}
   346  	if adjusted && len(f.infos) > 0 {
   347  		// few files have extra line infos
   348  		if i := searchLineInfos(f.infos, offset); i >= 0 {
   349  			alt := &f.infos[i]
   350  			filename = alt.Filename
   351  			if i := searchInts(f.lines, alt.Offset); i >= 0 {
   352  				// i+1 is the line at which the alternative position was recorded
   353  				d := line - (i + 1) // line distance from alternative position base
   354  				line = alt.Line + d
   355  				if alt.Column == 0 {
   356  					// alternative column is unknown => relative column is unknown
   357  					// (the current specification for line directives requires
   358  					// this to apply until the next PosBase/line directive,
   359  					// not just until the new newline)
   360  					column = 0
   361  				} else if d == 0 {
   362  					// the alternative position base is on the current line
   363  					// => column is relative to alternative column
   364  					column = alt.Column + (offset - alt.Offset)
   365  				}
   366  			}
   367  		}
   368  	}
   369  	// TODO(mvdan): move Unlock back under Lock with a defer statement once
   370  	// https://go.dev/issue/38471 is fixed to remove the performance penalty.
   371  	f.mutex.Unlock()
   372  	return
   373  }
   374  
   375  func (f *File) position(p Pos, adjusted bool) (pos Position) {
   376  	offset := f.fixOffset(int(p) - f.base)
   377  	pos.Offset = offset
   378  	pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted)
   379  	return
   380  }
   381  
   382  // PositionFor returns the Position value for the given file position p.
   383  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
   384  // If adjusted is set, the position may be adjusted by position-altering
   385  // //line comments; otherwise those comments are ignored.
   386  // p must be a Pos value in f or NoPos.
   387  func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) {
   388  	if p != NoPos {
   389  		pos = f.position(p, adjusted)
   390  	}
   391  	return
   392  }
   393  
   394  // Position returns the Position value for the given file position p.
   395  // If p is out of bounds, it is adjusted to match the File.Offset behavior.
   396  // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true).
   397  func (f *File) Position(p Pos) (pos Position) {
   398  	return f.PositionFor(p, true)
   399  }
   400  
   401  // -----------------------------------------------------------------------------
   402  // FileSet
   403  
   404  // A FileSet represents a set of source files.
   405  // Methods of file sets are synchronized; multiple goroutines
   406  // may invoke them concurrently.
   407  //
   408  // The byte offsets for each file in a file set are mapped into
   409  // distinct (integer) intervals, one interval [base, base+size]
   410  // per file. [FileSet.Base] represents the first byte in the file, and size
   411  // is the corresponding file size. A [Pos] value is a value in such
   412  // an interval. By determining the interval a [Pos] value belongs
   413  // to, the file, its file base, and thus the byte offset (position)
   414  // the [Pos] value is representing can be computed.
   415  //
   416  // When adding a new file, a file base must be provided. That can
   417  // be any integer value that is past the end of any interval of any
   418  // file already in the file set. For convenience, [FileSet.Base] provides
   419  // such a value, which is simply the end of the Pos interval of the most
   420  // recently added file, plus one. Unless there is a need to extend an
   421  // interval later, using the [FileSet.Base] should be used as argument
   422  // for [FileSet.AddFile].
   423  //
   424  // A [File] may be removed from a FileSet when it is no longer needed.
   425  // This may reduce memory usage in a long-running application.
   426  type FileSet struct {
   427  	mutex sync.RWMutex         // protects the file set
   428  	base  int                  // base offset for the next file
   429  	files []*File              // list of files in the order added to the set
   430  	last  atomic.Pointer[File] // cache of last file looked up
   431  }
   432  
   433  // NewFileSet creates a new file set.
   434  func NewFileSet() *FileSet {
   435  	return &FileSet{
   436  		base: 1, // 0 == NoPos
   437  	}
   438  }
   439  
   440  // Base returns the minimum base offset that must be provided to
   441  // [FileSet.AddFile] when adding the next file.
   442  func (s *FileSet) Base() int {
   443  	s.mutex.RLock()
   444  	b := s.base
   445  	s.mutex.RUnlock()
   446  	return b
   447  }
   448  
   449  // AddFile adds a new file with a given filename, base offset, and file size
   450  // to the file set s and returns the file. Multiple files may have the same
   451  // name. The base offset must not be smaller than the [FileSet.Base], and
   452  // size must not be negative. As a special case, if a negative base is provided,
   453  // the current value of the [FileSet.Base] is used instead.
   454  //
   455  // Adding the file will set the file set's [FileSet.Base] value to base + size + 1
   456  // as the minimum base value for the next file. The following relationship
   457  // exists between a [Pos] value p for a given file offset offs:
   458  //
   459  //	int(p) = base + offs
   460  //
   461  // with offs in the range [0, size] and thus p in the range [base, base+size].
   462  // For convenience, [File.Pos] may be used to create file-specific position
   463  // values from a file offset.
   464  func (s *FileSet) AddFile(filename string, base, size int) *File {
   465  	// Allocate f outside the critical section.
   466  	f := &File{name: filename, size: size, lines: []int{0}}
   467  
   468  	s.mutex.Lock()
   469  	defer s.mutex.Unlock()
   470  	if base < 0 {
   471  		base = s.base
   472  	}
   473  	if base < s.base {
   474  		panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base))
   475  	}
   476  	f.base = base
   477  	if size < 0 {
   478  		panic(fmt.Sprintf("invalid size %d (should be >= 0)", size))
   479  	}
   480  	// base >= s.base && size >= 0
   481  	base += size + 1 // +1 because EOF also has a position
   482  	if base < 0 {
   483  		panic("token.Pos offset overflow (> 2G of source code in file set)")
   484  	}
   485  	// add the file to the file set
   486  	s.base = base
   487  	s.files = append(s.files, f)
   488  	s.last.Store(f)
   489  	return f
   490  }
   491  
   492  // RemoveFile removes a file from the [FileSet] so that subsequent
   493  // queries for its [Pos] interval yield a negative result.
   494  // This reduces the memory usage of a long-lived [FileSet] that
   495  // encounters an unbounded stream of files.
   496  //
   497  // Removing a file that does not belong to the set has no effect.
   498  func (s *FileSet) RemoveFile(file *File) {
   499  	s.last.CompareAndSwap(file, nil) // clear last file cache
   500  
   501  	s.mutex.Lock()
   502  	defer s.mutex.Unlock()
   503  
   504  	if i := searchFiles(s.files, file.base); i >= 0 && s.files[i] == file {
   505  		last := &s.files[len(s.files)-1]
   506  		s.files = append(s.files[:i], s.files[i+1:]...)
   507  		*last = nil // don't prolong lifetime when popping last element
   508  	}
   509  }
   510  
   511  // Iterate calls f for the files in the file set in the order they were added
   512  // until f returns false.
   513  func (s *FileSet) Iterate(f func(*File) bool) {
   514  	for i := 0; ; i++ {
   515  		var file *File
   516  		s.mutex.RLock()
   517  		if i < len(s.files) {
   518  			file = s.files[i]
   519  		}
   520  		s.mutex.RUnlock()
   521  		if file == nil || !f(file) {
   522  			break
   523  		}
   524  	}
   525  }
   526  
   527  func searchFiles(a []*File, x int) int {
   528  	i, found := slices.BinarySearchFunc(a, x, func(a *File, x int) int {
   529  		return cmp.Compare(a.base, x)
   530  	})
   531  	if !found {
   532  		// We want the File containing x, but if we didn't
   533  		// find x then i is the next one.
   534  		i--
   535  	}
   536  	return i
   537  }
   538  
   539  func (s *FileSet) file(p Pos) *File {
   540  	// common case: p is in last file.
   541  	if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size {
   542  		return f
   543  	}
   544  
   545  	s.mutex.RLock()
   546  	defer s.mutex.RUnlock()
   547  
   548  	// p is not in last file - search all files
   549  	if i := searchFiles(s.files, int(p)); i >= 0 {
   550  		f := s.files[i]
   551  		// f.base <= int(p) by definition of searchFiles
   552  		if int(p) <= f.base+f.size {
   553  			// Update cache of last file. A race is ok,
   554  			// but an exclusive lock causes heavy contention.
   555  			s.last.Store(f)
   556  			return f
   557  		}
   558  	}
   559  	return nil
   560  }
   561  
   562  // File returns the file that contains the position p.
   563  // If no such file is found (for instance for p == [NoPos]),
   564  // the result is nil.
   565  func (s *FileSet) File(p Pos) (f *File) {
   566  	if p != NoPos {
   567  		f = s.file(p)
   568  	}
   569  	return
   570  }
   571  
   572  // PositionFor converts a [Pos] p in the fileset into a [Position] value.
   573  // If adjusted is set, the position may be adjusted by position-altering
   574  // //line comments; otherwise those comments are ignored.
   575  // p must be a [Pos] value in s or [NoPos].
   576  func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) {
   577  	if p != NoPos {
   578  		if f := s.file(p); f != nil {
   579  			return f.position(p, adjusted)
   580  		}
   581  	}
   582  	return
   583  }
   584  
   585  // Position converts a [Pos] p in the fileset into a Position value.
   586  // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true).
   587  func (s *FileSet) Position(p Pos) (pos Position) {
   588  	return s.PositionFor(p, true)
   589  }
   590  
   591  // -----------------------------------------------------------------------------
   592  // Helper functions
   593  
   594  func searchInts(a []int, x int) int {
   595  	// This function body is a manually inlined version of:
   596  	//
   597  	//   return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1
   598  	//
   599  	// With better compiler optimizations, this may not be needed in the
   600  	// future, but at the moment this change improves the go/printer
   601  	// benchmark performance by ~30%. This has a direct impact on the
   602  	// speed of gofmt and thus seems worthwhile (2011-04-29).
   603  	// TODO(gri): Remove this when compilers have caught up.
   604  	i, j := 0, len(a)
   605  	for i < j {
   606  		h := int(uint(i+j) >> 1) // avoid overflow when computing h
   607  		// i ≤ h < j
   608  		if a[h] <= x {
   609  			i = h + 1
   610  		} else {
   611  			j = h
   612  		}
   613  	}
   614  	return i - 1
   615  }
   616  

View as plain text