Source file src/internal/strconv/atoi.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  // lower(c) is a lower-case letter if and only if
     8  // c is either that lower-case letter or the equivalent upper-case letter.
     9  // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
    10  // Note that lower of non-letters can produce other non-letters.
    11  func lower(c byte) byte {
    12  	return c | ('x' - 'X')
    13  }
    14  
    15  type Error int
    16  
    17  const (
    18  	_ Error = iota
    19  	ErrRange
    20  	ErrSyntax
    21  	ErrBase
    22  	ErrBitSize
    23  )
    24  
    25  func (e Error) Error() string {
    26  	switch e {
    27  	case ErrRange:
    28  		return "value out of range"
    29  	case ErrSyntax:
    30  		return "invalid syntax"
    31  	case ErrBase:
    32  		return "invalid base"
    33  	case ErrBitSize:
    34  		return "invalid bit size"
    35  	}
    36  	return "unknown error"
    37  }
    38  
    39  const intSize = 32 << (^uint(0) >> 63)
    40  
    41  // IntSize is the size in bits of an int or uint value.
    42  const IntSize = intSize
    43  
    44  const maxUint64 = 1<<64 - 1
    45  
    46  // ParseUint is like [ParseInt] but for unsigned numbers.
    47  //
    48  // A sign prefix is not permitted.
    49  func ParseUint(s string, base int, bitSize int) (uint64, error) {
    50  	const fnParseUint = "ParseUint"
    51  
    52  	if s == "" {
    53  		return 0, ErrSyntax
    54  	}
    55  
    56  	base0 := base == 0
    57  
    58  	s0 := s
    59  	switch {
    60  	case 2 <= base && base <= 36:
    61  		// valid base; nothing to do
    62  
    63  	case base == 0:
    64  		// Look for octal, hex prefix.
    65  		base = 10
    66  		if s[0] == '0' {
    67  			switch {
    68  			case len(s) >= 3 && lower(s[1]) == 'b':
    69  				base = 2
    70  				s = s[2:]
    71  			case len(s) >= 3 && lower(s[1]) == 'o':
    72  				base = 8
    73  				s = s[2:]
    74  			case len(s) >= 3 && lower(s[1]) == 'x':
    75  				base = 16
    76  				s = s[2:]
    77  			default:
    78  				base = 8
    79  				s = s[1:]
    80  			}
    81  		}
    82  
    83  	default:
    84  		return 0, ErrBase
    85  	}
    86  
    87  	if bitSize == 0 {
    88  		bitSize = IntSize
    89  	} else if bitSize < 0 || bitSize > 64 {
    90  		return 0, ErrBitSize
    91  	}
    92  
    93  	// Cutoff is the smallest number such that cutoff*base > maxUint64.
    94  	// Use compile-time constants for common cases.
    95  	var cutoff uint64
    96  	switch base {
    97  	case 10:
    98  		cutoff = maxUint64/10 + 1
    99  	case 16:
   100  		cutoff = maxUint64/16 + 1
   101  	default:
   102  		cutoff = maxUint64/uint64(base) + 1
   103  	}
   104  
   105  	maxVal := uint64(1)<<uint(bitSize) - 1
   106  
   107  	underscores := false
   108  	var n uint64
   109  	for _, c := range []byte(s) {
   110  		var d byte
   111  		switch {
   112  		case c == '_' && base0:
   113  			underscores = true
   114  			continue
   115  		case '0' <= c && c <= '9':
   116  			d = c - '0'
   117  		case 'a' <= lower(c) && lower(c) <= 'z':
   118  			d = lower(c) - 'a' + 10
   119  		default:
   120  			return 0, ErrSyntax
   121  		}
   122  
   123  		if d >= byte(base) {
   124  			return 0, ErrSyntax
   125  		}
   126  
   127  		if n >= cutoff {
   128  			// n*base overflows
   129  			return maxVal, ErrRange
   130  		}
   131  		n *= uint64(base)
   132  
   133  		n1 := n + uint64(d)
   134  		if n1 < n || n1 > maxVal {
   135  			// n+d overflows
   136  			return maxVal, ErrRange
   137  		}
   138  		n = n1
   139  	}
   140  
   141  	if underscores && !underscoreOK(s0) {
   142  		return 0, ErrSyntax
   143  	}
   144  
   145  	return n, nil
   146  }
   147  
   148  // ParseInt interprets a string s in the given base (0, 2 to 36) and
   149  // bit size (0 to 64) and returns the corresponding value i.
   150  //
   151  // The string may begin with a leading sign: "+" or "-".
   152  //
   153  // If the base argument is 0, the true base is implied by the string's
   154  // prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
   155  // 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
   156  // underscore characters are permitted as defined by the Go syntax for
   157  // [integer literals].
   158  //
   159  // The bitSize argument specifies the integer type
   160  // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
   161  // correspond to int, int8, int16, int32, and int64.
   162  // If bitSize is below 0 or above 64, an error is returned.
   163  //
   164  // The errors that ParseInt returns have concrete type [*NumError]
   165  // and include err.Num = s. If s is empty or contains invalid
   166  // digits, err.Err = [ErrSyntax] and the returned value is 0;
   167  // if the value corresponding to s cannot be represented by a
   168  // signed integer of the given size, err.Err = [ErrRange] and the
   169  // returned value is the maximum magnitude integer of the
   170  // appropriate bitSize and sign.
   171  //
   172  // [integer literals]: https://go.dev/ref/spec#Integer_literals
   173  func ParseInt(s string, base int, bitSize int) (i int64, err error) {
   174  	const fnParseInt = "ParseInt"
   175  
   176  	if s == "" {
   177  		return 0, ErrSyntax
   178  	}
   179  
   180  	// Pick off leading sign.
   181  	neg := false
   182  	switch s[0] {
   183  	case '+':
   184  		s = s[1:]
   185  	case '-':
   186  		s = s[1:]
   187  		neg = true
   188  	}
   189  
   190  	// Convert unsigned and check range.
   191  	var un uint64
   192  	un, err = ParseUint(s, base, bitSize)
   193  	if err != nil && err != ErrRange {
   194  		return 0, err
   195  	}
   196  
   197  	if bitSize == 0 {
   198  		bitSize = IntSize
   199  	}
   200  
   201  	cutoff := uint64(1 << uint(bitSize-1))
   202  	if !neg && un >= cutoff {
   203  		return int64(cutoff - 1), ErrRange
   204  	}
   205  	if neg && un > cutoff {
   206  		return -int64(cutoff), ErrRange
   207  	}
   208  	n := int64(un)
   209  	if neg {
   210  		n = -n
   211  	}
   212  	return n, nil
   213  }
   214  
   215  // Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
   216  func Atoi(s string) (int, error) {
   217  	const fnAtoi = "Atoi"
   218  
   219  	sLen := len(s)
   220  	if intSize == 32 && (0 < sLen && sLen < 10) ||
   221  		intSize == 64 && (0 < sLen && sLen < 19) {
   222  		// Fast path for small integers that fit int type.
   223  		s0 := s
   224  		if s[0] == '-' || s[0] == '+' {
   225  			s = s[1:]
   226  			if len(s) < 1 {
   227  				return 0, ErrSyntax
   228  			}
   229  		}
   230  
   231  		n := 0
   232  		for _, ch := range []byte(s) {
   233  			ch -= '0'
   234  			if ch > 9 {
   235  				return 0, ErrSyntax
   236  			}
   237  			n = n*10 + int(ch)
   238  		}
   239  		if s0[0] == '-' {
   240  			n = -n
   241  		}
   242  		return n, nil
   243  	}
   244  
   245  	// Slow path for invalid, big, or underscored integers.
   246  	i64, err := ParseInt(s, 10, 0)
   247  	return int(i64), err
   248  }
   249  
   250  // underscoreOK reports whether the underscores in s are allowed.
   251  // Checking them in this one function lets all the parsers skip over them simply.
   252  // Underscore must appear only between digits or between a base prefix and a digit.
   253  func underscoreOK(s string) bool {
   254  	// saw tracks the last character (class) we saw:
   255  	// ^ for beginning of number,
   256  	// 0 for a digit or base prefix,
   257  	// _ for an underscore,
   258  	// ! for none of the above.
   259  	saw := '^'
   260  	i := 0
   261  
   262  	// Optional sign.
   263  	if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
   264  		s = s[1:]
   265  	}
   266  
   267  	// Optional base prefix.
   268  	hex := false
   269  	if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
   270  		i = 2
   271  		saw = '0' // base prefix counts as a digit for "underscore as digit separator"
   272  		hex = lower(s[1]) == 'x'
   273  	}
   274  
   275  	// Number proper.
   276  	for ; i < len(s); i++ {
   277  		// Digits are always okay.
   278  		if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
   279  			saw = '0'
   280  			continue
   281  		}
   282  		// Underscore must follow digit.
   283  		if s[i] == '_' {
   284  			if saw != '0' {
   285  				return false
   286  			}
   287  			saw = '_'
   288  			continue
   289  		}
   290  		// Underscore must also be followed by digit.
   291  		if saw == '_' {
   292  			return false
   293  		}
   294  		// Saw non-digit, non-underscore.
   295  		saw = '!'
   296  	}
   297  	return saw != '_'
   298  }
   299  

View as plain text