atof.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  // decimal to binary floating point conversion.
     8  // Algorithm:
     9  //   1) Store input in multiprecision decimal.
    10  //   2) Multiply/divide decimal by powers of two until in range [0.5, 1)
    11  //   3) Multiply by 2^precision and round to get mantissa.
    12  
    13  var optimize = true // set to false to force slow-path conversions for testing
    14  
    15  // commonPrefixLenIgnoreCase returns the length of the common
    16  // prefix of s and prefix, with the character case of s ignored.
    17  // The prefix argument must be all lower-case.
    18  func commonPrefixLenIgnoreCase(s, prefix string) int {
    19  	n := min(len(prefix), len(s))
    20  	for i := 0; i < n; i++ {
    21  		c := s[i]
    22  		if 'A' <= c && c <= 'Z' {
    23  			c += 'a' - 'A'
    24  		}
    25  		if c != prefix[i] {
    26  			return i
    27  		}
    28  	}
    29  	return n
    30  }
    31  
    32  // special returns the floating-point value for the special,
    33  // possibly signed floating-point representations inf, infinity,
    34  // and NaN. The result is ok if a prefix of s contains one
    35  // of these representations and n is the length of that prefix.
    36  // The character case is ignored.
    37  func special(s string) (f float64, n int, ok bool) {
    38  	if len(s) == 0 {
    39  		return 0, 0, false
    40  	}
    41  	sign := 1
    42  	nsign := 0
    43  	switch s[0] {
    44  	case '+', '-':
    45  		if s[0] == '-' {
    46  			sign = -1
    47  		}
    48  		nsign = 1
    49  		s = s[1:]
    50  		fallthrough
    51  	case 'i', 'I':
    52  		n := commonPrefixLenIgnoreCase(s, "infinity")
    53  		// Anything longer than "inf" is ok, but if we
    54  		// don't have "infinity", only consume "inf".
    55  		if 3 < n && n < 8 {
    56  			n = 3
    57  		}
    58  		if n == 3 || n == 8 {
    59  			return inf(sign), nsign + n, true
    60  		}
    61  	case 'n', 'N':
    62  		if commonPrefixLenIgnoreCase(s, "nan") == 3 {
    63  			return nan(), 3, true
    64  		}
    65  	}
    66  	return 0, 0, false
    67  }
    68  
    69  func (b *decimal) set(s string) (ok bool) {
    70  	i := 0
    71  	b.neg = false
    72  	b.trunc = false
    73  
    74  	// optional sign
    75  	if i >= len(s) {
    76  		return
    77  	}
    78  	switch s[i] {
    79  	case '+':
    80  		i++
    81  	case '-':
    82  		i++
    83  		b.neg = true
    84  	}
    85  
    86  	// digits
    87  	sawdot := false
    88  	sawdigits := false
    89  	for ; i < len(s); i++ {
    90  		switch {
    91  		case s[i] == '_':
    92  			// readFloat already checked underscores
    93  			continue
    94  		case s[i] == '.':
    95  			if sawdot {
    96  				return
    97  			}
    98  			sawdot = true
    99  			b.dp = b.nd
   100  			continue
   101  
   102  		case '0' <= s[i] && s[i] <= '9':
   103  			sawdigits = true
   104  			if s[i] == '0' && b.nd == 0 { // ignore leading zeros
   105  				b.dp--
   106  				continue
   107  			}
   108  			if b.nd < len(b.d) {
   109  				b.d[b.nd] = s[i]
   110  				b.nd++
   111  			} else if s[i] != '0' {
   112  				b.trunc = true
   113  			}
   114  			continue
   115  		}
   116  		break
   117  	}
   118  	if !sawdigits {
   119  		return
   120  	}
   121  	if !sawdot {
   122  		b.dp = b.nd
   123  	}
   124  
   125  	// optional exponent moves decimal point.
   126  	// if we read a very large, very long number,
   127  	// just be sure to move the decimal point by
   128  	// a lot (say, 100000).  it doesn't matter if it's
   129  	// not the exact number.
   130  	if i < len(s) && lower(s[i]) == 'e' {
   131  		i++
   132  		if i >= len(s) {
   133  			return
   134  		}
   135  		esign := 1
   136  		switch s[i] {
   137  		case '+':
   138  			i++
   139  		case '-':
   140  			i++
   141  			esign = -1
   142  		}
   143  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   144  			return
   145  		}
   146  		e := 0
   147  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   148  			if s[i] == '_' {
   149  				// readFloat already checked underscores
   150  				continue
   151  			}
   152  			if e < 10000 {
   153  				e = e*10 + int(s[i]) - '0'
   154  			}
   155  		}
   156  		b.dp += e * esign
   157  	}
   158  
   159  	if i != len(s) {
   160  		return
   161  	}
   162  
   163  	ok = true
   164  	return
   165  }
   166  
   167  // readFloat reads a decimal or hexadecimal mantissa and exponent from a float
   168  // string representation in s; the number may be followed by other characters.
   169  // readFloat reports the number of bytes consumed (i), and whether the number
   170  // is valid (ok).
   171  func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex bool, i int, ok bool) {
   172  	underscores := false
   173  
   174  	// optional sign
   175  	if i >= len(s) {
   176  		return
   177  	}
   178  	switch s[i] {
   179  	case '+':
   180  		i++
   181  	case '-':
   182  		i++
   183  		neg = true
   184  	}
   185  
   186  	// digits
   187  	base := uint64(10)
   188  	maxMantDigits := 19 // 10^19 fits in uint64
   189  	expChar := byte('e')
   190  	if i+2 < len(s) && s[i] == '0' && lower(s[i+1]) == 'x' {
   191  		base = 16
   192  		maxMantDigits = 16 // 16^16 fits in uint64
   193  		i += 2
   194  		expChar = 'p'
   195  		hex = true
   196  	}
   197  	sawdot := false
   198  	sawdigits := false
   199  	nd := 0
   200  	ndMant := 0
   201  	dp := 0
   202  loop:
   203  	for ; i < len(s); i++ {
   204  		switch c := s[i]; true {
   205  		case c == '_':
   206  			underscores = true
   207  			continue
   208  
   209  		case c == '.':
   210  			if sawdot {
   211  				break loop
   212  			}
   213  			sawdot = true
   214  			dp = nd
   215  			continue
   216  
   217  		case '0' <= c && c <= '9':
   218  			sawdigits = true
   219  			if c == '0' && nd == 0 { // ignore leading zeros
   220  				dp--
   221  				continue
   222  			}
   223  			nd++
   224  			if ndMant < maxMantDigits {
   225  				mantissa *= base
   226  				mantissa += uint64(c - '0')
   227  				ndMant++
   228  			} else if c != '0' {
   229  				trunc = true
   230  			}
   231  			continue
   232  
   233  		case base == 16 && 'a' <= lower(c) && lower(c) <= 'f':
   234  			sawdigits = true
   235  			nd++
   236  			if ndMant < maxMantDigits {
   237  				mantissa *= 16
   238  				mantissa += uint64(lower(c) - 'a' + 10)
   239  				ndMant++
   240  			} else {
   241  				trunc = true
   242  			}
   243  			continue
   244  		}
   245  		break
   246  	}
   247  	if !sawdigits {
   248  		return
   249  	}
   250  	if !sawdot {
   251  		dp = nd
   252  	}
   253  
   254  	if base == 16 {
   255  		dp *= 4
   256  		ndMant *= 4
   257  	}
   258  
   259  	// optional exponent moves decimal point.
   260  	// if we read a very large, very long number,
   261  	// just be sure to move the decimal point by
   262  	// a lot (say, 100000).  it doesn't matter if it's
   263  	// not the exact number.
   264  	if i < len(s) && lower(s[i]) == expChar {
   265  		i++
   266  		if i >= len(s) {
   267  			return
   268  		}
   269  		esign := 1
   270  		switch s[i] {
   271  		case '+':
   272  			i++
   273  		case '-':
   274  			i++
   275  			esign = -1
   276  		}
   277  		if i >= len(s) || s[i] < '0' || s[i] > '9' {
   278  			return
   279  		}
   280  		e := 0
   281  		for ; i < len(s) && ('0' <= s[i] && s[i] <= '9' || s[i] == '_'); i++ {
   282  			if s[i] == '_' {
   283  				underscores = true
   284  				continue
   285  			}
   286  			if e < 10000 {
   287  				e = e*10 + int(s[i]) - '0'
   288  			}
   289  		}
   290  		dp += e * esign
   291  	} else if base == 16 {
   292  		// Must have exponent.
   293  		return
   294  	}
   295  
   296  	if mantissa != 0 {
   297  		exp = dp - ndMant
   298  	}
   299  
   300  	if underscores && !underscoreOK(s[:i]) {
   301  		return
   302  	}
   303  
   304  	ok = true
   305  	return
   306  }
   307  
   308  // decimal power of ten to binary power of two.
   309  var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
   310  
   311  func (d *decimal) floatBits(flt *floatInfo) (b uint64, overflow bool) {
   312  	var exp int
   313  	var mant uint64
   314  
   315  	// Zero is always a special case.
   316  	if d.nd == 0 {
   317  		mant = 0
   318  		exp = flt.bias
   319  		goto out
   320  	}
   321  
   322  	// Obvious overflow/underflow.
   323  	// These bounds are for 64-bit floats.
   324  	// Will have to change if we want to support 80-bit floats in the future.
   325  	if d.dp > 310 {
   326  		goto overflow
   327  	}
   328  	if d.dp < -330 {
   329  		// zero
   330  		mant = 0
   331  		exp = flt.bias
   332  		goto out
   333  	}
   334  
   335  	// Scale by powers of two until in range [0.5, 1.0)
   336  	exp = 0
   337  	for d.dp > 0 {
   338  		var n int
   339  		if d.dp >= len(powtab) {
   340  			n = 27
   341  		} else {
   342  			n = powtab[d.dp]
   343  		}
   344  		d.Shift(-n)
   345  		exp += n
   346  	}
   347  	for d.dp < 0 || d.dp == 0 && d.d[0] < '5' {
   348  		var n int
   349  		if -d.dp >= len(powtab) {
   350  			n = 27
   351  		} else {
   352  			n = powtab[-d.dp]
   353  		}
   354  		d.Shift(n)
   355  		exp -= n
   356  	}
   357  
   358  	// Our range is [0.5,1) but floating point range is [1,2).
   359  	exp--
   360  
   361  	// Minimum representable exponent is flt.bias+1.
   362  	// If the exponent is smaller, move it up and
   363  	// adjust d accordingly.
   364  	if exp < flt.bias+1 {
   365  		n := flt.bias + 1 - exp
   366  		d.Shift(-n)
   367  		exp += n
   368  	}
   369  
   370  	if exp-flt.bias >= 1<<flt.expbits-1 {
   371  		goto overflow
   372  	}
   373  
   374  	// Extract 1+flt.mantbits bits.
   375  	d.Shift(int(1 + flt.mantbits))
   376  	mant = d.RoundedInteger()
   377  
   378  	// Rounding might have added a bit; shift down.
   379  	if mant == 2<<flt.mantbits {
   380  		mant >>= 1
   381  		exp++
   382  		if exp-flt.bias >= 1<<flt.expbits-1 {
   383  			goto overflow
   384  		}
   385  	}
   386  
   387  	// Denormalized?
   388  	if mant&(1<<flt.mantbits) == 0 {
   389  		exp = flt.bias
   390  	}
   391  	goto out
   392  
   393  overflow:
   394  	// ±Inf
   395  	mant = 0
   396  	exp = 1<<flt.expbits - 1 + flt.bias
   397  	overflow = true
   398  
   399  out:
   400  	// Assemble bits.
   401  	bits := mant & (uint64(1)<<flt.mantbits - 1)
   402  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   403  	if d.neg {
   404  		bits |= 1 << flt.mantbits << flt.expbits
   405  	}
   406  	return bits, overflow
   407  }
   408  
   409  // Exact powers of 10.
   410  var float64pow10 = []float64{
   411  	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   412  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   413  	1e20, 1e21, 1e22,
   414  }
   415  var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
   416  
   417  // If possible to convert decimal representation to 64-bit float f exactly,
   418  // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
   419  // Three common cases:
   420  //
   421  //	value is exact integer
   422  //	value is exact integer * exact power of ten
   423  //	value is exact integer / exact power of ten
   424  //
   425  // These all produce potentially inexact but correctly rounded answers.
   426  func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
   427  	if mantissa>>float64info.mantbits != 0 {
   428  		return
   429  	}
   430  	f = float64(mantissa)
   431  	if neg {
   432  		f = -f
   433  	}
   434  	switch {
   435  	case exp == 0:
   436  		// an integer.
   437  		return f, true
   438  	// Exact integers are <= 10^15.
   439  	// Exact powers of ten are <= 10^22.
   440  	case exp > 0 && exp <= 15+22: // int * 10^k
   441  		// If exponent is big but number of digits is not,
   442  		// can move a few zeros into the integer part.
   443  		if exp > 22 {
   444  			f *= float64pow10[exp-22]
   445  			exp = 22
   446  		}
   447  		if f > 1e15 || f < -1e15 {
   448  			// the exponent was really too large.
   449  			return
   450  		}
   451  		return f * float64pow10[exp], true
   452  	case exp < 0 && exp >= -22: // int / 10^k
   453  		return f / float64pow10[-exp], true
   454  	}
   455  	return
   456  }
   457  
   458  // If possible to compute mantissa*10^exp to 32-bit float f exactly,
   459  // entirely in floating-point math, do so, avoiding the machinery above.
   460  func atof32exact(mantissa uint64, exp int, neg bool) (f float32, ok bool) {
   461  	if mantissa>>float32MantBits != 0 {
   462  		return
   463  	}
   464  	f = float32(mantissa)
   465  	if neg {
   466  		f = -f
   467  	}
   468  	switch {
   469  	case exp == 0:
   470  		return f, true
   471  	// Exact integers are <= 10^7.
   472  	// Exact powers of ten are <= 10^10.
   473  	case exp > 0 && exp <= 7+10: // int * 10^k
   474  		// If exponent is big but number of digits is not,
   475  		// can move a few zeros into the integer part.
   476  		if exp > 10 {
   477  			f *= float32pow10[exp-10]
   478  			exp = 10
   479  		}
   480  		if f > 1e7 || f < -1e7 {
   481  			// the exponent was really too large.
   482  			return
   483  		}
   484  		return f * float32pow10[exp], true
   485  	case exp < 0 && exp >= -10: // int / 10^k
   486  		return f / float32pow10[-exp], true
   487  	}
   488  	return
   489  }
   490  
   491  // atofHex converts the hex floating-point string s
   492  // to a rounded float32 or float64 value (depending on flt==&float32info or flt==&float64info)
   493  // and returns it as a float64.
   494  // The string s has already been parsed into a mantissa, exponent, and sign (neg==true for negative).
   495  // If trunc is true, trailing non-zero bits have been omitted from the mantissa.
   496  func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool) (float64, error) {
   497  	maxExp := 1<<flt.expbits + flt.bias - 2
   498  	minExp := flt.bias + 1
   499  	exp += int(flt.mantbits) // mantissa now implicitly divided by 2^mantbits.
   500  
   501  	// Shift mantissa and exponent to bring representation into float range.
   502  	// Eventually we want a mantissa with a leading 1-bit followed by mantbits other bits.
   503  	// For rounding, we need two more, where the bottom bit represents
   504  	// whether that bit or any later bit was non-zero.
   505  	// (If the mantissa has already lost non-zero bits, trunc is true,
   506  	// and we OR in a 1 below after shifting left appropriately.)
   507  	for mantissa != 0 && mantissa>>(flt.mantbits+2) == 0 {
   508  		mantissa <<= 1
   509  		exp--
   510  	}
   511  	if trunc {
   512  		mantissa |= 1
   513  	}
   514  	for mantissa>>(1+flt.mantbits+2) != 0 {
   515  		mantissa = mantissa>>1 | mantissa&1
   516  		exp++
   517  	}
   518  
   519  	// If exponent is too negative,
   520  	// denormalize in hopes of making it representable.
   521  	// (The -2 is for the rounding bits.)
   522  	for mantissa > 1 && exp < minExp-2 {
   523  		mantissa = mantissa>>1 | mantissa&1
   524  		exp++
   525  	}
   526  
   527  	// Round using two bottom bits.
   528  	round := mantissa & 3
   529  	mantissa >>= 2
   530  	round |= mantissa & 1 // round to even (round up if mantissa is odd)
   531  	exp += 2
   532  	if round == 3 {
   533  		mantissa++
   534  		if mantissa == 1<<(1+flt.mantbits) {
   535  			mantissa >>= 1
   536  			exp++
   537  		}
   538  	}
   539  
   540  	if mantissa>>flt.mantbits == 0 { // Denormal or zero.
   541  		exp = flt.bias
   542  	}
   543  	var err error
   544  	if exp > maxExp { // infinity and range error
   545  		mantissa = 1 << flt.mantbits
   546  		exp = maxExp + 1
   547  		err = ErrRange
   548  	}
   549  
   550  	bits := mantissa & (1<<flt.mantbits - 1)
   551  	bits |= uint64((exp-flt.bias)&(1<<flt.expbits-1)) << flt.mantbits
   552  	if neg {
   553  		bits |= 1 << flt.mantbits << flt.expbits
   554  	}
   555  	if flt == &float32info {
   556  		return float64(float32frombits(uint32(bits))), err
   557  	}
   558  	return float64frombits(bits), err
   559  }
   560  
   561  const fnParseFloat = "ParseFloat"
   562  
   563  func atof32(s string) (f float32, n int, err error) {
   564  	if val, n, ok := special(s); ok {
   565  		return float32(val), n, nil
   566  	}
   567  
   568  	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
   569  	if !ok {
   570  		return 0, n, ErrSyntax
   571  	}
   572  
   573  	if hex {
   574  		f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc)
   575  		return float32(f), n, err
   576  	}
   577  
   578  	if optimize {
   579  		// Try pure floating-point arithmetic conversion, and if that fails,
   580  		// the Eisel-Lemire algorithm.
   581  		if !trunc {
   582  			if f, ok := atof32exact(mantissa, exp, neg); ok {
   583  				return f, n, nil
   584  			}
   585  		}
   586  		f, ok := eiselLemire32(mantissa, exp, neg)
   587  		if ok {
   588  			if !trunc {
   589  				return f, n, nil
   590  			}
   591  			// Even if the mantissa was truncated, we may
   592  			// have found the correct result. Confirm by
   593  			// converting the upper mantissa bound.
   594  			fUp, ok := eiselLemire32(mantissa+1, exp, neg)
   595  			if ok && f == fUp {
   596  				return f, n, nil
   597  			}
   598  		}
   599  	}
   600  
   601  	// Slow fallback.
   602  	var d decimal
   603  	if !d.set(s[:n]) {
   604  		return 0, n, ErrSyntax
   605  	}
   606  	b, ovf := d.floatBits(&float32info)
   607  	f = float32frombits(uint32(b))
   608  	if ovf {
   609  		err = ErrRange
   610  	}
   611  	return f, n, err
   612  }
   613  
   614  func atof64(s string) (f float64, n int, err error) {
   615  	if val, n, ok := special(s); ok {
   616  		return val, n, nil
   617  	}
   618  
   619  	mantissa, exp, neg, trunc, hex, n, ok := readFloat(s)
   620  	if !ok {
   621  		return 0, n, ErrSyntax
   622  	}
   623  
   624  	if hex {
   625  		f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc)
   626  		return f, n, err
   627  	}
   628  
   629  	if optimize {
   630  		// Try pure floating-point arithmetic conversion, and if that fails,
   631  		// the Eisel-Lemire algorithm.
   632  		if !trunc {
   633  			if f, ok := atof64exact(mantissa, exp, neg); ok {
   634  				return f, n, nil
   635  			}
   636  		}
   637  		f, ok := eiselLemire64(mantissa, exp, neg)
   638  		if ok {
   639  			if !trunc {
   640  				return f, n, nil
   641  			}
   642  			// Even if the mantissa was truncated, we may
   643  			// have found the correct result. Confirm by
   644  			// converting the upper mantissa bound.
   645  			fUp, ok := eiselLemire64(mantissa+1, exp, neg)
   646  			if ok && f == fUp {
   647  				return f, n, nil
   648  			}
   649  		}
   650  	}
   651  
   652  	// Slow fallback.
   653  	var d decimal
   654  	if !d.set(s[:n]) {
   655  		return 0, n, ErrSyntax
   656  	}
   657  	b, ovf := d.floatBits(&float64info)
   658  	f = float64frombits(b)
   659  	if ovf {
   660  		err = ErrRange
   661  	}
   662  	return f, n, err
   663  }
   664  
   665  // ParseFloat converts the string s to a floating-point number
   666  // with the precision specified by bitSize: 32 for float32, or 64 for float64.
   667  // When bitSize=32, the result still has type float64, but it will be
   668  // convertible to float32 without changing its value.
   669  //
   670  // ParseFloat accepts decimal and hexadecimal floating-point numbers
   671  // as defined by the Go syntax for [floating-point literals].
   672  // If s is well-formed and near a valid floating-point number,
   673  // ParseFloat returns the nearest floating-point number rounded
   674  // using IEEE754 unbiased rounding.
   675  // (Parsing a hexadecimal floating-point value only rounds when
   676  // there are more bits in the hexadecimal representation than
   677  // will fit in the mantissa.)
   678  //
   679  // The errors that ParseFloat returns have concrete type *NumError
   680  // and include err.Num = s.
   681  //
   682  // If s is not syntactically well-formed, ParseFloat returns err.Err = ErrSyntax.
   683  //
   684  // If s is syntactically well-formed but is more than 1/2 ULP
   685  // away from the largest floating point number of the given size,
   686  // ParseFloat returns f = ±Inf, err.Err = ErrRange.
   687  //
   688  // ParseFloat recognizes the string "NaN", and the (possibly signed) strings "Inf" and "Infinity"
   689  // as their respective special floating point values. It ignores case when matching.
   690  //
   691  // [floating-point literals]: https://go.dev/ref/spec#Floating-point_literals
   692  func ParseFloat(s string, bitSize int) (float64, error) {
   693  	f, n, err := parseFloatPrefix(s, bitSize)
   694  	if n != len(s) {
   695  		return 0, ErrSyntax
   696  	}
   697  	return f, err
   698  }
   699  
   700  func parseFloatPrefix(s string, bitSize int) (float64, int, error) {
   701  	if bitSize == 32 {
   702  		f, n, err := atof32(s)
   703  		return float64(f), n, err
   704  	}
   705  	return atof64(s)
   706  }
   707
View as plain text