Source file src/vendor/golang.org/x/net/idna/idna.go

     1  // Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
     2  
     3  // Copyright 2016 The Go Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  // Package idna implements IDNA2008 using the compatibility processing
     8  // defined by UTS (Unicode Technical Standard) #46, which defines a standard to
     9  // deal with the transition from IDNA2003.
    10  //
    11  // IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
    12  // 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
    13  // UTS #46 is defined in https://www.unicode.org/reports/tr46.
    14  // See https://unicode.org/cldr/utility/idna.jsp for a visualization of the
    15  // differences between these two standards.
    16  package idna // import "golang.org/x/net/idna"
    17  
    18  import (
    19  	"fmt"
    20  	"strings"
    21  	"unicode"
    22  	"unicode/utf8"
    23  
    24  	"golang.org/x/text/secure/bidirule"
    25  	"golang.org/x/text/unicode/bidi"
    26  	"golang.org/x/text/unicode/norm"
    27  )
    28  
    29  const unicode16 = unicode.Version >= "16.0.0"
    30  
    31  // NOTE: Unlike common practice in Go APIs, the functions will return a
    32  // sanitized domain name in case of errors. Browsers sometimes use a partially
    33  // evaluated string as lookup.
    34  // TODO: the current error handling is, in my opinion, the least opinionated.
    35  // Other strategies are also viable, though:
    36  // Option 1) Return an empty string in case of error, but allow the user to
    37  //    specify explicitly which errors to ignore.
    38  // Option 2) Return the partially evaluated string if it is itself a valid
    39  //    string, otherwise return the empty string in case of error.
    40  // Option 3) Option 1 and 2.
    41  // Option 4) Always return an empty string for now and implement Option 1 as
    42  //    needed, and document that the return string may not be empty in case of
    43  //    error in the future.
    44  // I think Option 1 is best, but it is quite opinionated.
    45  
    46  // ToASCII is a wrapper for Punycode.ToASCII.
    47  func ToASCII(s string) (string, error) {
    48  	return Punycode.process(s, true)
    49  }
    50  
    51  // ToUnicode is a wrapper for Punycode.ToUnicode.
    52  func ToUnicode(s string) (string, error) {
    53  	return Punycode.process(s, false)
    54  }
    55  
    56  // An Option configures a Profile at creation time.
    57  type Option func(*options)
    58  
    59  // Transitional sets a Profile to use the Transitional mapping as defined in UTS
    60  // #46. This will cause, for example, "ß" to be mapped to "ss". Using the
    61  // transitional mapping provides a compromise between IDNA2003 and IDNA2008
    62  // compatibility. It is used by some browsers when resolving domain names. This
    63  // option is only meaningful if combined with MapForLookup.
    64  func Transitional(transitional bool) Option {
    65  	return func(o *options) { o.transitional = transitional }
    66  }
    67  
    68  // VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
    69  // are longer than allowed by the RFC.
    70  //
    71  // This option corresponds to the VerifyDnsLength flag in UTS #46.
    72  func VerifyDNSLength(verify bool) Option {
    73  	return func(o *options) { o.verifyDNSLength = verify }
    74  }
    75  
    76  // RemoveLeadingDots removes leading label separators. Leading runes that map to
    77  // dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
    78  func RemoveLeadingDots(remove bool) Option {
    79  	return func(o *options) { o.removeLeadingDots = remove }
    80  }
    81  
    82  // ValidateLabels sets whether to check the mandatory label validation criteria
    83  // as defined in Section 5.4 of RFC 5891. This includes testing for correct use
    84  // of hyphens ('-'), normalization, validity of runes, and the context rules.
    85  // In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
    86  // in UTS #46.
    87  func ValidateLabels(enable bool) Option {
    88  	return func(o *options) {
    89  		// Don't override existing mappings, but set one that at least checks
    90  		// normalization if it is not set.
    91  		if o.mapping == nil && enable {
    92  			o.mapping = normalize
    93  		}
    94  		o.trie = trie
    95  		o.checkJoiners = enable
    96  		o.checkHyphens = enable
    97  		if enable {
    98  			o.fromPuny = validateFromPunycode
    99  		} else {
   100  			o.fromPuny = nil
   101  		}
   102  	}
   103  }
   104  
   105  // validateLabels reports whether the ValidateLabels option is enabled.
   106  func (p *Profile) validateLabels() bool {
   107  	return p.fromPuny != nil
   108  }
   109  
   110  // CheckHyphens sets whether to check for correct use of hyphens ('-') in
   111  // labels. Most web browsers do not have this option set, since labels such as
   112  // "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
   113  //
   114  // This option corresponds to the CheckHyphens flag in UTS #46.
   115  func CheckHyphens(enable bool) Option {
   116  	return func(o *options) { o.checkHyphens = enable }
   117  }
   118  
   119  // CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
   120  // A of RFC 5892, concerning the use of joiner runes.
   121  //
   122  // This option corresponds to the CheckJoiners flag in UTS #46.
   123  func CheckJoiners(enable bool) Option {
   124  	return func(o *options) {
   125  		o.trie = trie
   126  		o.checkJoiners = enable
   127  	}
   128  }
   129  
   130  // StrictDomainName limits the set of permissible ASCII characters to those
   131  // allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
   132  // hyphen). This is set by default for MapForLookup and ValidateForRegistration,
   133  // but is only useful if ValidateLabels is set.
   134  //
   135  // This option is useful, for instance, for browsers that allow characters
   136  // outside this range, for example a '_' (U+005F LOW LINE). See
   137  // http://www.rfc-editor.org/std/std3.txt for more details.
   138  //
   139  // This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
   140  func StrictDomainName(use bool) Option {
   141  	return func(o *options) { o.useSTD3Rules = use }
   142  }
   143  
   144  // NOTE: the following options pull in tables. The tables should not be linked
   145  // in as long as the options are not used.
   146  
   147  // BidiRule enables the Bidi rule as defined in RFC 5893. Any application
   148  // that relies on proper validation of labels should include this rule.
   149  //
   150  // This option corresponds to the CheckBidi flag in UTS #46.
   151  func BidiRule() Option {
   152  	return func(o *options) { o.bidirule = bidirule.ValidString }
   153  }
   154  
   155  // ValidateForRegistration sets validation options to verify that a given IDN is
   156  // properly formatted for registration as defined by Section 4 of RFC 5891.
   157  func ValidateForRegistration() Option {
   158  	return func(o *options) {
   159  		o.mapping = validateRegistration
   160  		StrictDomainName(true)(o)
   161  		ValidateLabels(true)(o)
   162  		VerifyDNSLength(true)(o)
   163  		BidiRule()(o)
   164  	}
   165  }
   166  
   167  // MapForLookup sets validation and mapping options such that a given IDN is
   168  // transformed for domain name lookup according to the requirements set out in
   169  // Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
   170  // RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
   171  // to add this check.
   172  //
   173  // The mappings include normalization and mapping case, width and other
   174  // compatibility mappings.
   175  func MapForLookup() Option {
   176  	return func(o *options) {
   177  		o.mapping = validateAndMap
   178  		StrictDomainName(true)(o)
   179  		ValidateLabels(true)(o)
   180  	}
   181  }
   182  
   183  type options struct {
   184  	transitional      bool
   185  	useSTD3Rules      bool
   186  	checkHyphens      bool
   187  	checkJoiners      bool
   188  	verifyDNSLength   bool
   189  	removeLeadingDots bool
   190  
   191  	trie *idnaTrie
   192  
   193  	// fromPuny calls validation rules when converting A-labels to U-labels.
   194  	fromPuny func(p *Profile, s string) error
   195  
   196  	// mapping implements a validation and mapping step as defined in RFC 5895
   197  	// or UTS 46, tailored to, for example, domain registration or lookup.
   198  	mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
   199  
   200  	// bidirule, if specified, checks whether s conforms to the Bidi Rule
   201  	// defined in RFC 5893.
   202  	bidirule func(s string) bool
   203  }
   204  
   205  // A Profile defines the configuration of an IDNA mapper.
   206  type Profile struct {
   207  	options
   208  }
   209  
   210  func apply(o *options, opts []Option) {
   211  	for _, f := range opts {
   212  		f(o)
   213  	}
   214  }
   215  
   216  // New creates a new Profile.
   217  //
   218  // With no options, the returned Profile is the most permissive and equals the
   219  // Punycode Profile. Options can be passed to further restrict the Profile. The
   220  // MapForLookup and ValidateForRegistration options set a collection of options,
   221  // for lookup and registration purposes respectively, which can be tailored by
   222  // adding more fine-grained options, where later options override earlier
   223  // options.
   224  func New(o ...Option) *Profile {
   225  	p := &Profile{}
   226  	apply(&p.options, o)
   227  	return p
   228  }
   229  
   230  // ToASCII converts a domain or domain label to its ASCII form. For example,
   231  // ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
   232  // ToASCII("golang") is "golang". If an error is encountered it will return
   233  // an error and a (partially) processed result.
   234  func (p *Profile) ToASCII(s string) (string, error) {
   235  	return p.process(s, true)
   236  }
   237  
   238  // ToUnicode converts a domain or domain label to its Unicode form. For example,
   239  // ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
   240  // ToUnicode("golang") is "golang". If an error is encountered it will return
   241  // an error and a (partially) processed result.
   242  func (p *Profile) ToUnicode(s string) (string, error) {
   243  	pp := *p
   244  	pp.transitional = false
   245  	return pp.process(s, false)
   246  }
   247  
   248  // String reports a string with a description of the profile for debugging
   249  // purposes. The string format may change with different versions.
   250  func (p *Profile) String() string {
   251  	s := ""
   252  	if p.transitional {
   253  		s = "Transitional"
   254  	} else {
   255  		s = "NonTransitional"
   256  	}
   257  	if p.useSTD3Rules {
   258  		s += ":UseSTD3Rules"
   259  	}
   260  	if p.checkHyphens {
   261  		s += ":CheckHyphens"
   262  	}
   263  	if p.checkJoiners {
   264  		s += ":CheckJoiners"
   265  	}
   266  	if p.verifyDNSLength {
   267  		s += ":VerifyDNSLength"
   268  	}
   269  	return s
   270  }
   271  
   272  // Transitional processing is disabled by default as of Go 1.18.
   273  // https://golang.org/issue/47510
   274  const transitionalLookup = false
   275  
   276  var (
   277  	// Punycode is a Profile that does raw punycode processing with a minimum
   278  	// of validation.
   279  	Punycode *Profile = punycode
   280  
   281  	// Lookup is the recommended profile for looking up domain names, according
   282  	// to Section 5 of RFC 5891. The exact configuration of this profile may
   283  	// change over time.
   284  	Lookup *Profile = lookup
   285  
   286  	// Display is the recommended profile for displaying domain names.
   287  	// The configuration of this profile may change over time.
   288  	Display *Profile = display
   289  
   290  	// Registration is the recommended profile for checking whether a given
   291  	// IDN is valid for registration, according to Section 4 of RFC 5891.
   292  	Registration *Profile = registration
   293  
   294  	punycode = &Profile{}
   295  	lookup   = &Profile{options{
   296  		transitional: transitionalLookup,
   297  		useSTD3Rules: true,
   298  		checkHyphens: true,
   299  		checkJoiners: true,
   300  		trie:         trie,
   301  		fromPuny:     validateFromPunycode,
   302  		mapping:      validateAndMap,
   303  		bidirule:     bidirule.ValidString,
   304  	}}
   305  	display = &Profile{options{
   306  		useSTD3Rules: true,
   307  		checkHyphens: true,
   308  		checkJoiners: true,
   309  		trie:         trie,
   310  		fromPuny:     validateFromPunycode,
   311  		mapping:      validateAndMap,
   312  		bidirule:     bidirule.ValidString,
   313  	}}
   314  	registration = &Profile{options{
   315  		useSTD3Rules:    true,
   316  		verifyDNSLength: true,
   317  		checkHyphens:    true,
   318  		checkJoiners:    true,
   319  		trie:            trie,
   320  		fromPuny:        validateFromPunycode,
   321  		mapping:         validateRegistration,
   322  		bidirule:        bidirule.ValidString,
   323  	}}
   324  
   325  	// TODO: profiles
   326  	// Register: recommended for approving domain names: don't do any mappings
   327  	// but rather reject on invalid input. Bundle or block deviation characters.
   328  )
   329  
   330  type labelError struct{ label, code_ string }
   331  
   332  func (e labelError) code() string { return e.code_ }
   333  func (e labelError) Error() string {
   334  	return fmt.Sprintf("idna: invalid label %q", e.label)
   335  }
   336  
   337  type runeError struct {
   338  	r     rune
   339  	code_ string
   340  }
   341  
   342  func (e runeError) code() string { return e.code_ }
   343  func (e runeError) Error() string {
   344  	return fmt.Sprintf("idna: disallowed rune %U", e.r)
   345  }
   346  
   347  // code16 returns old for Unicode < 16, new for Unicode >= 16.
   348  func code16(old, new string) string {
   349  	if unicode16 {
   350  		return new
   351  	}
   352  	return old
   353  }
   354  
   355  // process10 implements the algorithm described in section 4 of UTS #46.
   356  // It implements both the Unicode 10 algorithm
   357  // (https://www.unicode.org/reports/tr46/tr46-19.html)
   358  // and the Unicode 16 algorithm
   359  // (https://www.unicode.org/reports/tr46/tr46-35.html)
   360  // depending on unicode16, which in turn depends on unicode.Version.
   361  func (p *Profile) process(s string, toASCII bool) (string, error) {
   362  	var err error
   363  	var isBidi bool
   364  	if p.mapping != nil {
   365  		s, isBidi, err = p.mapping(p, s)
   366  	}
   367  	// Remove leading empty labels.
   368  	if p.removeLeadingDots {
   369  		for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
   370  		}
   371  	}
   372  	// TODO: allow for a quick check of the tables data.
   373  	// It seems like we should only create this error on ToASCII, but the
   374  	// UTS 46 conformance tests suggests we should always check this.
   375  	labelCode := "X4_2"
   376  	if !unicode16 || toASCII {
   377  		labelCode = "A4"
   378  	}
   379  	if err == nil && p.verifyDNSLength && s == "" {
   380  		err = labelError{s, labelCode}
   381  	}
   382  	labels := labelIter{orig: s}
   383  	for ; !labels.done(); labels.next() {
   384  		label := labels.label()
   385  		if label == "" {
   386  			// Empty labels are not okay. The label iterator skips the last
   387  			// label if it is empty.
   388  			if err == nil && p.verifyDNSLength {
   389  				err = labelError{s, labelCode}
   390  			}
   391  			continue
   392  		}
   393  		if strings.HasPrefix(label, acePrefix) {
   394  			enc := label[len(acePrefix):]
   395  			u, err2 := decode(enc)
   396  			if err2 != nil {
   397  				if err == nil {
   398  					err = err2
   399  				}
   400  				// Spec says keep the old label.
   401  				continue
   402  			}
   403  			if unicode16 && err == nil && len(u) > 0 && isASCII(u) {
   404  				err = punyError(enc)
   405  			}
   406  			isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
   407  			labels.set(u)
   408  			if err == nil && p.fromPuny != nil {
   409  				err = p.fromPuny(p, u)
   410  			}
   411  			if err == nil {
   412  				// This should be called on NonTransitional, according to the
   413  				// spec, but that currently does not have any effect. Use the
   414  				// original profile to preserve options.
   415  				err = p.validateLabel(u, labelCode)
   416  			}
   417  		} else if err == nil {
   418  			err = p.validateLabel(label, labelCode)
   419  		}
   420  	}
   421  	if isBidi && p.bidirule != nil && err == nil {
   422  		for labels.reset(); !labels.done(); labels.next() {
   423  			if !p.bidirule(labels.label()) {
   424  				err = labelError{s, "B"}
   425  				break
   426  			}
   427  		}
   428  	}
   429  	if toASCII {
   430  		for labels.reset(); !labels.done(); labels.next() {
   431  			label := labels.label()
   432  			if !ascii(label) {
   433  				a, err2 := encode(acePrefix, label)
   434  				if err == nil {
   435  					err = err2
   436  				}
   437  				label = a
   438  				labels.set(a)
   439  			}
   440  			n := len(label)
   441  			if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
   442  				err = labelError{label, labelCode}
   443  			}
   444  		}
   445  	}
   446  	s = labels.result()
   447  	if toASCII && p.verifyDNSLength && err == nil {
   448  		if unicode16 && strings.HasSuffix(s, ".") {
   449  			err = labelError{s, labelCode}
   450  		}
   451  		// Compute the length of the domain name minus the root label and its dot.
   452  		n := len(s)
   453  		if n > 0 && s[n-1] == '.' {
   454  			n--
   455  		}
   456  		if len(s) < 1 || n > 253 {
   457  			err = labelError{s, labelCode}
   458  		}
   459  	}
   460  	return s, err
   461  }
   462  
   463  func isASCII(s string) bool {
   464  	for _, c := range []byte(s) {
   465  		if c >= 0x80 {
   466  			return false
   467  		}
   468  	}
   469  	return true
   470  }
   471  
   472  func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
   473  	// TODO: consider first doing a quick check to see if any of these checks
   474  	// need to be done. This will make it slower in the general case, but
   475  	// faster in the common case.
   476  	mapped = norm.NFC.String(s)
   477  	isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
   478  	return mapped, isBidi, nil
   479  }
   480  
   481  func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
   482  	// TODO: filter need for normalization in loop below.
   483  	if !norm.NFC.IsNormalString(s) {
   484  		return s, false, labelError{s, "V1"}
   485  	}
   486  	for i := 0; i < len(s); {
   487  		v, sz := trie.lookupString(s[i:])
   488  		if sz == 0 {
   489  			return s, bidi, runeError{utf8.RuneError, "P1"}
   490  		}
   491  		bidi = bidi || info(v).isBidi(s[i:])
   492  		// Copy bytes not copied so far.
   493  		switch p.simplify(info(v).category()) {
   494  		// TODO: handle the NV8 defined in the Unicode idna data set to allow
   495  		// for strict conformance to IDNA2008.
   496  		case valid, deviation:
   497  			if sz == 1 && p.useSTD3Rules && !allowedSTD3(rune(s[i])) {
   498  				return s, bidi, runeError{rune(s[i]), "P1"}
   499  			}
   500  		case disallowed, mapped, unknown, ignored:
   501  			r, _ := utf8.DecodeRuneInString(s[i:])
   502  			return s, bidi, runeError{r, "P1"}
   503  		}
   504  		i += sz
   505  	}
   506  	return s, bidi, nil
   507  }
   508  
   509  func (c info) isBidi(s string) bool {
   510  	if !c.isMapped() {
   511  		return c&attributesMask == rtl
   512  	}
   513  	// TODO: also store bidi info for mapped data. This is possible, but a bit
   514  	// cumbersome and not for the common case.
   515  	p, _ := bidi.LookupString(s)
   516  	switch p.Class() {
   517  	case bidi.R, bidi.AL, bidi.AN:
   518  		return true
   519  	}
   520  	return false
   521  }
   522  
   523  func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
   524  	var (
   525  		b []byte
   526  		k int
   527  	)
   528  	// combinedInfoBits contains the or-ed bits of all runes. We use this
   529  	// to derive the mayNeedNorm bit later. This may trigger normalization
   530  	// overeagerly, but it will not do so in the common case. The end result
   531  	// is another 10% saving on BenchmarkProfile for the common case.
   532  	var combinedInfoBits info
   533  	for i := 0; i < len(s); {
   534  		v, sz := trie.lookupString(s[i:])
   535  		if sz == 0 {
   536  			b = append(b, s[k:i]...)
   537  			b = append(b, "\ufffd"...)
   538  			k = len(s)
   539  			if err == nil {
   540  				err = runeError{utf8.RuneError, "P1"}
   541  			}
   542  			break
   543  		}
   544  		combinedInfoBits |= info(v)
   545  		bidi = bidi || info(v).isBidi(s[i:])
   546  		start := i
   547  		i += sz
   548  		// Copy bytes not copied so far.
   549  		switch p.simplify(info(v).category()) {
   550  		case valid:
   551  			continue
   552  		case disallowed:
   553  			// Unicode 16 delays the error until validateLabels.
   554  			// Unicode 10 gave an error now.
   555  			if !unicode16 && err == nil {
   556  				r, _ := utf8.DecodeRuneInString(s[start:])
   557  				err = runeError{r, "P1"}
   558  			}
   559  			continue
   560  		case deviation:
   561  			if unicode16 && !p.transitional {
   562  				break
   563  			}
   564  			fallthrough
   565  		case mapped:
   566  			b = append(b, s[k:start]...)
   567  			// Unicode 16 requires a special case to handle ẞ -> ss in transitional mode.
   568  			if unicode16 && p.transitional && s[start:start+sz] == "ẞ" {
   569  				b = append(b, "ss"...)
   570  			} else {
   571  				b = info(v).appendMapping(b, s[start:i])
   572  			}
   573  		case ignored:
   574  			b = append(b, s[k:start]...)
   575  			// drop the rune
   576  		case unknown:
   577  			b = append(b, s[k:start]...)
   578  			b = append(b, "\ufffd"...)
   579  		}
   580  		k = i
   581  	}
   582  	if k == 0 {
   583  		// No changes so far.
   584  		if combinedInfoBits&mayNeedNorm != 0 {
   585  			s = norm.NFC.String(s)
   586  		}
   587  	} else {
   588  		b = append(b, s[k:]...)
   589  		if norm.NFC.QuickSpan(b) != len(b) {
   590  			b = norm.NFC.Bytes(b)
   591  		}
   592  		// TODO: the punycode converters require strings as input.
   593  		s = string(b)
   594  	}
   595  	return s, bidi, err
   596  }
   597  
   598  // A labelIter allows iterating over domain name labels.
   599  type labelIter struct {
   600  	orig     string
   601  	slice    []string
   602  	curStart int
   603  	curEnd   int
   604  	i        int
   605  }
   606  
   607  func (l *labelIter) reset() {
   608  	l.curStart = 0
   609  	l.curEnd = 0
   610  	l.i = 0
   611  }
   612  
   613  func (l *labelIter) done() bool {
   614  	return l.curStart >= len(l.orig)
   615  }
   616  
   617  func (l *labelIter) result() string {
   618  	if l.slice != nil {
   619  		return strings.Join(l.slice, ".")
   620  	}
   621  	return l.orig
   622  }
   623  
   624  func (l *labelIter) label() string {
   625  	if l.slice != nil {
   626  		return l.slice[l.i]
   627  	}
   628  	p := strings.IndexByte(l.orig[l.curStart:], '.')
   629  	l.curEnd = l.curStart + p
   630  	if p == -1 {
   631  		l.curEnd = len(l.orig)
   632  	}
   633  	return l.orig[l.curStart:l.curEnd]
   634  }
   635  
   636  // next sets the value to the next label. It skips the last label if it is empty.
   637  func (l *labelIter) next() {
   638  	l.i++
   639  	if l.slice != nil {
   640  		if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
   641  			l.curStart = len(l.orig)
   642  		}
   643  	} else {
   644  		l.curStart = l.curEnd + 1
   645  		if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
   646  			l.curStart = len(l.orig)
   647  		}
   648  	}
   649  }
   650  
   651  func (l *labelIter) set(s string) {
   652  	if l.slice == nil {
   653  		l.slice = strings.Split(l.orig, ".")
   654  	}
   655  	l.slice[l.i] = s
   656  }
   657  
   658  // acePrefix is the ASCII Compatible Encoding prefix.
   659  const acePrefix = "xn--"
   660  
   661  func (p *Profile) simplify(cat category) category {
   662  	switch cat {
   663  	case disallowedSTD3Mapped: // only happens for pre-Unicode 16
   664  		if p.useSTD3Rules {
   665  			cat = disallowed
   666  		} else {
   667  			cat = mapped
   668  		}
   669  	case disallowedSTD3Valid: // only happens for pre-Unicode 16
   670  		if p.useSTD3Rules {
   671  			cat = disallowed
   672  		} else {
   673  			cat = valid
   674  		}
   675  	case deviation:
   676  		if !p.transitional {
   677  			cat = valid
   678  		}
   679  	case validNV8, validXV8:
   680  		// TODO: handle V2008
   681  		cat = valid
   682  	}
   683  	return cat
   684  }
   685  
   686  func validateFromPunycode(p *Profile, s string) error {
   687  	if !norm.NFC.IsNormalString(s) {
   688  		return labelError{s, "V1"}
   689  	}
   690  	// TODO: detect whether string may have to be normalized in the following
   691  	// loop.
   692  	for i := 0; i < len(s); {
   693  		v, sz := trie.lookupString(s[i:])
   694  		if sz == 0 {
   695  			return runeError{utf8.RuneError, "P1"}
   696  		}
   697  		cat := info(v).category()
   698  		if c := p.simplify(cat); c != valid && c != deviation {
   699  			return labelError{s, code16("V6", "V7")}
   700  		}
   701  		i += sz
   702  	}
   703  	return nil
   704  }
   705  
   706  const (
   707  	zwnj = "\u200c"
   708  	zwj  = "\u200d"
   709  )
   710  
   711  type joinState int8
   712  
   713  const (
   714  	stateStart joinState = iota
   715  	stateVirama
   716  	stateBefore
   717  	stateBeforeVirama
   718  	stateAfter
   719  	stateFAIL
   720  )
   721  
   722  var joinStates = [][numJoinTypes]joinState{
   723  	stateStart: {
   724  		joiningL:   stateBefore,
   725  		joiningD:   stateBefore,
   726  		joinZWNJ:   stateFAIL,
   727  		joinZWJ:    stateFAIL,
   728  		joinVirama: stateVirama,
   729  	},
   730  	stateVirama: {
   731  		joiningL: stateBefore,
   732  		joiningD: stateBefore,
   733  	},
   734  	stateBefore: {
   735  		joiningL:   stateBefore,
   736  		joiningD:   stateBefore,
   737  		joiningT:   stateBefore,
   738  		joinZWNJ:   stateAfter,
   739  		joinZWJ:    stateFAIL,
   740  		joinVirama: stateBeforeVirama,
   741  	},
   742  	stateBeforeVirama: {
   743  		joiningL: stateBefore,
   744  		joiningD: stateBefore,
   745  		joiningT: stateBefore,
   746  	},
   747  	stateAfter: {
   748  		joiningL:   stateFAIL,
   749  		joiningD:   stateBefore,
   750  		joiningT:   stateAfter,
   751  		joiningR:   stateStart,
   752  		joinZWNJ:   stateFAIL,
   753  		joinZWJ:    stateFAIL,
   754  		joinVirama: stateAfter, // no-op as we can't accept joiners here
   755  	},
   756  	stateFAIL: {
   757  		0:          stateFAIL,
   758  		joiningL:   stateFAIL,
   759  		joiningD:   stateFAIL,
   760  		joiningT:   stateFAIL,
   761  		joiningR:   stateFAIL,
   762  		joinZWNJ:   stateFAIL,
   763  		joinZWJ:    stateFAIL,
   764  		joinVirama: stateFAIL,
   765  	},
   766  }
   767  
   768  // allowedSTD3 reports whether r is a rune that can appear in a domain name
   769  // according to STD3. We allow all non-ASCII runes and then letters, digits, hyphens.
   770  // We also add dot so that this can be run against the whole name and not just
   771  // a single name element (label). The surrounding code checks dots well enough.
   772  func allowedSTD3(r rune) bool {
   773  	return r >= 0x80 || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '-' || r == '.'
   774  }
   775  
   776  // validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
   777  // already implicitly satisfied by the overall implementation.
   778  func (p *Profile) validateLabel(s string, labelCode string) (err error) {
   779  	if s == "" {
   780  		if p.verifyDNSLength {
   781  			return labelError{s, labelCode}
   782  		}
   783  		return nil
   784  	}
   785  	if p.checkHyphens {
   786  		if len(s) > 4 && s[2] == '-' && s[3] == '-' {
   787  			return labelError{s, "V2"}
   788  		}
   789  		if s[0] == '-' || s[len(s)-1] == '-' {
   790  			return labelError{s, "V3"}
   791  		}
   792  	}
   793  
   794  	// Unicode 16's TR 46 delays the rune validity checks until after the label is decoded.
   795  	// (validateAndMap did not reject them earlier.)
   796  	if unicode16 && p.validateLabels() {
   797  		for i := 0; i < len(s); {
   798  			v, sz := trie.lookupString(s[i:])
   799  			if sz == 0 {
   800  				return runeError{utf8.RuneError, "P1"}
   801  			}
   802  			cat := info(v).category()
   803  			if c := p.simplify(cat); c != valid && (!p.transitional || c != deviation) {
   804  				return labelError{s, "V7"}
   805  			}
   806  			if sz == 1 && p.useSTD3Rules && !allowedSTD3(rune(s[i])) {
   807  				return runeError{rune(s[i]), "U1"}
   808  			}
   809  			i += sz
   810  		}
   811  	}
   812  
   813  	if !p.checkJoiners {
   814  		return nil
   815  	}
   816  	trie := p.trie // p.checkJoiners is only set if trie is set.
   817  	// TODO: merge the use of this in the trie.
   818  	v, sz := trie.lookupString(s)
   819  	x := info(v)
   820  	if x.isModifier() {
   821  		return labelError{s, code16("V5", "V6")}
   822  	}
   823  	// Quickly return in the absence of zero-width (non) joiners.
   824  	if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
   825  		return nil
   826  	}
   827  	st := stateStart
   828  	for i := 0; ; {
   829  		jt := x.joinType()
   830  		if s[i:i+sz] == zwj {
   831  			jt = joinZWJ
   832  		} else if s[i:i+sz] == zwnj {
   833  			jt = joinZWNJ
   834  		}
   835  		st = joinStates[st][jt]
   836  		if x.isViramaModifier() {
   837  			st = joinStates[st][joinVirama]
   838  		}
   839  		if i += sz; i == len(s) {
   840  			break
   841  		}
   842  		v, sz = trie.lookupString(s[i:])
   843  		x = info(v)
   844  	}
   845  	if st == stateFAIL || st == stateAfter {
   846  		return labelError{s, "C"}
   847  	}
   848  
   849  	return nil
   850  }
   851  
   852  func ascii(s string) bool {
   853  	for i := 0; i < len(s); i++ {
   854  		if s[i] >= utf8.RuneSelf {
   855  			return false
   856  		}
   857  	}
   858  	return true
   859  }
   860  
   861  // appendMapping appends the mapping for the respective rune. isMapped must be
   862  // true. A mapping is a categorization of a rune as defined in UTS #46.
   863  func (c info) appendMapping(b []byte, s string) []byte {
   864  	index := int(c >> indexShift)
   865  	if c&xorBit == 0 {
   866  		p := index
   867  		return append(b, mappings[mappingIndex[p]:mappingIndex[p+1]]...)
   868  	}
   869  	b = append(b, s...)
   870  	if c&inlineXOR == inlineXOR {
   871  		// TODO: support and handle two-byte inline masks
   872  		b[len(b)-1] ^= byte(index)
   873  	} else {
   874  		for p := len(b) - int(xorData[index]); p < len(b); p++ {
   875  			index++
   876  			b[p] ^= xorData[index]
   877  		}
   878  	}
   879  	return b
   880  }
   881  

View as plain text