1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package idna
17
18 import (
19 "fmt"
20 "strings"
21 "unicode"
22 "unicode/utf8"
23
24 "golang.org/x/text/secure/bidirule"
25 "golang.org/x/text/unicode/bidi"
26 "golang.org/x/text/unicode/norm"
27 )
28
29 const unicode16 = unicode.Version >= "16.0.0"
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47 func ToASCII(s string) (string, error) {
48 return Punycode.process(s, true)
49 }
50
51
52 func ToUnicode(s string) (string, error) {
53 return Punycode.process(s, false)
54 }
55
56
57 type Option func(*options)
58
59
60
61
62
63
64 func Transitional(transitional bool) Option {
65 return func(o *options) { o.transitional = transitional }
66 }
67
68
69
70
71
72 func VerifyDNSLength(verify bool) Option {
73 return func(o *options) { o.verifyDNSLength = verify }
74 }
75
76
77
78 func RemoveLeadingDots(remove bool) Option {
79 return func(o *options) { o.removeLeadingDots = remove }
80 }
81
82
83
84
85
86
87 func ValidateLabels(enable bool) Option {
88 return func(o *options) {
89
90
91 if o.mapping == nil && enable {
92 o.mapping = normalize
93 }
94 o.trie = trie
95 o.checkJoiners = enable
96 o.checkHyphens = enable
97 if enable {
98 o.fromPuny = validateFromPunycode
99 } else {
100 o.fromPuny = nil
101 }
102 }
103 }
104
105
106 func (p *Profile) validateLabels() bool {
107 return p.fromPuny != nil
108 }
109
110
111
112
113
114
115 func CheckHyphens(enable bool) Option {
116 return func(o *options) { o.checkHyphens = enable }
117 }
118
119
120
121
122
123 func CheckJoiners(enable bool) Option {
124 return func(o *options) {
125 o.trie = trie
126 o.checkJoiners = enable
127 }
128 }
129
130
131
132
133
134
135
136
137
138
139
140 func StrictDomainName(use bool) Option {
141 return func(o *options) { o.useSTD3Rules = use }
142 }
143
144
145
146
147
148
149
150
151 func BidiRule() Option {
152 return func(o *options) { o.bidirule = bidirule.ValidString }
153 }
154
155
156
157 func ValidateForRegistration() Option {
158 return func(o *options) {
159 o.mapping = validateRegistration
160 StrictDomainName(true)(o)
161 ValidateLabels(true)(o)
162 VerifyDNSLength(true)(o)
163 BidiRule()(o)
164 }
165 }
166
167
168
169
170
171
172
173
174
175 func MapForLookup() Option {
176 return func(o *options) {
177 o.mapping = validateAndMap
178 StrictDomainName(true)(o)
179 ValidateLabels(true)(o)
180 }
181 }
182
183 type options struct {
184 transitional bool
185 useSTD3Rules bool
186 checkHyphens bool
187 checkJoiners bool
188 verifyDNSLength bool
189 removeLeadingDots bool
190
191 trie *idnaTrie
192
193
194 fromPuny func(p *Profile, s string) error
195
196
197
198 mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
199
200
201
202 bidirule func(s string) bool
203 }
204
205
206 type Profile struct {
207 options
208 }
209
210 func apply(o *options, opts []Option) {
211 for _, f := range opts {
212 f(o)
213 }
214 }
215
216
217
218
219
220
221
222
223
224 func New(o ...Option) *Profile {
225 p := &Profile{}
226 apply(&p.options, o)
227 return p
228 }
229
230
231
232
233
234 func (p *Profile) ToASCII(s string) (string, error) {
235 return p.process(s, true)
236 }
237
238
239
240
241
242 func (p *Profile) ToUnicode(s string) (string, error) {
243 pp := *p
244 pp.transitional = false
245 return pp.process(s, false)
246 }
247
248
249
250 func (p *Profile) String() string {
251 s := ""
252 if p.transitional {
253 s = "Transitional"
254 } else {
255 s = "NonTransitional"
256 }
257 if p.useSTD3Rules {
258 s += ":UseSTD3Rules"
259 }
260 if p.checkHyphens {
261 s += ":CheckHyphens"
262 }
263 if p.checkJoiners {
264 s += ":CheckJoiners"
265 }
266 if p.verifyDNSLength {
267 s += ":VerifyDNSLength"
268 }
269 return s
270 }
271
272
273
274 const transitionalLookup = false
275
276 var (
277
278
279 Punycode *Profile = punycode
280
281
282
283
284 Lookup *Profile = lookup
285
286
287
288 Display *Profile = display
289
290
291
292 Registration *Profile = registration
293
294 punycode = &Profile{}
295 lookup = &Profile{options{
296 transitional: transitionalLookup,
297 useSTD3Rules: true,
298 checkHyphens: true,
299 checkJoiners: true,
300 trie: trie,
301 fromPuny: validateFromPunycode,
302 mapping: validateAndMap,
303 bidirule: bidirule.ValidString,
304 }}
305 display = &Profile{options{
306 useSTD3Rules: true,
307 checkHyphens: true,
308 checkJoiners: true,
309 trie: trie,
310 fromPuny: validateFromPunycode,
311 mapping: validateAndMap,
312 bidirule: bidirule.ValidString,
313 }}
314 registration = &Profile{options{
315 useSTD3Rules: true,
316 verifyDNSLength: true,
317 checkHyphens: true,
318 checkJoiners: true,
319 trie: trie,
320 fromPuny: validateFromPunycode,
321 mapping: validateRegistration,
322 bidirule: bidirule.ValidString,
323 }}
324
325
326
327
328 )
329
330 type labelError struct{ label, code_ string }
331
332 func (e labelError) code() string { return e.code_ }
333 func (e labelError) Error() string {
334 return fmt.Sprintf("idna: invalid label %q", e.label)
335 }
336
337 type runeError struct {
338 r rune
339 code_ string
340 }
341
342 func (e runeError) code() string { return e.code_ }
343 func (e runeError) Error() string {
344 return fmt.Sprintf("idna: disallowed rune %U", e.r)
345 }
346
347
348 func code16(old, new string) string {
349 if unicode16 {
350 return new
351 }
352 return old
353 }
354
355
356
357
358
359
360
361 func (p *Profile) process(s string, toASCII bool) (string, error) {
362 var err error
363 var isBidi bool
364 if p.mapping != nil {
365 s, isBidi, err = p.mapping(p, s)
366 }
367
368 if p.removeLeadingDots {
369 for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
370 }
371 }
372
373
374
375 labelCode := "X4_2"
376 if !unicode16 || toASCII {
377 labelCode = "A4"
378 }
379 if err == nil && p.verifyDNSLength && s == "" {
380 err = labelError{s, labelCode}
381 }
382 labels := labelIter{orig: s}
383 for ; !labels.done(); labels.next() {
384 label := labels.label()
385 if label == "" {
386
387
388 if err == nil && p.verifyDNSLength {
389 err = labelError{s, labelCode}
390 }
391 continue
392 }
393 if strings.HasPrefix(label, acePrefix) {
394 enc := label[len(acePrefix):]
395 u, err2 := decode(enc)
396 if err2 != nil {
397 if err == nil {
398 err = err2
399 }
400
401 continue
402 }
403 if unicode16 && err == nil && len(u) > 0 && isASCII(u) {
404 err = punyError(enc)
405 }
406 isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
407 labels.set(u)
408 if err == nil && p.fromPuny != nil {
409 err = p.fromPuny(p, u)
410 }
411 if err == nil {
412
413
414
415 err = p.validateLabel(u, labelCode)
416 }
417 } else if err == nil {
418 err = p.validateLabel(label, labelCode)
419 }
420 }
421 if isBidi && p.bidirule != nil && err == nil {
422 for labels.reset(); !labels.done(); labels.next() {
423 if !p.bidirule(labels.label()) {
424 err = labelError{s, "B"}
425 break
426 }
427 }
428 }
429 if toASCII {
430 for labels.reset(); !labels.done(); labels.next() {
431 label := labels.label()
432 if !ascii(label) {
433 a, err2 := encode(acePrefix, label)
434 if err == nil {
435 err = err2
436 }
437 label = a
438 labels.set(a)
439 }
440 n := len(label)
441 if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
442 err = labelError{label, labelCode}
443 }
444 }
445 }
446 s = labels.result()
447 if toASCII && p.verifyDNSLength && err == nil {
448 if unicode16 && strings.HasSuffix(s, ".") {
449 err = labelError{s, labelCode}
450 }
451
452 n := len(s)
453 if n > 0 && s[n-1] == '.' {
454 n--
455 }
456 if len(s) < 1 || n > 253 {
457 err = labelError{s, labelCode}
458 }
459 }
460 return s, err
461 }
462
463 func isASCII(s string) bool {
464 for _, c := range []byte(s) {
465 if c >= 0x80 {
466 return false
467 }
468 }
469 return true
470 }
471
472 func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
473
474
475
476 mapped = norm.NFC.String(s)
477 isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
478 return mapped, isBidi, nil
479 }
480
481 func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
482
483 if !norm.NFC.IsNormalString(s) {
484 return s, false, labelError{s, "V1"}
485 }
486 for i := 0; i < len(s); {
487 v, sz := trie.lookupString(s[i:])
488 if sz == 0 {
489 return s, bidi, runeError{utf8.RuneError, "P1"}
490 }
491 bidi = bidi || info(v).isBidi(s[i:])
492
493 switch p.simplify(info(v).category()) {
494
495
496 case valid, deviation:
497 if sz == 1 && p.useSTD3Rules && !allowedSTD3(rune(s[i])) {
498 return s, bidi, runeError{rune(s[i]), "P1"}
499 }
500 case disallowed, mapped, unknown, ignored:
501 r, _ := utf8.DecodeRuneInString(s[i:])
502 return s, bidi, runeError{r, "P1"}
503 }
504 i += sz
505 }
506 return s, bidi, nil
507 }
508
509 func (c info) isBidi(s string) bool {
510 if !c.isMapped() {
511 return c&attributesMask == rtl
512 }
513
514
515 p, _ := bidi.LookupString(s)
516 switch p.Class() {
517 case bidi.R, bidi.AL, bidi.AN:
518 return true
519 }
520 return false
521 }
522
523 func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
524 var (
525 b []byte
526 k int
527 )
528
529
530
531
532 var combinedInfoBits info
533 for i := 0; i < len(s); {
534 v, sz := trie.lookupString(s[i:])
535 if sz == 0 {
536 b = append(b, s[k:i]...)
537 b = append(b, "\ufffd"...)
538 k = len(s)
539 if err == nil {
540 err = runeError{utf8.RuneError, "P1"}
541 }
542 break
543 }
544 combinedInfoBits |= info(v)
545 bidi = bidi || info(v).isBidi(s[i:])
546 start := i
547 i += sz
548
549 switch p.simplify(info(v).category()) {
550 case valid:
551 continue
552 case disallowed:
553
554
555 if !unicode16 && err == nil {
556 r, _ := utf8.DecodeRuneInString(s[start:])
557 err = runeError{r, "P1"}
558 }
559 continue
560 case deviation:
561 if unicode16 && !p.transitional {
562 break
563 }
564 fallthrough
565 case mapped:
566 b = append(b, s[k:start]...)
567
568 if unicode16 && p.transitional && s[start:start+sz] == "ẞ" {
569 b = append(b, "ss"...)
570 } else {
571 b = info(v).appendMapping(b, s[start:i])
572 }
573 case ignored:
574 b = append(b, s[k:start]...)
575
576 case unknown:
577 b = append(b, s[k:start]...)
578 b = append(b, "\ufffd"...)
579 }
580 k = i
581 }
582 if k == 0 {
583
584 if combinedInfoBits&mayNeedNorm != 0 {
585 s = norm.NFC.String(s)
586 }
587 } else {
588 b = append(b, s[k:]...)
589 if norm.NFC.QuickSpan(b) != len(b) {
590 b = norm.NFC.Bytes(b)
591 }
592
593 s = string(b)
594 }
595 return s, bidi, err
596 }
597
598
599 type labelIter struct {
600 orig string
601 slice []string
602 curStart int
603 curEnd int
604 i int
605 }
606
607 func (l *labelIter) reset() {
608 l.curStart = 0
609 l.curEnd = 0
610 l.i = 0
611 }
612
613 func (l *labelIter) done() bool {
614 return l.curStart >= len(l.orig)
615 }
616
617 func (l *labelIter) result() string {
618 if l.slice != nil {
619 return strings.Join(l.slice, ".")
620 }
621 return l.orig
622 }
623
624 func (l *labelIter) label() string {
625 if l.slice != nil {
626 return l.slice[l.i]
627 }
628 p := strings.IndexByte(l.orig[l.curStart:], '.')
629 l.curEnd = l.curStart + p
630 if p == -1 {
631 l.curEnd = len(l.orig)
632 }
633 return l.orig[l.curStart:l.curEnd]
634 }
635
636
637 func (l *labelIter) next() {
638 l.i++
639 if l.slice != nil {
640 if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
641 l.curStart = len(l.orig)
642 }
643 } else {
644 l.curStart = l.curEnd + 1
645 if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
646 l.curStart = len(l.orig)
647 }
648 }
649 }
650
651 func (l *labelIter) set(s string) {
652 if l.slice == nil {
653 l.slice = strings.Split(l.orig, ".")
654 }
655 l.slice[l.i] = s
656 }
657
658
659 const acePrefix = "xn--"
660
661 func (p *Profile) simplify(cat category) category {
662 switch cat {
663 case disallowedSTD3Mapped:
664 if p.useSTD3Rules {
665 cat = disallowed
666 } else {
667 cat = mapped
668 }
669 case disallowedSTD3Valid:
670 if p.useSTD3Rules {
671 cat = disallowed
672 } else {
673 cat = valid
674 }
675 case deviation:
676 if !p.transitional {
677 cat = valid
678 }
679 case validNV8, validXV8:
680
681 cat = valid
682 }
683 return cat
684 }
685
686 func validateFromPunycode(p *Profile, s string) error {
687 if !norm.NFC.IsNormalString(s) {
688 return labelError{s, "V1"}
689 }
690
691
692 for i := 0; i < len(s); {
693 v, sz := trie.lookupString(s[i:])
694 if sz == 0 {
695 return runeError{utf8.RuneError, "P1"}
696 }
697 cat := info(v).category()
698 if c := p.simplify(cat); c != valid && c != deviation {
699 return labelError{s, code16("V6", "V7")}
700 }
701 i += sz
702 }
703 return nil
704 }
705
706 const (
707 zwnj = "\u200c"
708 zwj = "\u200d"
709 )
710
711 type joinState int8
712
713 const (
714 stateStart joinState = iota
715 stateVirama
716 stateBefore
717 stateBeforeVirama
718 stateAfter
719 stateFAIL
720 )
721
722 var joinStates = [][numJoinTypes]joinState{
723 stateStart: {
724 joiningL: stateBefore,
725 joiningD: stateBefore,
726 joinZWNJ: stateFAIL,
727 joinZWJ: stateFAIL,
728 joinVirama: stateVirama,
729 },
730 stateVirama: {
731 joiningL: stateBefore,
732 joiningD: stateBefore,
733 },
734 stateBefore: {
735 joiningL: stateBefore,
736 joiningD: stateBefore,
737 joiningT: stateBefore,
738 joinZWNJ: stateAfter,
739 joinZWJ: stateFAIL,
740 joinVirama: stateBeforeVirama,
741 },
742 stateBeforeVirama: {
743 joiningL: stateBefore,
744 joiningD: stateBefore,
745 joiningT: stateBefore,
746 },
747 stateAfter: {
748 joiningL: stateFAIL,
749 joiningD: stateBefore,
750 joiningT: stateAfter,
751 joiningR: stateStart,
752 joinZWNJ: stateFAIL,
753 joinZWJ: stateFAIL,
754 joinVirama: stateAfter,
755 },
756 stateFAIL: {
757 0: stateFAIL,
758 joiningL: stateFAIL,
759 joiningD: stateFAIL,
760 joiningT: stateFAIL,
761 joiningR: stateFAIL,
762 joinZWNJ: stateFAIL,
763 joinZWJ: stateFAIL,
764 joinVirama: stateFAIL,
765 },
766 }
767
768
769
770
771
772 func allowedSTD3(r rune) bool {
773 return r >= 0x80 || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '-' || r == '.'
774 }
775
776
777
778 func (p *Profile) validateLabel(s string, labelCode string) (err error) {
779 if s == "" {
780 if p.verifyDNSLength {
781 return labelError{s, labelCode}
782 }
783 return nil
784 }
785 if p.checkHyphens {
786 if len(s) > 4 && s[2] == '-' && s[3] == '-' {
787 return labelError{s, "V2"}
788 }
789 if s[0] == '-' || s[len(s)-1] == '-' {
790 return labelError{s, "V3"}
791 }
792 }
793
794
795
796 if unicode16 && p.validateLabels() {
797 for i := 0; i < len(s); {
798 v, sz := trie.lookupString(s[i:])
799 if sz == 0 {
800 return runeError{utf8.RuneError, "P1"}
801 }
802 cat := info(v).category()
803 if c := p.simplify(cat); c != valid && (!p.transitional || c != deviation) {
804 return labelError{s, "V7"}
805 }
806 if sz == 1 && p.useSTD3Rules && !allowedSTD3(rune(s[i])) {
807 return runeError{rune(s[i]), "U1"}
808 }
809 i += sz
810 }
811 }
812
813 if !p.checkJoiners {
814 return nil
815 }
816 trie := p.trie
817
818 v, sz := trie.lookupString(s)
819 x := info(v)
820 if x.isModifier() {
821 return labelError{s, code16("V5", "V6")}
822 }
823
824 if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
825 return nil
826 }
827 st := stateStart
828 for i := 0; ; {
829 jt := x.joinType()
830 if s[i:i+sz] == zwj {
831 jt = joinZWJ
832 } else if s[i:i+sz] == zwnj {
833 jt = joinZWNJ
834 }
835 st = joinStates[st][jt]
836 if x.isViramaModifier() {
837 st = joinStates[st][joinVirama]
838 }
839 if i += sz; i == len(s) {
840 break
841 }
842 v, sz = trie.lookupString(s[i:])
843 x = info(v)
844 }
845 if st == stateFAIL || st == stateAfter {
846 return labelError{s, "C"}
847 }
848
849 return nil
850 }
851
852 func ascii(s string) bool {
853 for i := 0; i < len(s); i++ {
854 if s[i] >= utf8.RuneSelf {
855 return false
856 }
857 }
858 return true
859 }
860
861
862
863 func (c info) appendMapping(b []byte, s string) []byte {
864 index := int(c >> indexShift)
865 if c&xorBit == 0 {
866 p := index
867 return append(b, mappings[mappingIndex[p]:mappingIndex[p+1]]...)
868 }
869 b = append(b, s...)
870 if c&inlineXOR == inlineXOR {
871
872 b[len(b)-1] ^= byte(index)
873 } else {
874 for p := len(b) - int(xorData[index]); p < len(b); p++ {
875 index++
876 b[p] ^= xorData[index]
877 }
878 }
879 return b
880 }
881
View as plain text