1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package idna
19
20 import (
21 "fmt"
22 "strings"
23 "unicode/utf8"
24
25 "golang.org/x/text/secure/bidirule"
26 "golang.org/x/text/unicode/bidi"
27 "golang.org/x/text/unicode/norm"
28 )
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46 func ToASCII(s string) (string, error) {
47 return Punycode.process(s, true)
48 }
49
50
51 func ToUnicode(s string) (string, error) {
52 return Punycode.process(s, false)
53 }
54
55
56 type Option func(*options)
57
58
59
60
61
62
63 func Transitional(transitional bool) Option {
64 return func(o *options) { o.transitional = transitional }
65 }
66
67
68
69
70
71 func VerifyDNSLength(verify bool) Option {
72 return func(o *options) { o.verifyDNSLength = verify }
73 }
74
75
76
77 func RemoveLeadingDots(remove bool) Option {
78 return func(o *options) { o.removeLeadingDots = remove }
79 }
80
81
82
83
84
85
86 func ValidateLabels(enable bool) Option {
87 return func(o *options) {
88
89
90 if o.mapping == nil && enable {
91 o.mapping = normalize
92 }
93 o.trie = trie
94 o.checkJoiners = enable
95 o.checkHyphens = enable
96 if enable {
97 o.fromPuny = validateFromPunycode
98 } else {
99 o.fromPuny = nil
100 }
101 }
102 }
103
104
105
106
107
108
109 func CheckHyphens(enable bool) Option {
110 return func(o *options) { o.checkHyphens = enable }
111 }
112
113
114
115
116
117 func CheckJoiners(enable bool) Option {
118 return func(o *options) {
119 o.trie = trie
120 o.checkJoiners = enable
121 }
122 }
123
124
125
126
127
128
129
130
131
132
133
134 func StrictDomainName(use bool) Option {
135 return func(o *options) { o.useSTD3Rules = use }
136 }
137
138
139
140
141
142
143
144
145 func BidiRule() Option {
146 return func(o *options) { o.bidirule = bidirule.ValidString }
147 }
148
149
150
151 func ValidateForRegistration() Option {
152 return func(o *options) {
153 o.mapping = validateRegistration
154 StrictDomainName(true)(o)
155 ValidateLabels(true)(o)
156 VerifyDNSLength(true)(o)
157 BidiRule()(o)
158 }
159 }
160
161
162
163
164
165
166
167
168
169 func MapForLookup() Option {
170 return func(o *options) {
171 o.mapping = validateAndMap
172 StrictDomainName(true)(o)
173 ValidateLabels(true)(o)
174 }
175 }
176
177 type options struct {
178 transitional bool
179 useSTD3Rules bool
180 checkHyphens bool
181 checkJoiners bool
182 verifyDNSLength bool
183 removeLeadingDots bool
184
185 trie *idnaTrie
186
187
188 fromPuny func(p *Profile, s string) error
189
190
191
192 mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
193
194
195
196 bidirule func(s string) bool
197 }
198
199
200 type Profile struct {
201 options
202 }
203
204 func apply(o *options, opts []Option) {
205 for _, f := range opts {
206 f(o)
207 }
208 }
209
210
211
212
213
214
215
216
217
218 func New(o ...Option) *Profile {
219 p := &Profile{}
220 apply(&p.options, o)
221 return p
222 }
223
224
225
226
227
228 func (p *Profile) ToASCII(s string) (string, error) {
229 return p.process(s, true)
230 }
231
232
233
234
235
236 func (p *Profile) ToUnicode(s string) (string, error) {
237 pp := *p
238 pp.transitional = false
239 return pp.process(s, false)
240 }
241
242
243
244 func (p *Profile) String() string {
245 s := ""
246 if p.transitional {
247 s = "Transitional"
248 } else {
249 s = "NonTransitional"
250 }
251 if p.useSTD3Rules {
252 s += ":UseSTD3Rules"
253 }
254 if p.checkHyphens {
255 s += ":CheckHyphens"
256 }
257 if p.checkJoiners {
258 s += ":CheckJoiners"
259 }
260 if p.verifyDNSLength {
261 s += ":VerifyDNSLength"
262 }
263 return s
264 }
265
266 var (
267
268
269 Punycode *Profile = punycode
270
271
272
273
274 Lookup *Profile = lookup
275
276
277
278 Display *Profile = display
279
280
281
282 Registration *Profile = registration
283
284 punycode = &Profile{}
285 lookup = &Profile{options{
286 transitional: transitionalLookup,
287 useSTD3Rules: true,
288 checkHyphens: true,
289 checkJoiners: true,
290 trie: trie,
291 fromPuny: validateFromPunycode,
292 mapping: validateAndMap,
293 bidirule: bidirule.ValidString,
294 }}
295 display = &Profile{options{
296 useSTD3Rules: true,
297 checkHyphens: true,
298 checkJoiners: true,
299 trie: trie,
300 fromPuny: validateFromPunycode,
301 mapping: validateAndMap,
302 bidirule: bidirule.ValidString,
303 }}
304 registration = &Profile{options{
305 useSTD3Rules: true,
306 verifyDNSLength: true,
307 checkHyphens: true,
308 checkJoiners: true,
309 trie: trie,
310 fromPuny: validateFromPunycode,
311 mapping: validateRegistration,
312 bidirule: bidirule.ValidString,
313 }}
314
315
316
317
318 )
319
320 type labelError struct{ label, code_ string }
321
322 func (e labelError) code() string { return e.code_ }
323 func (e labelError) Error() string {
324 return fmt.Sprintf("idna: invalid label %q", e.label)
325 }
326
327 type runeError rune
328
329 func (e runeError) code() string { return "P1" }
330 func (e runeError) Error() string {
331 return fmt.Sprintf("idna: disallowed rune %U", e)
332 }
333
334
335
336 func (p *Profile) process(s string, toASCII bool) (string, error) {
337 var err error
338 var isBidi bool
339 if p.mapping != nil {
340 s, isBidi, err = p.mapping(p, s)
341 }
342
343 if p.removeLeadingDots {
344 for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
345 }
346 }
347
348
349
350 if err == nil && p.verifyDNSLength && s == "" {
351 err = &labelError{s, "A4"}
352 }
353 labels := labelIter{orig: s}
354 for ; !labels.done(); labels.next() {
355 label := labels.label()
356 if label == "" {
357
358
359 if err == nil && p.verifyDNSLength {
360 err = &labelError{s, "A4"}
361 }
362 continue
363 }
364 if strings.HasPrefix(label, acePrefix) {
365 u, err2 := decode(label[len(acePrefix):])
366 if err2 != nil {
367 if err == nil {
368 err = err2
369 }
370
371 continue
372 }
373 isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
374 labels.set(u)
375 if err == nil && p.fromPuny != nil {
376 err = p.fromPuny(p, u)
377 }
378 if err == nil {
379
380
381
382 err = p.validateLabel(u)
383 }
384 } else if err == nil {
385 err = p.validateLabel(label)
386 }
387 }
388 if isBidi && p.bidirule != nil && err == nil {
389 for labels.reset(); !labels.done(); labels.next() {
390 if !p.bidirule(labels.label()) {
391 err = &labelError{s, "B"}
392 break
393 }
394 }
395 }
396 if toASCII {
397 for labels.reset(); !labels.done(); labels.next() {
398 label := labels.label()
399 if !ascii(label) {
400 a, err2 := encode(acePrefix, label)
401 if err == nil {
402 err = err2
403 }
404 label = a
405 labels.set(a)
406 }
407 n := len(label)
408 if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
409 err = &labelError{label, "A4"}
410 }
411 }
412 }
413 s = labels.result()
414 if toASCII && p.verifyDNSLength && err == nil {
415
416 n := len(s)
417 if n > 0 && s[n-1] == '.' {
418 n--
419 }
420 if len(s) < 1 || n > 253 {
421 err = &labelError{s, "A4"}
422 }
423 }
424 return s, err
425 }
426
427 func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
428
429
430
431 mapped = norm.NFC.String(s)
432 isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
433 return mapped, isBidi, nil
434 }
435
436 func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
437
438 if !norm.NFC.IsNormalString(s) {
439 return s, false, &labelError{s, "V1"}
440 }
441 for i := 0; i < len(s); {
442 v, sz := trie.lookupString(s[i:])
443 if sz == 0 {
444 return s, bidi, runeError(utf8.RuneError)
445 }
446 bidi = bidi || info(v).isBidi(s[i:])
447
448 switch p.simplify(info(v).category()) {
449
450
451 case valid, deviation:
452 case disallowed, mapped, unknown, ignored:
453 r, _ := utf8.DecodeRuneInString(s[i:])
454 return s, bidi, runeError(r)
455 }
456 i += sz
457 }
458 return s, bidi, nil
459 }
460
461 func (c info) isBidi(s string) bool {
462 if !c.isMapped() {
463 return c&attributesMask == rtl
464 }
465
466
467 p, _ := bidi.LookupString(s)
468 switch p.Class() {
469 case bidi.R, bidi.AL, bidi.AN:
470 return true
471 }
472 return false
473 }
474
475 func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
476 var (
477 b []byte
478 k int
479 )
480
481
482
483
484 var combinedInfoBits info
485 for i := 0; i < len(s); {
486 v, sz := trie.lookupString(s[i:])
487 if sz == 0 {
488 b = append(b, s[k:i]...)
489 b = append(b, "\ufffd"...)
490 k = len(s)
491 if err == nil {
492 err = runeError(utf8.RuneError)
493 }
494 break
495 }
496 combinedInfoBits |= info(v)
497 bidi = bidi || info(v).isBidi(s[i:])
498 start := i
499 i += sz
500
501 switch p.simplify(info(v).category()) {
502 case valid:
503 continue
504 case disallowed:
505 if err == nil {
506 r, _ := utf8.DecodeRuneInString(s[start:])
507 err = runeError(r)
508 }
509 continue
510 case mapped, deviation:
511 b = append(b, s[k:start]...)
512 b = info(v).appendMapping(b, s[start:i])
513 case ignored:
514 b = append(b, s[k:start]...)
515
516 case unknown:
517 b = append(b, s[k:start]...)
518 b = append(b, "\ufffd"...)
519 }
520 k = i
521 }
522 if k == 0 {
523
524 if combinedInfoBits&mayNeedNorm != 0 {
525 s = norm.NFC.String(s)
526 }
527 } else {
528 b = append(b, s[k:]...)
529 if norm.NFC.QuickSpan(b) != len(b) {
530 b = norm.NFC.Bytes(b)
531 }
532
533 s = string(b)
534 }
535 return s, bidi, err
536 }
537
538
539 type labelIter struct {
540 orig string
541 slice []string
542 curStart int
543 curEnd int
544 i int
545 }
546
547 func (l *labelIter) reset() {
548 l.curStart = 0
549 l.curEnd = 0
550 l.i = 0
551 }
552
553 func (l *labelIter) done() bool {
554 return l.curStart >= len(l.orig)
555 }
556
557 func (l *labelIter) result() string {
558 if l.slice != nil {
559 return strings.Join(l.slice, ".")
560 }
561 return l.orig
562 }
563
564 func (l *labelIter) label() string {
565 if l.slice != nil {
566 return l.slice[l.i]
567 }
568 p := strings.IndexByte(l.orig[l.curStart:], '.')
569 l.curEnd = l.curStart + p
570 if p == -1 {
571 l.curEnd = len(l.orig)
572 }
573 return l.orig[l.curStart:l.curEnd]
574 }
575
576
577 func (l *labelIter) next() {
578 l.i++
579 if l.slice != nil {
580 if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
581 l.curStart = len(l.orig)
582 }
583 } else {
584 l.curStart = l.curEnd + 1
585 if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
586 l.curStart = len(l.orig)
587 }
588 }
589 }
590
591 func (l *labelIter) set(s string) {
592 if l.slice == nil {
593 l.slice = strings.Split(l.orig, ".")
594 }
595 l.slice[l.i] = s
596 }
597
598
599 const acePrefix = "xn--"
600
601 func (p *Profile) simplify(cat category) category {
602 switch cat {
603 case disallowedSTD3Mapped:
604 if p.useSTD3Rules {
605 cat = disallowed
606 } else {
607 cat = mapped
608 }
609 case disallowedSTD3Valid:
610 if p.useSTD3Rules {
611 cat = disallowed
612 } else {
613 cat = valid
614 }
615 case deviation:
616 if !p.transitional {
617 cat = valid
618 }
619 case validNV8, validXV8:
620
621 cat = valid
622 }
623 return cat
624 }
625
626 func validateFromPunycode(p *Profile, s string) error {
627 if !norm.NFC.IsNormalString(s) {
628 return &labelError{s, "V1"}
629 }
630
631
632 for i := 0; i < len(s); {
633 v, sz := trie.lookupString(s[i:])
634 if sz == 0 {
635 return runeError(utf8.RuneError)
636 }
637 if c := p.simplify(info(v).category()); c != valid && c != deviation {
638 return &labelError{s, "V6"}
639 }
640 i += sz
641 }
642 return nil
643 }
644
645 const (
646 zwnj = "\u200c"
647 zwj = "\u200d"
648 )
649
650 type joinState int8
651
652 const (
653 stateStart joinState = iota
654 stateVirama
655 stateBefore
656 stateBeforeVirama
657 stateAfter
658 stateFAIL
659 )
660
661 var joinStates = [][numJoinTypes]joinState{
662 stateStart: {
663 joiningL: stateBefore,
664 joiningD: stateBefore,
665 joinZWNJ: stateFAIL,
666 joinZWJ: stateFAIL,
667 joinVirama: stateVirama,
668 },
669 stateVirama: {
670 joiningL: stateBefore,
671 joiningD: stateBefore,
672 },
673 stateBefore: {
674 joiningL: stateBefore,
675 joiningD: stateBefore,
676 joiningT: stateBefore,
677 joinZWNJ: stateAfter,
678 joinZWJ: stateFAIL,
679 joinVirama: stateBeforeVirama,
680 },
681 stateBeforeVirama: {
682 joiningL: stateBefore,
683 joiningD: stateBefore,
684 joiningT: stateBefore,
685 },
686 stateAfter: {
687 joiningL: stateFAIL,
688 joiningD: stateBefore,
689 joiningT: stateAfter,
690 joiningR: stateStart,
691 joinZWNJ: stateFAIL,
692 joinZWJ: stateFAIL,
693 joinVirama: stateAfter,
694 },
695 stateFAIL: {
696 0: stateFAIL,
697 joiningL: stateFAIL,
698 joiningD: stateFAIL,
699 joiningT: stateFAIL,
700 joiningR: stateFAIL,
701 joinZWNJ: stateFAIL,
702 joinZWJ: stateFAIL,
703 joinVirama: stateFAIL,
704 },
705 }
706
707
708
709 func (p *Profile) validateLabel(s string) (err error) {
710 if s == "" {
711 if p.verifyDNSLength {
712 return &labelError{s, "A4"}
713 }
714 return nil
715 }
716 if p.checkHyphens {
717 if len(s) > 4 && s[2] == '-' && s[3] == '-' {
718 return &labelError{s, "V2"}
719 }
720 if s[0] == '-' || s[len(s)-1] == '-' {
721 return &labelError{s, "V3"}
722 }
723 }
724 if !p.checkJoiners {
725 return nil
726 }
727 trie := p.trie
728
729 v, sz := trie.lookupString(s)
730 x := info(v)
731 if x.isModifier() {
732 return &labelError{s, "V5"}
733 }
734
735 if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
736 return nil
737 }
738 st := stateStart
739 for i := 0; ; {
740 jt := x.joinType()
741 if s[i:i+sz] == zwj {
742 jt = joinZWJ
743 } else if s[i:i+sz] == zwnj {
744 jt = joinZWNJ
745 }
746 st = joinStates[st][jt]
747 if x.isViramaModifier() {
748 st = joinStates[st][joinVirama]
749 }
750 if i += sz; i == len(s) {
751 break
752 }
753 v, sz = trie.lookupString(s[i:])
754 x = info(v)
755 }
756 if st == stateFAIL || st == stateAfter {
757 return &labelError{s, "C"}
758 }
759 return nil
760 }
761
762 func ascii(s string) bool {
763 for i := 0; i < len(s); i++ {
764 if s[i] >= utf8.RuneSelf {
765 return false
766 }
767 }
768 return true
769 }
770
View as plain text