1
2
3
4
5 package cases
6
7
8
9
10
11 import (
12 "strings"
13 "unicode"
14 "unicode/utf8"
15
16 "golang.org/x/text/internal"
17 "golang.org/x/text/language"
18 "golang.org/x/text/transform"
19 "golang.org/x/text/unicode/norm"
20 )
21
22
23
24
25
26 type mapFunc func(*context) bool
27
28
29
30
31 type spanFunc func(*context) bool
32
33
34
35 const maxIgnorable = 30
36
37
38 const supported = "und af az el lt nl tr"
39
40 func init() {
41 tags := []language.Tag{}
42 for _, s := range strings.Split(supported, " ") {
43 tags = append(tags, language.MustParse(s))
44 }
45 matcher = internal.NewInheritanceMatcher(tags)
46 Supported = language.NewCoverage(tags)
47 }
48
49 var (
50 matcher *internal.InheritanceMatcher
51
52 Supported language.Coverage
53
54
55
56
57
58
59 upperFunc = []struct {
60 upper mapFunc
61 span spanFunc
62 }{
63 {nil, nil},
64 {nil, nil},
65 {aztrUpper(upper), isUpper},
66 {elUpper, noSpan},
67 {ltUpper(upper), noSpan},
68 {nil, nil},
69 {aztrUpper(upper), isUpper},
70 }
71
72 undUpper transform.SpanningTransformer = &undUpperCaser{}
73 undLower transform.SpanningTransformer = &undLowerCaser{}
74 undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}
75
76 lowerFunc = []mapFunc{
77 nil,
78 nil,
79 aztrLower,
80 nil,
81 ltLower,
82 nil,
83 aztrLower,
84 }
85
86 titleInfos = []struct {
87 title mapFunc
88 lower mapFunc
89 titleSpan spanFunc
90 rewrite func(*context)
91 }{
92 {title, lower, isTitle, nil},
93 {title, lower, isTitle, afnlRewrite},
94 {aztrUpper(title), aztrLower, isTitle, nil},
95 {title, lower, isTitle, nil},
96 {ltUpper(title), ltLower, noSpan, nil},
97 {nlTitle, lower, nlTitleSpan, afnlRewrite},
98 {aztrUpper(title), aztrLower, isTitle, nil},
99 }
100 )
101
102 func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
103 _, i, _ := matcher.Match(t)
104 f := upperFunc[i].upper
105 if f == nil {
106 return undUpper
107 }
108 return &simpleCaser{f: f, span: upperFunc[i].span}
109 }
110
111 func makeLower(t language.Tag, o options) transform.SpanningTransformer {
112 _, i, _ := matcher.Match(t)
113 f := lowerFunc[i]
114 if f == nil {
115 if o.ignoreFinalSigma {
116 return undLowerIgnoreSigma
117 }
118 return undLower
119 }
120 if o.ignoreFinalSigma {
121 return &simpleCaser{f: f, span: isLower}
122 }
123 return &lowerCaser{
124 first: f,
125 midWord: finalSigma(f),
126 }
127 }
128
129 func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
130 _, i, _ := matcher.Match(t)
131 x := &titleInfos[i]
132 lower := x.lower
133 if o.noLower {
134 lower = (*context).copy
135 } else if !o.ignoreFinalSigma {
136 lower = finalSigma(lower)
137 }
138 return &titleCaser{
139 title: x.title,
140 lower: lower,
141 titleSpan: x.titleSpan,
142 rewrite: x.rewrite,
143 }
144 }
145
146 func noSpan(c *context) bool {
147 c.err = transform.ErrEndOfSpan
148 return false
149 }
150
151
152
153
154
155 type undUpperCaser struct{ transform.NopResetter }
156
157
158
159
160 func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
161 c := context{dst: dst, src: src, atEOF: atEOF}
162 for c.next() {
163 upper(&c)
164 c.checkpoint()
165 }
166 return c.ret()
167 }
168
169 func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
170 c := context{src: src, atEOF: atEOF}
171 for c.next() && isUpper(&c) {
172 c.checkpoint()
173 }
174 return c.retSpan()
175 }
176
177
178
179
180
181 type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }
182
183 func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
184 c := context{dst: dst, src: src, atEOF: atEOF}
185 for c.next() && lower(&c) {
186 c.checkpoint()
187 }
188 return c.ret()
189
190 }
191
192
193
194
195
196 func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
197 c := context{src: src, atEOF: atEOF}
198 for c.next() && isLower(&c) {
199 c.checkpoint()
200 }
201 return c.retSpan()
202 }
203
204 type simpleCaser struct {
205 context
206 f mapFunc
207 span spanFunc
208 }
209
210
211
212 func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
213 c := context{dst: dst, src: src, atEOF: atEOF}
214 for c.next() && t.f(&c) {
215 c.checkpoint()
216 }
217 return c.ret()
218 }
219
220 func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
221 c := context{src: src, atEOF: atEOF}
222 for c.next() && t.span(&c) {
223 c.checkpoint()
224 }
225 return c.retSpan()
226 }
227
228
229
230
231
232 type undLowerCaser struct{ transform.NopResetter }
233
234 func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
235 c := context{dst: dst, src: src, atEOF: atEOF}
236
237 for isInterWord := true; c.next(); {
238 if isInterWord {
239 if c.info.isCased() {
240 if !lower(&c) {
241 break
242 }
243 isInterWord = false
244 } else if !c.copy() {
245 break
246 }
247 } else {
248 if c.info.isNotCasedAndNotCaseIgnorable() {
249 if !c.copy() {
250 break
251 }
252 isInterWord = true
253 } else if !c.hasPrefix("Σ") {
254 if !lower(&c) {
255 break
256 }
257 } else if !finalSigmaBody(&c) {
258 break
259 }
260 }
261 c.checkpoint()
262 }
263 return c.ret()
264 }
265
266 func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
267 c := context{src: src, atEOF: atEOF}
268 for c.next() && isLower(&c) {
269 c.checkpoint()
270 }
271 return c.retSpan()
272 }
273
274
275
276
277 type lowerCaser struct {
278 undLowerIgnoreSigmaCaser
279
280 context
281
282 first, midWord mapFunc
283 }
284
285 func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
286 t.context = context{dst: dst, src: src, atEOF: atEOF}
287 c := &t.context
288
289 for isInterWord := true; c.next(); {
290 if isInterWord {
291 if c.info.isCased() {
292 if !t.first(c) {
293 break
294 }
295 isInterWord = false
296 } else if !c.copy() {
297 break
298 }
299 } else {
300 if c.info.isNotCasedAndNotCaseIgnorable() {
301 if !c.copy() {
302 break
303 }
304 isInterWord = true
305 } else if !t.midWord(c) {
306 break
307 }
308 }
309 c.checkpoint()
310 }
311 return c.ret()
312 }
313
314
315
316
317 type titleCaser struct {
318 context
319
320
321 title mapFunc
322 lower mapFunc
323 titleSpan spanFunc
324
325 rewrite func(*context)
326 }
327
328
329
330
331
332
333
334
335 func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
336 t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
337 c := &t.context
338
339 if !c.next() {
340 return c.ret()
341 }
342
343 for {
344 p := c.info
345 if t.rewrite != nil {
346 t.rewrite(c)
347 }
348
349 wasMid := p.isMid()
350
351
352 if p.isCased() {
353 if !c.isMidWord {
354 if !t.title(c) {
355 break
356 }
357 c.isMidWord = true
358 } else if !t.lower(c) {
359 break
360 }
361 } else if !c.copy() {
362 break
363 } else if p.isBreak() {
364 c.isMidWord = false
365 }
366
367
368
369 if !(c.isMidWord && wasMid) {
370 c.checkpoint()
371 }
372
373 if !c.next() {
374 break
375 }
376 if wasMid && c.info.isMid() {
377 c.isMidWord = false
378 }
379 }
380 return c.ret()
381 }
382
383 func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
384 t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
385 c := &t.context
386
387 if !c.next() {
388 return c.retSpan()
389 }
390
391 for {
392 p := c.info
393 if t.rewrite != nil {
394 t.rewrite(c)
395 }
396
397 wasMid := p.isMid()
398
399
400 if p.isCased() {
401 if !c.isMidWord {
402 if !t.titleSpan(c) {
403 break
404 }
405 c.isMidWord = true
406 } else if !isLower(c) {
407 break
408 }
409 } else if p.isBreak() {
410 c.isMidWord = false
411 }
412
413
414 if !(c.isMidWord && wasMid) {
415 c.checkpoint()
416 }
417
418 if !c.next() {
419 break
420 }
421 if wasMid && c.info.isMid() {
422 c.isMidWord = false
423 }
424 }
425 return c.retSpan()
426 }
427
428
429
430
431 func finalSigma(f mapFunc) mapFunc {
432 return func(c *context) bool {
433 if !c.hasPrefix("Σ") {
434 return f(c)
435 }
436 return finalSigmaBody(c)
437 }
438 }
439
440 func finalSigmaBody(c *context) bool {
441
442
443
444
445
446
447
448
449
450 p := c.pDst
451 c.writeString("ς")
452
453
454
455
456
457
458
459
460
461
462
463 wasMid := false
464 for i := 0; i < maxIgnorable+1; i++ {
465 if !c.next() {
466 return false
467 }
468 if !c.info.isCaseIgnorable() {
469
470
471
472
473 if c.info.isCased() {
474
475
476
477 c.dst[p+1]++
478 }
479 c.unreadRune()
480 return true
481 }
482
483
484 isMid := c.info.isMid()
485 if (wasMid && isMid) || c.info.isBreak() {
486 c.isMidWord = false
487 }
488 wasMid = isMid
489 c.copy()
490 }
491 return true
492 }
493
494
495
496
497
498
499
500 func elUpper(c *context) bool {
501
502
503
504
505 r, _ := utf8.DecodeRune(c.src[c.pSrc:])
506 oldPDst := c.pDst
507 if !upper(c) {
508 return false
509 }
510 if !unicode.Is(unicode.Greek, r) {
511 return true
512 }
513 i := 0
514
515
516
517 if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
518
519 r, sz := utf8.DecodeRune(b)
520 if r <= 0xFF {
521 return true
522 }
523 c.pDst = oldPDst
524
525 c.writeBytes(b[:sz])
526 i = len(b[sz:]) / 2
527 }
528
529 for ; i < maxIgnorable && c.next(); i++ {
530 switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
531
532 case 0x0300,
533 0x0301,
534 0x0304,
535 0x0306,
536 0x0308,
537 0x0313,
538 0x0314,
539 0x0342,
540 0x0345:
541
542
543 default:
544 switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
545 case cccZero:
546 c.unreadRune()
547 return true
548
549
550
551
552
553 case cccAbove:
554 return c.copy()
555 default:
556
557
558 c.copy()
559 }
560 }
561 }
562 return i == maxIgnorable
563 }
564
565
566
567 func ltLower(c *context) bool {
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588 i := 0
589 if r := c.src[c.pSrc]; r < utf8.RuneSelf {
590 lower(c)
591 if r != 'I' && r != 'J' {
592 return true
593 }
594 } else {
595 p := norm.NFD.Properties(c.src[c.pSrc:])
596 if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
597
598
599
600
601 if d[1] == 0xCC && d[2] <= 0x91 {
602 if !c.writeBytes(d[:1]) {
603 return false
604 }
605 c.dst[c.pDst-1] += 'a' - 'A'
606
607
608
609 return c.writeString("\u0307") && c.writeBytes(d[1:])
610 }
611
612
613
614
615 lower(c)
616 i = 1
617 } else {
618 return lower(c)
619 }
620 }
621
622 for ; i < maxIgnorable && c.next(); i++ {
623 switch c.info.cccType() {
624 case cccZero:
625 c.unreadRune()
626 return true
627 case cccAbove:
628 return c.writeString("\u0307") && c.copy()
629 default:
630 c.copy()
631 }
632 }
633 return i == maxIgnorable
634 }
635
636
637
638 func ltUpper(f mapFunc) mapFunc {
639 return func(c *context) bool {
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654 r, _ := utf8.DecodeRune(c.src[c.pSrc:])
655 oldPDst := c.pDst
656 if !f(c) {
657 return false
658 }
659 if !unicode.Is(unicode.Soft_Dotted, r) {
660 return true
661 }
662
663
664
665
666 i := 0
667 for ; i < maxIgnorable && c.next(); i++ {
668 switch c.info.cccType() {
669 case cccZero:
670 c.unreadRune()
671 return true
672 case cccAbove:
673 if c.hasPrefix("\u0307") {
674
675
676
677
678 if !c.next() {
679 return false
680 }
681 if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
682 s := ""
683 switch c.src[c.pSrc+1] {
684 case 0x80:
685 s = "\u00cc"
686 case 0x81:
687 s = "\u00cd"
688 case 0x83:
689 s = "\u0128"
690 case 0x88:
691 s = "\u00cf"
692 default:
693 }
694 if s != "" {
695 c.pDst = oldPDst
696 return c.writeString(s)
697 }
698 }
699 }
700 return c.copy()
701 default:
702 c.copy()
703 }
704 }
705 return i == maxIgnorable
706 }
707 }
708
709
710
711 func aztrUpper(f mapFunc) mapFunc {
712 return func(c *context) bool {
713
714 if c.src[c.pSrc] == 'i' {
715 return c.writeString("İ")
716 }
717 return f(c)
718 }
719 }
720
721 func aztrLower(c *context) (done bool) {
722
723
724
725
726
727
728
729
730
731
732
733
734 if c.hasPrefix("\u0130") {
735 return c.writeString("i")
736 }
737 if c.src[c.pSrc] != 'I' {
738 return lower(c)
739 }
740
741
742
743 start := c.pSrc + c.sz
744
745 i := 0
746 Loop:
747
748
749 for ; i < maxIgnorable && c.next(); i++ {
750 switch c.info.cccType() {
751 case cccAbove:
752 if c.hasPrefix("\u0307") {
753 return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc])
754 }
755 done = true
756 break Loop
757 case cccZero:
758 c.unreadRune()
759 done = true
760 break Loop
761 default:
762
763 }
764 }
765 if i == maxIgnorable {
766 done = true
767 }
768 return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
769 }
770
771
772
773 func nlTitle(c *context) bool {
774
775
776
777
778
779 if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
780 return title(c)
781 }
782
783 if !c.writeString("I") || !c.next() {
784 return false
785 }
786 if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
787 return c.writeString("J")
788 }
789 c.unreadRune()
790 return true
791 }
792
793 func nlTitleSpan(c *context) bool {
794
795
796
797
798
799 if c.src[c.pSrc] != 'I' {
800 return isTitle(c)
801 }
802 if !c.next() || c.src[c.pSrc] == 'j' {
803 return false
804 }
805 if c.src[c.pSrc] != 'J' {
806 c.unreadRune()
807 }
808 return true
809 }
810
811
812 func afnlRewrite(c *context) {
813 if c.hasPrefix("'") || c.hasPrefix("’") {
814 c.isMidWord = true
815 }
816 }
817
View as plain text