1
2
3
4
5 package language
6
7 import (
8 "bytes"
9 "errors"
10 "fmt"
11 "sort"
12
13 "golang.org/x/text/internal/tag"
14 )
15
16
17
18 func isAlpha(b byte) bool {
19 return b > '9'
20 }
21
22
23 func isAlphaNum(s []byte) bool {
24 for _, c := range s {
25 if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
26 return false
27 }
28 }
29 return true
30 }
31
32
33
34
35 var ErrSyntax = errors.New("language: tag is not well-formed")
36
37
38
39 var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
40
41
42
43
44 type ValueError struct {
45 v [8]byte
46 }
47
48
49 func NewValueError(tag []byte) ValueError {
50 var e ValueError
51 copy(e.v[:], tag)
52 return e
53 }
54
55 func (e ValueError) tag() []byte {
56 n := bytes.IndexByte(e.v[:], 0)
57 if n == -1 {
58 n = 8
59 }
60 return e.v[:n]
61 }
62
63
64 func (e ValueError) Error() string {
65 return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
66 }
67
68
69 func (e ValueError) Subtag() string {
70 return string(e.tag())
71 }
72
73
74 type scanner struct {
75 b []byte
76 bytes [max99thPercentileSize]byte
77 token []byte
78 start int
79 end int
80 next int
81 err error
82 done bool
83 }
84
85 func makeScannerString(s string) scanner {
86 scan := scanner{}
87 if len(s) <= len(scan.bytes) {
88 scan.b = scan.bytes[:copy(scan.bytes[:], s)]
89 } else {
90 scan.b = []byte(s)
91 }
92 scan.init()
93 return scan
94 }
95
96
97
98 func makeScanner(b []byte) scanner {
99 scan := scanner{b: b}
100 scan.init()
101 return scan
102 }
103
104 func (s *scanner) init() {
105 for i, c := range s.b {
106 if c == '_' {
107 s.b[i] = '-'
108 }
109 }
110 s.scan()
111 }
112
113
114 func (s *scanner) toLower(start, end int) {
115 for i := start; i < end; i++ {
116 c := s.b[i]
117 if 'A' <= c && c <= 'Z' {
118 s.b[i] += 'a' - 'A'
119 }
120 }
121 }
122
123 func (s *scanner) setError(e error) {
124 if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
125 s.err = e
126 }
127 }
128
129
130
131
132 func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
133 s.start = oldStart
134 if end := oldStart + newSize; end != oldEnd {
135 diff := end - oldEnd
136 var b []byte
137 if n := len(s.b) + diff; n > cap(s.b) {
138 b = make([]byte, n)
139 copy(b, s.b[:oldStart])
140 } else {
141 b = s.b[:n]
142 }
143 copy(b[end:], s.b[oldEnd:])
144 s.b = b
145 s.next = end + (s.next - s.end)
146 s.end = end
147 }
148 }
149
150
151 func (s *scanner) replace(repl string) {
152 s.resizeRange(s.start, s.end, len(repl))
153 copy(s.b[s.start:], repl)
154 }
155
156
157
158 func (s *scanner) gobble(e error) {
159 s.setError(e)
160 if s.start == 0 {
161 s.b = s.b[:+copy(s.b, s.b[s.next:])]
162 s.end = 0
163 } else {
164 s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
165 s.end = s.start - 1
166 }
167 s.next = s.start
168 }
169
170
171 func (s *scanner) deleteRange(start, end int) {
172 s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
173 diff := end - start
174 s.next -= diff
175 s.start -= diff
176 s.end -= diff
177 }
178
179
180
181
182
183 func (s *scanner) scan() (end int) {
184 end = s.end
185 s.token = nil
186 for s.start = s.next; s.next < len(s.b); {
187 i := bytes.IndexByte(s.b[s.next:], '-')
188 if i == -1 {
189 s.end = len(s.b)
190 s.next = len(s.b)
191 i = s.end - s.start
192 } else {
193 s.end = s.next + i
194 s.next = s.end + 1
195 }
196 token := s.b[s.start:s.end]
197 if i < 1 || i > 8 || !isAlphaNum(token) {
198 s.gobble(ErrSyntax)
199 continue
200 }
201 s.token = token
202 return end
203 }
204 if n := len(s.b); n > 0 && s.b[n-1] == '-' {
205 s.setError(ErrSyntax)
206 s.b = s.b[:len(s.b)-1]
207 }
208 s.done = true
209 return end
210 }
211
212
213
214 func (s *scanner) acceptMinSize(min int) (end int) {
215 end = s.end
216 s.scan()
217 for ; len(s.token) >= min; s.scan() {
218 end = s.end
219 }
220 return end
221 }
222
223
224
225
226
227
228
229
230 func Parse(s string) (t Tag, err error) {
231
232 if s == "" {
233 return Und, ErrSyntax
234 }
235 defer func() {
236 if recover() != nil {
237 t = Und
238 err = ErrSyntax
239 return
240 }
241 }()
242 if len(s) <= maxAltTaglen {
243 b := [maxAltTaglen]byte{}
244 for i, c := range s {
245
246 if 'A' <= c && c <= 'Z' {
247 c += 'a' - 'A'
248 } else if c == '_' {
249 c = '-'
250 }
251 b[i] = byte(c)
252 }
253 if t, ok := grandfathered(b); ok {
254 return t, nil
255 }
256 }
257 scan := makeScannerString(s)
258 return parse(&scan, s)
259 }
260
261 func parse(scan *scanner, s string) (t Tag, err error) {
262 t = Und
263 var end int
264 if n := len(scan.token); n <= 1 {
265 scan.toLower(0, len(scan.b))
266 if n == 0 || scan.token[0] != 'x' {
267 return t, ErrSyntax
268 }
269 end = parseExtensions(scan)
270 } else if n >= 4 {
271 return Und, ErrSyntax
272 } else {
273 t, end = parseTag(scan, true)
274 if n := len(scan.token); n == 1 {
275 t.pExt = uint16(end)
276 end = parseExtensions(scan)
277 } else if end < len(scan.b) {
278 scan.setError(ErrSyntax)
279 scan.b = scan.b[:end]
280 }
281 }
282 if int(t.pVariant) < len(scan.b) {
283 if end < len(s) {
284 s = s[:end]
285 }
286 if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
287 t.str = s
288 } else {
289 t.str = string(scan.b)
290 }
291 } else {
292 t.pVariant, t.pExt = 0, 0
293 }
294 return t, scan.err
295 }
296
297
298
299
300 func parseTag(scan *scanner, doNorm bool) (t Tag, end int) {
301 var e error
302
303 t.LangID, e = getLangID(scan.token)
304 scan.setError(e)
305 scan.replace(t.LangID.String())
306 langStart := scan.start
307 end = scan.scan()
308 for len(scan.token) == 3 && isAlpha(scan.token[0]) {
309
310
311 if doNorm {
312 lang, e := getLangID(scan.token)
313 if lang != 0 {
314 t.LangID = lang
315 langStr := lang.String()
316 copy(scan.b[langStart:], langStr)
317 scan.b[langStart+len(langStr)] = '-'
318 scan.start = langStart + len(langStr) + 1
319 }
320 scan.gobble(e)
321 }
322 end = scan.scan()
323 }
324 if len(scan.token) == 4 && isAlpha(scan.token[0]) {
325 t.ScriptID, e = getScriptID(script, scan.token)
326 if t.ScriptID == 0 {
327 scan.gobble(e)
328 }
329 end = scan.scan()
330 }
331 if n := len(scan.token); n >= 2 && n <= 3 {
332 t.RegionID, e = getRegionID(scan.token)
333 if t.RegionID == 0 {
334 scan.gobble(e)
335 } else {
336 scan.replace(t.RegionID.String())
337 }
338 end = scan.scan()
339 }
340 scan.toLower(scan.start, len(scan.b))
341 t.pVariant = byte(end)
342 end = parseVariants(scan, end, t)
343 t.pExt = uint16(end)
344 return t, end
345 }
346
347 var separator = []byte{'-'}
348
349
350
351 func parseVariants(scan *scanner, end int, t Tag) int {
352 start := scan.start
353 varIDBuf := [4]uint8{}
354 variantBuf := [4][]byte{}
355 varID := varIDBuf[:0]
356 variant := variantBuf[:0]
357 last := -1
358 needSort := false
359 for ; len(scan.token) >= 4; scan.scan() {
360
361
362 v, ok := variantIndex[string(scan.token)]
363 if !ok {
364
365
366 scan.gobble(NewValueError(scan.token))
367 continue
368 }
369 varID = append(varID, v)
370 variant = append(variant, scan.token)
371 if !needSort {
372 if last < int(v) {
373 last = int(v)
374 } else {
375 needSort = true
376
377
378 const maxVariants = 8
379 if len(varID) > maxVariants {
380 break
381 }
382 }
383 }
384 end = scan.end
385 }
386 if needSort {
387 sort.Sort(variantsSort{varID, variant})
388 k, l := 0, -1
389 for i, v := range varID {
390 w := int(v)
391 if l == w {
392
393 continue
394 }
395 varID[k] = varID[i]
396 variant[k] = variant[i]
397 k++
398 l = w
399 }
400 if str := bytes.Join(variant[:k], separator); len(str) == 0 {
401 end = start - 1
402 } else {
403 scan.resizeRange(start, end, len(str))
404 copy(scan.b[scan.start:], str)
405 end = scan.end
406 }
407 }
408 return end
409 }
410
411 type variantsSort struct {
412 i []uint8
413 v [][]byte
414 }
415
416 func (s variantsSort) Len() int {
417 return len(s.i)
418 }
419
420 func (s variantsSort) Swap(i, j int) {
421 s.i[i], s.i[j] = s.i[j], s.i[i]
422 s.v[i], s.v[j] = s.v[j], s.v[i]
423 }
424
425 func (s variantsSort) Less(i, j int) bool {
426 return s.i[i] < s.i[j]
427 }
428
429 type bytesSort struct {
430 b [][]byte
431 n int
432 }
433
434 func (b bytesSort) Len() int {
435 return len(b.b)
436 }
437
438 func (b bytesSort) Swap(i, j int) {
439 b.b[i], b.b[j] = b.b[j], b.b[i]
440 }
441
442 func (b bytesSort) Less(i, j int) bool {
443 for k := 0; k < b.n; k++ {
444 if b.b[i][k] == b.b[j][k] {
445 continue
446 }
447 return b.b[i][k] < b.b[j][k]
448 }
449 return false
450 }
451
452
453
454
455 func parseExtensions(scan *scanner) int {
456 start := scan.start
457 exts := [][]byte{}
458 private := []byte{}
459 end := scan.end
460 for len(scan.token) == 1 {
461 extStart := scan.start
462 ext := scan.token[0]
463 end = parseExtension(scan)
464 extension := scan.b[extStart:end]
465 if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
466 scan.setError(ErrSyntax)
467 end = extStart
468 continue
469 } else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
470 scan.b = scan.b[:end]
471 return end
472 } else if ext == 'x' {
473 private = extension
474 break
475 }
476 exts = append(exts, extension)
477 }
478 sort.Sort(bytesSort{exts, 1})
479 if len(private) > 0 {
480 exts = append(exts, private)
481 }
482 scan.b = scan.b[:start]
483 if len(exts) > 0 {
484 scan.b = append(scan.b, bytes.Join(exts, separator)...)
485 } else if start > 0 {
486
487 scan.b = scan.b[:start-1]
488 }
489 return end
490 }
491
492
493
494 func parseExtension(scan *scanner) int {
495 start, end := scan.start, scan.end
496 switch scan.token[0] {
497 case 'u':
498 attrStart := end
499 scan.scan()
500 for last := []byte{}; len(scan.token) > 2; scan.scan() {
501 if bytes.Compare(scan.token, last) != -1 {
502
503 p := attrStart + 1
504 scan.next = p
505 attrs := [][]byte{}
506 for scan.scan(); len(scan.token) > 2; scan.scan() {
507 attrs = append(attrs, scan.token)
508 end = scan.end
509 }
510 sort.Sort(bytesSort{attrs, 3})
511 copy(scan.b[p:], bytes.Join(attrs, separator))
512 break
513 }
514 last = scan.token
515 end = scan.end
516 }
517
518
519 var last, key []byte
520 for attrEnd := end; len(scan.token) == 2; last = key {
521 key = scan.token
522 end = scan.end
523 for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
524 end = scan.end
525 }
526
527 if bytes.Compare(key, last) != 1 || scan.err != nil {
528
529
530 p := attrEnd + 1
531 scan.next = p
532 keys := [][]byte{}
533 for scan.scan(); len(scan.token) == 2; {
534 keyStart := scan.start
535 end = scan.end
536 for scan.scan(); end < scan.end && len(scan.token) > 2; scan.scan() {
537 end = scan.end
538 }
539 keys = append(keys, scan.b[keyStart:end])
540 }
541 sort.Stable(bytesSort{keys, 2})
542 if n := len(keys); n > 0 {
543 k := 0
544 for i := 1; i < n; i++ {
545 if !bytes.Equal(keys[k][:2], keys[i][:2]) {
546 k++
547 keys[k] = keys[i]
548 } else if !bytes.Equal(keys[k], keys[i]) {
549 scan.setError(ErrDuplicateKey)
550 }
551 }
552 keys = keys[:k+1]
553 }
554 reordered := bytes.Join(keys, separator)
555 if e := p + len(reordered); e < end {
556 scan.deleteRange(e, end)
557 end = e
558 }
559 copy(scan.b[p:], reordered)
560 break
561 }
562 }
563 case 't':
564 scan.scan()
565 if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
566 _, end = parseTag(scan, false)
567 scan.toLower(start, end)
568 }
569 for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
570 end = scan.acceptMinSize(3)
571 }
572 case 'x':
573 end = scan.acceptMinSize(1)
574 default:
575 end = scan.acceptMinSize(2)
576 }
577 return end
578 }
579
580
581 func getExtension(s string, p int) (end int, ext string) {
582 if s[p] == '-' {
583 p++
584 }
585 if s[p] == 'x' {
586 return len(s), s[p:]
587 }
588 end = nextExtension(s, p)
589 return end, s[p:end]
590 }
591
592
593
594
595
596 func nextExtension(s string, p int) int {
597 for n := len(s) - 3; p < n; {
598 if s[p] == '-' {
599 if s[p+2] == '-' {
600 return p
601 }
602 p += 3
603 } else {
604 p++
605 }
606 }
607 return len(s)
608 }
609
View as plain text