1
2
3
4
5 package language
6
7 import (
8 "bytes"
9 "fmt"
10 "sort"
11 "strconv"
12
13 "golang.org/x/text/internal/tag"
14 )
15
16
17
18 func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
19 if !tag.FixCase(form, key) {
20 return 0, ErrSyntax
21 }
22 i := idx.Index(key)
23 if i == -1 {
24 return 0, NewValueError(key)
25 }
26 return i, nil
27 }
28
29 func searchUint(imap []uint16, key uint16) int {
30 return sort.Search(len(imap), func(i int) bool {
31 return imap[i] >= key
32 })
33 }
34
35 type Language uint16
36
37
38
39 func getLangID(s []byte) (Language, error) {
40 if len(s) == 2 {
41 return getLangISO2(s)
42 }
43 return getLangISO3(s)
44 }
45
46
47
48
49 func (id Language) Canonicalize() (Language, AliasType) {
50 return normLang(id)
51 }
52
53
54 func normLang(id Language) (Language, AliasType) {
55 k := sort.Search(len(AliasMap), func(i int) bool {
56 return AliasMap[i].From >= uint16(id)
57 })
58 if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
59 return Language(AliasMap[k].To), AliasTypes[k]
60 }
61 return id, AliasTypeUnknown
62 }
63
64
65
66 func getLangISO2(s []byte) (Language, error) {
67 if !tag.FixCase("zz", s) {
68 return 0, ErrSyntax
69 }
70 if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
71 return Language(i), nil
72 }
73 return 0, NewValueError(s)
74 }
75
76 const base = 'z' - 'a' + 1
77
78 func strToInt(s []byte) uint {
79 v := uint(0)
80 for i := 0; i < len(s); i++ {
81 v *= base
82 v += uint(s[i] - 'a')
83 }
84 return v
85 }
86
87
88
89 func intToStr(v uint, s []byte) {
90 for i := len(s) - 1; i >= 0; i-- {
91 s[i] = byte(v%base) + 'a'
92 v /= base
93 }
94 }
95
96
97
98 func getLangISO3(s []byte) (Language, error) {
99 if tag.FixCase("und", s) {
100
101 for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
102 if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
103
104
105
106 id := Language(i)
107 if id == nonCanonicalUnd {
108 return 0, nil
109 }
110 return id, nil
111 }
112 }
113 if i := altLangISO3.Index(s); i != -1 {
114 return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
115 }
116 n := strToInt(s)
117 if langNoIndex[n/8]&(1<<(n%8)) != 0 {
118 return Language(n) + langNoIndexOffset, nil
119 }
120
121 for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
122 if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
123 return Language(i), nil
124 }
125 }
126 return 0, NewValueError(s)
127 }
128 return 0, ErrSyntax
129 }
130
131
132
133 func (id Language) StringToBuf(b []byte) int {
134 if id >= langNoIndexOffset {
135 intToStr(uint(id)-langNoIndexOffset, b[:3])
136 return 3
137 } else if id == 0 {
138 return copy(b, "und")
139 }
140 l := lang[id<<2:]
141 if l[3] == 0 {
142 return copy(b, l[:3])
143 }
144 return copy(b, l[:2])
145 }
146
147
148
149
150 func (b Language) String() string {
151 if b == 0 {
152 return "und"
153 } else if b >= langNoIndexOffset {
154 b -= langNoIndexOffset
155 buf := [3]byte{}
156 intToStr(uint(b), buf[:])
157 return string(buf[:])
158 }
159 l := lang.Elem(int(b))
160 if l[3] == 0 {
161 return l[:3]
162 }
163 return l[:2]
164 }
165
166
167 func (b Language) ISO3() string {
168 if b == 0 || b >= langNoIndexOffset {
169 return b.String()
170 }
171 l := lang.Elem(int(b))
172 if l[3] == 0 {
173 return l[:3]
174 } else if l[2] == 0 {
175 return altLangISO3.Elem(int(l[3]))[:3]
176 }
177
178
179 return l[0:1] + l[2:4]
180 }
181
182
183 func (b Language) IsPrivateUse() bool {
184 return langPrivateStart <= b && b <= langPrivateEnd
185 }
186
187
188
189 func (b Language) SuppressScript() Script {
190 if b < langNoIndexOffset {
191 return Script(suppressScript[b])
192 }
193 return 0
194 }
195
196 type Region uint16
197
198
199
200 func getRegionID(s []byte) (Region, error) {
201 if len(s) == 3 {
202 if isAlpha(s[0]) {
203 return getRegionISO3(s)
204 }
205 if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
206 return getRegionM49(int(i))
207 }
208 }
209 return getRegionISO2(s)
210 }
211
212
213
214 func getRegionISO2(s []byte) (Region, error) {
215 i, err := findIndex(regionISO, s, "ZZ")
216 if err != nil {
217 return 0, err
218 }
219 return Region(i) + isoRegionOffset, nil
220 }
221
222
223
224 func getRegionISO3(s []byte) (Region, error) {
225 if tag.FixCase("ZZZ", s) {
226 for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
227 if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
228 return Region(i) + isoRegionOffset, nil
229 }
230 }
231 for i := 0; i < len(altRegionISO3); i += 3 {
232 if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
233 return Region(altRegionIDs[i/3]), nil
234 }
235 }
236 return 0, NewValueError(s)
237 }
238 return 0, ErrSyntax
239 }
240
241 func getRegionM49(n int) (Region, error) {
242 if 0 < n && n <= 999 {
243 const (
244 searchBits = 7
245 regionBits = 9
246 regionMask = 1<<regionBits - 1
247 )
248 idx := n >> searchBits
249 buf := fromM49[m49Index[idx]:m49Index[idx+1]]
250 val := uint16(n) << regionBits
251 i := sort.Search(len(buf), func(i int) bool {
252 return buf[i] >= val
253 })
254 if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
255 return Region(r & regionMask), nil
256 }
257 }
258 var e ValueError
259 fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
260 return 0, e
261 }
262
263
264
265
266 func normRegion(r Region) Region {
267 m := regionOldMap
268 k := sort.Search(len(m), func(i int) bool {
269 return m[i].From >= uint16(r)
270 })
271 if k < len(m) && m[k].From == uint16(r) {
272 return Region(m[k].To)
273 }
274 return 0
275 }
276
277 const (
278 iso3166UserAssigned = 1 << iota
279 ccTLD
280 bcp47Region
281 )
282
283 func (r Region) typ() byte {
284 return regionTypes[r]
285 }
286
287
288
289 func (r Region) String() string {
290 if r < isoRegionOffset {
291 if r == 0 {
292 return "ZZ"
293 }
294 return fmt.Sprintf("%03d", r.M49())
295 }
296 r -= isoRegionOffset
297 return regionISO.Elem(int(r))[:2]
298 }
299
300
301
302
303 func (r Region) ISO3() string {
304 if r < isoRegionOffset {
305 return "ZZZ"
306 }
307 r -= isoRegionOffset
308 reg := regionISO.Elem(int(r))
309 switch reg[2] {
310 case 0:
311 return altRegionISO3[reg[3]:][:3]
312 case ' ':
313 return "ZZZ"
314 }
315 return reg[0:1] + reg[2:4]
316 }
317
318
319
320 func (r Region) M49() int {
321 return int(m49[r])
322 }
323
324
325
326
327 func (r Region) IsPrivateUse() bool {
328 return r.typ()&iso3166UserAssigned != 0
329 }
330
331 type Script uint16
332
333
334
335 func getScriptID(idx tag.Index, s []byte) (Script, error) {
336 i, err := findIndex(idx, s, "Zzzz")
337 return Script(i), err
338 }
339
340
341
342 func (s Script) String() string {
343 if s == 0 {
344 return "Zzzz"
345 }
346 return script.Elem(int(s))
347 }
348
349
350 func (s Script) IsPrivateUse() bool {
351 return _Qaaa <= s && s <= _Qabx
352 }
353
354 const (
355 maxAltTaglen = len("en-US-POSIX")
356 maxLen = maxAltTaglen
357 )
358
359 var (
360
361
362 grandfatheredMap = map[[maxLen]byte]int16{
363 [maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo,
364 [maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami,
365 [maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn,
366 [maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak,
367 [maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh,
368 [maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb,
369 [maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv,
370 [maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn,
371 [maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao,
372 [maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay,
373 [maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu,
374 [maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb,
375 [maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn,
376 [maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb,
377 [maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt,
378 [maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg,
379 [maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn,
380 [maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak,
381 [maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan,
382 [maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn,
383
384
385
386 [maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1,
387 [maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2,
388 [maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3,
389 [maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4,
390 [maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5,
391 [maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6,
392
393
394 [maxLen]byte{'r', 'o', 'o', 't'}: 0,
395 [maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7,
396 }
397
398 altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
399
400 altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
401 )
402
403 func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
404 if v, ok := grandfatheredMap[s]; ok {
405 if v < 0 {
406 return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
407 }
408 t.LangID = Language(v)
409 return t, true
410 }
411 return t, false
412 }
413
View as plain text