1
2
3
4
5
6
7
8
9
10 package norm
11
12 import (
13 "unicode/utf8"
14
15 "golang.org/x/text/transform"
16 )
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35 type Form int
36
37 const (
38 NFC Form = iota
39 NFD
40 NFKC
41 NFKD
42 )
43
44
45 func (f Form) Bytes(b []byte) []byte {
46 src := inputBytes(b)
47 ft := formTable[f]
48 n, ok := ft.quickSpan(src, 0, len(b), true)
49 if ok {
50 return b
51 }
52 out := make([]byte, n, len(b))
53 copy(out, b[0:n])
54 rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
55 return doAppendInner(&rb, n)
56 }
57
58
59 func (f Form) String(s string) string {
60 src := inputString(s)
61 ft := formTable[f]
62 n, ok := ft.quickSpan(src, 0, len(s), true)
63 if ok {
64 return s
65 }
66 out := make([]byte, n, len(s))
67 copy(out, s[0:n])
68 rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
69 return string(doAppendInner(&rb, n))
70 }
71
72
73 func (f Form) IsNormal(b []byte) bool {
74 src := inputBytes(b)
75 ft := formTable[f]
76 bp, ok := ft.quickSpan(src, 0, len(b), true)
77 if ok {
78 return true
79 }
80 rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
81 rb.setFlusher(nil, cmpNormalBytes)
82 for bp < len(b) {
83 rb.out = b[bp:]
84 if bp = decomposeSegment(&rb, bp, true); bp < 0 {
85 return false
86 }
87 bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
88 }
89 return true
90 }
91
92 func cmpNormalBytes(rb *reorderBuffer) bool {
93 b := rb.out
94 for i := 0; i < rb.nrune; i++ {
95 info := rb.rune[i]
96 if int(info.size) > len(b) {
97 return false
98 }
99 p := info.pos
100 pe := p + info.size
101 for ; p < pe; p++ {
102 if b[0] != rb.byte[p] {
103 return false
104 }
105 b = b[1:]
106 }
107 }
108 return true
109 }
110
111
112 func (f Form) IsNormalString(s string) bool {
113 src := inputString(s)
114 ft := formTable[f]
115 bp, ok := ft.quickSpan(src, 0, len(s), true)
116 if ok {
117 return true
118 }
119 rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
120 rb.setFlusher(nil, func(rb *reorderBuffer) bool {
121 for i := 0; i < rb.nrune; i++ {
122 info := rb.rune[i]
123 if bp+int(info.size) > len(s) {
124 return false
125 }
126 p := info.pos
127 pe := p + info.size
128 for ; p < pe; p++ {
129 if s[bp] != rb.byte[p] {
130 return false
131 }
132 bp++
133 }
134 }
135 return true
136 })
137 for bp < len(s) {
138 if bp = decomposeSegment(&rb, bp, true); bp < 0 {
139 return false
140 }
141 bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
142 }
143 return true
144 }
145
146
147
148
149 func patchTail(rb *reorderBuffer) bool {
150 info, p := lastRuneStart(&rb.f, rb.out)
151 if p == -1 || info.size == 0 {
152 return true
153 }
154 end := p + int(info.size)
155 extra := len(rb.out) - end
156 if extra > 0 {
157
158
159 x := make([]byte, 0)
160 x = append(x, rb.out[len(rb.out)-extra:]...)
161 rb.out = rb.out[:end]
162 decomposeToLastBoundary(rb)
163 rb.doFlush()
164 rb.out = append(rb.out, x...)
165 return false
166 }
167 buf := rb.out[p:]
168 rb.out = rb.out[:p]
169 decomposeToLastBoundary(rb)
170 if s := rb.ss.next(info); s == ssStarter {
171 rb.doFlush()
172 rb.ss.first(info)
173 } else if s == ssOverflow {
174 rb.doFlush()
175 rb.insertCGJ()
176 rb.ss = 0
177 }
178 rb.insertUnsafe(inputBytes(buf), 0, info)
179 return true
180 }
181
182 func appendQuick(rb *reorderBuffer, i int) int {
183 if rb.nsrc == i {
184 return i
185 }
186 end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
187 rb.out = rb.src.appendSlice(rb.out, i, end)
188 return end
189 }
190
191
192
193 func (f Form) Append(out []byte, src ...byte) []byte {
194 return f.doAppend(out, inputBytes(src), len(src))
195 }
196
197 func (f Form) doAppend(out []byte, src input, n int) []byte {
198 if n == 0 {
199 return out
200 }
201 ft := formTable[f]
202
203 if len(out) == 0 {
204 p, _ := ft.quickSpan(src, 0, n, true)
205 out = src.appendSlice(out, 0, p)
206 if p == n {
207 return out
208 }
209 rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
210 return doAppendInner(&rb, p)
211 }
212 rb := reorderBuffer{f: *ft, src: src, nsrc: n}
213 return doAppend(&rb, out, 0)
214 }
215
216 func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
217 rb.setFlusher(out, appendFlush)
218 src, n := rb.src, rb.nsrc
219 doMerge := len(out) > 0
220 if q := src.skipContinuationBytes(p); q > p {
221
222 rb.out = src.appendSlice(rb.out, p, q)
223 p = q
224 doMerge = patchTail(rb)
225 }
226 fd := &rb.f
227 if doMerge {
228 var info Properties
229 if p < n {
230 info = fd.info(src, p)
231 if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
232 if p == 0 {
233 decomposeToLastBoundary(rb)
234 }
235 p = decomposeSegment(rb, p, true)
236 }
237 }
238 if info.size == 0 {
239 rb.doFlush()
240
241 return src.appendSlice(rb.out, p, n)
242 }
243 if rb.nrune > 0 {
244 return doAppendInner(rb, p)
245 }
246 }
247 p = appendQuick(rb, p)
248 return doAppendInner(rb, p)
249 }
250
251 func doAppendInner(rb *reorderBuffer, p int) []byte {
252 for n := rb.nsrc; p < n; {
253 p = decomposeSegment(rb, p, true)
254 p = appendQuick(rb, p)
255 }
256 return rb.out
257 }
258
259
260
261 func (f Form) AppendString(out []byte, src string) []byte {
262 return f.doAppend(out, inputString(src), len(src))
263 }
264
265
266
267 func (f Form) QuickSpan(b []byte) int {
268 n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
269 return n
270 }
271
272
273
274 func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
275 n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
276 if n < len(b) {
277 if !ok {
278 err = transform.ErrEndOfSpan
279 } else {
280 err = transform.ErrShortSrc
281 }
282 }
283 return n, err
284 }
285
286
287
288 func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
289 n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
290 if n < len(s) {
291 if !ok {
292 err = transform.ErrEndOfSpan
293 } else {
294 err = transform.ErrShortSrc
295 }
296 }
297 return n, err
298 }
299
300
301
302
303
304 func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
305 var lastCC uint8
306 ss := streamSafe(0)
307 lastSegStart := i
308 for n = end; i < n; {
309 if j := src.skipASCII(i, n); i != j {
310 i = j
311 lastSegStart = i - 1
312 lastCC = 0
313 ss = 0
314 continue
315 }
316 info := f.info(src, i)
317 if info.size == 0 {
318 if atEOF {
319
320 return n, true
321 }
322 return lastSegStart, true
323 }
324
325
326 switch ss.next(info) {
327 case ssStarter:
328 lastSegStart = i
329 case ssOverflow:
330 return lastSegStart, false
331 case ssSuccess:
332 if lastCC > info.ccc {
333 return lastSegStart, false
334 }
335 }
336 if f.composing {
337 if !info.isYesC() {
338 break
339 }
340 } else {
341 if !info.isYesD() {
342 break
343 }
344 }
345 lastCC = info.ccc
346 i += int(info.size)
347 }
348 if i == n {
349 if !atEOF {
350 n = lastSegStart
351 }
352 return n, true
353 }
354 return lastSegStart, false
355 }
356
357
358
359 func (f Form) QuickSpanString(s string) int {
360 n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
361 return n
362 }
363
364
365
366 func (f Form) FirstBoundary(b []byte) int {
367 return f.firstBoundary(inputBytes(b), len(b))
368 }
369
370 func (f Form) firstBoundary(src input, nsrc int) int {
371 i := src.skipContinuationBytes(0)
372 if i >= nsrc {
373 return -1
374 }
375 fd := formTable[f]
376 ss := streamSafe(0)
377
378
379
380 for {
381 info := fd.info(src, i)
382 if info.size == 0 {
383 return -1
384 }
385 if s := ss.next(info); s != ssSuccess {
386 return i
387 }
388 i += int(info.size)
389 if i >= nsrc {
390 if !info.BoundaryAfter() && !ss.isMax() {
391 return -1
392 }
393 return nsrc
394 }
395 }
396 }
397
398
399
400 func (f Form) FirstBoundaryInString(s string) int {
401 return f.firstBoundary(inputString(s), len(s))
402 }
403
404
405
406
407 func (f Form) NextBoundary(b []byte, atEOF bool) int {
408 return f.nextBoundary(inputBytes(b), len(b), atEOF)
409 }
410
411
412
413
414 func (f Form) NextBoundaryInString(s string, atEOF bool) int {
415 return f.nextBoundary(inputString(s), len(s), atEOF)
416 }
417
418 func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
419 if nsrc == 0 {
420 if atEOF {
421 return 0
422 }
423 return -1
424 }
425 fd := formTable[f]
426 info := fd.info(src, 0)
427 if info.size == 0 {
428 if atEOF {
429 return 1
430 }
431 return -1
432 }
433 ss := streamSafe(0)
434 ss.first(info)
435
436 for i := int(info.size); i < nsrc; i += int(info.size) {
437 info = fd.info(src, i)
438 if info.size == 0 {
439 if atEOF {
440 return i
441 }
442 return -1
443 }
444
445
446 if s := ss.next(info); s != ssSuccess {
447 return i
448 }
449 }
450 if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
451 return -1
452 }
453 return nsrc
454 }
455
456
457
458 func (f Form) LastBoundary(b []byte) int {
459 return lastBoundary(formTable[f], b)
460 }
461
462 func lastBoundary(fd *formInfo, b []byte) int {
463 i := len(b)
464 info, p := lastRuneStart(fd, b)
465 if p == -1 {
466 return -1
467 }
468 if info.size == 0 {
469 if p == 0 {
470 return -1
471 }
472 i = p
473 info, p = lastRuneStart(fd, b[:i])
474 if p == -1 {
475 return i
476 }
477 }
478 if p+int(info.size) != i {
479 return i
480 }
481 if info.BoundaryAfter() {
482 return i
483 }
484 ss := streamSafe(0)
485 v := ss.backwards(info)
486 for i = p; i >= 0 && v != ssStarter; i = p {
487 info, p = lastRuneStart(fd, b[:i])
488 if v = ss.backwards(info); v == ssOverflow {
489 break
490 }
491 if p+int(info.size) != i {
492 if p == -1 {
493 return -1
494 }
495 return i
496 }
497 }
498 return i
499 }
500
501
502
503
504 func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
505
506 info := rb.f.info(rb.src, sp)
507 if info.size == 0 {
508 return 0
509 }
510 if s := rb.ss.next(info); s == ssStarter {
511
512 if rb.nrune > 0 {
513 goto end
514 }
515 } else if s == ssOverflow {
516 rb.insertCGJ()
517 goto end
518 }
519 if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
520 return int(err)
521 }
522 for {
523 sp += int(info.size)
524 if sp >= rb.nsrc {
525 if !atEOF && !info.BoundaryAfter() {
526 return int(iShortSrc)
527 }
528 break
529 }
530 info = rb.f.info(rb.src, sp)
531 if info.size == 0 {
532 if !atEOF {
533 return int(iShortSrc)
534 }
535 break
536 }
537 if s := rb.ss.next(info); s == ssStarter {
538 break
539 } else if s == ssOverflow {
540 rb.insertCGJ()
541 break
542 }
543 if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
544 return int(err)
545 }
546 }
547 end:
548 if !rb.doFlush() {
549 return int(iShortDst)
550 }
551 return sp
552 }
553
554
555
556 func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
557 p := len(buf) - 1
558 for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
559 }
560 if p < 0 {
561 return Properties{}, -1
562 }
563 return fd.info(inputBytes(buf), p), p
564 }
565
566
567
568 func decomposeToLastBoundary(rb *reorderBuffer) {
569 fd := &rb.f
570 info, i := lastRuneStart(fd, rb.out)
571 if int(info.size) != len(rb.out)-i {
572
573 return
574 }
575 if info.BoundaryAfter() {
576 return
577 }
578 var add [maxNonStarters + 1]Properties
579 padd := 0
580 ss := streamSafe(0)
581 p := len(rb.out)
582 for {
583 add[padd] = info
584 v := ss.backwards(info)
585 if v == ssOverflow {
586
587
588 break
589 }
590 padd++
591 p -= int(info.size)
592 if v == ssStarter || p < 0 {
593 break
594 }
595 info, i = lastRuneStart(fd, rb.out[:p])
596 if int(info.size) != p-i {
597 break
598 }
599 }
600 rb.ss = ss
601
602 var buf [maxBufferSize * utf8.UTFMax]byte
603 cp := buf[:copy(buf[:], rb.out[p:])]
604 rb.out = rb.out[:p]
605 for padd--; padd >= 0; padd-- {
606 info = add[padd]
607 rb.insertUnsafe(inputBytes(cp), 0, info)
608 cp = cp[info.size:]
609 }
610 }
611
View as plain text