Source file
src/strconv/quote.go
1
2
3
4
5
6
7 package strconv
8
9 import (
10 "unicode/utf8"
11 )
12
13 const (
14 lowerhex = "0123456789abcdef"
15 upperhex = "0123456789ABCDEF"
16 )
17
18
19 func contains(s string, c byte) bool {
20 return index(s, c) != -1
21 }
22
23 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
24 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
25 }
26
27 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string {
28 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly))
29 }
30
31 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
32
33
34 if cap(buf)-len(buf) < len(s) {
35 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
36 copy(nBuf, buf)
37 buf = nBuf
38 }
39 buf = append(buf, quote)
40 for width := 0; len(s) > 0; s = s[width:] {
41 r := rune(s[0])
42 width = 1
43 if r >= utf8.RuneSelf {
44 r, width = utf8.DecodeRuneInString(s)
45 }
46 if width == 1 && r == utf8.RuneError {
47 buf = append(buf, `\x`...)
48 buf = append(buf, lowerhex[s[0]>>4])
49 buf = append(buf, lowerhex[s[0]&0xF])
50 continue
51 }
52 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
53 }
54 buf = append(buf, quote)
55 return buf
56 }
57
58 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
59 buf = append(buf, quote)
60 if !utf8.ValidRune(r) {
61 r = utf8.RuneError
62 }
63 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
64 buf = append(buf, quote)
65 return buf
66 }
67
68 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
69 if r == rune(quote) || r == '\\' {
70 buf = append(buf, '\\')
71 buf = append(buf, byte(r))
72 return buf
73 }
74 if ASCIIonly {
75 if r < utf8.RuneSelf && IsPrint(r) {
76 buf = append(buf, byte(r))
77 return buf
78 }
79 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
80 return utf8.AppendRune(buf, r)
81 }
82 switch r {
83 case '\a':
84 buf = append(buf, `\a`...)
85 case '\b':
86 buf = append(buf, `\b`...)
87 case '\f':
88 buf = append(buf, `\f`...)
89 case '\n':
90 buf = append(buf, `\n`...)
91 case '\r':
92 buf = append(buf, `\r`...)
93 case '\t':
94 buf = append(buf, `\t`...)
95 case '\v':
96 buf = append(buf, `\v`...)
97 default:
98 switch {
99 case r < ' ' || r == 0x7f:
100 buf = append(buf, `\x`...)
101 buf = append(buf, lowerhex[byte(r)>>4])
102 buf = append(buf, lowerhex[byte(r)&0xF])
103 case !utf8.ValidRune(r):
104 r = 0xFFFD
105 fallthrough
106 case r < 0x10000:
107 buf = append(buf, `\u`...)
108 for s := 12; s >= 0; s -= 4 {
109 buf = append(buf, lowerhex[r>>uint(s)&0xF])
110 }
111 default:
112 buf = append(buf, `\U`...)
113 for s := 28; s >= 0; s -= 4 {
114 buf = append(buf, lowerhex[r>>uint(s)&0xF])
115 }
116 }
117 }
118 return buf
119 }
120
121
122
123
124
125 func Quote(s string) string {
126 return quoteWith(s, '"', false, false)
127 }
128
129
130
131 func AppendQuote(dst []byte, s string) []byte {
132 return appendQuotedWith(dst, s, '"', false, false)
133 }
134
135
136
137
138 func QuoteToASCII(s string) string {
139 return quoteWith(s, '"', true, false)
140 }
141
142
143
144 func AppendQuoteToASCII(dst []byte, s string) []byte {
145 return appendQuotedWith(dst, s, '"', true, false)
146 }
147
148
149
150
151
152 func QuoteToGraphic(s string) string {
153 return quoteWith(s, '"', false, true)
154 }
155
156
157
158 func AppendQuoteToGraphic(dst []byte, s string) []byte {
159 return appendQuotedWith(dst, s, '"', false, true)
160 }
161
162
163
164
165
166
167 func QuoteRune(r rune) string {
168 return quoteRuneWith(r, '\'', false, false)
169 }
170
171
172
173 func AppendQuoteRune(dst []byte, r rune) []byte {
174 return appendQuotedRuneWith(dst, r, '\'', false, false)
175 }
176
177
178
179
180
181
182
183 func QuoteRuneToASCII(r rune) string {
184 return quoteRuneWith(r, '\'', true, false)
185 }
186
187
188
189 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
190 return appendQuotedRuneWith(dst, r, '\'', true, false)
191 }
192
193
194
195
196
197
198
199 func QuoteRuneToGraphic(r rune) string {
200 return quoteRuneWith(r, '\'', false, true)
201 }
202
203
204
205 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
206 return appendQuotedRuneWith(dst, r, '\'', false, true)
207 }
208
209
210
211
212 func CanBackquote(s string) bool {
213 for len(s) > 0 {
214 r, wid := utf8.DecodeRuneInString(s)
215 s = s[wid:]
216 if wid > 1 {
217 if r == '\ufeff' {
218 return false
219 }
220 continue
221 }
222 if r == utf8.RuneError {
223 return false
224 }
225 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
226 return false
227 }
228 }
229 return true
230 }
231
232 func unhex(b byte) (v rune, ok bool) {
233 c := rune(b)
234 switch {
235 case '0' <= c && c <= '9':
236 return c - '0', true
237 case 'a' <= c && c <= 'f':
238 return c - 'a' + 10, true
239 case 'A' <= c && c <= 'F':
240 return c - 'A' + 10, true
241 }
242 return
243 }
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
260
261 if len(s) == 0 {
262 err = ErrSyntax
263 return
264 }
265 switch c := s[0]; {
266 case c == quote && (quote == '\'' || quote == '"'):
267 err = ErrSyntax
268 return
269 case c >= utf8.RuneSelf:
270 r, size := utf8.DecodeRuneInString(s)
271 return r, true, s[size:], nil
272 case c != '\\':
273 return rune(s[0]), false, s[1:], nil
274 }
275
276
277 if len(s) <= 1 {
278 err = ErrSyntax
279 return
280 }
281 c := s[1]
282 s = s[2:]
283
284 switch c {
285 case 'a':
286 value = '\a'
287 case 'b':
288 value = '\b'
289 case 'f':
290 value = '\f'
291 case 'n':
292 value = '\n'
293 case 'r':
294 value = '\r'
295 case 't':
296 value = '\t'
297 case 'v':
298 value = '\v'
299 case 'x', 'u', 'U':
300 n := 0
301 switch c {
302 case 'x':
303 n = 2
304 case 'u':
305 n = 4
306 case 'U':
307 n = 8
308 }
309 var v rune
310 if len(s) < n {
311 err = ErrSyntax
312 return
313 }
314 for j := 0; j < n; j++ {
315 x, ok := unhex(s[j])
316 if !ok {
317 err = ErrSyntax
318 return
319 }
320 v = v<<4 | x
321 }
322 s = s[n:]
323 if c == 'x' {
324
325 value = v
326 break
327 }
328 if !utf8.ValidRune(v) {
329 err = ErrSyntax
330 return
331 }
332 value = v
333 multibyte = true
334 case '0', '1', '2', '3', '4', '5', '6', '7':
335 v := rune(c) - '0'
336 if len(s) < 2 {
337 err = ErrSyntax
338 return
339 }
340 for j := 0; j < 2; j++ {
341 x := rune(s[j]) - '0'
342 if x < 0 || x > 7 {
343 err = ErrSyntax
344 return
345 }
346 v = (v << 3) | x
347 }
348 s = s[2:]
349 if v > 255 {
350 err = ErrSyntax
351 return
352 }
353 value = v
354 case '\\':
355 value = '\\'
356 case '\'', '"':
357 if c != quote {
358 err = ErrSyntax
359 return
360 }
361 value = rune(c)
362 default:
363 err = ErrSyntax
364 return
365 }
366 tail = s
367 return
368 }
369
370
371
372 func QuotedPrefix(s string) (string, error) {
373 out, _, err := unquote(s, false)
374 return out, err
375 }
376
377
378
379
380
381
382 func Unquote(s string) (string, error) {
383 out, rem, err := unquote(s, true)
384 if len(rem) > 0 {
385 return "", ErrSyntax
386 }
387 return out, err
388 }
389
390
391
392
393
394 func unquote(in string, unescape bool) (out, rem string, err error) {
395
396 if len(in) < 2 {
397 return "", in, ErrSyntax
398 }
399 quote := in[0]
400 end := index(in[1:], quote)
401 if end < 0 {
402 return "", in, ErrSyntax
403 }
404 end += 2
405
406 switch quote {
407 case '`':
408 switch {
409 case !unescape:
410 out = in[:end]
411 case !contains(in[:end], '\r'):
412 out = in[len("`") : end-len("`")]
413 default:
414
415
416 buf := make([]byte, 0, end-len("`")-len("\r")-len("`"))
417 for i := len("`"); i < end-len("`"); i++ {
418 if in[i] != '\r' {
419 buf = append(buf, in[i])
420 }
421 }
422 out = string(buf)
423 }
424
425
426
427
428
429 return out, in[end:], nil
430 case '"', '\'':
431
432 if !contains(in[:end], '\\') && !contains(in[:end], '\n') {
433 var valid bool
434 switch quote {
435 case '"':
436 valid = utf8.ValidString(in[len(`"`) : end-len(`"`)])
437 case '\'':
438 r, n := utf8.DecodeRuneInString(in[len("'") : end-len("'")])
439 valid = len("'")+n+len("'") == end && (r != utf8.RuneError || n != 1)
440 }
441 if valid {
442 out = in[:end]
443 if unescape {
444 out = out[1 : end-1]
445 }
446 return out, in[end:], nil
447 }
448 }
449
450
451 var buf []byte
452 in0 := in
453 in = in[1:]
454 if unescape {
455 buf = make([]byte, 0, 3*end/2)
456 }
457 for len(in) > 0 && in[0] != quote {
458
459
460 r, multibyte, rem, err := UnquoteChar(in, quote)
461 if in[0] == '\n' || err != nil {
462 return "", in0, ErrSyntax
463 }
464 in = rem
465
466
467 if unescape {
468 if r < utf8.RuneSelf || !multibyte {
469 buf = append(buf, byte(r))
470 } else {
471 buf = utf8.AppendRune(buf, r)
472 }
473 }
474
475
476 if quote == '\'' {
477 break
478 }
479 }
480
481
482 if !(len(in) > 0 && in[0] == quote) {
483 return "", in0, ErrSyntax
484 }
485 in = in[1:]
486
487 if unescape {
488 return string(buf), in, nil
489 }
490 return in0[:len(in0)-len(in)], in, nil
491 default:
492 return "", in, ErrSyntax
493 }
494 }
495
496
497
498 func bsearch[S ~[]E, E ~uint16 | ~uint32](s S, v E) (int, bool) {
499 n := len(s)
500 i, j := 0, n
501 for i < j {
502 h := i + (j-i)>>1
503 if s[h] < v {
504 i = h + 1
505 } else {
506 j = h
507 }
508 }
509 return i, i < n && s[i] == v
510 }
511
512
513
514
515
516
517
518
519
520
521 func IsPrint(r rune) bool {
522
523 if r <= 0xFF {
524 if 0x20 <= r && r <= 0x7E {
525
526 return true
527 }
528 if 0xA1 <= r && r <= 0xFF {
529
530 return r != 0xAD
531 }
532 return false
533 }
534
535
536
537
538
539
540
541 if 0 <= r && r < 1<<16 {
542 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
543 i, _ := bsearch(isPrint, rr)
544 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
545 return false
546 }
547 _, found := bsearch(isNotPrint, rr)
548 return !found
549 }
550
551 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
552 i, _ := bsearch(isPrint, rr)
553 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
554 return false
555 }
556 if r >= 0x20000 {
557 return true
558 }
559 r -= 0x10000
560 _, found := bsearch(isNotPrint, uint16(r))
561 return !found
562 }
563
564
565
566
567 func IsGraphic(r rune) bool {
568 if IsPrint(r) {
569 return true
570 }
571 return isInGraphicList(r)
572 }
573
574
575
576
577 func isInGraphicList(r rune) bool {
578
579 if r > 0xFFFF {
580 return false
581 }
582 _, found := bsearch(isGraphic, uint16(r))
583 return found
584 }
585
View as plain text