Source file
src/text/scanner/scanner_test.go
1
2
3
4
5 package scanner
6
7 import (
8 "bytes"
9 "fmt"
10 "io"
11 "strings"
12 "testing"
13 "unicode/utf8"
14 )
15
16
17 type StringReader struct {
18 data []string
19 step int
20 }
21
22 func (r *StringReader) Read(p []byte) (n int, err error) {
23 if r.step < len(r.data) {
24 s := r.data[r.step]
25 n = copy(p, s)
26 r.step++
27 } else {
28 err = io.EOF
29 }
30 return
31 }
32
33 func readRuneSegments(t *testing.T, segments []string) {
34 got := ""
35 want := strings.Join(segments, "")
36 s := new(Scanner).Init(&StringReader{data: segments})
37 for {
38 ch := s.Next()
39 if ch == EOF {
40 break
41 }
42 got += string(ch)
43 }
44 if got != want {
45 t.Errorf("segments=%v got=%s want=%s", segments, got, want)
46 }
47 }
48
49 var segmentList = [][]string{
50 {},
51 {""},
52 {"日", "本語"},
53 {"\u65e5", "\u672c", "\u8a9e"},
54 {"\U000065e5", " ", "\U0000672c", "\U00008a9e"},
55 {"\xe6", "\x97\xa5\xe6", "\x9c\xac\xe8\xaa\x9e"},
56 {"Hello", ", ", "World", "!"},
57 {"Hello", ", ", "", "World", "!"},
58 }
59
60 func TestNext(t *testing.T) {
61 for _, s := range segmentList {
62 readRuneSegments(t, s)
63 }
64 }
65
66 type token struct {
67 tok rune
68 text string
69 }
70
71 var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
72
73 var tokenList = []token{
74 {Comment, "// line comments"},
75 {Comment, "//"},
76 {Comment, "////"},
77 {Comment, "// comment"},
78 {Comment, "// /* comment */"},
79 {Comment, "// // comment //"},
80 {Comment, "//" + f100},
81
82 {Comment, "// general comments"},
83 {Comment, "/**/"},
84 {Comment, "/***/"},
85 {Comment, "/* comment */"},
86 {Comment, "/* // comment */"},
87 {Comment, "/* /* comment */"},
88 {Comment, "/*\n comment\n*/"},
89 {Comment, "/*" + f100 + "*/"},
90
91 {Comment, "// identifiers"},
92 {Ident, "a"},
93 {Ident, "a0"},
94 {Ident, "foobar"},
95 {Ident, "abc123"},
96 {Ident, "LGTM"},
97 {Ident, "_"},
98 {Ident, "_abc123"},
99 {Ident, "abc123_"},
100 {Ident, "_abc_123_"},
101 {Ident, "_äöü"},
102 {Ident, "_本"},
103 {Ident, "äöü"},
104 {Ident, "本"},
105 {Ident, "a۰۱۸"},
106 {Ident, "foo६४"},
107 {Ident, "bar9876"},
108 {Ident, f100},
109
110 {Comment, "// decimal ints"},
111 {Int, "0"},
112 {Int, "1"},
113 {Int, "9"},
114 {Int, "42"},
115 {Int, "1234567890"},
116
117 {Comment, "// octal ints"},
118 {Int, "00"},
119 {Int, "01"},
120 {Int, "07"},
121 {Int, "042"},
122 {Int, "01234567"},
123
124 {Comment, "// hexadecimal ints"},
125 {Int, "0x0"},
126 {Int, "0x1"},
127 {Int, "0xf"},
128 {Int, "0x42"},
129 {Int, "0x123456789abcDEF"},
130 {Int, "0x" + f100},
131 {Int, "0X0"},
132 {Int, "0X1"},
133 {Int, "0XF"},
134 {Int, "0X42"},
135 {Int, "0X123456789abcDEF"},
136 {Int, "0X" + f100},
137
138 {Comment, "// floats"},
139 {Float, "0."},
140 {Float, "1."},
141 {Float, "42."},
142 {Float, "01234567890."},
143 {Float, ".0"},
144 {Float, ".1"},
145 {Float, ".42"},
146 {Float, ".0123456789"},
147 {Float, "0.0"},
148 {Float, "1.0"},
149 {Float, "42.0"},
150 {Float, "01234567890.0"},
151 {Float, "0e0"},
152 {Float, "1e0"},
153 {Float, "42e0"},
154 {Float, "01234567890e0"},
155 {Float, "0E0"},
156 {Float, "1E0"},
157 {Float, "42E0"},
158 {Float, "01234567890E0"},
159 {Float, "0e+10"},
160 {Float, "1e-10"},
161 {Float, "42e+10"},
162 {Float, "01234567890e-10"},
163 {Float, "0E+10"},
164 {Float, "1E-10"},
165 {Float, "42E+10"},
166 {Float, "01234567890E-10"},
167
168 {Comment, "// chars"},
169 {Char, `' '`},
170 {Char, `'a'`},
171 {Char, `'本'`},
172 {Char, `'\a'`},
173 {Char, `'\b'`},
174 {Char, `'\f'`},
175 {Char, `'\n'`},
176 {Char, `'\r'`},
177 {Char, `'\t'`},
178 {Char, `'\v'`},
179 {Char, `'\''`},
180 {Char, `'\000'`},
181 {Char, `'\777'`},
182 {Char, `'\x00'`},
183 {Char, `'\xff'`},
184 {Char, `'\u0000'`},
185 {Char, `'\ufA16'`},
186 {Char, `'\U00000000'`},
187 {Char, `'\U0000ffAB'`},
188
189 {Comment, "// strings"},
190 {String, `" "`},
191 {String, `"a"`},
192 {String, `"本"`},
193 {String, `"\a"`},
194 {String, `"\b"`},
195 {String, `"\f"`},
196 {String, `"\n"`},
197 {String, `"\r"`},
198 {String, `"\t"`},
199 {String, `"\v"`},
200 {String, `"\""`},
201 {String, `"\000"`},
202 {String, `"\777"`},
203 {String, `"\x00"`},
204 {String, `"\xff"`},
205 {String, `"\u0000"`},
206 {String, `"\ufA16"`},
207 {String, `"\U00000000"`},
208 {String, `"\U0000ffAB"`},
209 {String, `"` + f100 + `"`},
210
211 {Comment, "// raw strings"},
212 {RawString, "``"},
213 {RawString, "`\\`"},
214 {RawString, "`" + "\n\n/* foobar */\n\n" + "`"},
215 {RawString, "`" + f100 + "`"},
216
217 {Comment, "// individual characters"},
218
219 {'\x01', "\x01"},
220 {' ' - 1, string(' ' - 1)},
221 {'+', "+"},
222 {'/', "/"},
223 {'.', "."},
224 {'~', "~"},
225 {'(', "("},
226 }
227
228 func makeSource(pattern string) *bytes.Buffer {
229 var buf bytes.Buffer
230 for _, k := range tokenList {
231 fmt.Fprintf(&buf, pattern, k.text)
232 }
233 return &buf
234 }
235
236 func checkTok(t *testing.T, s *Scanner, line int, got, want rune, text string) {
237 if got != want {
238 t.Fatalf("tok = %s, want %s for %q", TokenString(got), TokenString(want), text)
239 }
240 if s.Line != line {
241 t.Errorf("line = %d, want %d for %q", s.Line, line, text)
242 }
243 stext := s.TokenText()
244 if stext != text {
245 t.Errorf("text = %q, want %q", stext, text)
246 } else {
247
248 stext = s.TokenText()
249 if stext != text {
250 t.Errorf("text = %q, want %q (idempotency check)", stext, text)
251 }
252 }
253 }
254
255 func checkTokErr(t *testing.T, s *Scanner, line int, want rune, text string) {
256 prevCount := s.ErrorCount
257 checkTok(t, s, line, s.Scan(), want, text)
258 if s.ErrorCount != prevCount+1 {
259 t.Fatalf("want error for %q", text)
260 }
261 }
262
263 func countNewlines(s string) int {
264 n := 0
265 for _, ch := range s {
266 if ch == '\n' {
267 n++
268 }
269 }
270 return n
271 }
272
273 func testScan(t *testing.T, mode uint) {
274 s := new(Scanner).Init(makeSource(" \t%s\n"))
275 s.Mode = mode
276 tok := s.Scan()
277 line := 1
278 for _, k := range tokenList {
279 if mode&SkipComments == 0 || k.tok != Comment {
280 checkTok(t, s, line, tok, k.tok, k.text)
281 tok = s.Scan()
282 }
283 line += countNewlines(k.text) + 1
284 }
285 checkTok(t, s, line, tok, EOF, "")
286 }
287
288 func TestScan(t *testing.T) {
289 testScan(t, GoTokens)
290 testScan(t, GoTokens&^SkipComments)
291 }
292
293 func TestInvalidExponent(t *testing.T) {
294 const src = "1.5e 1.5E 1e+ 1e- 1.5z"
295 s := new(Scanner).Init(strings.NewReader(src))
296 s.Error = func(s *Scanner, msg string) {
297 const want = "exponent has no digits"
298 if msg != want {
299 t.Errorf("%s: got error %q; want %q", s.TokenText(), msg, want)
300 }
301 }
302 checkTokErr(t, s, 1, Float, "1.5e")
303 checkTokErr(t, s, 1, Float, "1.5E")
304 checkTokErr(t, s, 1, Float, "1e+")
305 checkTokErr(t, s, 1, Float, "1e-")
306 checkTok(t, s, 1, s.Scan(), Float, "1.5")
307 checkTok(t, s, 1, s.Scan(), Ident, "z")
308 checkTok(t, s, 1, s.Scan(), EOF, "")
309 if s.ErrorCount != 4 {
310 t.Errorf("%d errors, want 4", s.ErrorCount)
311 }
312 }
313
314 func TestPosition(t *testing.T) {
315 src := makeSource("\t\t\t\t%s\n")
316 s := new(Scanner).Init(src)
317 s.Mode = GoTokens &^ SkipComments
318 s.Scan()
319 pos := Position{"", 4, 1, 5}
320 for _, k := range tokenList {
321 if s.Offset != pos.Offset {
322 t.Errorf("offset = %d, want %d for %q", s.Offset, pos.Offset, k.text)
323 }
324 if s.Line != pos.Line {
325 t.Errorf("line = %d, want %d for %q", s.Line, pos.Line, k.text)
326 }
327 if s.Column != pos.Column {
328 t.Errorf("column = %d, want %d for %q", s.Column, pos.Column, k.text)
329 }
330 pos.Offset += 4 + len(k.text) + 1
331 pos.Line += countNewlines(k.text) + 1
332 s.Scan()
333 }
334
335 if s.ErrorCount != 0 {
336 t.Errorf("%d errors", s.ErrorCount)
337 }
338 }
339
340 func TestScanZeroMode(t *testing.T) {
341 src := makeSource("%s\n")
342 str := src.String()
343 s := new(Scanner).Init(src)
344 s.Mode = 0
345 s.Whitespace = 0
346 tok := s.Scan()
347 for i, ch := range str {
348 if tok != ch {
349 t.Fatalf("%d. tok = %s, want %s", i, TokenString(tok), TokenString(ch))
350 }
351 tok = s.Scan()
352 }
353 if tok != EOF {
354 t.Fatalf("tok = %s, want EOF", TokenString(tok))
355 }
356 if s.ErrorCount != 0 {
357 t.Errorf("%d errors", s.ErrorCount)
358 }
359 }
360
361 func testScanSelectedMode(t *testing.T, mode uint, class rune) {
362 src := makeSource("%s\n")
363 s := new(Scanner).Init(src)
364 s.Mode = mode
365 tok := s.Scan()
366 for tok != EOF {
367 if tok < 0 && tok != class {
368 t.Fatalf("tok = %s, want %s", TokenString(tok), TokenString(class))
369 }
370 tok = s.Scan()
371 }
372 if s.ErrorCount != 0 {
373 t.Errorf("%d errors", s.ErrorCount)
374 }
375 }
376
377 func TestScanSelectedMask(t *testing.T) {
378 testScanSelectedMode(t, 0, 0)
379 testScanSelectedMode(t, ScanIdents, Ident)
380
381
382
383 testScanSelectedMode(t, ScanChars, Char)
384 testScanSelectedMode(t, ScanStrings, String)
385 testScanSelectedMode(t, SkipComments, 0)
386 testScanSelectedMode(t, ScanComments, Comment)
387 }
388
389 func TestScanCustomIdent(t *testing.T) {
390 const src = "faab12345 a12b123 a12 3b"
391 s := new(Scanner).Init(strings.NewReader(src))
392
393
394
395 s.IsIdentRune = func(ch rune, i int) bool {
396 return i == 0 && (ch == 'a' || ch == 'b') || 0 < i && i < 4 && '0' <= ch && ch <= '3'
397 }
398 checkTok(t, s, 1, s.Scan(), 'f', "f")
399 checkTok(t, s, 1, s.Scan(), Ident, "a")
400 checkTok(t, s, 1, s.Scan(), Ident, "a")
401 checkTok(t, s, 1, s.Scan(), Ident, "b123")
402 checkTok(t, s, 1, s.Scan(), Int, "45")
403 checkTok(t, s, 1, s.Scan(), Ident, "a12")
404 checkTok(t, s, 1, s.Scan(), Ident, "b123")
405 checkTok(t, s, 1, s.Scan(), Ident, "a12")
406 checkTok(t, s, 1, s.Scan(), Int, "3")
407 checkTok(t, s, 1, s.Scan(), Ident, "b")
408 checkTok(t, s, 1, s.Scan(), EOF, "")
409 }
410
411 func TestScanNext(t *testing.T) {
412 const BOM = '\uFEFF'
413 BOMs := string(BOM)
414 s := new(Scanner).Init(strings.NewReader(BOMs + "if a == bcd /* com" + BOMs + "ment */ {\n\ta += c\n}" + BOMs + "// line comment ending in eof"))
415 checkTok(t, s, 1, s.Scan(), Ident, "if")
416 checkTok(t, s, 1, s.Scan(), Ident, "a")
417 checkTok(t, s, 1, s.Scan(), '=', "=")
418 checkTok(t, s, 0, s.Next(), '=', "")
419 checkTok(t, s, 0, s.Next(), ' ', "")
420 checkTok(t, s, 0, s.Next(), 'b', "")
421 checkTok(t, s, 1, s.Scan(), Ident, "cd")
422 checkTok(t, s, 1, s.Scan(), '{', "{")
423 checkTok(t, s, 2, s.Scan(), Ident, "a")
424 checkTok(t, s, 2, s.Scan(), '+', "+")
425 checkTok(t, s, 0, s.Next(), '=', "")
426 checkTok(t, s, 2, s.Scan(), Ident, "c")
427 checkTok(t, s, 3, s.Scan(), '}', "}")
428 checkTok(t, s, 3, s.Scan(), BOM, BOMs)
429 checkTok(t, s, 3, s.Scan(), -1, "")
430 if s.ErrorCount != 0 {
431 t.Errorf("%d errors", s.ErrorCount)
432 }
433 }
434
435 func TestScanWhitespace(t *testing.T) {
436 var buf bytes.Buffer
437 var ws uint64
438
439 for ch := byte(1); ch < ' '; ch++ {
440 buf.WriteByte(ch)
441 ws |= 1 << ch
442 }
443 const orig = 'x'
444 buf.WriteByte(orig)
445
446 s := new(Scanner).Init(&buf)
447 s.Mode = 0
448 s.Whitespace = ws
449 tok := s.Scan()
450 if tok != orig {
451 t.Errorf("tok = %s, want %s", TokenString(tok), TokenString(orig))
452 }
453 }
454
455 func testError(t *testing.T, src, pos, msg string, tok rune) {
456 s := new(Scanner).Init(strings.NewReader(src))
457 errorCalled := false
458 s.Error = func(s *Scanner, m string) {
459 if !errorCalled {
460
461 if p := s.Pos().String(); p != pos {
462 t.Errorf("pos = %q, want %q for %q", p, pos, src)
463 }
464 if m != msg {
465 t.Errorf("msg = %q, want %q for %q", m, msg, src)
466 }
467 errorCalled = true
468 }
469 }
470 tk := s.Scan()
471 if tk != tok {
472 t.Errorf("tok = %s, want %s for %q", TokenString(tk), TokenString(tok), src)
473 }
474 if !errorCalled {
475 t.Errorf("error handler not called for %q", src)
476 }
477 if s.ErrorCount == 0 {
478 t.Errorf("count = %d, want > 0 for %q", s.ErrorCount, src)
479 }
480 }
481
482 func TestError(t *testing.T) {
483 testError(t, "\x00", "<input>:1:1", "invalid character NUL", 0)
484 testError(t, "\x80", "<input>:1:1", "invalid UTF-8 encoding", utf8.RuneError)
485 testError(t, "\xff", "<input>:1:1", "invalid UTF-8 encoding", utf8.RuneError)
486
487 testError(t, "a\x00", "<input>:1:2", "invalid character NUL", Ident)
488 testError(t, "ab\x80", "<input>:1:3", "invalid UTF-8 encoding", Ident)
489 testError(t, "abc\xff", "<input>:1:4", "invalid UTF-8 encoding", Ident)
490
491 testError(t, `"a`+"\x00", "<input>:1:3", "invalid character NUL", String)
492 testError(t, `"ab`+"\x80", "<input>:1:4", "invalid UTF-8 encoding", String)
493 testError(t, `"abc`+"\xff", "<input>:1:5", "invalid UTF-8 encoding", String)
494
495 testError(t, "`a"+"\x00", "<input>:1:3", "invalid character NUL", RawString)
496 testError(t, "`ab"+"\x80", "<input>:1:4", "invalid UTF-8 encoding", RawString)
497 testError(t, "`abc"+"\xff", "<input>:1:5", "invalid UTF-8 encoding", RawString)
498
499 testError(t, `'\"'`, "<input>:1:3", "invalid char escape", Char)
500 testError(t, `"\'"`, "<input>:1:3", "invalid char escape", String)
501
502 testError(t, `01238`, "<input>:1:6", "invalid digit '8' in octal literal", Int)
503 testError(t, `01238123`, "<input>:1:9", "invalid digit '8' in octal literal", Int)
504 testError(t, `0x`, "<input>:1:3", "hexadecimal literal has no digits", Int)
505 testError(t, `0xg`, "<input>:1:3", "hexadecimal literal has no digits", Int)
506 testError(t, `'aa'`, "<input>:1:4", "invalid char literal", Char)
507 testError(t, `1.5e`, "<input>:1:5", "exponent has no digits", Float)
508 testError(t, `1.5E`, "<input>:1:5", "exponent has no digits", Float)
509 testError(t, `1.5e+`, "<input>:1:6", "exponent has no digits", Float)
510 testError(t, `1.5e-`, "<input>:1:6", "exponent has no digits", Float)
511
512 testError(t, `'`, "<input>:1:2", "literal not terminated", Char)
513 testError(t, `'`+"\n", "<input>:1:2", "literal not terminated", Char)
514 testError(t, `"abc`, "<input>:1:5", "literal not terminated", String)
515 testError(t, `"abc`+"\n", "<input>:1:5", "literal not terminated", String)
516 testError(t, "`abc\n", "<input>:2:1", "literal not terminated", RawString)
517 testError(t, `/*/`, "<input>:1:4", "comment not terminated", EOF)
518 }
519
520
521 type errReader struct{}
522
523 func (errReader) Read(b []byte) (int, error) {
524 return 0, io.ErrNoProgress
525 }
526
527 func TestIOError(t *testing.T) {
528 s := new(Scanner).Init(errReader{})
529 errorCalled := false
530 s.Error = func(s *Scanner, msg string) {
531 if !errorCalled {
532 if want := io.ErrNoProgress.Error(); msg != want {
533 t.Errorf("msg = %q, want %q", msg, want)
534 }
535 errorCalled = true
536 }
537 }
538 tok := s.Scan()
539 if tok != EOF {
540 t.Errorf("tok = %s, want EOF", TokenString(tok))
541 }
542 if !errorCalled {
543 t.Errorf("error handler not called")
544 }
545 }
546
547 func checkPos(t *testing.T, got, want Position) {
548 if got.Offset != want.Offset || got.Line != want.Line || got.Column != want.Column {
549 t.Errorf("got offset, line, column = %d, %d, %d; want %d, %d, %d",
550 got.Offset, got.Line, got.Column, want.Offset, want.Line, want.Column)
551 }
552 }
553
554 func checkNextPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
555 if ch := s.Next(); ch != char {
556 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
557 }
558 want := Position{Offset: offset, Line: line, Column: column}
559 checkPos(t, s.Pos(), want)
560 }
561
562 func checkScanPos(t *testing.T, s *Scanner, offset, line, column int, char rune) {
563 want := Position{Offset: offset, Line: line, Column: column}
564 checkPos(t, s.Pos(), want)
565 if ch := s.Scan(); ch != char {
566 t.Errorf("ch = %s, want %s", TokenString(ch), TokenString(char))
567 if string(ch) != s.TokenText() {
568 t.Errorf("tok = %q, want %q", s.TokenText(), string(ch))
569 }
570 }
571 checkPos(t, s.Position, want)
572 }
573
574 func TestPos(t *testing.T) {
575
576 s := new(Scanner).Init(strings.NewReader(""))
577 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
578 s.Peek()
579 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
580
581
582 s = new(Scanner).Init(strings.NewReader("\n"))
583 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
584 checkNextPos(t, s, 1, 2, 1, '\n')
585
586 for i := 10; i > 0; i-- {
587 checkScanPos(t, s, 1, 2, 1, EOF)
588 }
589 if s.ErrorCount != 0 {
590 t.Errorf("%d errors", s.ErrorCount)
591 }
592
593
594 s = new(Scanner).Init(strings.NewReader("本"))
595 checkPos(t, s.Pos(), Position{Offset: 0, Line: 1, Column: 1})
596 checkNextPos(t, s, 3, 1, 2, '本')
597
598 for i := 10; i > 0; i-- {
599 checkScanPos(t, s, 3, 1, 2, EOF)
600 }
601 if s.ErrorCount != 0 {
602 t.Errorf("%d errors", s.ErrorCount)
603 }
604
605
606 s = new(Scanner).Init(strings.NewReader(" foo६४ \n\n本語\n"))
607 checkNextPos(t, s, 1, 1, 2, ' ')
608 s.Peek()
609 checkNextPos(t, s, 2, 1, 3, ' ')
610 checkNextPos(t, s, 3, 1, 4, 'f')
611 checkNextPos(t, s, 4, 1, 5, 'o')
612 checkNextPos(t, s, 5, 1, 6, 'o')
613 checkNextPos(t, s, 8, 1, 7, '६')
614 checkNextPos(t, s, 11, 1, 8, '४')
615 checkNextPos(t, s, 12, 1, 9, ' ')
616 checkNextPos(t, s, 13, 1, 10, ' ')
617 checkNextPos(t, s, 14, 2, 1, '\n')
618 checkNextPos(t, s, 15, 3, 1, '\n')
619 checkNextPos(t, s, 18, 3, 2, '本')
620 checkNextPos(t, s, 21, 3, 3, '語')
621 checkNextPos(t, s, 22, 4, 1, '\n')
622
623 for i := 10; i > 0; i-- {
624 checkScanPos(t, s, 22, 4, 1, EOF)
625 }
626 if s.ErrorCount != 0 {
627 t.Errorf("%d errors", s.ErrorCount)
628 }
629
630
631 s = new(Scanner).Init(strings.NewReader("abc\n本語\n\nx"))
632 s.Mode = 0
633 s.Whitespace = 0
634 checkScanPos(t, s, 0, 1, 1, 'a')
635 s.Peek()
636 checkScanPos(t, s, 1, 1, 2, 'b')
637 checkScanPos(t, s, 2, 1, 3, 'c')
638 checkScanPos(t, s, 3, 1, 4, '\n')
639 checkScanPos(t, s, 4, 2, 1, '本')
640 checkScanPos(t, s, 7, 2, 2, '語')
641 checkScanPos(t, s, 10, 2, 3, '\n')
642 checkScanPos(t, s, 11, 3, 1, '\n')
643 checkScanPos(t, s, 12, 4, 1, 'x')
644
645 for i := 10; i > 0; i-- {
646 checkScanPos(t, s, 13, 4, 2, EOF)
647 }
648 if s.ErrorCount != 0 {
649 t.Errorf("%d errors", s.ErrorCount)
650 }
651 }
652
653 type countReader int
654
655 func (r *countReader) Read([]byte) (int, error) {
656 *r++
657 return 0, io.EOF
658 }
659
660 func TestNextEOFHandling(t *testing.T) {
661 var r countReader
662
663
664 s := new(Scanner).Init(&r)
665
666 tok := s.Next()
667 if tok != EOF {
668 t.Error("1) EOF not reported")
669 }
670
671 tok = s.Peek()
672 if tok != EOF {
673 t.Error("2) EOF not reported")
674 }
675
676 if r != 1 {
677 t.Errorf("scanner called Read %d times, not once", r)
678 }
679 }
680
681 func TestScanEOFHandling(t *testing.T) {
682 var r countReader
683
684
685 s := new(Scanner).Init(&r)
686
687 tok := s.Scan()
688 if tok != EOF {
689 t.Error("1) EOF not reported")
690 }
691
692 tok = s.Peek()
693 if tok != EOF {
694 t.Error("2) EOF not reported")
695 }
696
697 if r != 1 {
698 t.Errorf("scanner called Read %d times, not once", r)
699 }
700 }
701
702 func TestIssue29723(t *testing.T) {
703 s := new(Scanner).Init(strings.NewReader(`x "`))
704 s.Error = func(s *Scanner, _ string) {
705 got := s.TokenText()
706 const want = `"`
707 if got != want {
708 t.Errorf("got %q; want %q", got, want)
709 }
710 }
711 for r := s.Scan(); r != EOF; r = s.Scan() {
712 }
713 }
714
715 func TestNumbers(t *testing.T) {
716 for _, test := range []struct {
717 tok rune
718 src, tokens, err string
719 }{
720
721 {Int, "0b0", "0b0", ""},
722 {Int, "0b1010", "0b1010", ""},
723 {Int, "0B1110", "0B1110", ""},
724
725 {Int, "0b", "0b", "binary literal has no digits"},
726 {Int, "0b0190", "0b0190", "invalid digit '9' in binary literal"},
727 {Int, "0b01a0", "0b01 a0", ""},
728
729
730 {Float, "0b.", "0b.", "invalid radix point in binary literal"},
731 {Float, "0b.1", "0b.1", "invalid radix point in binary literal"},
732 {Float, "0b1.0", "0b1.0", "invalid radix point in binary literal"},
733 {Float, "0b1e10", "0b1e10", "'e' exponent requires decimal mantissa"},
734 {Float, "0b1P-1", "0b1P-1", "'P' exponent requires hexadecimal mantissa"},
735
736
737 {Int, "0o0", "0o0", ""},
738 {Int, "0o1234", "0o1234", ""},
739 {Int, "0O1234", "0O1234", ""},
740
741 {Int, "0o", "0o", "octal literal has no digits"},
742 {Int, "0o8123", "0o8123", "invalid digit '8' in octal literal"},
743 {Int, "0o1293", "0o1293", "invalid digit '9' in octal literal"},
744 {Int, "0o12a3", "0o12 a3", ""},
745
746
747 {Float, "0o.", "0o.", "invalid radix point in octal literal"},
748 {Float, "0o.2", "0o.2", "invalid radix point in octal literal"},
749 {Float, "0o1.2", "0o1.2", "invalid radix point in octal literal"},
750 {Float, "0o1E+2", "0o1E+2", "'E' exponent requires decimal mantissa"},
751 {Float, "0o1p10", "0o1p10", "'p' exponent requires hexadecimal mantissa"},
752
753
754 {Int, "0", "0", ""},
755 {Int, "0123", "0123", ""},
756
757 {Int, "08123", "08123", "invalid digit '8' in octal literal"},
758 {Int, "01293", "01293", "invalid digit '9' in octal literal"},
759 {Int, "0F.", "0 F .", ""},
760 {Int, "0123F.", "0123 F .", ""},
761 {Int, "0123456x", "0123456 x", ""},
762
763
764 {Int, "1", "1", ""},
765 {Int, "1234", "1234", ""},
766
767 {Int, "1f", "1 f", ""},
768
769
770 {Float, "0.", "0.", ""},
771 {Float, "123.", "123.", ""},
772 {Float, "0123.", "0123.", ""},
773
774 {Float, ".0", ".0", ""},
775 {Float, ".123", ".123", ""},
776 {Float, ".0123", ".0123", ""},
777
778 {Float, "0.0", "0.0", ""},
779 {Float, "123.123", "123.123", ""},
780 {Float, "0123.0123", "0123.0123", ""},
781
782 {Float, "0e0", "0e0", ""},
783 {Float, "123e+0", "123e+0", ""},
784 {Float, "0123E-1", "0123E-1", ""},
785
786 {Float, "0.e+1", "0.e+1", ""},
787 {Float, "123.E-10", "123.E-10", ""},
788 {Float, "0123.e123", "0123.e123", ""},
789
790 {Float, ".0e-1", ".0e-1", ""},
791 {Float, ".123E+10", ".123E+10", ""},
792 {Float, ".0123E123", ".0123E123", ""},
793
794 {Float, "0.0e1", "0.0e1", ""},
795 {Float, "123.123E-10", "123.123E-10", ""},
796 {Float, "0123.0123e+456", "0123.0123e+456", ""},
797
798 {Float, "0e", "0e", "exponent has no digits"},
799 {Float, "0E+", "0E+", "exponent has no digits"},
800 {Float, "1e+f", "1e+ f", "exponent has no digits"},
801 {Float, "0p0", "0p0", "'p' exponent requires hexadecimal mantissa"},
802 {Float, "1.0P-1", "1.0P-1", "'P' exponent requires hexadecimal mantissa"},
803
804
805 {Int, "0x0", "0x0", ""},
806 {Int, "0x1234", "0x1234", ""},
807 {Int, "0xcafef00d", "0xcafef00d", ""},
808 {Int, "0XCAFEF00D", "0XCAFEF00D", ""},
809
810 {Int, "0x", "0x", "hexadecimal literal has no digits"},
811 {Int, "0x1g", "0x1 g", ""},
812
813
814 {Float, "0x0p0", "0x0p0", ""},
815 {Float, "0x12efp-123", "0x12efp-123", ""},
816 {Float, "0xABCD.p+0", "0xABCD.p+0", ""},
817 {Float, "0x.0189P-0", "0x.0189P-0", ""},
818 {Float, "0x1.ffffp+1023", "0x1.ffffp+1023", ""},
819
820 {Float, "0x.", "0x.", "hexadecimal literal has no digits"},
821 {Float, "0x0.", "0x0.", "hexadecimal mantissa requires a 'p' exponent"},
822 {Float, "0x.0", "0x.0", "hexadecimal mantissa requires a 'p' exponent"},
823 {Float, "0x1.1", "0x1.1", "hexadecimal mantissa requires a 'p' exponent"},
824 {Float, "0x1.1e0", "0x1.1e0", "hexadecimal mantissa requires a 'p' exponent"},
825 {Float, "0x1.2gp1a", "0x1.2 gp1a", "hexadecimal mantissa requires a 'p' exponent"},
826 {Float, "0x0p", "0x0p", "exponent has no digits"},
827 {Float, "0xeP-", "0xeP-", "exponent has no digits"},
828 {Float, "0x1234PAB", "0x1234P AB", "exponent has no digits"},
829 {Float, "0x1.2p1a", "0x1.2p1 a", ""},
830
831
832 {Int, "0b_1000_0001", "0b_1000_0001", ""},
833 {Int, "0o_600", "0o_600", ""},
834 {Int, "0_466", "0_466", ""},
835 {Int, "1_000", "1_000", ""},
836 {Float, "1_000.000_1", "1_000.000_1", ""},
837 {Int, "0x_f00d", "0x_f00d", ""},
838 {Float, "0x_f00d.0p1_2", "0x_f00d.0p1_2", ""},
839
840 {Int, "0b__1000", "0b__1000", "'_' must separate successive digits"},
841 {Int, "0o60___0", "0o60___0", "'_' must separate successive digits"},
842 {Int, "0466_", "0466_", "'_' must separate successive digits"},
843 {Float, "1_.", "1_.", "'_' must separate successive digits"},
844 {Float, "0._1", "0._1", "'_' must separate successive digits"},
845 {Float, "2.7_e0", "2.7_e0", "'_' must separate successive digits"},
846 {Int, "0x___0", "0x___0", "'_' must separate successive digits"},
847 {Float, "0x1.0_p0", "0x1.0_p0", "'_' must separate successive digits"},
848 } {
849 s := new(Scanner).Init(strings.NewReader(test.src))
850 var err string
851 s.Error = func(s *Scanner, msg string) {
852 if err == "" {
853 err = msg
854 }
855 }
856
857 for i, want := range strings.Split(test.tokens, " ") {
858 err = ""
859 tok := s.Scan()
860 lit := s.TokenText()
861 if i == 0 {
862 if tok != test.tok {
863 t.Errorf("%q: got token %s; want %s", test.src, TokenString(tok), TokenString(test.tok))
864 }
865 if err != test.err {
866 t.Errorf("%q: got error %q; want %q", test.src, err, test.err)
867 }
868 }
869 if lit != want {
870 t.Errorf("%q: got literal %q (%s); want %s", test.src, lit, TokenString(tok), want)
871 }
872 }
873
874
875 if tok := s.Scan(); tok != EOF {
876 t.Errorf("%q: got %s; want EOF", test.src, TokenString(tok))
877 }
878 }
879 }
880
881 func TestIssue30320(t *testing.T) {
882 for _, test := range []struct {
883 in, want string
884 mode uint
885 }{
886 {"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts},
887 {"foo0/12/0/5.67", "0 12 0 5 67", ScanInts},
888 {"xxx1e0yyy", "1 0", ScanInts},
889 {"1_2", "1_2", ScanInts},
890 {"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts},
891 {"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats},
892 } {
893 got := extractInts(test.in, test.mode)
894 if got != test.want {
895 t.Errorf("%q: got %q; want %q", test.in, got, test.want)
896 }
897 }
898 }
899
900 func extractInts(t string, mode uint) (res string) {
901 var s Scanner
902 s.Init(strings.NewReader(t))
903 s.Mode = mode
904 for {
905 switch tok := s.Scan(); tok {
906 case Int, Float:
907 if len(res) > 0 {
908 res += " "
909 }
910 res += s.TokenText()
911 case EOF:
912 return
913 }
914 }
915 }
916
917 func TestIssue50909(t *testing.T) {
918 var s Scanner
919 s.Init(strings.NewReader("hello \n\nworld\n!\n"))
920 s.IsIdentRune = func(ch rune, _ int) bool { return ch != '\n' }
921
922 r := ""
923 n := 0
924 for s.Scan() != EOF && n < 10 {
925 r += s.TokenText()
926 n++
927 }
928
929 const R = "hello world!"
930 const N = 3
931 if r != R || n != N {
932 t.Errorf("got %q (n = %d); want %q (n = %d)", r, n, R, N)
933 }
934 }
935
View as plain text