1
2
3
4
5 package xml
6
7 import (
8 "bytes"
9 "fmt"
10 "io"
11 "reflect"
12 "strings"
13 "testing"
14 "unicode/utf8"
15 )
16
17 type toks struct {
18 earlyEOF bool
19 t []Token
20 }
21
22 func (t *toks) Token() (Token, error) {
23 if len(t.t) == 0 {
24 return nil, io.EOF
25 }
26 var tok Token
27 tok, t.t = t.t[0], t.t[1:]
28 if t.earlyEOF && len(t.t) == 0 {
29 return tok, io.EOF
30 }
31 return tok, nil
32 }
33
34 func TestDecodeEOF(t *testing.T) {
35 start := StartElement{Name: Name{Local: "test"}}
36 tests := []struct {
37 name string
38 tokens []Token
39 ok bool
40 }{
41 {
42 name: "OK",
43 tokens: []Token{
44 start,
45 start.End(),
46 },
47 ok: true,
48 },
49 {
50 name: "Malformed",
51 tokens: []Token{
52 start,
53 StartElement{Name: Name{Local: "bad"}},
54 start.End(),
55 },
56 ok: false,
57 },
58 }
59 for _, tc := range tests {
60 for _, eof := range []bool{true, false} {
61 name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
62 t.Run(name, func(t *testing.T) {
63 d := NewTokenDecoder(&toks{
64 earlyEOF: eof,
65 t: tc.tokens,
66 })
67 err := d.Decode(&struct {
68 XMLName Name `xml:"test"`
69 }{})
70 if tc.ok && err != nil {
71 t.Fatalf("d.Decode: expected nil error, got %v", err)
72 }
73 if _, ok := err.(*SyntaxError); !tc.ok && !ok {
74 t.Errorf("d.Decode: expected syntax error, got %v", err)
75 }
76 })
77 }
78 }
79 }
80
81 type toksNil struct {
82 returnEOF bool
83 t []Token
84 }
85
86 func (t *toksNil) Token() (Token, error) {
87 if len(t.t) == 0 {
88 if !t.returnEOF {
89
90
91 t.returnEOF = true
92 return nil, nil
93 }
94 return nil, io.EOF
95 }
96 var tok Token
97 tok, t.t = t.t[0], t.t[1:]
98 return tok, nil
99 }
100
101 func TestDecodeNilToken(t *testing.T) {
102 for _, strict := range []bool{true, false} {
103 name := fmt.Sprintf("Strict=%v", strict)
104 t.Run(name, func(t *testing.T) {
105 start := StartElement{Name: Name{Local: "test"}}
106 bad := StartElement{Name: Name{Local: "bad"}}
107 d := NewTokenDecoder(&toksNil{
108
109 t: []Token{start, bad, start.End()},
110 })
111 d.Strict = strict
112 err := d.Decode(&struct {
113 XMLName Name `xml:"test"`
114 }{})
115 if _, ok := err.(*SyntaxError); !ok {
116 t.Errorf("d.Decode: expected syntax error, got %v", err)
117 }
118 })
119 }
120 }
121
122 const testInput = `
123 <?xml version="1.0" encoding="UTF-8"?>
124 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
125 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
126 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
127 "\r\n\t" + ` >
128 <hello lang="en">World <>'" 白鵬翔</hello>
129 <query>&何; &is-it;</query>
130 <goodbye />
131 <outer foo:attr="value" xmlns:tag="ns4">
132 <inner/>
133 </outer>
134 <tag:name>
135 <![CDATA[Some text here.]]>
136 </tag:name>
137 </body><!-- missing final newline -->`
138
139 var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
140
141 var rawTokens = []Token{
142 CharData("\n"),
143 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
144 CharData("\n"),
145 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
146 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
147 CharData("\n"),
148 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
149 CharData("\n "),
150 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
151 CharData("World <>'\" 白鵬翔"),
152 EndElement{Name{"", "hello"}},
153 CharData("\n "),
154 StartElement{Name{"", "query"}, []Attr{}},
155 CharData("What is it?"),
156 EndElement{Name{"", "query"}},
157 CharData("\n "),
158 StartElement{Name{"", "goodbye"}, []Attr{}},
159 EndElement{Name{"", "goodbye"}},
160 CharData("\n "),
161 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
162 CharData("\n "),
163 StartElement{Name{"", "inner"}, []Attr{}},
164 EndElement{Name{"", "inner"}},
165 CharData("\n "),
166 EndElement{Name{"", "outer"}},
167 CharData("\n "),
168 StartElement{Name{"tag", "name"}, []Attr{}},
169 CharData("\n "),
170 CharData("Some text here."),
171 CharData("\n "),
172 EndElement{Name{"tag", "name"}},
173 CharData("\n"),
174 EndElement{Name{"", "body"}},
175 Comment(" missing final newline "),
176 }
177
178 var cookedTokens = []Token{
179 CharData("\n"),
180 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
181 CharData("\n"),
182 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
183 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
184 CharData("\n"),
185 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
186 CharData("\n "),
187 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
188 CharData("World <>'\" 白鵬翔"),
189 EndElement{Name{"ns2", "hello"}},
190 CharData("\n "),
191 StartElement{Name{"ns2", "query"}, []Attr{}},
192 CharData("What is it?"),
193 EndElement{Name{"ns2", "query"}},
194 CharData("\n "),
195 StartElement{Name{"ns2", "goodbye"}, []Attr{}},
196 EndElement{Name{"ns2", "goodbye"}},
197 CharData("\n "),
198 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
199 CharData("\n "),
200 StartElement{Name{"ns2", "inner"}, []Attr{}},
201 EndElement{Name{"ns2", "inner"}},
202 CharData("\n "),
203 EndElement{Name{"ns2", "outer"}},
204 CharData("\n "),
205 StartElement{Name{"ns3", "name"}, []Attr{}},
206 CharData("\n "),
207 CharData("Some text here."),
208 CharData("\n "),
209 EndElement{Name{"ns3", "name"}},
210 CharData("\n"),
211 EndElement{Name{"ns2", "body"}},
212 Comment(" missing final newline "),
213 }
214
215 const testInputAltEncoding = `
216 <?xml version="1.0" encoding="x-testing-uppercase"?>
217 <TAG>VALUE</TAG>`
218
219 var rawTokensAltEncoding = []Token{
220 CharData("\n"),
221 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
222 CharData("\n"),
223 StartElement{Name{"", "tag"}, []Attr{}},
224 CharData("value"),
225 EndElement{Name{"", "tag"}},
226 }
227
228 var xmlInput = []string{
229
230 "<",
231 "<t",
232 "<t ",
233 "<t/",
234 "<!",
235 "<!-",
236 "<!--",
237 "<!--c-",
238 "<!--c--",
239 "<!d",
240 "<t></",
241 "<t></t",
242 "<?",
243 "<?p",
244 "<t a",
245 "<t a=",
246 "<t a='",
247 "<t a=''",
248 "<t/><![",
249 "<t/><![C",
250 "<t/><![CDATA[d",
251 "<t/><![CDATA[d]",
252 "<t/><![CDATA[d]]",
253
254
255 "<>",
256 "<t/a",
257 "<0 />",
258 "<?0 >",
259
260 "</0>",
261 "<t 0=''>",
262 "<t a='&'>",
263 "<t a='<'>",
264 "<t> c;</t>",
265 "<t a>",
266 "<t a=>",
267 "<t a=v>",
268
269 "<t></e>",
270 "<t></>",
271 "<t></t!",
272 "<t>cdata]]></t>",
273 }
274
275 func TestRawToken(t *testing.T) {
276 d := NewDecoder(strings.NewReader(testInput))
277 d.Entity = testEntity
278 testRawToken(t, d, testInput, rawTokens)
279 }
280
281 const nonStrictInput = `
282 <tag>non&entity</tag>
283 <tag>&unknown;entity</tag>
284 <tag>{</tag>
285 <tag>&#zzz;</tag>
286 <tag>&なまえ3;</tag>
287 <tag><-gt;</tag>
288 <tag>&;</tag>
289 <tag>&0a;</tag>
290 `
291
292 var nonStrictTokens = []Token{
293 CharData("\n"),
294 StartElement{Name{"", "tag"}, []Attr{}},
295 CharData("non&entity"),
296 EndElement{Name{"", "tag"}},
297 CharData("\n"),
298 StartElement{Name{"", "tag"}, []Attr{}},
299 CharData("&unknown;entity"),
300 EndElement{Name{"", "tag"}},
301 CharData("\n"),
302 StartElement{Name{"", "tag"}, []Attr{}},
303 CharData("{"),
304 EndElement{Name{"", "tag"}},
305 CharData("\n"),
306 StartElement{Name{"", "tag"}, []Attr{}},
307 CharData("&#zzz;"),
308 EndElement{Name{"", "tag"}},
309 CharData("\n"),
310 StartElement{Name{"", "tag"}, []Attr{}},
311 CharData("&なまえ3;"),
312 EndElement{Name{"", "tag"}},
313 CharData("\n"),
314 StartElement{Name{"", "tag"}, []Attr{}},
315 CharData("<-gt;"),
316 EndElement{Name{"", "tag"}},
317 CharData("\n"),
318 StartElement{Name{"", "tag"}, []Attr{}},
319 CharData("&;"),
320 EndElement{Name{"", "tag"}},
321 CharData("\n"),
322 StartElement{Name{"", "tag"}, []Attr{}},
323 CharData("&0a;"),
324 EndElement{Name{"", "tag"}},
325 CharData("\n"),
326 }
327
328 func TestNonStrictRawToken(t *testing.T) {
329 d := NewDecoder(strings.NewReader(nonStrictInput))
330 d.Strict = false
331 testRawToken(t, d, nonStrictInput, nonStrictTokens)
332 }
333
334 type downCaser struct {
335 t *testing.T
336 r io.ByteReader
337 }
338
339 func (d *downCaser) ReadByte() (c byte, err error) {
340 c, err = d.r.ReadByte()
341 if c >= 'A' && c <= 'Z' {
342 c += 'a' - 'A'
343 }
344 return
345 }
346
347 func (d *downCaser) Read(p []byte) (int, error) {
348 d.t.Fatalf("unexpected Read call on downCaser reader")
349 panic("unreachable")
350 }
351
352 func TestRawTokenAltEncoding(t *testing.T) {
353 d := NewDecoder(strings.NewReader(testInputAltEncoding))
354 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
355 if charset != "x-testing-uppercase" {
356 t.Fatalf("unexpected charset %q", charset)
357 }
358 return &downCaser{t, input.(io.ByteReader)}, nil
359 }
360 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
361 }
362
363 func TestRawTokenAltEncodingNoConverter(t *testing.T) {
364 d := NewDecoder(strings.NewReader(testInputAltEncoding))
365 token, err := d.RawToken()
366 if token == nil {
367 t.Fatalf("expected a token on first RawToken call")
368 }
369 if err != nil {
370 t.Fatal(err)
371 }
372 token, err = d.RawToken()
373 if token != nil {
374 t.Errorf("expected a nil token; got %#v", token)
375 }
376 if err == nil {
377 t.Fatalf("expected an error on second RawToken call")
378 }
379 const encoding = "x-testing-uppercase"
380 if !strings.Contains(err.Error(), encoding) {
381 t.Errorf("expected error to contain %q; got error: %v",
382 encoding, err)
383 }
384 }
385
386 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
387 lastEnd := int64(0)
388 for i, want := range rawTokens {
389 start := d.InputOffset()
390 have, err := d.RawToken()
391 end := d.InputOffset()
392 if err != nil {
393 t.Fatalf("token %d: unexpected error: %s", i, err)
394 }
395 if !reflect.DeepEqual(have, want) {
396 var shave, swant string
397 if _, ok := have.(CharData); ok {
398 shave = fmt.Sprintf("CharData(%q)", have)
399 } else {
400 shave = fmt.Sprintf("%#v", have)
401 }
402 if _, ok := want.(CharData); ok {
403 swant = fmt.Sprintf("CharData(%q)", want)
404 } else {
405 swant = fmt.Sprintf("%#v", want)
406 }
407 t.Errorf("token %d = %s, want %s", i, shave, swant)
408 }
409
410
411 switch {
412 case start < lastEnd:
413 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
414 case start >= end:
415
416 if start == end && end == lastEnd {
417 break
418 }
419 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
420 case end > int64(len(raw)):
421 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
422 default:
423 text := raw[start:end]
424 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
425 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
426 }
427 }
428 lastEnd = end
429 }
430 }
431
432
433
434
435
436 var nestedDirectivesInput = `
437 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
438 <!DOCTYPE [<!ENTITY xlt ">">]>
439 <!DOCTYPE [<!ENTITY xlt "<">]>
440 <!DOCTYPE [<!ENTITY xlt '>'>]>
441 <!DOCTYPE [<!ENTITY xlt '<'>]>
442 <!DOCTYPE [<!ENTITY xlt '">'>]>
443 <!DOCTYPE [<!ENTITY xlt "'<">]>
444 `
445
446 var nestedDirectivesTokens = []Token{
447 CharData("\n"),
448 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
449 CharData("\n"),
450 Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
451 CharData("\n"),
452 Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
453 CharData("\n"),
454 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
455 CharData("\n"),
456 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
457 CharData("\n"),
458 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
459 CharData("\n"),
460 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
461 CharData("\n"),
462 }
463
464 func TestNestedDirectives(t *testing.T) {
465 d := NewDecoder(strings.NewReader(nestedDirectivesInput))
466
467 for i, want := range nestedDirectivesTokens {
468 have, err := d.Token()
469 if err != nil {
470 t.Fatalf("token %d: unexpected error: %s", i, err)
471 }
472 if !reflect.DeepEqual(have, want) {
473 t.Errorf("token %d = %#v want %#v", i, have, want)
474 }
475 }
476 }
477
478 func TestToken(t *testing.T) {
479 d := NewDecoder(strings.NewReader(testInput))
480 d.Entity = testEntity
481
482 for i, want := range cookedTokens {
483 have, err := d.Token()
484 if err != nil {
485 t.Fatalf("token %d: unexpected error: %s", i, err)
486 }
487 if !reflect.DeepEqual(have, want) {
488 t.Errorf("token %d = %#v want %#v", i, have, want)
489 }
490 }
491 }
492
493 func TestSyntax(t *testing.T) {
494 for i := range xmlInput {
495 d := NewDecoder(strings.NewReader(xmlInput[i]))
496 var err error
497 for _, err = d.Token(); err == nil; _, err = d.Token() {
498 }
499 if _, ok := err.(*SyntaxError); !ok {
500 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
501 }
502 }
503 }
504
505 func TestInputLinePos(t *testing.T) {
506 testInput := `<root>
507 <?pi
508 ?> <elt
509 att
510 =
511 "val">
512 <![CDATA[
513 ]]><!--
514
515 --></elt>
516 </root>`
517 linePos := [][]int{
518 {1, 7},
519 {2, 1},
520 {3, 4},
521 {3, 6},
522 {6, 7},
523 {7, 1},
524 {8, 4},
525 {10, 4},
526 {10, 10},
527 {11, 1},
528 {11, 8},
529 }
530 dec := NewDecoder(strings.NewReader(testInput))
531 for _, want := range linePos {
532 if _, err := dec.Token(); err != nil {
533 t.Errorf("Unexpected error: %v", err)
534 continue
535 }
536
537 gotLine, gotCol := dec.InputPos()
538 if gotLine != want[0] || gotCol != want[1] {
539 t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1])
540 }
541 }
542 }
543
544 type allScalars struct {
545 True1 bool
546 True2 bool
547 False1 bool
548 False2 bool
549 Int int
550 Int8 int8
551 Int16 int16
552 Int32 int32
553 Int64 int64
554 Uint int
555 Uint8 uint8
556 Uint16 uint16
557 Uint32 uint32
558 Uint64 uint64
559 Uintptr uintptr
560 Float32 float32
561 Float64 float64
562 String string
563 PtrString *string
564 }
565
566 var all = allScalars{
567 True1: true,
568 True2: true,
569 False1: false,
570 False2: false,
571 Int: 1,
572 Int8: -2,
573 Int16: 3,
574 Int32: -4,
575 Int64: 5,
576 Uint: 6,
577 Uint8: 7,
578 Uint16: 8,
579 Uint32: 9,
580 Uint64: 10,
581 Uintptr: 11,
582 Float32: 13.0,
583 Float64: 14.0,
584 String: "15",
585 PtrString: &sixteen,
586 }
587
588 var sixteen = "16"
589
590 const testScalarsInput = `<allscalars>
591 <True1>true</True1>
592 <True2>1</True2>
593 <False1>false</False1>
594 <False2>0</False2>
595 <Int>1</Int>
596 <Int8>-2</Int8>
597 <Int16>3</Int16>
598 <Int32>-4</Int32>
599 <Int64>5</Int64>
600 <Uint>6</Uint>
601 <Uint8>7</Uint8>
602 <Uint16>8</Uint16>
603 <Uint32>9</Uint32>
604 <Uint64>10</Uint64>
605 <Uintptr>11</Uintptr>
606 <Float>12.0</Float>
607 <Float32>13.0</Float32>
608 <Float64>14.0</Float64>
609 <String>15</String>
610 <PtrString>16</PtrString>
611 </allscalars>`
612
613 func TestAllScalars(t *testing.T) {
614 var a allScalars
615 err := Unmarshal([]byte(testScalarsInput), &a)
616
617 if err != nil {
618 t.Fatal(err)
619 }
620 if !reflect.DeepEqual(a, all) {
621 t.Errorf("have %+v want %+v", a, all)
622 }
623 }
624
625 type item struct {
626 FieldA string
627 }
628
629 func TestIssue68387(t *testing.T) {
630 data := `<item b=']]>'/>`
631 dec := NewDecoder(strings.NewReader(data))
632 var tok1, tok2, tok3 Token
633 var err error
634 if tok1, err = dec.RawToken(); err != nil {
635 t.Fatalf("RawToken() failed: %v", err)
636 }
637 if tok2, err = dec.RawToken(); err != nil {
638 t.Fatalf("RawToken() failed: %v", err)
639 }
640 if tok3, err = dec.RawToken(); err != io.EOF || tok3 != nil {
641 t.Fatalf("Missed EOF")
642 }
643 s := StartElement{Name{"", "item"}, []Attr{Attr{Name{"","b"}, "]]>"}}}
644 if !reflect.DeepEqual(tok1.(StartElement), s) {
645 t.Error("Wrong start element")
646 }
647 e := EndElement{Name{"","item"}}
648 if tok2.(EndElement) != e {
649 t.Error("Wrong end element")
650 }
651 }
652
653 func TestIssue569(t *testing.T) {
654 data := `<item><FieldA>abcd</FieldA></item>`
655 var i item
656 err := Unmarshal([]byte(data), &i)
657
658 if err != nil || i.FieldA != "abcd" {
659 t.Fatal("Expecting abcd")
660 }
661 }
662
663 func TestUnquotedAttrs(t *testing.T) {
664 data := "<tag attr=azAZ09:-_\t>"
665 d := NewDecoder(strings.NewReader(data))
666 d.Strict = false
667 token, err := d.Token()
668 if _, ok := err.(*SyntaxError); ok {
669 t.Errorf("Unexpected error: %v", err)
670 }
671 if token.(StartElement).Name.Local != "tag" {
672 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
673 }
674 attr := token.(StartElement).Attr[0]
675 if attr.Value != "azAZ09:-_" {
676 t.Errorf("Unexpected attribute value: %v", attr.Value)
677 }
678 if attr.Name.Local != "attr" {
679 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
680 }
681 }
682
683 func TestValuelessAttrs(t *testing.T) {
684 tests := [][3]string{
685 {"<p nowrap>", "p", "nowrap"},
686 {"<p nowrap >", "p", "nowrap"},
687 {"<input checked/>", "input", "checked"},
688 {"<input checked />", "input", "checked"},
689 }
690 for _, test := range tests {
691 d := NewDecoder(strings.NewReader(test[0]))
692 d.Strict = false
693 token, err := d.Token()
694 if _, ok := err.(*SyntaxError); ok {
695 t.Errorf("Unexpected error: %v", err)
696 }
697 if token.(StartElement).Name.Local != test[1] {
698 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
699 }
700 attr := token.(StartElement).Attr[0]
701 if attr.Value != test[2] {
702 t.Errorf("Unexpected attribute value: %v", attr.Value)
703 }
704 if attr.Name.Local != test[2] {
705 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
706 }
707 }
708 }
709
710 func TestCopyTokenCharData(t *testing.T) {
711 data := []byte("same data")
712 var tok1 Token = CharData(data)
713 tok2 := CopyToken(tok1)
714 if !reflect.DeepEqual(tok1, tok2) {
715 t.Error("CopyToken(CharData) != CharData")
716 }
717 data[1] = 'o'
718 if reflect.DeepEqual(tok1, tok2) {
719 t.Error("CopyToken(CharData) uses same buffer.")
720 }
721 }
722
723 func TestCopyTokenStartElement(t *testing.T) {
724 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
725 var tok1 Token = elt
726 tok2 := CopyToken(tok1)
727 if tok1.(StartElement).Attr[0].Value != "en" {
728 t.Error("CopyToken overwrote Attr[0]")
729 }
730 if !reflect.DeepEqual(tok1, tok2) {
731 t.Error("CopyToken(StartElement) != StartElement")
732 }
733 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
734 if reflect.DeepEqual(tok1, tok2) {
735 t.Error("CopyToken(CharData) uses same buffer.")
736 }
737 }
738
739 func TestCopyTokenComment(t *testing.T) {
740 data := []byte("<!-- some comment -->")
741 var tok1 Token = Comment(data)
742 tok2 := CopyToken(tok1)
743 if !reflect.DeepEqual(tok1, tok2) {
744 t.Error("CopyToken(Comment) != Comment")
745 }
746 data[1] = 'o'
747 if reflect.DeepEqual(tok1, tok2) {
748 t.Error("CopyToken(Comment) uses same buffer.")
749 }
750 }
751
752 func TestSyntaxErrorLineNum(t *testing.T) {
753 testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
754 d := NewDecoder(strings.NewReader(testInput))
755 var err error
756 for _, err = d.Token(); err == nil; _, err = d.Token() {
757 }
758 synerr, ok := err.(*SyntaxError)
759 if !ok {
760 t.Error("Expected SyntaxError.")
761 }
762 if synerr.Line != 3 {
763 t.Error("SyntaxError didn't have correct line number.")
764 }
765 }
766
767 func TestTrailingRawToken(t *testing.T) {
768 input := `<FOO></FOO> `
769 d := NewDecoder(strings.NewReader(input))
770 var err error
771 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
772 }
773 if err != io.EOF {
774 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
775 }
776 }
777
778 func TestTrailingToken(t *testing.T) {
779 input := `<FOO></FOO> `
780 d := NewDecoder(strings.NewReader(input))
781 var err error
782 for _, err = d.Token(); err == nil; _, err = d.Token() {
783 }
784 if err != io.EOF {
785 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
786 }
787 }
788
789 func TestEntityInsideCDATA(t *testing.T) {
790 input := `<test><![CDATA[ &val=foo ]]></test>`
791 d := NewDecoder(strings.NewReader(input))
792 var err error
793 for _, err = d.Token(); err == nil; _, err = d.Token() {
794 }
795 if err != io.EOF {
796 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
797 }
798 }
799
800 var characterTests = []struct {
801 in string
802 err string
803 }{
804 {"\x12<doc/>", "illegal character code U+0012"},
805 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
806 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
807 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
808 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
809 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
810 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
811 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
812 {"<doc>&hello;</doc>", "invalid character entity &hello;"},
813 }
814
815 func TestDisallowedCharacters(t *testing.T) {
816
817 for i, tt := range characterTests {
818 d := NewDecoder(strings.NewReader(tt.in))
819 var err error
820
821 for err == nil {
822 _, err = d.Token()
823 }
824 synerr, ok := err.(*SyntaxError)
825 if !ok {
826 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
827 }
828 if synerr.Msg != tt.err {
829 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
830 }
831 }
832 }
833
834 func TestIsInCharacterRange(t *testing.T) {
835 invalid := []rune{
836 utf8.MaxRune + 1,
837 0xD800,
838 0xDFFF,
839 -1,
840 }
841 for _, r := range invalid {
842 if isInCharacterRange(r) {
843 t.Errorf("rune %U considered valid", r)
844 }
845 }
846 }
847
848 var procInstTests = []struct {
849 input string
850 expect [2]string
851 }{
852 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
853 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
854 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
855 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
856 {`encoding="FOO" `, [2]string{"", "FOO"}},
857 {`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
858 {`version= encoding=`, [2]string{"", ""}},
859 {`encoding="version=1.0"`, [2]string{"", "version=1.0"}},
860 {``, [2]string{"", ""}},
861
862 {`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}},
863 {`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}},
864 }
865
866 func TestProcInstEncoding(t *testing.T) {
867 for _, test := range procInstTests {
868 if got := procInst("version", test.input); got != test.expect[0] {
869 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
870 }
871 if got := procInst("encoding", test.input); got != test.expect[1] {
872 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
873 }
874 }
875 }
876
877
878
879
880 var directivesWithCommentsInput = `
881 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
882 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
883 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
884 `
885
886 var directivesWithCommentsTokens = []Token{
887 CharData("\n"),
888 Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
889 CharData("\n"),
890 Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
891 CharData("\n"),
892 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`),
893 CharData("\n"),
894 }
895
896 func TestDirectivesWithComments(t *testing.T) {
897 d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
898
899 for i, want := range directivesWithCommentsTokens {
900 have, err := d.Token()
901 if err != nil {
902 t.Fatalf("token %d: unexpected error: %s", i, err)
903 }
904 if !reflect.DeepEqual(have, want) {
905 t.Errorf("token %d = %#v want %#v", i, have, want)
906 }
907 }
908 }
909
910
911 type errWriter struct{}
912
913 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
914
915 func TestEscapeTextIOErrors(t *testing.T) {
916 expectErr := "unwritable"
917 err := EscapeText(errWriter{}, []byte{'A'})
918
919 if err == nil || err.Error() != expectErr {
920 t.Errorf("have %v, want %v", err, expectErr)
921 }
922 }
923
924 func TestEscapeTextInvalidChar(t *testing.T) {
925 input := []byte("A \x00 terminated string.")
926 expected := "A \uFFFD terminated string."
927
928 buff := new(strings.Builder)
929 if err := EscapeText(buff, input); err != nil {
930 t.Fatalf("have %v, want nil", err)
931 }
932 text := buff.String()
933
934 if text != expected {
935 t.Errorf("have %v, want %v", text, expected)
936 }
937 }
938
939 func TestIssue5880(t *testing.T) {
940 type T []byte
941 data, err := Marshal(T{192, 168, 0, 1})
942 if err != nil {
943 t.Errorf("Marshal error: %v", err)
944 }
945 if !utf8.Valid(data) {
946 t.Errorf("Marshal generated invalid UTF-8: %x", data)
947 }
948 }
949
950 func TestIssue8535(t *testing.T) {
951
952 type ExampleConflict struct {
953 XMLName Name `xml:"example"`
954 Link string `xml:"link"`
955 AtomLink string `xml:"http://www.w3.org/2005/Atom link"`
956 }
957 testCase := `<example>
958 <title>Example</title>
959 <link>http://example.com/default</link> <!-- not assigned -->
960 <link>http://example.com/home</link> <!-- not assigned -->
961 <ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link>
962 </example>`
963
964 var dest ExampleConflict
965 d := NewDecoder(strings.NewReader(testCase))
966 if err := d.Decode(&dest); err != nil {
967 t.Fatal(err)
968 }
969 }
970
971 func TestEncodeXMLNS(t *testing.T) {
972 testCases := []struct {
973 f func() ([]byte, error)
974 want string
975 ok bool
976 }{
977 {encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
978 {encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true},
979 {encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
980 {encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false},
981 }
982
983 for i, tc := range testCases {
984 if b, err := tc.f(); err == nil {
985 if got, want := string(b), tc.want; got != want {
986 t.Errorf("%d: got %s, want %s \n", i, got, want)
987 }
988 } else {
989 t.Errorf("%d: marshal failed with %s", i, err)
990 }
991 }
992 }
993
994 func encodeXMLNS1() ([]byte, error) {
995
996 type T struct {
997 XMLName Name `xml:"Test"`
998 Ns string `xml:"xmlns,attr"`
999 Body string
1000 }
1001
1002 s := &T{Ns: "http://example.com/ns", Body: "hello world"}
1003 return Marshal(s)
1004 }
1005
1006 func encodeXMLNS2() ([]byte, error) {
1007
1008 type Test struct {
1009 Body string `xml:"http://example.com/ns body"`
1010 }
1011
1012 s := &Test{Body: "hello world"}
1013 return Marshal(s)
1014 }
1015
1016 func encodeXMLNS3() ([]byte, error) {
1017
1018 type Test struct {
1019 XMLName Name `xml:"http://example.com/ns Test"`
1020 Body string
1021 }
1022
1023
1024
1025 s := &Test{Body: "hello world"}
1026 return Marshal(s)
1027 }
1028
1029 func encodeXMLNS4() ([]byte, error) {
1030
1031 type Test struct {
1032 Ns string `xml:"xmlns,attr"`
1033 Body string
1034 }
1035
1036 s := &Test{Ns: "http://example.com/ns", Body: "hello world"}
1037 return Marshal(s)
1038 }
1039
1040 func TestIssue11405(t *testing.T) {
1041 testCases := []string{
1042 "<root>",
1043 "<root><foo>",
1044 "<root><foo></foo>",
1045 }
1046 for _, tc := range testCases {
1047 d := NewDecoder(strings.NewReader(tc))
1048 var err error
1049 for {
1050 _, err = d.Token()
1051 if err != nil {
1052 break
1053 }
1054 }
1055 if _, ok := err.(*SyntaxError); !ok {
1056 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
1057 }
1058 }
1059 }
1060
1061 func TestIssue12417(t *testing.T) {
1062 testCases := []struct {
1063 s string
1064 ok bool
1065 }{
1066 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
1067 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
1068 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
1069 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
1070 }
1071 for _, tc := range testCases {
1072 d := NewDecoder(strings.NewReader(tc.s))
1073 var err error
1074 for {
1075 _, err = d.Token()
1076 if err != nil {
1077 if err == io.EOF {
1078 err = nil
1079 }
1080 break
1081 }
1082 }
1083 if err != nil && tc.ok {
1084 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
1085 continue
1086 }
1087 if err == nil && !tc.ok {
1088 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
1089 }
1090 }
1091 }
1092
1093 func TestIssue7113(t *testing.T) {
1094 type C struct {
1095 XMLName Name `xml:""`
1096 }
1097
1098 type D struct {
1099 XMLName Name `xml:"d"`
1100 }
1101
1102 type A struct {
1103 XMLName Name `xml:""`
1104 C C `xml:""`
1105 D D
1106 }
1107
1108 var a A
1109 structSpace := "b"
1110 xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>`
1111 t.Log(xmlTest)
1112 err := Unmarshal([]byte(xmlTest), &a)
1113 if err != nil {
1114 t.Fatal(err)
1115 }
1116
1117 if a.XMLName.Space != structSpace {
1118 t.Errorf("overidding with empty namespace: unmarshaling, got %s, want %s\n", a.XMLName.Space, structSpace)
1119 }
1120 if len(a.C.XMLName.Space) != 0 {
1121 t.Fatalf("overidding with empty namespace: unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
1122 }
1123
1124 var b []byte
1125 b, err = Marshal(&a)
1126 if err != nil {
1127 t.Fatal(err)
1128 }
1129 if len(a.C.XMLName.Space) != 0 {
1130 t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space)
1131 }
1132 if string(b) != xmlTest {
1133 t.Fatalf("overidding with empty namespace: marshaling, got %s, want %s\n", b, xmlTest)
1134 }
1135 var c A
1136 err = Unmarshal(b, &c)
1137 if err != nil {
1138 t.Fatalf("second Unmarshal failed: %s", err)
1139 }
1140 if c.XMLName.Space != "b" {
1141 t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace)
1142 }
1143 if len(c.C.XMLName.Space) != 0 {
1144 t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
1145 }
1146 }
1147
1148 func TestIssue20396(t *testing.T) {
1149
1150 var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element")
1151
1152 testCases := []struct {
1153 s string
1154 wantErr error
1155 }{
1156 {`<a:te:st xmlns:a="abcd"/>`,
1157 UnmarshalError("XML syntax error on line 1: expected element name after <")},
1158 {`<a:te=st xmlns:a="abcd"/>`, attrError},
1159 {`<a:te&st xmlns:a="abcd"/>`, attrError},
1160 {`<a:test xmlns:a="abcd"/>`, nil},
1161 {`<a:te:st xmlns:a="abcd">1</a:te:st>`,
1162 UnmarshalError("XML syntax error on line 1: expected element name after <")},
1163 {`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError},
1164 {`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError},
1165 {`<a:test xmlns:a="abcd">1</a:test>`, nil},
1166 }
1167
1168 var dest string
1169 for _, tc := range testCases {
1170 if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want {
1171 if got == nil {
1172 t.Errorf("%s: Unexpected success, want %v", tc.s, want)
1173 } else if want == nil {
1174 t.Errorf("%s: Unexpected error, got %v", tc.s, got)
1175 } else if got.Error() != want.Error() {
1176 t.Errorf("%s: got %v, want %v", tc.s, got, want)
1177 }
1178 }
1179 }
1180 }
1181
1182 func TestIssue20685(t *testing.T) {
1183 testCases := []struct {
1184 s string
1185 ok bool
1186 }{
1187 {`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false},
1188 {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true},
1189 {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false},
1190 {`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false},
1191 {`<x:book xmlns:x="abcd">one</y:book>`, false},
1192 {`<x:book>one</y:book>`, false},
1193 {`<xbook>one</ybook>`, false},
1194 }
1195 for _, tc := range testCases {
1196 d := NewDecoder(strings.NewReader(tc.s))
1197 var err error
1198 for {
1199 _, err = d.Token()
1200 if err != nil {
1201 if err == io.EOF {
1202 err = nil
1203 }
1204 break
1205 }
1206 }
1207 if err != nil && tc.ok {
1208 t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err)
1209 continue
1210 }
1211 if err == nil && !tc.ok {
1212 t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s)
1213 }
1214 }
1215 }
1216
1217 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
1218 return func(src TokenReader) TokenReader {
1219 return mapper{
1220 t: src,
1221 f: mapping,
1222 }
1223 }
1224 }
1225
1226 type mapper struct {
1227 t TokenReader
1228 f func(Token) Token
1229 }
1230
1231 func (m mapper) Token() (Token, error) {
1232 tok, err := m.t.Token()
1233 if err != nil {
1234 return nil, err
1235 }
1236 return m.f(tok), nil
1237 }
1238
1239 func TestNewTokenDecoderIdempotent(t *testing.T) {
1240 d := NewDecoder(strings.NewReader(`<br>`))
1241 d2 := NewTokenDecoder(d)
1242 if d != d2 {
1243 t.Error("NewTokenDecoder did not detect underlying Decoder")
1244 }
1245 }
1246
1247 func TestWrapDecoder(t *testing.T) {
1248 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
1249 m := tokenMap(func(t Token) Token {
1250 switch tok := t.(type) {
1251 case StartElement:
1252 if tok.Name.Local == "quote" {
1253 tok.Name.Local = "blocking"
1254 return tok
1255 }
1256 case EndElement:
1257 if tok.Name.Local == "quote" {
1258 tok.Name.Local = "blocking"
1259 return tok
1260 }
1261 }
1262 return t
1263 })
1264
1265 d = NewTokenDecoder(m(d))
1266
1267 o := struct {
1268 XMLName Name `xml:"blocking"`
1269 Chardata string `xml:",chardata"`
1270 }{}
1271
1272 if err := d.Decode(&o); err != nil {
1273 t.Fatal("Got unexpected error while decoding:", err)
1274 }
1275
1276 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
1277 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
1278 }
1279 }
1280
1281 type tokReader struct{}
1282
1283 func (tokReader) Token() (Token, error) {
1284 return StartElement{}, nil
1285 }
1286
1287 type Failure struct{}
1288
1289 func (Failure) UnmarshalXML(*Decoder, StartElement) error {
1290 return nil
1291 }
1292
1293 func TestTokenUnmarshaler(t *testing.T) {
1294 defer func() {
1295 if r := recover(); r != nil {
1296 t.Error("Unexpected panic using custom token unmarshaler")
1297 }
1298 }()
1299
1300 d := NewTokenDecoder(tokReader{})
1301 d.Decode(&Failure{})
1302 }
1303
1304 func testRoundTrip(t *testing.T, input string) {
1305 d := NewDecoder(strings.NewReader(input))
1306 var tokens []Token
1307 var buf bytes.Buffer
1308 e := NewEncoder(&buf)
1309 for {
1310 tok, err := d.Token()
1311 if err == io.EOF {
1312 break
1313 }
1314 if err != nil {
1315 t.Fatalf("invalid input: %v", err)
1316 }
1317 if err := e.EncodeToken(tok); err != nil {
1318 t.Fatalf("failed to re-encode input: %v", err)
1319 }
1320 tokens = append(tokens, CopyToken(tok))
1321 }
1322 if err := e.Flush(); err != nil {
1323 t.Fatal(err)
1324 }
1325
1326 d = NewDecoder(&buf)
1327 for {
1328 tok, err := d.Token()
1329 if err == io.EOF {
1330 break
1331 }
1332 if err != nil {
1333 t.Fatalf("failed to decode output: %v", err)
1334 }
1335 if len(tokens) == 0 {
1336 t.Fatalf("unexpected token: %#v", tok)
1337 }
1338 a, b := tokens[0], tok
1339 if !reflect.DeepEqual(a, b) {
1340 t.Fatalf("token mismatch: %#v vs %#v", a, b)
1341 }
1342 tokens = tokens[1:]
1343 }
1344 if len(tokens) > 0 {
1345 t.Fatalf("lost tokens: %#v", tokens)
1346 }
1347 }
1348
1349 func TestRoundTrip(t *testing.T) {
1350 tests := map[string]string{
1351 "trailing colon": `<foo abc:="x"></foo>`,
1352 "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
1353 }
1354 for name, input := range tests {
1355 t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
1356 }
1357 }
1358
1359 func TestParseErrors(t *testing.T) {
1360 withDefaultHeader := func(s string) string {
1361 return `<?xml version="1.0" encoding="UTF-8"?>` + s
1362 }
1363 tests := []struct {
1364 src string
1365 err string
1366 }{
1367 {withDefaultHeader(`</foo>`), `unexpected end element </foo>`},
1368 {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`},
1369 {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`},
1370 {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`},
1371 {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`},
1372 {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`},
1373 {withDefaultHeader(`<zzz:foo xmlns:zzz="http://example.com"><bar>baz</bar></foo>`),
1374 `element <foo> in space zzz closed by </foo> in space ""`},
1375 {withDefaultHeader("\xf1"), `invalid UTF-8`},
1376
1377
1378 {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
1379
1380
1381 {withDefaultHeader(`<?ok?>`), ``},
1382 {withDefaultHeader(`<?ok version="ok"?>`), ``},
1383 }
1384
1385 for _, test := range tests {
1386 d := NewDecoder(strings.NewReader(test.src))
1387 var err error
1388 for {
1389 _, err = d.Token()
1390 if err != nil {
1391 break
1392 }
1393 }
1394 if test.err == "" {
1395 if err != io.EOF {
1396 t.Errorf("parse %s: have %q error, expected none", test.src, err)
1397 }
1398 continue
1399 }
1400
1401 if err == io.EOF {
1402 t.Errorf("parse %s: unexpected EOF", test.src)
1403 continue
1404 }
1405 if !strings.Contains(err.Error(), test.err) {
1406 t.Errorf("parse %s: can't find %q error substring\nerror: %q", test.src, test.err, err)
1407 continue
1408 }
1409 }
1410 }
1411
1412 const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?>
1413 <br>
1414 <br/><br/>
1415 <br><br>
1416 <br></br>
1417 <BR>
1418 <BR/><BR/>
1419 <Br></Br>
1420 <BR><span id="test">abc</span><br/><br/>`
1421
1422 func BenchmarkHTMLAutoClose(b *testing.B) {
1423 b.RunParallel(func(p *testing.PB) {
1424 for p.Next() {
1425 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
1426 d.Strict = false
1427 d.AutoClose = HTMLAutoClose
1428 d.Entity = HTMLEntity
1429 for {
1430 _, err := d.Token()
1431 if err != nil {
1432 if err == io.EOF {
1433 break
1434 }
1435 b.Fatalf("unexpected error: %v", err)
1436 }
1437 }
1438 }
1439 })
1440 }
1441
1442 func TestHTMLAutoClose(t *testing.T) {
1443 wantTokens := []Token{
1444 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
1445 CharData("\n"),
1446 StartElement{Name{"", "br"}, []Attr{}},
1447 EndElement{Name{"", "br"}},
1448 CharData("\n"),
1449 StartElement{Name{"", "br"}, []Attr{}},
1450 EndElement{Name{"", "br"}},
1451 StartElement{Name{"", "br"}, []Attr{}},
1452 EndElement{Name{"", "br"}},
1453 CharData("\n"),
1454 StartElement{Name{"", "br"}, []Attr{}},
1455 EndElement{Name{"", "br"}},
1456 StartElement{Name{"", "br"}, []Attr{}},
1457 EndElement{Name{"", "br"}},
1458 CharData("\n"),
1459 StartElement{Name{"", "br"}, []Attr{}},
1460 EndElement{Name{"", "br"}},
1461 CharData("\n"),
1462 StartElement{Name{"", "BR"}, []Attr{}},
1463 EndElement{Name{"", "BR"}},
1464 CharData("\n"),
1465 StartElement{Name{"", "BR"}, []Attr{}},
1466 EndElement{Name{"", "BR"}},
1467 StartElement{Name{"", "BR"}, []Attr{}},
1468 EndElement{Name{"", "BR"}},
1469 CharData("\n"),
1470 StartElement{Name{"", "Br"}, []Attr{}},
1471 EndElement{Name{"", "Br"}},
1472 CharData("\n"),
1473 StartElement{Name{"", "BR"}, []Attr{}},
1474 EndElement{Name{"", "BR"}},
1475 StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}},
1476 CharData("abc"),
1477 EndElement{Name{"", "span"}},
1478 StartElement{Name{"", "br"}, []Attr{}},
1479 EndElement{Name{"", "br"}},
1480 StartElement{Name{"", "br"}, []Attr{}},
1481 EndElement{Name{"", "br"}},
1482 }
1483
1484 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
1485 d.Strict = false
1486 d.AutoClose = HTMLAutoClose
1487 d.Entity = HTMLEntity
1488 var haveTokens []Token
1489 for {
1490 tok, err := d.Token()
1491 if err != nil {
1492 if err == io.EOF {
1493 break
1494 }
1495 t.Fatalf("unexpected error: %v", err)
1496 }
1497 haveTokens = append(haveTokens, CopyToken(tok))
1498 }
1499 if len(haveTokens) != len(wantTokens) {
1500 t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens))
1501 }
1502 for i, want := range wantTokens {
1503 if i >= len(haveTokens) {
1504 t.Errorf("token[%d] expected %#v, have no token", i, want)
1505 } else {
1506 have := haveTokens[i]
1507 if !reflect.DeepEqual(have, want) {
1508 t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want)
1509 }
1510 }
1511 }
1512 }
1513
View as plain text