1
2
3
4
5 package xml
6
7 import (
8 "bytes"
9 "fmt"
10 "io"
11 "reflect"
12 "strings"
13 "testing"
14 "unicode/utf8"
15 )
16
17 type toks struct {
18 earlyEOF bool
19 t []Token
20 }
21
22 func (t *toks) Token() (Token, error) {
23 if len(t.t) == 0 {
24 return nil, io.EOF
25 }
26 var tok Token
27 tok, t.t = t.t[0], t.t[1:]
28 if t.earlyEOF && len(t.t) == 0 {
29 return tok, io.EOF
30 }
31 return tok, nil
32 }
33
34 func TestDecodeEOF(t *testing.T) {
35 start := StartElement{Name: Name{Local: "test"}}
36 tests := []struct {
37 name string
38 tokens []Token
39 ok bool
40 }{
41 {
42 name: "OK",
43 tokens: []Token{
44 start,
45 start.End(),
46 },
47 ok: true,
48 },
49 {
50 name: "Malformed",
51 tokens: []Token{
52 start,
53 StartElement{Name: Name{Local: "bad"}},
54 start.End(),
55 },
56 ok: false,
57 },
58 }
59 for _, tc := range tests {
60 for _, eof := range []bool{true, false} {
61 name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
62 t.Run(name, func(t *testing.T) {
63 d := NewTokenDecoder(&toks{
64 earlyEOF: eof,
65 t: tc.tokens,
66 })
67 err := d.Decode(&struct {
68 XMLName Name `xml:"test"`
69 }{})
70 if tc.ok && err != nil {
71 t.Fatalf("d.Decode: expected nil error, got %v", err)
72 }
73 if _, ok := err.(*SyntaxError); !tc.ok && !ok {
74 t.Errorf("d.Decode: expected syntax error, got %v", err)
75 }
76 })
77 }
78 }
79 }
80
81 type toksNil struct {
82 returnEOF bool
83 t []Token
84 }
85
86 func (t *toksNil) Token() (Token, error) {
87 if len(t.t) == 0 {
88 if !t.returnEOF {
89
90
91 t.returnEOF = true
92 return nil, nil
93 }
94 return nil, io.EOF
95 }
96 var tok Token
97 tok, t.t = t.t[0], t.t[1:]
98 return tok, nil
99 }
100
101 func TestDecodeNilToken(t *testing.T) {
102 for _, strict := range []bool{true, false} {
103 name := fmt.Sprintf("Strict=%v", strict)
104 t.Run(name, func(t *testing.T) {
105 start := StartElement{Name: Name{Local: "test"}}
106 bad := StartElement{Name: Name{Local: "bad"}}
107 d := NewTokenDecoder(&toksNil{
108
109 t: []Token{start, bad, start.End()},
110 })
111 d.Strict = strict
112 err := d.Decode(&struct {
113 XMLName Name `xml:"test"`
114 }{})
115 if _, ok := err.(*SyntaxError); !ok {
116 t.Errorf("d.Decode: expected syntax error, got %v", err)
117 }
118 })
119 }
120 }
121
122 const testInput = `
123 <?xml version="1.0" encoding="UTF-8"?>
124 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
125 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
126 <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
127 "\r\n\t" + ` >
128 <hello lang="en">World <>'" 白鵬翔</hello>
129 <query>&何; &is-it;</query>
130 <goodbye />
131 <outer foo:attr="value" xmlns:tag="ns4">
132 <inner/>
133 </outer>
134 <tag:name>
135 <![CDATA[Some text here.]]>
136 </tag:name>
137 </body><!-- missing final newline -->`
138
139 var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
140
141 var rawTokens = []Token{
142 CharData("\n"),
143 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
144 CharData("\n"),
145 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
146 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
147 CharData("\n"),
148 StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
149 CharData("\n "),
150 StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
151 CharData("World <>'\" 白鵬翔"),
152 EndElement{Name{"", "hello"}},
153 CharData("\n "),
154 StartElement{Name{"", "query"}, []Attr{}},
155 CharData("What is it?"),
156 EndElement{Name{"", "query"}},
157 CharData("\n "),
158 StartElement{Name{"", "goodbye"}, []Attr{}},
159 EndElement{Name{"", "goodbye"}},
160 CharData("\n "),
161 StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
162 CharData("\n "),
163 StartElement{Name{"", "inner"}, []Attr{}},
164 EndElement{Name{"", "inner"}},
165 CharData("\n "),
166 EndElement{Name{"", "outer"}},
167 CharData("\n "),
168 StartElement{Name{"tag", "name"}, []Attr{}},
169 CharData("\n "),
170 CharData("Some text here."),
171 CharData("\n "),
172 EndElement{Name{"tag", "name"}},
173 CharData("\n"),
174 EndElement{Name{"", "body"}},
175 Comment(" missing final newline "),
176 }
177
178 var cookedTokens = []Token{
179 CharData("\n"),
180 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
181 CharData("\n"),
182 Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
183 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
184 CharData("\n"),
185 StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
186 CharData("\n "),
187 StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
188 CharData("World <>'\" 白鵬翔"),
189 EndElement{Name{"ns2", "hello"}},
190 CharData("\n "),
191 StartElement{Name{"ns2", "query"}, []Attr{}},
192 CharData("What is it?"),
193 EndElement{Name{"ns2", "query"}},
194 CharData("\n "),
195 StartElement{Name{"ns2", "goodbye"}, []Attr{}},
196 EndElement{Name{"ns2", "goodbye"}},
197 CharData("\n "),
198 StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
199 CharData("\n "),
200 StartElement{Name{"ns2", "inner"}, []Attr{}},
201 EndElement{Name{"ns2", "inner"}},
202 CharData("\n "),
203 EndElement{Name{"ns2", "outer"}},
204 CharData("\n "),
205 StartElement{Name{"ns3", "name"}, []Attr{}},
206 CharData("\n "),
207 CharData("Some text here."),
208 CharData("\n "),
209 EndElement{Name{"ns3", "name"}},
210 CharData("\n"),
211 EndElement{Name{"ns2", "body"}},
212 Comment(" missing final newline "),
213 }
214
215 const testInputAltEncoding = `
216 <?xml version="1.0" encoding="x-testing-uppercase"?>
217 <TAG>VALUE</TAG>`
218
219 var rawTokensAltEncoding = []Token{
220 CharData("\n"),
221 ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
222 CharData("\n"),
223 StartElement{Name{"", "tag"}, []Attr{}},
224 CharData("value"),
225 EndElement{Name{"", "tag"}},
226 }
227
228 var xmlInput = []string{
229
230 "<",
231 "<t",
232 "<t ",
233 "<t/",
234 "<!",
235 "<!-",
236 "<!--",
237 "<!--c-",
238 "<!--c--",
239 "<!d",
240 "<t></",
241 "<t></t",
242 "<?",
243 "<?p",
244 "<t a",
245 "<t a=",
246 "<t a='",
247 "<t a=''",
248 "<t/><![",
249 "<t/><![C",
250 "<t/><![CDATA[d",
251 "<t/><![CDATA[d]",
252 "<t/><![CDATA[d]]",
253
254
255 "<>",
256 "<t/a",
257 "<0 />",
258 "<?0 >",
259
260 "</0>",
261 "<t 0=''>",
262 "<t a='&'>",
263 "<t a='<'>",
264 "<t> c;</t>",
265 "<t a>",
266 "<t a=>",
267 "<t a=v>",
268
269 "<t></e>",
270 "<t></>",
271 "<t></t!",
272 "<t>cdata]]></t>",
273 }
274
275 func TestRawToken(t *testing.T) {
276 d := NewDecoder(strings.NewReader(testInput))
277 d.Entity = testEntity
278 testRawToken(t, d, testInput, rawTokens)
279 }
280
281 const nonStrictInput = `
282 <tag>non&entity</tag>
283 <tag>&unknown;entity</tag>
284 <tag>{</tag>
285 <tag>&#zzz;</tag>
286 <tag>&なまえ3;</tag>
287 <tag><-gt;</tag>
288 <tag>&;</tag>
289 <tag>&0a;</tag>
290 `
291
292 var nonStrictTokens = []Token{
293 CharData("\n"),
294 StartElement{Name{"", "tag"}, []Attr{}},
295 CharData("non&entity"),
296 EndElement{Name{"", "tag"}},
297 CharData("\n"),
298 StartElement{Name{"", "tag"}, []Attr{}},
299 CharData("&unknown;entity"),
300 EndElement{Name{"", "tag"}},
301 CharData("\n"),
302 StartElement{Name{"", "tag"}, []Attr{}},
303 CharData("{"),
304 EndElement{Name{"", "tag"}},
305 CharData("\n"),
306 StartElement{Name{"", "tag"}, []Attr{}},
307 CharData("&#zzz;"),
308 EndElement{Name{"", "tag"}},
309 CharData("\n"),
310 StartElement{Name{"", "tag"}, []Attr{}},
311 CharData("&なまえ3;"),
312 EndElement{Name{"", "tag"}},
313 CharData("\n"),
314 StartElement{Name{"", "tag"}, []Attr{}},
315 CharData("<-gt;"),
316 EndElement{Name{"", "tag"}},
317 CharData("\n"),
318 StartElement{Name{"", "tag"}, []Attr{}},
319 CharData("&;"),
320 EndElement{Name{"", "tag"}},
321 CharData("\n"),
322 StartElement{Name{"", "tag"}, []Attr{}},
323 CharData("&0a;"),
324 EndElement{Name{"", "tag"}},
325 CharData("\n"),
326 }
327
328 func TestNonStrictRawToken(t *testing.T) {
329 d := NewDecoder(strings.NewReader(nonStrictInput))
330 d.Strict = false
331 testRawToken(t, d, nonStrictInput, nonStrictTokens)
332 }
333
334 type downCaser struct {
335 t *testing.T
336 r io.ByteReader
337 }
338
339 func (d *downCaser) ReadByte() (c byte, err error) {
340 c, err = d.r.ReadByte()
341 if c >= 'A' && c <= 'Z' {
342 c += 'a' - 'A'
343 }
344 return
345 }
346
347 func (d *downCaser) Read(p []byte) (int, error) {
348 d.t.Fatalf("unexpected Read call on downCaser reader")
349 panic("unreachable")
350 }
351
352 func TestRawTokenAltEncoding(t *testing.T) {
353 d := NewDecoder(strings.NewReader(testInputAltEncoding))
354 d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
355 if charset != "x-testing-uppercase" {
356 t.Fatalf("unexpected charset %q", charset)
357 }
358 return &downCaser{t, input.(io.ByteReader)}, nil
359 }
360 testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
361 }
362
363 func TestRawTokenAltEncodingNoConverter(t *testing.T) {
364 d := NewDecoder(strings.NewReader(testInputAltEncoding))
365 token, err := d.RawToken()
366 if token == nil {
367 t.Fatalf("expected a token on first RawToken call")
368 }
369 if err != nil {
370 t.Fatal(err)
371 }
372 token, err = d.RawToken()
373 if token != nil {
374 t.Errorf("expected a nil token; got %#v", token)
375 }
376 if err == nil {
377 t.Fatalf("expected an error on second RawToken call")
378 }
379 const encoding = "x-testing-uppercase"
380 if !strings.Contains(err.Error(), encoding) {
381 t.Errorf("expected error to contain %q; got error: %v",
382 encoding, err)
383 }
384 }
385
386 func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
387 lastEnd := int64(0)
388 for i, want := range rawTokens {
389 start := d.InputOffset()
390 have, err := d.RawToken()
391 end := d.InputOffset()
392 if err != nil {
393 t.Fatalf("token %d: unexpected error: %s", i, err)
394 }
395 if !reflect.DeepEqual(have, want) {
396 var shave, swant string
397 if _, ok := have.(CharData); ok {
398 shave = fmt.Sprintf("CharData(%q)", have)
399 } else {
400 shave = fmt.Sprintf("%#v", have)
401 }
402 if _, ok := want.(CharData); ok {
403 swant = fmt.Sprintf("CharData(%q)", want)
404 } else {
405 swant = fmt.Sprintf("%#v", want)
406 }
407 t.Errorf("token %d = %s, want %s", i, shave, swant)
408 }
409
410
411 switch {
412 case start < lastEnd:
413 t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
414 case start >= end:
415
416 if start == end && end == lastEnd {
417 break
418 }
419 t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
420 case end > int64(len(raw)):
421 t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
422 default:
423 text := raw[start:end]
424 if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
425 t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
426 }
427 }
428 lastEnd = end
429 }
430 }
431
432
433
434
435
436 var nestedDirectivesInput = `
437 <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
438 <!DOCTYPE [<!ENTITY xlt ">">]>
439 <!DOCTYPE [<!ENTITY xlt "<">]>
440 <!DOCTYPE [<!ENTITY xlt '>'>]>
441 <!DOCTYPE [<!ENTITY xlt '<'>]>
442 <!DOCTYPE [<!ENTITY xlt '">'>]>
443 <!DOCTYPE [<!ENTITY xlt "'<">]>
444 `
445
446 var nestedDirectivesTokens = []Token{
447 CharData("\n"),
448 Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
449 CharData("\n"),
450 Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
451 CharData("\n"),
452 Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
453 CharData("\n"),
454 Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
455 CharData("\n"),
456 Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
457 CharData("\n"),
458 Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
459 CharData("\n"),
460 Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
461 CharData("\n"),
462 }
463
464 func TestNestedDirectives(t *testing.T) {
465 d := NewDecoder(strings.NewReader(nestedDirectivesInput))
466
467 for i, want := range nestedDirectivesTokens {
468 have, err := d.Token()
469 if err != nil {
470 t.Fatalf("token %d: unexpected error: %s", i, err)
471 }
472 if !reflect.DeepEqual(have, want) {
473 t.Errorf("token %d = %#v want %#v", i, have, want)
474 }
475 }
476 }
477
478 func TestToken(t *testing.T) {
479 d := NewDecoder(strings.NewReader(testInput))
480 d.Entity = testEntity
481
482 for i, want := range cookedTokens {
483 have, err := d.Token()
484 if err != nil {
485 t.Fatalf("token %d: unexpected error: %s", i, err)
486 }
487 if !reflect.DeepEqual(have, want) {
488 t.Errorf("token %d = %#v want %#v", i, have, want)
489 }
490 }
491 }
492
493 func TestSyntax(t *testing.T) {
494 for i := range xmlInput {
495 d := NewDecoder(strings.NewReader(xmlInput[i]))
496 var err error
497 for _, err = d.Token(); err == nil; _, err = d.Token() {
498 }
499 if _, ok := err.(*SyntaxError); !ok {
500 t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
501 }
502 }
503 }
504
505 func TestInputLinePos(t *testing.T) {
506 testInput := `<root>
507 <?pi
508 ?> <elt
509 att
510 =
511 "val">
512 <![CDATA[
513 ]]><!--
514
515 --></elt>
516 </root>`
517 linePos := [][]int{
518 {1, 7},
519 {2, 1},
520 {3, 4},
521 {3, 6},
522 {6, 7},
523 {7, 1},
524 {8, 4},
525 {10, 4},
526 {10, 10},
527 {11, 1},
528 {11, 8},
529 }
530 dec := NewDecoder(strings.NewReader(testInput))
531 for _, want := range linePos {
532 if _, err := dec.Token(); err != nil {
533 t.Errorf("Unexpected error: %v", err)
534 continue
535 }
536
537 gotLine, gotCol := dec.InputPos()
538 if gotLine != want[0] || gotCol != want[1] {
539 t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1])
540 }
541 }
542 }
543
544 type allScalars struct {
545 True1 bool
546 True2 bool
547 False1 bool
548 False2 bool
549 Int int
550 Int8 int8
551 Int16 int16
552 Int32 int32
553 Int64 int64
554 Uint int
555 Uint8 uint8
556 Uint16 uint16
557 Uint32 uint32
558 Uint64 uint64
559 Uintptr uintptr
560 Float32 float32
561 Float64 float64
562 String string
563 PtrString *string
564 }
565
566 var all = allScalars{
567 True1: true,
568 True2: true,
569 False1: false,
570 False2: false,
571 Int: 1,
572 Int8: -2,
573 Int16: 3,
574 Int32: -4,
575 Int64: 5,
576 Uint: 6,
577 Uint8: 7,
578 Uint16: 8,
579 Uint32: 9,
580 Uint64: 10,
581 Uintptr: 11,
582 Float32: 13.0,
583 Float64: 14.0,
584 String: "15",
585 PtrString: &sixteen,
586 }
587
588 var sixteen = "16"
589
590 const testScalarsInput = `<allscalars>
591 <True1>true</True1>
592 <True2>1</True2>
593 <False1>false</False1>
594 <False2>0</False2>
595 <Int>1</Int>
596 <Int8>-2</Int8>
597 <Int16>3</Int16>
598 <Int32>-4</Int32>
599 <Int64>5</Int64>
600 <Uint>6</Uint>
601 <Uint8>7</Uint8>
602 <Uint16>8</Uint16>
603 <Uint32>9</Uint32>
604 <Uint64>10</Uint64>
605 <Uintptr>11</Uintptr>
606 <Float>12.0</Float>
607 <Float32>13.0</Float32>
608 <Float64>14.0</Float64>
609 <String>15</String>
610 <PtrString>16</PtrString>
611 </allscalars>`
612
613 func TestAllScalars(t *testing.T) {
614 var a allScalars
615 err := Unmarshal([]byte(testScalarsInput), &a)
616
617 if err != nil {
618 t.Fatal(err)
619 }
620 if !reflect.DeepEqual(a, all) {
621 t.Errorf("have %+v want %+v", a, all)
622 }
623 }
624
625 type item struct {
626 FieldA string
627 }
628
629 func TestIssue569(t *testing.T) {
630 data := `<item><FieldA>abcd</FieldA></item>`
631 var i item
632 err := Unmarshal([]byte(data), &i)
633
634 if err != nil || i.FieldA != "abcd" {
635 t.Fatal("Expecting abcd")
636 }
637 }
638
639 func TestUnquotedAttrs(t *testing.T) {
640 data := "<tag attr=azAZ09:-_\t>"
641 d := NewDecoder(strings.NewReader(data))
642 d.Strict = false
643 token, err := d.Token()
644 if _, ok := err.(*SyntaxError); ok {
645 t.Errorf("Unexpected error: %v", err)
646 }
647 if token.(StartElement).Name.Local != "tag" {
648 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
649 }
650 attr := token.(StartElement).Attr[0]
651 if attr.Value != "azAZ09:-_" {
652 t.Errorf("Unexpected attribute value: %v", attr.Value)
653 }
654 if attr.Name.Local != "attr" {
655 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
656 }
657 }
658
659 func TestValuelessAttrs(t *testing.T) {
660 tests := [][3]string{
661 {"<p nowrap>", "p", "nowrap"},
662 {"<p nowrap >", "p", "nowrap"},
663 {"<input checked/>", "input", "checked"},
664 {"<input checked />", "input", "checked"},
665 }
666 for _, test := range tests {
667 d := NewDecoder(strings.NewReader(test[0]))
668 d.Strict = false
669 token, err := d.Token()
670 if _, ok := err.(*SyntaxError); ok {
671 t.Errorf("Unexpected error: %v", err)
672 }
673 if token.(StartElement).Name.Local != test[1] {
674 t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
675 }
676 attr := token.(StartElement).Attr[0]
677 if attr.Value != test[2] {
678 t.Errorf("Unexpected attribute value: %v", attr.Value)
679 }
680 if attr.Name.Local != test[2] {
681 t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
682 }
683 }
684 }
685
686 func TestCopyTokenCharData(t *testing.T) {
687 data := []byte("same data")
688 var tok1 Token = CharData(data)
689 tok2 := CopyToken(tok1)
690 if !reflect.DeepEqual(tok1, tok2) {
691 t.Error("CopyToken(CharData) != CharData")
692 }
693 data[1] = 'o'
694 if reflect.DeepEqual(tok1, tok2) {
695 t.Error("CopyToken(CharData) uses same buffer.")
696 }
697 }
698
699 func TestCopyTokenStartElement(t *testing.T) {
700 elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
701 var tok1 Token = elt
702 tok2 := CopyToken(tok1)
703 if tok1.(StartElement).Attr[0].Value != "en" {
704 t.Error("CopyToken overwrote Attr[0]")
705 }
706 if !reflect.DeepEqual(tok1, tok2) {
707 t.Error("CopyToken(StartElement) != StartElement")
708 }
709 tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
710 if reflect.DeepEqual(tok1, tok2) {
711 t.Error("CopyToken(CharData) uses same buffer.")
712 }
713 }
714
715 func TestCopyTokenComment(t *testing.T) {
716 data := []byte("<!-- some comment -->")
717 var tok1 Token = Comment(data)
718 tok2 := CopyToken(tok1)
719 if !reflect.DeepEqual(tok1, tok2) {
720 t.Error("CopyToken(Comment) != Comment")
721 }
722 data[1] = 'o'
723 if reflect.DeepEqual(tok1, tok2) {
724 t.Error("CopyToken(Comment) uses same buffer.")
725 }
726 }
727
728 func TestSyntaxErrorLineNum(t *testing.T) {
729 testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
730 d := NewDecoder(strings.NewReader(testInput))
731 var err error
732 for _, err = d.Token(); err == nil; _, err = d.Token() {
733 }
734 synerr, ok := err.(*SyntaxError)
735 if !ok {
736 t.Error("Expected SyntaxError.")
737 }
738 if synerr.Line != 3 {
739 t.Error("SyntaxError didn't have correct line number.")
740 }
741 }
742
743 func TestTrailingRawToken(t *testing.T) {
744 input := `<FOO></FOO> `
745 d := NewDecoder(strings.NewReader(input))
746 var err error
747 for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
748 }
749 if err != io.EOF {
750 t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
751 }
752 }
753
754 func TestTrailingToken(t *testing.T) {
755 input := `<FOO></FOO> `
756 d := NewDecoder(strings.NewReader(input))
757 var err error
758 for _, err = d.Token(); err == nil; _, err = d.Token() {
759 }
760 if err != io.EOF {
761 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
762 }
763 }
764
765 func TestEntityInsideCDATA(t *testing.T) {
766 input := `<test><![CDATA[ &val=foo ]]></test>`
767 d := NewDecoder(strings.NewReader(input))
768 var err error
769 for _, err = d.Token(); err == nil; _, err = d.Token() {
770 }
771 if err != io.EOF {
772 t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
773 }
774 }
775
776 var characterTests = []struct {
777 in string
778 err string
779 }{
780 {"\x12<doc/>", "illegal character code U+0012"},
781 {"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
782 {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
783 {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
784 {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
785 {"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
786 {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
787 {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
788 {"<doc>&hello;</doc>", "invalid character entity &hello;"},
789 }
790
791 func TestDisallowedCharacters(t *testing.T) {
792
793 for i, tt := range characterTests {
794 d := NewDecoder(strings.NewReader(tt.in))
795 var err error
796
797 for err == nil {
798 _, err = d.Token()
799 }
800 synerr, ok := err.(*SyntaxError)
801 if !ok {
802 t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
803 }
804 if synerr.Msg != tt.err {
805 t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
806 }
807 }
808 }
809
810 func TestIsInCharacterRange(t *testing.T) {
811 invalid := []rune{
812 utf8.MaxRune + 1,
813 0xD800,
814 0xDFFF,
815 -1,
816 }
817 for _, r := range invalid {
818 if isInCharacterRange(r) {
819 t.Errorf("rune %U considered valid", r)
820 }
821 }
822 }
823
824 var procInstTests = []struct {
825 input string
826 expect [2]string
827 }{
828 {`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
829 {`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
830 {`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
831 {`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
832 {`encoding="FOO" `, [2]string{"", "FOO"}},
833 {`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
834 {`version= encoding=`, [2]string{"", ""}},
835 {`encoding="version=1.0"`, [2]string{"", "version=1.0"}},
836 {``, [2]string{"", ""}},
837
838 {`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}},
839 {`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}},
840 }
841
842 func TestProcInstEncoding(t *testing.T) {
843 for _, test := range procInstTests {
844 if got := procInst("version", test.input); got != test.expect[0] {
845 t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
846 }
847 if got := procInst("encoding", test.input); got != test.expect[1] {
848 t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
849 }
850 }
851 }
852
853
854
855
856 var directivesWithCommentsInput = `
857 <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
858 <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
859 <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
860 `
861
862 var directivesWithCommentsTokens = []Token{
863 CharData("\n"),
864 Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
865 CharData("\n"),
866 Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
867 CharData("\n"),
868 Directive(`DOCTYPE <!-> <!> [<!ENTITY go "Golang"> ]`),
869 CharData("\n"),
870 }
871
872 func TestDirectivesWithComments(t *testing.T) {
873 d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
874
875 for i, want := range directivesWithCommentsTokens {
876 have, err := d.Token()
877 if err != nil {
878 t.Fatalf("token %d: unexpected error: %s", i, err)
879 }
880 if !reflect.DeepEqual(have, want) {
881 t.Errorf("token %d = %#v want %#v", i, have, want)
882 }
883 }
884 }
885
886
887 type errWriter struct{}
888
889 func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
890
891 func TestEscapeTextIOErrors(t *testing.T) {
892 expectErr := "unwritable"
893 err := EscapeText(errWriter{}, []byte{'A'})
894
895 if err == nil || err.Error() != expectErr {
896 t.Errorf("have %v, want %v", err, expectErr)
897 }
898 }
899
900 func TestEscapeTextInvalidChar(t *testing.T) {
901 input := []byte("A \x00 terminated string.")
902 expected := "A \uFFFD terminated string."
903
904 buff := new(strings.Builder)
905 if err := EscapeText(buff, input); err != nil {
906 t.Fatalf("have %v, want nil", err)
907 }
908 text := buff.String()
909
910 if text != expected {
911 t.Errorf("have %v, want %v", text, expected)
912 }
913 }
914
915 func TestIssue5880(t *testing.T) {
916 type T []byte
917 data, err := Marshal(T{192, 168, 0, 1})
918 if err != nil {
919 t.Errorf("Marshal error: %v", err)
920 }
921 if !utf8.Valid(data) {
922 t.Errorf("Marshal generated invalid UTF-8: %x", data)
923 }
924 }
925
926 func TestIssue8535(t *testing.T) {
927
928 type ExampleConflict struct {
929 XMLName Name `xml:"example"`
930 Link string `xml:"link"`
931 AtomLink string `xml:"http://www.w3.org/2005/Atom link"`
932 }
933 testCase := `<example>
934 <title>Example</title>
935 <link>http://example.com/default</link> <!-- not assigned -->
936 <link>http://example.com/home</link> <!-- not assigned -->
937 <ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link>
938 </example>`
939
940 var dest ExampleConflict
941 d := NewDecoder(strings.NewReader(testCase))
942 if err := d.Decode(&dest); err != nil {
943 t.Fatal(err)
944 }
945 }
946
947 func TestEncodeXMLNS(t *testing.T) {
948 testCases := []struct {
949 f func() ([]byte, error)
950 want string
951 ok bool
952 }{
953 {encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
954 {encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true},
955 {encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
956 {encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false},
957 }
958
959 for i, tc := range testCases {
960 if b, err := tc.f(); err == nil {
961 if got, want := string(b), tc.want; got != want {
962 t.Errorf("%d: got %s, want %s \n", i, got, want)
963 }
964 } else {
965 t.Errorf("%d: marshal failed with %s", i, err)
966 }
967 }
968 }
969
970 func encodeXMLNS1() ([]byte, error) {
971
972 type T struct {
973 XMLName Name `xml:"Test"`
974 Ns string `xml:"xmlns,attr"`
975 Body string
976 }
977
978 s := &T{Ns: "http://example.com/ns", Body: "hello world"}
979 return Marshal(s)
980 }
981
982 func encodeXMLNS2() ([]byte, error) {
983
984 type Test struct {
985 Body string `xml:"http://example.com/ns body"`
986 }
987
988 s := &Test{Body: "hello world"}
989 return Marshal(s)
990 }
991
992 func encodeXMLNS3() ([]byte, error) {
993
994 type Test struct {
995 XMLName Name `xml:"http://example.com/ns Test"`
996 Body string
997 }
998
999
1000
1001 s := &Test{Body: "hello world"}
1002 return Marshal(s)
1003 }
1004
1005 func encodeXMLNS4() ([]byte, error) {
1006
1007 type Test struct {
1008 Ns string `xml:"xmlns,attr"`
1009 Body string
1010 }
1011
1012 s := &Test{Ns: "http://example.com/ns", Body: "hello world"}
1013 return Marshal(s)
1014 }
1015
1016 func TestIssue11405(t *testing.T) {
1017 testCases := []string{
1018 "<root>",
1019 "<root><foo>",
1020 "<root><foo></foo>",
1021 }
1022 for _, tc := range testCases {
1023 d := NewDecoder(strings.NewReader(tc))
1024 var err error
1025 for {
1026 _, err = d.Token()
1027 if err != nil {
1028 break
1029 }
1030 }
1031 if _, ok := err.(*SyntaxError); !ok {
1032 t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
1033 }
1034 }
1035 }
1036
1037 func TestIssue12417(t *testing.T) {
1038 testCases := []struct {
1039 s string
1040 ok bool
1041 }{
1042 {`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
1043 {`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
1044 {`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
1045 {`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
1046 }
1047 for _, tc := range testCases {
1048 d := NewDecoder(strings.NewReader(tc.s))
1049 var err error
1050 for {
1051 _, err = d.Token()
1052 if err != nil {
1053 if err == io.EOF {
1054 err = nil
1055 }
1056 break
1057 }
1058 }
1059 if err != nil && tc.ok {
1060 t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
1061 continue
1062 }
1063 if err == nil && !tc.ok {
1064 t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
1065 }
1066 }
1067 }
1068
1069 func TestIssue7113(t *testing.T) {
1070 type C struct {
1071 XMLName Name `xml:""`
1072 }
1073
1074 type D struct {
1075 XMLName Name `xml:"d"`
1076 }
1077
1078 type A struct {
1079 XMLName Name `xml:""`
1080 C C `xml:""`
1081 D D
1082 }
1083
1084 var a A
1085 structSpace := "b"
1086 xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>`
1087 t.Log(xmlTest)
1088 err := Unmarshal([]byte(xmlTest), &a)
1089 if err != nil {
1090 t.Fatal(err)
1091 }
1092
1093 if a.XMLName.Space != structSpace {
1094 t.Errorf("overidding with empty namespace: unmarshaling, got %s, want %s\n", a.XMLName.Space, structSpace)
1095 }
1096 if len(a.C.XMLName.Space) != 0 {
1097 t.Fatalf("overidding with empty namespace: unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
1098 }
1099
1100 var b []byte
1101 b, err = Marshal(&a)
1102 if err != nil {
1103 t.Fatal(err)
1104 }
1105 if len(a.C.XMLName.Space) != 0 {
1106 t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space)
1107 }
1108 if string(b) != xmlTest {
1109 t.Fatalf("overidding with empty namespace: marshaling, got %s, want %s\n", b, xmlTest)
1110 }
1111 var c A
1112 err = Unmarshal(b, &c)
1113 if err != nil {
1114 t.Fatalf("second Unmarshal failed: %s", err)
1115 }
1116 if c.XMLName.Space != "b" {
1117 t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace)
1118 }
1119 if len(c.C.XMLName.Space) != 0 {
1120 t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
1121 }
1122 }
1123
1124 func TestIssue20396(t *testing.T) {
1125
1126 var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element")
1127
1128 testCases := []struct {
1129 s string
1130 wantErr error
1131 }{
1132 {`<a:te:st xmlns:a="abcd"/>`,
1133 UnmarshalError("XML syntax error on line 1: expected element name after <")},
1134 {`<a:te=st xmlns:a="abcd"/>`, attrError},
1135 {`<a:te&st xmlns:a="abcd"/>`, attrError},
1136 {`<a:test xmlns:a="abcd"/>`, nil},
1137 {`<a:te:st xmlns:a="abcd">1</a:te:st>`,
1138 UnmarshalError("XML syntax error on line 1: expected element name after <")},
1139 {`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError},
1140 {`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError},
1141 {`<a:test xmlns:a="abcd">1</a:test>`, nil},
1142 }
1143
1144 var dest string
1145 for _, tc := range testCases {
1146 if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want {
1147 if got == nil {
1148 t.Errorf("%s: Unexpected success, want %v", tc.s, want)
1149 } else if want == nil {
1150 t.Errorf("%s: Unexpected error, got %v", tc.s, got)
1151 } else if got.Error() != want.Error() {
1152 t.Errorf("%s: got %v, want %v", tc.s, got, want)
1153 }
1154 }
1155 }
1156 }
1157
1158 func TestIssue20685(t *testing.T) {
1159 testCases := []struct {
1160 s string
1161 ok bool
1162 }{
1163 {`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false},
1164 {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true},
1165 {`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false},
1166 {`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false},
1167 {`<x:book xmlns:x="abcd">one</y:book>`, false},
1168 {`<x:book>one</y:book>`, false},
1169 {`<xbook>one</ybook>`, false},
1170 }
1171 for _, tc := range testCases {
1172 d := NewDecoder(strings.NewReader(tc.s))
1173 var err error
1174 for {
1175 _, err = d.Token()
1176 if err != nil {
1177 if err == io.EOF {
1178 err = nil
1179 }
1180 break
1181 }
1182 }
1183 if err != nil && tc.ok {
1184 t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err)
1185 continue
1186 }
1187 if err == nil && !tc.ok {
1188 t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s)
1189 }
1190 }
1191 }
1192
1193 func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
1194 return func(src TokenReader) TokenReader {
1195 return mapper{
1196 t: src,
1197 f: mapping,
1198 }
1199 }
1200 }
1201
1202 type mapper struct {
1203 t TokenReader
1204 f func(Token) Token
1205 }
1206
1207 func (m mapper) Token() (Token, error) {
1208 tok, err := m.t.Token()
1209 if err != nil {
1210 return nil, err
1211 }
1212 return m.f(tok), nil
1213 }
1214
1215 func TestNewTokenDecoderIdempotent(t *testing.T) {
1216 d := NewDecoder(strings.NewReader(`<br>`))
1217 d2 := NewTokenDecoder(d)
1218 if d != d2 {
1219 t.Error("NewTokenDecoder did not detect underlying Decoder")
1220 }
1221 }
1222
1223 func TestWrapDecoder(t *testing.T) {
1224 d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
1225 m := tokenMap(func(t Token) Token {
1226 switch tok := t.(type) {
1227 case StartElement:
1228 if tok.Name.Local == "quote" {
1229 tok.Name.Local = "blocking"
1230 return tok
1231 }
1232 case EndElement:
1233 if tok.Name.Local == "quote" {
1234 tok.Name.Local = "blocking"
1235 return tok
1236 }
1237 }
1238 return t
1239 })
1240
1241 d = NewTokenDecoder(m(d))
1242
1243 o := struct {
1244 XMLName Name `xml:"blocking"`
1245 Chardata string `xml:",chardata"`
1246 }{}
1247
1248 if err := d.Decode(&o); err != nil {
1249 t.Fatal("Got unexpected error while decoding:", err)
1250 }
1251
1252 if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
1253 t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
1254 }
1255 }
1256
1257 type tokReader struct{}
1258
1259 func (tokReader) Token() (Token, error) {
1260 return StartElement{}, nil
1261 }
1262
1263 type Failure struct{}
1264
1265 func (Failure) UnmarshalXML(*Decoder, StartElement) error {
1266 return nil
1267 }
1268
1269 func TestTokenUnmarshaler(t *testing.T) {
1270 defer func() {
1271 if r := recover(); r != nil {
1272 t.Error("Unexpected panic using custom token unmarshaler")
1273 }
1274 }()
1275
1276 d := NewTokenDecoder(tokReader{})
1277 d.Decode(&Failure{})
1278 }
1279
1280 func testRoundTrip(t *testing.T, input string) {
1281 d := NewDecoder(strings.NewReader(input))
1282 var tokens []Token
1283 var buf bytes.Buffer
1284 e := NewEncoder(&buf)
1285 for {
1286 tok, err := d.Token()
1287 if err == io.EOF {
1288 break
1289 }
1290 if err != nil {
1291 t.Fatalf("invalid input: %v", err)
1292 }
1293 if err := e.EncodeToken(tok); err != nil {
1294 t.Fatalf("failed to re-encode input: %v", err)
1295 }
1296 tokens = append(tokens, CopyToken(tok))
1297 }
1298 if err := e.Flush(); err != nil {
1299 t.Fatal(err)
1300 }
1301
1302 d = NewDecoder(&buf)
1303 for {
1304 tok, err := d.Token()
1305 if err == io.EOF {
1306 break
1307 }
1308 if err != nil {
1309 t.Fatalf("failed to decode output: %v", err)
1310 }
1311 if len(tokens) == 0 {
1312 t.Fatalf("unexpected token: %#v", tok)
1313 }
1314 a, b := tokens[0], tok
1315 if !reflect.DeepEqual(a, b) {
1316 t.Fatalf("token mismatch: %#v vs %#v", a, b)
1317 }
1318 tokens = tokens[1:]
1319 }
1320 if len(tokens) > 0 {
1321 t.Fatalf("lost tokens: %#v", tokens)
1322 }
1323 }
1324
1325 func TestRoundTrip(t *testing.T) {
1326 tests := map[string]string{
1327 "trailing colon": `<foo abc:="x"></foo>`,
1328 "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
1329 }
1330 for name, input := range tests {
1331 t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
1332 }
1333 }
1334
1335 func TestParseErrors(t *testing.T) {
1336 withDefaultHeader := func(s string) string {
1337 return `<?xml version="1.0" encoding="UTF-8"?>` + s
1338 }
1339 tests := []struct {
1340 src string
1341 err string
1342 }{
1343 {withDefaultHeader(`</foo>`), `unexpected end element </foo>`},
1344 {withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`},
1345 {withDefaultHeader(`<? not ok ?>`), `expected target name after <?`},
1346 {withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`},
1347 {withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`},
1348 {withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`},
1349 {withDefaultHeader(`<zzz:foo xmlns:zzz="http://example.com"><bar>baz</bar></foo>`),
1350 `element <foo> in space zzz closed by </foo> in space ""`},
1351 {withDefaultHeader("\xf1"), `invalid UTF-8`},
1352
1353
1354 {`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
1355
1356
1357 {withDefaultHeader(`<?ok?>`), ``},
1358 {withDefaultHeader(`<?ok version="ok"?>`), ``},
1359 }
1360
1361 for _, test := range tests {
1362 d := NewDecoder(strings.NewReader(test.src))
1363 var err error
1364 for {
1365 _, err = d.Token()
1366 if err != nil {
1367 break
1368 }
1369 }
1370 if test.err == "" {
1371 if err != io.EOF {
1372 t.Errorf("parse %s: have %q error, expected none", test.src, err)
1373 }
1374 continue
1375 }
1376
1377 if err == io.EOF {
1378 t.Errorf("parse %s: unexpected EOF", test.src)
1379 continue
1380 }
1381 if !strings.Contains(err.Error(), test.err) {
1382 t.Errorf("parse %s: can't find %q error substring\nerror: %q", test.src, test.err, err)
1383 continue
1384 }
1385 }
1386 }
1387
1388 const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?>
1389 <br>
1390 <br/><br/>
1391 <br><br>
1392 <br></br>
1393 <BR>
1394 <BR/><BR/>
1395 <Br></Br>
1396 <BR><span id="test">abc</span><br/><br/>`
1397
1398 func BenchmarkHTMLAutoClose(b *testing.B) {
1399 b.RunParallel(func(p *testing.PB) {
1400 for p.Next() {
1401 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
1402 d.Strict = false
1403 d.AutoClose = HTMLAutoClose
1404 d.Entity = HTMLEntity
1405 for {
1406 _, err := d.Token()
1407 if err != nil {
1408 if err == io.EOF {
1409 break
1410 }
1411 b.Fatalf("unexpected error: %v", err)
1412 }
1413 }
1414 }
1415 })
1416 }
1417
1418 func TestHTMLAutoClose(t *testing.T) {
1419 wantTokens := []Token{
1420 ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
1421 CharData("\n"),
1422 StartElement{Name{"", "br"}, []Attr{}},
1423 EndElement{Name{"", "br"}},
1424 CharData("\n"),
1425 StartElement{Name{"", "br"}, []Attr{}},
1426 EndElement{Name{"", "br"}},
1427 StartElement{Name{"", "br"}, []Attr{}},
1428 EndElement{Name{"", "br"}},
1429 CharData("\n"),
1430 StartElement{Name{"", "br"}, []Attr{}},
1431 EndElement{Name{"", "br"}},
1432 StartElement{Name{"", "br"}, []Attr{}},
1433 EndElement{Name{"", "br"}},
1434 CharData("\n"),
1435 StartElement{Name{"", "br"}, []Attr{}},
1436 EndElement{Name{"", "br"}},
1437 CharData("\n"),
1438 StartElement{Name{"", "BR"}, []Attr{}},
1439 EndElement{Name{"", "BR"}},
1440 CharData("\n"),
1441 StartElement{Name{"", "BR"}, []Attr{}},
1442 EndElement{Name{"", "BR"}},
1443 StartElement{Name{"", "BR"}, []Attr{}},
1444 EndElement{Name{"", "BR"}},
1445 CharData("\n"),
1446 StartElement{Name{"", "Br"}, []Attr{}},
1447 EndElement{Name{"", "Br"}},
1448 CharData("\n"),
1449 StartElement{Name{"", "BR"}, []Attr{}},
1450 EndElement{Name{"", "BR"}},
1451 StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}},
1452 CharData("abc"),
1453 EndElement{Name{"", "span"}},
1454 StartElement{Name{"", "br"}, []Attr{}},
1455 EndElement{Name{"", "br"}},
1456 StartElement{Name{"", "br"}, []Attr{}},
1457 EndElement{Name{"", "br"}},
1458 }
1459
1460 d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
1461 d.Strict = false
1462 d.AutoClose = HTMLAutoClose
1463 d.Entity = HTMLEntity
1464 var haveTokens []Token
1465 for {
1466 tok, err := d.Token()
1467 if err != nil {
1468 if err == io.EOF {
1469 break
1470 }
1471 t.Fatalf("unexpected error: %v", err)
1472 }
1473 haveTokens = append(haveTokens, CopyToken(tok))
1474 }
1475 if len(haveTokens) != len(wantTokens) {
1476 t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens))
1477 }
1478 for i, want := range wantTokens {
1479 if i >= len(haveTokens) {
1480 t.Errorf("token[%d] expected %#v, have no token", i, want)
1481 } else {
1482 have := haveTokens[i]
1483 if !reflect.DeepEqual(have, want) {
1484 t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want)
1485 }
1486 }
1487 }
1488 }
1489
View as plain text