Source file src/encoding/xml/xml_test.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  type toks struct {
    18  	earlyEOF bool
    19  	t        []Token
    20  }
    21  
    22  func (t *toks) Token() (Token, error) {
    23  	if len(t.t) == 0 {
    24  		return nil, io.EOF
    25  	}
    26  	var tok Token
    27  	tok, t.t = t.t[0], t.t[1:]
    28  	if t.earlyEOF && len(t.t) == 0 {
    29  		return tok, io.EOF
    30  	}
    31  	return tok, nil
    32  }
    33  
    34  func TestDecodeEOF(t *testing.T) {
    35  	start := StartElement{Name: Name{Local: "test"}}
    36  	tests := []struct {
    37  		name   string
    38  		tokens []Token
    39  		ok     bool
    40  	}{
    41  		{
    42  			name: "OK",
    43  			tokens: []Token{
    44  				start,
    45  				start.End(),
    46  			},
    47  			ok: true,
    48  		},
    49  		{
    50  			name: "Malformed",
    51  			tokens: []Token{
    52  				start,
    53  				StartElement{Name: Name{Local: "bad"}},
    54  				start.End(),
    55  			},
    56  			ok: false,
    57  		},
    58  	}
    59  	for _, tc := range tests {
    60  		for _, eof := range []bool{true, false} {
    61  			name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
    62  			t.Run(name, func(t *testing.T) {
    63  				d := NewTokenDecoder(&toks{
    64  					earlyEOF: eof,
    65  					t:        tc.tokens,
    66  				})
    67  				err := d.Decode(&struct {
    68  					XMLName Name `xml:"test"`
    69  				}{})
    70  				if tc.ok && err != nil {
    71  					t.Fatalf("d.Decode: expected nil error, got %v", err)
    72  				}
    73  				if _, ok := err.(*SyntaxError); !tc.ok && !ok {
    74  					t.Errorf("d.Decode: expected syntax error, got %v", err)
    75  				}
    76  			})
    77  		}
    78  	}
    79  }
    80  
    81  type toksNil struct {
    82  	returnEOF bool
    83  	t         []Token
    84  }
    85  
    86  func (t *toksNil) Token() (Token, error) {
    87  	if len(t.t) == 0 {
    88  		if !t.returnEOF {
    89  			// Return nil, nil before returning an EOF. It's legal, but
    90  			// discouraged.
    91  			t.returnEOF = true
    92  			return nil, nil
    93  		}
    94  		return nil, io.EOF
    95  	}
    96  	var tok Token
    97  	tok, t.t = t.t[0], t.t[1:]
    98  	return tok, nil
    99  }
   100  
   101  func TestDecodeNilToken(t *testing.T) {
   102  	for _, strict := range []bool{true, false} {
   103  		name := fmt.Sprintf("Strict=%v", strict)
   104  		t.Run(name, func(t *testing.T) {
   105  			start := StartElement{Name: Name{Local: "test"}}
   106  			bad := StartElement{Name: Name{Local: "bad"}}
   107  			d := NewTokenDecoder(&toksNil{
   108  				// Malformed
   109  				t: []Token{start, bad, start.End()},
   110  			})
   111  			d.Strict = strict
   112  			err := d.Decode(&struct {
   113  				XMLName Name `xml:"test"`
   114  			}{})
   115  			if _, ok := err.(*SyntaxError); !ok {
   116  				t.Errorf("d.Decode: expected syntax error, got %v", err)
   117  			}
   118  		})
   119  	}
   120  }
   121  
   122  const testInput = `
   123  <?xml version="1.0" encoding="UTF-8"?>
   124  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   125    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
   126  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
   127  	"\r\n\t" + `  >
   128    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
   129    <query>&何; &is-it;</query>
   130    <goodbye />
   131    <outer foo:attr="value" xmlns:tag="ns4">
   132      <inner/>
   133    </outer>
   134    <tag:name>
   135      <![CDATA[Some text here.]]>
   136    </tag:name>
   137  </body><!-- missing final newline -->`
   138  
   139  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
   140  
   141  var rawTokens = []Token{
   142  	CharData("\n"),
   143  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   144  	CharData("\n"),
   145  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   146    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   147  	CharData("\n"),
   148  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   149  	CharData("\n  "),
   150  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   151  	CharData("World <>'\" 白鵬翔"),
   152  	EndElement{Name{"", "hello"}},
   153  	CharData("\n  "),
   154  	StartElement{Name{"", "query"}, []Attr{}},
   155  	CharData("What is it?"),
   156  	EndElement{Name{"", "query"}},
   157  	CharData("\n  "),
   158  	StartElement{Name{"", "goodbye"}, []Attr{}},
   159  	EndElement{Name{"", "goodbye"}},
   160  	CharData("\n  "),
   161  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   162  	CharData("\n    "),
   163  	StartElement{Name{"", "inner"}, []Attr{}},
   164  	EndElement{Name{"", "inner"}},
   165  	CharData("\n  "),
   166  	EndElement{Name{"", "outer"}},
   167  	CharData("\n  "),
   168  	StartElement{Name{"tag", "name"}, []Attr{}},
   169  	CharData("\n    "),
   170  	CharData("Some text here."),
   171  	CharData("\n  "),
   172  	EndElement{Name{"tag", "name"}},
   173  	CharData("\n"),
   174  	EndElement{Name{"", "body"}},
   175  	Comment(" missing final newline "),
   176  }
   177  
   178  var cookedTokens = []Token{
   179  	CharData("\n"),
   180  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   181  	CharData("\n"),
   182  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   183    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   184  	CharData("\n"),
   185  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   186  	CharData("\n  "),
   187  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   188  	CharData("World <>'\" 白鵬翔"),
   189  	EndElement{Name{"ns2", "hello"}},
   190  	CharData("\n  "),
   191  	StartElement{Name{"ns2", "query"}, []Attr{}},
   192  	CharData("What is it?"),
   193  	EndElement{Name{"ns2", "query"}},
   194  	CharData("\n  "),
   195  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
   196  	EndElement{Name{"ns2", "goodbye"}},
   197  	CharData("\n  "),
   198  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   199  	CharData("\n    "),
   200  	StartElement{Name{"ns2", "inner"}, []Attr{}},
   201  	EndElement{Name{"ns2", "inner"}},
   202  	CharData("\n  "),
   203  	EndElement{Name{"ns2", "outer"}},
   204  	CharData("\n  "),
   205  	StartElement{Name{"ns3", "name"}, []Attr{}},
   206  	CharData("\n    "),
   207  	CharData("Some text here."),
   208  	CharData("\n  "),
   209  	EndElement{Name{"ns3", "name"}},
   210  	CharData("\n"),
   211  	EndElement{Name{"ns2", "body"}},
   212  	Comment(" missing final newline "),
   213  }
   214  
   215  const testInputAltEncoding = `
   216  <?xml version="1.0" encoding="x-testing-uppercase"?>
   217  <TAG>VALUE</TAG>`
   218  
   219  var rawTokensAltEncoding = []Token{
   220  	CharData("\n"),
   221  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   222  	CharData("\n"),
   223  	StartElement{Name{"", "tag"}, []Attr{}},
   224  	CharData("value"),
   225  	EndElement{Name{"", "tag"}},
   226  }
   227  
   228  var xmlInput = []string{
   229  	// unexpected EOF cases
   230  	"<",
   231  	"<t",
   232  	"<t ",
   233  	"<t/",
   234  	"<!",
   235  	"<!-",
   236  	"<!--",
   237  	"<!--c-",
   238  	"<!--c--",
   239  	"<!d",
   240  	"<t></",
   241  	"<t></t",
   242  	"<?",
   243  	"<?p",
   244  	"<t a",
   245  	"<t a=",
   246  	"<t a='",
   247  	"<t a=''",
   248  	"<t/><![",
   249  	"<t/><![C",
   250  	"<t/><![CDATA[d",
   251  	"<t/><![CDATA[d]",
   252  	"<t/><![CDATA[d]]",
   253  
   254  	// other Syntax errors
   255  	"<>",
   256  	"<t/a",
   257  	"<0 />",
   258  	"<?0 >",
   259  	//	"<!0 >",	// let the Token() caller handle
   260  	"</0>",
   261  	"<t 0=''>",
   262  	"<t a='&'>",
   263  	"<t a='<'>",
   264  	"<t>&nbspc;</t>",
   265  	"<t a>",
   266  	"<t a=>",
   267  	"<t a=v>",
   268  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   269  	"<t></e>",
   270  	"<t></>",
   271  	"<t></t!",
   272  	"<t>cdata]]></t>",
   273  }
   274  
   275  func TestRawToken(t *testing.T) {
   276  	d := NewDecoder(strings.NewReader(testInput))
   277  	d.Entity = testEntity
   278  	testRawToken(t, d, testInput, rawTokens)
   279  }
   280  
   281  const nonStrictInput = `
   282  <tag>non&entity</tag>
   283  <tag>&unknown;entity</tag>
   284  <tag>&#123</tag>
   285  <tag>&#zzz;</tag>
   286  <tag>&なまえ3;</tag>
   287  <tag>&lt-gt;</tag>
   288  <tag>&;</tag>
   289  <tag>&0a;</tag>
   290  `
   291  
   292  var nonStrictTokens = []Token{
   293  	CharData("\n"),
   294  	StartElement{Name{"", "tag"}, []Attr{}},
   295  	CharData("non&entity"),
   296  	EndElement{Name{"", "tag"}},
   297  	CharData("\n"),
   298  	StartElement{Name{"", "tag"}, []Attr{}},
   299  	CharData("&unknown;entity"),
   300  	EndElement{Name{"", "tag"}},
   301  	CharData("\n"),
   302  	StartElement{Name{"", "tag"}, []Attr{}},
   303  	CharData("&#123"),
   304  	EndElement{Name{"", "tag"}},
   305  	CharData("\n"),
   306  	StartElement{Name{"", "tag"}, []Attr{}},
   307  	CharData("&#zzz;"),
   308  	EndElement{Name{"", "tag"}},
   309  	CharData("\n"),
   310  	StartElement{Name{"", "tag"}, []Attr{}},
   311  	CharData("&なまえ3;"),
   312  	EndElement{Name{"", "tag"}},
   313  	CharData("\n"),
   314  	StartElement{Name{"", "tag"}, []Attr{}},
   315  	CharData("&lt-gt;"),
   316  	EndElement{Name{"", "tag"}},
   317  	CharData("\n"),
   318  	StartElement{Name{"", "tag"}, []Attr{}},
   319  	CharData("&;"),
   320  	EndElement{Name{"", "tag"}},
   321  	CharData("\n"),
   322  	StartElement{Name{"", "tag"}, []Attr{}},
   323  	CharData("&0a;"),
   324  	EndElement{Name{"", "tag"}},
   325  	CharData("\n"),
   326  }
   327  
   328  func TestNonStrictRawToken(t *testing.T) {
   329  	d := NewDecoder(strings.NewReader(nonStrictInput))
   330  	d.Strict = false
   331  	testRawToken(t, d, nonStrictInput, nonStrictTokens)
   332  }
   333  
   334  type downCaser struct {
   335  	t *testing.T
   336  	r io.ByteReader
   337  }
   338  
   339  func (d *downCaser) ReadByte() (c byte, err error) {
   340  	c, err = d.r.ReadByte()
   341  	if c >= 'A' && c <= 'Z' {
   342  		c += 'a' - 'A'
   343  	}
   344  	return
   345  }
   346  
   347  func (d *downCaser) Read(p []byte) (int, error) {
   348  	d.t.Fatalf("unexpected Read call on downCaser reader")
   349  	panic("unreachable")
   350  }
   351  
   352  func TestRawTokenAltEncoding(t *testing.T) {
   353  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   354  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   355  		if charset != "x-testing-uppercase" {
   356  			t.Fatalf("unexpected charset %q", charset)
   357  		}
   358  		return &downCaser{t, input.(io.ByteReader)}, nil
   359  	}
   360  	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   361  }
   362  
   363  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   364  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   365  	token, err := d.RawToken()
   366  	if token == nil {
   367  		t.Fatalf("expected a token on first RawToken call")
   368  	}
   369  	if err != nil {
   370  		t.Fatal(err)
   371  	}
   372  	token, err = d.RawToken()
   373  	if token != nil {
   374  		t.Errorf("expected a nil token; got %#v", token)
   375  	}
   376  	if err == nil {
   377  		t.Fatalf("expected an error on second RawToken call")
   378  	}
   379  	const encoding = "x-testing-uppercase"
   380  	if !strings.Contains(err.Error(), encoding) {
   381  		t.Errorf("expected error to contain %q; got error: %v",
   382  			encoding, err)
   383  	}
   384  }
   385  
   386  func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   387  	lastEnd := int64(0)
   388  	for i, want := range rawTokens {
   389  		start := d.InputOffset()
   390  		have, err := d.RawToken()
   391  		end := d.InputOffset()
   392  		if err != nil {
   393  			t.Fatalf("token %d: unexpected error: %s", i, err)
   394  		}
   395  		if !reflect.DeepEqual(have, want) {
   396  			var shave, swant string
   397  			if _, ok := have.(CharData); ok {
   398  				shave = fmt.Sprintf("CharData(%q)", have)
   399  			} else {
   400  				shave = fmt.Sprintf("%#v", have)
   401  			}
   402  			if _, ok := want.(CharData); ok {
   403  				swant = fmt.Sprintf("CharData(%q)", want)
   404  			} else {
   405  				swant = fmt.Sprintf("%#v", want)
   406  			}
   407  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   408  		}
   409  
   410  		// Check that InputOffset returned actual token.
   411  		switch {
   412  		case start < lastEnd:
   413  			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   414  		case start >= end:
   415  			// Special case: EndElement can be synthesized.
   416  			if start == end && end == lastEnd {
   417  				break
   418  			}
   419  			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   420  		case end > int64(len(raw)):
   421  			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   422  		default:
   423  			text := raw[start:end]
   424  			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   425  				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   426  			}
   427  		}
   428  		lastEnd = end
   429  	}
   430  }
   431  
   432  // Ensure that directives (specifically !DOCTYPE) include the complete
   433  // text of any nested directives, noting that < and > do not change
   434  // nesting depth if they are in single or double quotes.
   435  
   436  var nestedDirectivesInput = `
   437  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   438  <!DOCTYPE [<!ENTITY xlt ">">]>
   439  <!DOCTYPE [<!ENTITY xlt "<">]>
   440  <!DOCTYPE [<!ENTITY xlt '>'>]>
   441  <!DOCTYPE [<!ENTITY xlt '<'>]>
   442  <!DOCTYPE [<!ENTITY xlt '">'>]>
   443  <!DOCTYPE [<!ENTITY xlt "'<">]>
   444  `
   445  
   446  var nestedDirectivesTokens = []Token{
   447  	CharData("\n"),
   448  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   449  	CharData("\n"),
   450  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   451  	CharData("\n"),
   452  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   453  	CharData("\n"),
   454  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   455  	CharData("\n"),
   456  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   457  	CharData("\n"),
   458  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   459  	CharData("\n"),
   460  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   461  	CharData("\n"),
   462  }
   463  
   464  func TestNestedDirectives(t *testing.T) {
   465  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   466  
   467  	for i, want := range nestedDirectivesTokens {
   468  		have, err := d.Token()
   469  		if err != nil {
   470  			t.Fatalf("token %d: unexpected error: %s", i, err)
   471  		}
   472  		if !reflect.DeepEqual(have, want) {
   473  			t.Errorf("token %d = %#v want %#v", i, have, want)
   474  		}
   475  	}
   476  }
   477  
   478  func TestToken(t *testing.T) {
   479  	d := NewDecoder(strings.NewReader(testInput))
   480  	d.Entity = testEntity
   481  
   482  	for i, want := range cookedTokens {
   483  		have, err := d.Token()
   484  		if err != nil {
   485  			t.Fatalf("token %d: unexpected error: %s", i, err)
   486  		}
   487  		if !reflect.DeepEqual(have, want) {
   488  			t.Errorf("token %d = %#v want %#v", i, have, want)
   489  		}
   490  	}
   491  }
   492  
   493  func TestSyntax(t *testing.T) {
   494  	for i := range xmlInput {
   495  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   496  		var err error
   497  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   498  		}
   499  		if _, ok := err.(*SyntaxError); !ok {
   500  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   501  		}
   502  	}
   503  }
   504  
   505  func TestInputLinePos(t *testing.T) {
   506  	testInput := `<root>
   507  <?pi
   508   ?>  <elt
   509  att
   510  =
   511  "val">
   512  <![CDATA[
   513  ]]><!--
   514  
   515  --></elt>
   516  </root>`
   517  	linePos := [][]int{
   518  		{1, 7},
   519  		{2, 1},
   520  		{3, 4},
   521  		{3, 6},
   522  		{6, 7},
   523  		{7, 1},
   524  		{8, 4},
   525  		{10, 4},
   526  		{10, 10},
   527  		{11, 1},
   528  		{11, 8},
   529  	}
   530  	dec := NewDecoder(strings.NewReader(testInput))
   531  	for _, want := range linePos {
   532  		if _, err := dec.Token(); err != nil {
   533  			t.Errorf("Unexpected error: %v", err)
   534  			continue
   535  		}
   536  
   537  		gotLine, gotCol := dec.InputPos()
   538  		if gotLine != want[0] || gotCol != want[1] {
   539  			t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1])
   540  		}
   541  	}
   542  }
   543  
   544  type allScalars struct {
   545  	True1     bool
   546  	True2     bool
   547  	False1    bool
   548  	False2    bool
   549  	Int       int
   550  	Int8      int8
   551  	Int16     int16
   552  	Int32     int32
   553  	Int64     int64
   554  	Uint      int
   555  	Uint8     uint8
   556  	Uint16    uint16
   557  	Uint32    uint32
   558  	Uint64    uint64
   559  	Uintptr   uintptr
   560  	Float32   float32
   561  	Float64   float64
   562  	String    string
   563  	PtrString *string
   564  }
   565  
   566  var all = allScalars{
   567  	True1:     true,
   568  	True2:     true,
   569  	False1:    false,
   570  	False2:    false,
   571  	Int:       1,
   572  	Int8:      -2,
   573  	Int16:     3,
   574  	Int32:     -4,
   575  	Int64:     5,
   576  	Uint:      6,
   577  	Uint8:     7,
   578  	Uint16:    8,
   579  	Uint32:    9,
   580  	Uint64:    10,
   581  	Uintptr:   11,
   582  	Float32:   13.0,
   583  	Float64:   14.0,
   584  	String:    "15",
   585  	PtrString: &sixteen,
   586  }
   587  
   588  var sixteen = "16"
   589  
   590  const testScalarsInput = `<allscalars>
   591  	<True1>true</True1>
   592  	<True2>1</True2>
   593  	<False1>false</False1>
   594  	<False2>0</False2>
   595  	<Int>1</Int>
   596  	<Int8>-2</Int8>
   597  	<Int16>3</Int16>
   598  	<Int32>-4</Int32>
   599  	<Int64>5</Int64>
   600  	<Uint>6</Uint>
   601  	<Uint8>7</Uint8>
   602  	<Uint16>8</Uint16>
   603  	<Uint32>9</Uint32>
   604  	<Uint64>10</Uint64>
   605  	<Uintptr>11</Uintptr>
   606  	<Float>12.0</Float>
   607  	<Float32>13.0</Float32>
   608  	<Float64>14.0</Float64>
   609  	<String>15</String>
   610  	<PtrString>16</PtrString>
   611  </allscalars>`
   612  
   613  func TestAllScalars(t *testing.T) {
   614  	var a allScalars
   615  	err := Unmarshal([]byte(testScalarsInput), &a)
   616  
   617  	if err != nil {
   618  		t.Fatal(err)
   619  	}
   620  	if !reflect.DeepEqual(a, all) {
   621  		t.Errorf("have %+v want %+v", a, all)
   622  	}
   623  }
   624  
   625  type item struct {
   626  	FieldA string
   627  }
   628  
   629  func TestIssue68387(t *testing.T) {
   630  	data := `<item b=']]>'/>`
   631  	dec := NewDecoder(strings.NewReader(data))
   632  	var tok1, tok2, tok3 Token
   633  	var err error
   634  	if tok1, err = dec.RawToken(); err != nil {
   635  		t.Fatalf("RawToken() failed: %v", err)
   636  	}
   637  	if tok2, err = dec.RawToken(); err != nil {
   638  		t.Fatalf("RawToken() failed: %v", err)
   639  	}
   640  	if tok3, err = dec.RawToken(); err != io.EOF || tok3 != nil {
   641  		t.Fatalf("Missed EOF")
   642  	}
   643  	s := StartElement{Name{"", "item"}, []Attr{Attr{Name{"","b"}, "]]>"}}}
   644  	if !reflect.DeepEqual(tok1.(StartElement), s) {
   645  		t.Error("Wrong start element")
   646  	}
   647  	e := EndElement{Name{"","item"}}
   648  	if tok2.(EndElement) != e {
   649  		t.Error("Wrong end element")
   650  	}
   651  }
   652  
   653  func TestIssue569(t *testing.T) {
   654  	data := `<item><FieldA>abcd</FieldA></item>`
   655  	var i item
   656  	err := Unmarshal([]byte(data), &i)
   657  
   658  	if err != nil || i.FieldA != "abcd" {
   659  		t.Fatal("Expecting abcd")
   660  	}
   661  }
   662  
   663  func TestUnquotedAttrs(t *testing.T) {
   664  	data := "<tag attr=azAZ09:-_\t>"
   665  	d := NewDecoder(strings.NewReader(data))
   666  	d.Strict = false
   667  	token, err := d.Token()
   668  	if _, ok := err.(*SyntaxError); ok {
   669  		t.Errorf("Unexpected error: %v", err)
   670  	}
   671  	if token.(StartElement).Name.Local != "tag" {
   672  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   673  	}
   674  	attr := token.(StartElement).Attr[0]
   675  	if attr.Value != "azAZ09:-_" {
   676  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   677  	}
   678  	if attr.Name.Local != "attr" {
   679  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   680  	}
   681  }
   682  
   683  func TestValuelessAttrs(t *testing.T) {
   684  	tests := [][3]string{
   685  		{"<p nowrap>", "p", "nowrap"},
   686  		{"<p nowrap >", "p", "nowrap"},
   687  		{"<input checked/>", "input", "checked"},
   688  		{"<input checked />", "input", "checked"},
   689  	}
   690  	for _, test := range tests {
   691  		d := NewDecoder(strings.NewReader(test[0]))
   692  		d.Strict = false
   693  		token, err := d.Token()
   694  		if _, ok := err.(*SyntaxError); ok {
   695  			t.Errorf("Unexpected error: %v", err)
   696  		}
   697  		if token.(StartElement).Name.Local != test[1] {
   698  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   699  		}
   700  		attr := token.(StartElement).Attr[0]
   701  		if attr.Value != test[2] {
   702  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   703  		}
   704  		if attr.Name.Local != test[2] {
   705  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   706  		}
   707  	}
   708  }
   709  
   710  func TestCopyTokenCharData(t *testing.T) {
   711  	data := []byte("same data")
   712  	var tok1 Token = CharData(data)
   713  	tok2 := CopyToken(tok1)
   714  	if !reflect.DeepEqual(tok1, tok2) {
   715  		t.Error("CopyToken(CharData) != CharData")
   716  	}
   717  	data[1] = 'o'
   718  	if reflect.DeepEqual(tok1, tok2) {
   719  		t.Error("CopyToken(CharData) uses same buffer.")
   720  	}
   721  }
   722  
   723  func TestCopyTokenStartElement(t *testing.T) {
   724  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   725  	var tok1 Token = elt
   726  	tok2 := CopyToken(tok1)
   727  	if tok1.(StartElement).Attr[0].Value != "en" {
   728  		t.Error("CopyToken overwrote Attr[0]")
   729  	}
   730  	if !reflect.DeepEqual(tok1, tok2) {
   731  		t.Error("CopyToken(StartElement) != StartElement")
   732  	}
   733  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   734  	if reflect.DeepEqual(tok1, tok2) {
   735  		t.Error("CopyToken(CharData) uses same buffer.")
   736  	}
   737  }
   738  
   739  func TestCopyTokenComment(t *testing.T) {
   740  	data := []byte("<!-- some comment -->")
   741  	var tok1 Token = Comment(data)
   742  	tok2 := CopyToken(tok1)
   743  	if !reflect.DeepEqual(tok1, tok2) {
   744  		t.Error("CopyToken(Comment) != Comment")
   745  	}
   746  	data[1] = 'o'
   747  	if reflect.DeepEqual(tok1, tok2) {
   748  		t.Error("CopyToken(Comment) uses same buffer.")
   749  	}
   750  }
   751  
   752  func TestSyntaxErrorLineNum(t *testing.T) {
   753  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   754  	d := NewDecoder(strings.NewReader(testInput))
   755  	var err error
   756  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   757  	}
   758  	synerr, ok := err.(*SyntaxError)
   759  	if !ok {
   760  		t.Error("Expected SyntaxError.")
   761  	}
   762  	if synerr.Line != 3 {
   763  		t.Error("SyntaxError didn't have correct line number.")
   764  	}
   765  }
   766  
   767  func TestTrailingRawToken(t *testing.T) {
   768  	input := `<FOO></FOO>  `
   769  	d := NewDecoder(strings.NewReader(input))
   770  	var err error
   771  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   772  	}
   773  	if err != io.EOF {
   774  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   775  	}
   776  }
   777  
   778  func TestTrailingToken(t *testing.T) {
   779  	input := `<FOO></FOO>  `
   780  	d := NewDecoder(strings.NewReader(input))
   781  	var err error
   782  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   783  	}
   784  	if err != io.EOF {
   785  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   786  	}
   787  }
   788  
   789  func TestEntityInsideCDATA(t *testing.T) {
   790  	input := `<test><![CDATA[ &val=foo ]]></test>`
   791  	d := NewDecoder(strings.NewReader(input))
   792  	var err error
   793  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   794  	}
   795  	if err != io.EOF {
   796  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   797  	}
   798  }
   799  
   800  var characterTests = []struct {
   801  	in  string
   802  	err string
   803  }{
   804  	{"\x12<doc/>", "illegal character code U+0012"},
   805  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   806  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   807  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   808  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   809  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   810  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   811  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   812  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   813  }
   814  
   815  func TestDisallowedCharacters(t *testing.T) {
   816  
   817  	for i, tt := range characterTests {
   818  		d := NewDecoder(strings.NewReader(tt.in))
   819  		var err error
   820  
   821  		for err == nil {
   822  			_, err = d.Token()
   823  		}
   824  		synerr, ok := err.(*SyntaxError)
   825  		if !ok {
   826  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   827  		}
   828  		if synerr.Msg != tt.err {
   829  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   830  		}
   831  	}
   832  }
   833  
   834  func TestIsInCharacterRange(t *testing.T) {
   835  	invalid := []rune{
   836  		utf8.MaxRune + 1,
   837  		0xD800, // surrogate min
   838  		0xDFFF, // surrogate max
   839  		-1,
   840  	}
   841  	for _, r := range invalid {
   842  		if isInCharacterRange(r) {
   843  			t.Errorf("rune %U considered valid", r)
   844  		}
   845  	}
   846  }
   847  
   848  var procInstTests = []struct {
   849  	input  string
   850  	expect [2]string
   851  }{
   852  	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
   853  	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   854  	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
   855  	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
   856  	{`encoding="FOO" `, [2]string{"", "FOO"}},
   857  	{`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   858  	{`version= encoding=`, [2]string{"", ""}},
   859  	{`encoding="version=1.0"`, [2]string{"", "version=1.0"}},
   860  	{``, [2]string{"", ""}},
   861  	// TODO: what's the right approach to handle these nested cases?
   862  	{`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}},
   863  	{`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}},
   864  }
   865  
   866  func TestProcInstEncoding(t *testing.T) {
   867  	for _, test := range procInstTests {
   868  		if got := procInst("version", test.input); got != test.expect[0] {
   869  			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
   870  		}
   871  		if got := procInst("encoding", test.input); got != test.expect[1] {
   872  			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
   873  		}
   874  	}
   875  }
   876  
   877  // Ensure that directives with comments include the complete
   878  // text of any nested directives.
   879  
   880  var directivesWithCommentsInput = `
   881  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   882  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   883  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   884  `
   885  
   886  var directivesWithCommentsTokens = []Token{
   887  	CharData("\n"),
   888  	Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   889  	CharData("\n"),
   890  	Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
   891  	CharData("\n"),
   892  	Directive(`DOCTYPE <!-> <!>       [<!ENTITY go "Golang"> ]`),
   893  	CharData("\n"),
   894  }
   895  
   896  func TestDirectivesWithComments(t *testing.T) {
   897  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   898  
   899  	for i, want := range directivesWithCommentsTokens {
   900  		have, err := d.Token()
   901  		if err != nil {
   902  			t.Fatalf("token %d: unexpected error: %s", i, err)
   903  		}
   904  		if !reflect.DeepEqual(have, want) {
   905  			t.Errorf("token %d = %#v want %#v", i, have, want)
   906  		}
   907  	}
   908  }
   909  
   910  // Writer whose Write method always returns an error.
   911  type errWriter struct{}
   912  
   913  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   914  
   915  func TestEscapeTextIOErrors(t *testing.T) {
   916  	expectErr := "unwritable"
   917  	err := EscapeText(errWriter{}, []byte{'A'})
   918  
   919  	if err == nil || err.Error() != expectErr {
   920  		t.Errorf("have %v, want %v", err, expectErr)
   921  	}
   922  }
   923  
   924  func TestEscapeTextInvalidChar(t *testing.T) {
   925  	input := []byte("A \x00 terminated string.")
   926  	expected := "A \uFFFD terminated string."
   927  
   928  	buff := new(strings.Builder)
   929  	if err := EscapeText(buff, input); err != nil {
   930  		t.Fatalf("have %v, want nil", err)
   931  	}
   932  	text := buff.String()
   933  
   934  	if text != expected {
   935  		t.Errorf("have %v, want %v", text, expected)
   936  	}
   937  }
   938  
   939  func TestIssue5880(t *testing.T) {
   940  	type T []byte
   941  	data, err := Marshal(T{192, 168, 0, 1})
   942  	if err != nil {
   943  		t.Errorf("Marshal error: %v", err)
   944  	}
   945  	if !utf8.Valid(data) {
   946  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   947  	}
   948  }
   949  
   950  func TestIssue8535(t *testing.T) {
   951  
   952  	type ExampleConflict struct {
   953  		XMLName  Name   `xml:"example"`
   954  		Link     string `xml:"link"`
   955  		AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space
   956  	}
   957  	testCase := `<example>
   958  			<title>Example</title>
   959  			<link>http://example.com/default</link> <!-- not assigned -->
   960  			<link>http://example.com/home</link> <!-- not assigned -->
   961  			<ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link>
   962  		</example>`
   963  
   964  	var dest ExampleConflict
   965  	d := NewDecoder(strings.NewReader(testCase))
   966  	if err := d.Decode(&dest); err != nil {
   967  		t.Fatal(err)
   968  	}
   969  }
   970  
   971  func TestEncodeXMLNS(t *testing.T) {
   972  	testCases := []struct {
   973  		f    func() ([]byte, error)
   974  		want string
   975  		ok   bool
   976  	}{
   977  		{encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
   978  		{encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true},
   979  		{encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
   980  		{encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false},
   981  	}
   982  
   983  	for i, tc := range testCases {
   984  		if b, err := tc.f(); err == nil {
   985  			if got, want := string(b), tc.want; got != want {
   986  				t.Errorf("%d: got %s, want %s \n", i, got, want)
   987  			}
   988  		} else {
   989  			t.Errorf("%d: marshal failed with %s", i, err)
   990  		}
   991  	}
   992  }
   993  
   994  func encodeXMLNS1() ([]byte, error) {
   995  
   996  	type T struct {
   997  		XMLName Name   `xml:"Test"`
   998  		Ns      string `xml:"xmlns,attr"`
   999  		Body    string
  1000  	}
  1001  
  1002  	s := &T{Ns: "http://example.com/ns", Body: "hello world"}
  1003  	return Marshal(s)
  1004  }
  1005  
  1006  func encodeXMLNS2() ([]byte, error) {
  1007  
  1008  	type Test struct {
  1009  		Body string `xml:"http://example.com/ns body"`
  1010  	}
  1011  
  1012  	s := &Test{Body: "hello world"}
  1013  	return Marshal(s)
  1014  }
  1015  
  1016  func encodeXMLNS3() ([]byte, error) {
  1017  
  1018  	type Test struct {
  1019  		XMLName Name `xml:"http://example.com/ns Test"`
  1020  		Body    string
  1021  	}
  1022  
  1023  	//s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing
  1024  	// as documentation states
  1025  	s := &Test{Body: "hello world"}
  1026  	return Marshal(s)
  1027  }
  1028  
  1029  func encodeXMLNS4() ([]byte, error) {
  1030  
  1031  	type Test struct {
  1032  		Ns   string `xml:"xmlns,attr"`
  1033  		Body string
  1034  	}
  1035  
  1036  	s := &Test{Ns: "http://example.com/ns", Body: "hello world"}
  1037  	return Marshal(s)
  1038  }
  1039  
  1040  func TestIssue11405(t *testing.T) {
  1041  	testCases := []string{
  1042  		"<root>",
  1043  		"<root><foo>",
  1044  		"<root><foo></foo>",
  1045  	}
  1046  	for _, tc := range testCases {
  1047  		d := NewDecoder(strings.NewReader(tc))
  1048  		var err error
  1049  		for {
  1050  			_, err = d.Token()
  1051  			if err != nil {
  1052  				break
  1053  			}
  1054  		}
  1055  		if _, ok := err.(*SyntaxError); !ok {
  1056  			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
  1057  		}
  1058  	}
  1059  }
  1060  
  1061  func TestIssue12417(t *testing.T) {
  1062  	testCases := []struct {
  1063  		s  string
  1064  		ok bool
  1065  	}{
  1066  		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
  1067  		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
  1068  		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
  1069  		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
  1070  	}
  1071  	for _, tc := range testCases {
  1072  		d := NewDecoder(strings.NewReader(tc.s))
  1073  		var err error
  1074  		for {
  1075  			_, err = d.Token()
  1076  			if err != nil {
  1077  				if err == io.EOF {
  1078  					err = nil
  1079  				}
  1080  				break
  1081  			}
  1082  		}
  1083  		if err != nil && tc.ok {
  1084  			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
  1085  			continue
  1086  		}
  1087  		if err == nil && !tc.ok {
  1088  			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
  1089  		}
  1090  	}
  1091  }
  1092  
  1093  func TestIssue7113(t *testing.T) {
  1094  	type C struct {
  1095  		XMLName Name `xml:""` // Sets empty namespace
  1096  	}
  1097  
  1098  	type D struct {
  1099  		XMLName Name `xml:"d"`
  1100  	}
  1101  
  1102  	type A struct {
  1103  		XMLName Name `xml:""`
  1104  		C       C    `xml:""`
  1105  		D       D
  1106  	}
  1107  
  1108  	var a A
  1109  	structSpace := "b"
  1110  	xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>`
  1111  	t.Log(xmlTest)
  1112  	err := Unmarshal([]byte(xmlTest), &a)
  1113  	if err != nil {
  1114  		t.Fatal(err)
  1115  	}
  1116  
  1117  	if a.XMLName.Space != structSpace {
  1118  		t.Errorf("overidding with empty namespace: unmarshaling, got %s, want %s\n", a.XMLName.Space, structSpace)
  1119  	}
  1120  	if len(a.C.XMLName.Space) != 0 {
  1121  		t.Fatalf("overidding with empty namespace: unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
  1122  	}
  1123  
  1124  	var b []byte
  1125  	b, err = Marshal(&a)
  1126  	if err != nil {
  1127  		t.Fatal(err)
  1128  	}
  1129  	if len(a.C.XMLName.Space) != 0 {
  1130  		t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space)
  1131  	}
  1132  	if string(b) != xmlTest {
  1133  		t.Fatalf("overidding with empty namespace: marshaling, got %s, want %s\n", b, xmlTest)
  1134  	}
  1135  	var c A
  1136  	err = Unmarshal(b, &c)
  1137  	if err != nil {
  1138  		t.Fatalf("second Unmarshal failed: %s", err)
  1139  	}
  1140  	if c.XMLName.Space != "b" {
  1141  		t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace)
  1142  	}
  1143  	if len(c.C.XMLName.Space) != 0 {
  1144  		t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
  1145  	}
  1146  }
  1147  
  1148  func TestIssue20396(t *testing.T) {
  1149  
  1150  	var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element")
  1151  
  1152  	testCases := []struct {
  1153  		s       string
  1154  		wantErr error
  1155  	}{
  1156  		{`<a:te:st xmlns:a="abcd"/>`, // Issue 20396
  1157  			UnmarshalError("XML syntax error on line 1: expected element name after <")},
  1158  		{`<a:te=st xmlns:a="abcd"/>`, attrError},
  1159  		{`<a:te&st xmlns:a="abcd"/>`, attrError},
  1160  		{`<a:test xmlns:a="abcd"/>`, nil},
  1161  		{`<a:te:st xmlns:a="abcd">1</a:te:st>`,
  1162  			UnmarshalError("XML syntax error on line 1: expected element name after <")},
  1163  		{`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError},
  1164  		{`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError},
  1165  		{`<a:test xmlns:a="abcd">1</a:test>`, nil},
  1166  	}
  1167  
  1168  	var dest string
  1169  	for _, tc := range testCases {
  1170  		if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want {
  1171  			if got == nil {
  1172  				t.Errorf("%s: Unexpected success, want %v", tc.s, want)
  1173  			} else if want == nil {
  1174  				t.Errorf("%s: Unexpected error, got %v", tc.s, got)
  1175  			} else if got.Error() != want.Error() {
  1176  				t.Errorf("%s: got %v, want %v", tc.s, got, want)
  1177  			}
  1178  		}
  1179  	}
  1180  }
  1181  
  1182  func TestIssue20685(t *testing.T) {
  1183  	testCases := []struct {
  1184  		s  string
  1185  		ok bool
  1186  	}{
  1187  		{`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false},
  1188  		{`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true},
  1189  		{`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false},
  1190  		{`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false},
  1191  		{`<x:book xmlns:x="abcd">one</y:book>`, false},
  1192  		{`<x:book>one</y:book>`, false},
  1193  		{`<xbook>one</ybook>`, false},
  1194  	}
  1195  	for _, tc := range testCases {
  1196  		d := NewDecoder(strings.NewReader(tc.s))
  1197  		var err error
  1198  		for {
  1199  			_, err = d.Token()
  1200  			if err != nil {
  1201  				if err == io.EOF {
  1202  					err = nil
  1203  				}
  1204  				break
  1205  			}
  1206  		}
  1207  		if err != nil && tc.ok {
  1208  			t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err)
  1209  			continue
  1210  		}
  1211  		if err == nil && !tc.ok {
  1212  			t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s)
  1213  		}
  1214  	}
  1215  }
  1216  
  1217  func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
  1218  	return func(src TokenReader) TokenReader {
  1219  		return mapper{
  1220  			t: src,
  1221  			f: mapping,
  1222  		}
  1223  	}
  1224  }
  1225  
  1226  type mapper struct {
  1227  	t TokenReader
  1228  	f func(Token) Token
  1229  }
  1230  
  1231  func (m mapper) Token() (Token, error) {
  1232  	tok, err := m.t.Token()
  1233  	if err != nil {
  1234  		return nil, err
  1235  	}
  1236  	return m.f(tok), nil
  1237  }
  1238  
  1239  func TestNewTokenDecoderIdempotent(t *testing.T) {
  1240  	d := NewDecoder(strings.NewReader(`<br>`))
  1241  	d2 := NewTokenDecoder(d)
  1242  	if d != d2 {
  1243  		t.Error("NewTokenDecoder did not detect underlying Decoder")
  1244  	}
  1245  }
  1246  
  1247  func TestWrapDecoder(t *testing.T) {
  1248  	d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
  1249  	m := tokenMap(func(t Token) Token {
  1250  		switch tok := t.(type) {
  1251  		case StartElement:
  1252  			if tok.Name.Local == "quote" {
  1253  				tok.Name.Local = "blocking"
  1254  				return tok
  1255  			}
  1256  		case EndElement:
  1257  			if tok.Name.Local == "quote" {
  1258  				tok.Name.Local = "blocking"
  1259  				return tok
  1260  			}
  1261  		}
  1262  		return t
  1263  	})
  1264  
  1265  	d = NewTokenDecoder(m(d))
  1266  
  1267  	o := struct {
  1268  		XMLName  Name   `xml:"blocking"`
  1269  		Chardata string `xml:",chardata"`
  1270  	}{}
  1271  
  1272  	if err := d.Decode(&o); err != nil {
  1273  		t.Fatal("Got unexpected error while decoding:", err)
  1274  	}
  1275  
  1276  	if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
  1277  		t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
  1278  	}
  1279  }
  1280  
  1281  type tokReader struct{}
  1282  
  1283  func (tokReader) Token() (Token, error) {
  1284  	return StartElement{}, nil
  1285  }
  1286  
  1287  type Failure struct{}
  1288  
  1289  func (Failure) UnmarshalXML(*Decoder, StartElement) error {
  1290  	return nil
  1291  }
  1292  
  1293  func TestTokenUnmarshaler(t *testing.T) {
  1294  	defer func() {
  1295  		if r := recover(); r != nil {
  1296  			t.Error("Unexpected panic using custom token unmarshaler")
  1297  		}
  1298  	}()
  1299  
  1300  	d := NewTokenDecoder(tokReader{})
  1301  	d.Decode(&Failure{})
  1302  }
  1303  
  1304  func testRoundTrip(t *testing.T, input string) {
  1305  	d := NewDecoder(strings.NewReader(input))
  1306  	var tokens []Token
  1307  	var buf bytes.Buffer
  1308  	e := NewEncoder(&buf)
  1309  	for {
  1310  		tok, err := d.Token()
  1311  		if err == io.EOF {
  1312  			break
  1313  		}
  1314  		if err != nil {
  1315  			t.Fatalf("invalid input: %v", err)
  1316  		}
  1317  		if err := e.EncodeToken(tok); err != nil {
  1318  			t.Fatalf("failed to re-encode input: %v", err)
  1319  		}
  1320  		tokens = append(tokens, CopyToken(tok))
  1321  	}
  1322  	if err := e.Flush(); err != nil {
  1323  		t.Fatal(err)
  1324  	}
  1325  
  1326  	d = NewDecoder(&buf)
  1327  	for {
  1328  		tok, err := d.Token()
  1329  		if err == io.EOF {
  1330  			break
  1331  		}
  1332  		if err != nil {
  1333  			t.Fatalf("failed to decode output: %v", err)
  1334  		}
  1335  		if len(tokens) == 0 {
  1336  			t.Fatalf("unexpected token: %#v", tok)
  1337  		}
  1338  		a, b := tokens[0], tok
  1339  		if !reflect.DeepEqual(a, b) {
  1340  			t.Fatalf("token mismatch: %#v vs %#v", a, b)
  1341  		}
  1342  		tokens = tokens[1:]
  1343  	}
  1344  	if len(tokens) > 0 {
  1345  		t.Fatalf("lost tokens: %#v", tokens)
  1346  	}
  1347  }
  1348  
  1349  func TestRoundTrip(t *testing.T) {
  1350  	tests := map[string]string{
  1351  		"trailing colon":         `<foo abc:="x"></foo>`,
  1352  		"comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
  1353  	}
  1354  	for name, input := range tests {
  1355  		t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
  1356  	}
  1357  }
  1358  
  1359  func TestParseErrors(t *testing.T) {
  1360  	withDefaultHeader := func(s string) string {
  1361  		return `<?xml version="1.0" encoding="UTF-8"?>` + s
  1362  	}
  1363  	tests := []struct {
  1364  		src string
  1365  		err string
  1366  	}{
  1367  		{withDefaultHeader(`</foo>`), `unexpected end element </foo>`},
  1368  		{withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`},
  1369  		{withDefaultHeader(`<? not ok ?>`), `expected target name after <?`},
  1370  		{withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`},
  1371  		{withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`},
  1372  		{withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`},
  1373  		{withDefaultHeader(`<zzz:foo xmlns:zzz="http://example.com"><bar>baz</bar></foo>`),
  1374  			`element <foo> in space zzz closed by </foo> in space ""`},
  1375  		{withDefaultHeader("\xf1"), `invalid UTF-8`},
  1376  
  1377  		// Header-related errors.
  1378  		{`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
  1379  
  1380  		// Cases below are for "no errors".
  1381  		{withDefaultHeader(`<?ok?>`), ``},
  1382  		{withDefaultHeader(`<?ok version="ok"?>`), ``},
  1383  	}
  1384  
  1385  	for _, test := range tests {
  1386  		d := NewDecoder(strings.NewReader(test.src))
  1387  		var err error
  1388  		for {
  1389  			_, err = d.Token()
  1390  			if err != nil {
  1391  				break
  1392  			}
  1393  		}
  1394  		if test.err == "" {
  1395  			if err != io.EOF {
  1396  				t.Errorf("parse %s: have %q error, expected none", test.src, err)
  1397  			}
  1398  			continue
  1399  		}
  1400  		// Inv: err != nil
  1401  		if err == io.EOF {
  1402  			t.Errorf("parse %s: unexpected EOF", test.src)
  1403  			continue
  1404  		}
  1405  		if !strings.Contains(err.Error(), test.err) {
  1406  			t.Errorf("parse %s: can't find %q error substring\nerror: %q", test.src, test.err, err)
  1407  			continue
  1408  		}
  1409  	}
  1410  }
  1411  
  1412  const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?>
  1413  <br>
  1414  <br/><br/>
  1415  <br><br>
  1416  <br></br>
  1417  <BR>
  1418  <BR/><BR/>
  1419  <Br></Br>
  1420  <BR><span id="test">abc</span><br/><br/>`
  1421  
  1422  func BenchmarkHTMLAutoClose(b *testing.B) {
  1423  	b.RunParallel(func(p *testing.PB) {
  1424  		for p.Next() {
  1425  			d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
  1426  			d.Strict = false
  1427  			d.AutoClose = HTMLAutoClose
  1428  			d.Entity = HTMLEntity
  1429  			for {
  1430  				_, err := d.Token()
  1431  				if err != nil {
  1432  					if err == io.EOF {
  1433  						break
  1434  					}
  1435  					b.Fatalf("unexpected error: %v", err)
  1436  				}
  1437  			}
  1438  		}
  1439  	})
  1440  }
  1441  
  1442  func TestHTMLAutoClose(t *testing.T) {
  1443  	wantTokens := []Token{
  1444  		ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
  1445  		CharData("\n"),
  1446  		StartElement{Name{"", "br"}, []Attr{}},
  1447  		EndElement{Name{"", "br"}},
  1448  		CharData("\n"),
  1449  		StartElement{Name{"", "br"}, []Attr{}},
  1450  		EndElement{Name{"", "br"}},
  1451  		StartElement{Name{"", "br"}, []Attr{}},
  1452  		EndElement{Name{"", "br"}},
  1453  		CharData("\n"),
  1454  		StartElement{Name{"", "br"}, []Attr{}},
  1455  		EndElement{Name{"", "br"}},
  1456  		StartElement{Name{"", "br"}, []Attr{}},
  1457  		EndElement{Name{"", "br"}},
  1458  		CharData("\n"),
  1459  		StartElement{Name{"", "br"}, []Attr{}},
  1460  		EndElement{Name{"", "br"}},
  1461  		CharData("\n"),
  1462  		StartElement{Name{"", "BR"}, []Attr{}},
  1463  		EndElement{Name{"", "BR"}},
  1464  		CharData("\n"),
  1465  		StartElement{Name{"", "BR"}, []Attr{}},
  1466  		EndElement{Name{"", "BR"}},
  1467  		StartElement{Name{"", "BR"}, []Attr{}},
  1468  		EndElement{Name{"", "BR"}},
  1469  		CharData("\n"),
  1470  		StartElement{Name{"", "Br"}, []Attr{}},
  1471  		EndElement{Name{"", "Br"}},
  1472  		CharData("\n"),
  1473  		StartElement{Name{"", "BR"}, []Attr{}},
  1474  		EndElement{Name{"", "BR"}},
  1475  		StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}},
  1476  		CharData("abc"),
  1477  		EndElement{Name{"", "span"}},
  1478  		StartElement{Name{"", "br"}, []Attr{}},
  1479  		EndElement{Name{"", "br"}},
  1480  		StartElement{Name{"", "br"}, []Attr{}},
  1481  		EndElement{Name{"", "br"}},
  1482  	}
  1483  
  1484  	d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
  1485  	d.Strict = false
  1486  	d.AutoClose = HTMLAutoClose
  1487  	d.Entity = HTMLEntity
  1488  	var haveTokens []Token
  1489  	for {
  1490  		tok, err := d.Token()
  1491  		if err != nil {
  1492  			if err == io.EOF {
  1493  				break
  1494  			}
  1495  			t.Fatalf("unexpected error: %v", err)
  1496  		}
  1497  		haveTokens = append(haveTokens, CopyToken(tok))
  1498  	}
  1499  	if len(haveTokens) != len(wantTokens) {
  1500  		t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens))
  1501  	}
  1502  	for i, want := range wantTokens {
  1503  		if i >= len(haveTokens) {
  1504  			t.Errorf("token[%d] expected %#v, have no token", i, want)
  1505  		} else {
  1506  			have := haveTokens[i]
  1507  			if !reflect.DeepEqual(have, want) {
  1508  				t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want)
  1509  			}
  1510  		}
  1511  	}
  1512  }
  1513  

View as plain text