Source file src/encoding/xml/xml_test.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package xml
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"strings"
    13  	"testing"
    14  	"unicode/utf8"
    15  )
    16  
    17  type toks struct {
    18  	earlyEOF bool
    19  	t        []Token
    20  }
    21  
    22  func (t *toks) Token() (Token, error) {
    23  	if len(t.t) == 0 {
    24  		return nil, io.EOF
    25  	}
    26  	var tok Token
    27  	tok, t.t = t.t[0], t.t[1:]
    28  	if t.earlyEOF && len(t.t) == 0 {
    29  		return tok, io.EOF
    30  	}
    31  	return tok, nil
    32  }
    33  
    34  func TestDecodeEOF(t *testing.T) {
    35  	start := StartElement{Name: Name{Local: "test"}}
    36  	tests := []struct {
    37  		name   string
    38  		tokens []Token
    39  		ok     bool
    40  	}{
    41  		{
    42  			name: "OK",
    43  			tokens: []Token{
    44  				start,
    45  				start.End(),
    46  			},
    47  			ok: true,
    48  		},
    49  		{
    50  			name: "Malformed",
    51  			tokens: []Token{
    52  				start,
    53  				StartElement{Name: Name{Local: "bad"}},
    54  				start.End(),
    55  			},
    56  			ok: false,
    57  		},
    58  	}
    59  	for _, tc := range tests {
    60  		for _, eof := range []bool{true, false} {
    61  			name := fmt.Sprintf("%s/earlyEOF=%v", tc.name, eof)
    62  			t.Run(name, func(t *testing.T) {
    63  				d := NewTokenDecoder(&toks{
    64  					earlyEOF: eof,
    65  					t:        tc.tokens,
    66  				})
    67  				err := d.Decode(&struct {
    68  					XMLName Name `xml:"test"`
    69  				}{})
    70  				if tc.ok && err != nil {
    71  					t.Fatalf("d.Decode: expected nil error, got %v", err)
    72  				}
    73  				if _, ok := err.(*SyntaxError); !tc.ok && !ok {
    74  					t.Errorf("d.Decode: expected syntax error, got %v", err)
    75  				}
    76  			})
    77  		}
    78  	}
    79  }
    80  
    81  type toksNil struct {
    82  	returnEOF bool
    83  	t         []Token
    84  }
    85  
    86  func (t *toksNil) Token() (Token, error) {
    87  	if len(t.t) == 0 {
    88  		if !t.returnEOF {
    89  			// Return nil, nil before returning an EOF. It's legal, but
    90  			// discouraged.
    91  			t.returnEOF = true
    92  			return nil, nil
    93  		}
    94  		return nil, io.EOF
    95  	}
    96  	var tok Token
    97  	tok, t.t = t.t[0], t.t[1:]
    98  	return tok, nil
    99  }
   100  
   101  func TestDecodeNilToken(t *testing.T) {
   102  	for _, strict := range []bool{true, false} {
   103  		name := fmt.Sprintf("Strict=%v", strict)
   104  		t.Run(name, func(t *testing.T) {
   105  			start := StartElement{Name: Name{Local: "test"}}
   106  			bad := StartElement{Name: Name{Local: "bad"}}
   107  			d := NewTokenDecoder(&toksNil{
   108  				// Malformed
   109  				t: []Token{start, bad, start.End()},
   110  			})
   111  			d.Strict = strict
   112  			err := d.Decode(&struct {
   113  				XMLName Name `xml:"test"`
   114  			}{})
   115  			if _, ok := err.(*SyntaxError); !ok {
   116  				t.Errorf("d.Decode: expected syntax error, got %v", err)
   117  			}
   118  		})
   119  	}
   120  }
   121  
   122  const testInput = `
   123  <?xml version="1.0" encoding="UTF-8"?>
   124  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   125    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
   126  <body xmlns:foo="ns1" xmlns="ns2" xmlns:tag="ns3" ` +
   127  	"\r\n\t" + `  >
   128    <hello lang="en">World &lt;&gt;&apos;&quot; &#x767d;&#40300;翔</hello>
   129    <query>&何; &is-it;</query>
   130    <goodbye />
   131    <outer foo:attr="value" xmlns:tag="ns4">
   132      <inner/>
   133    </outer>
   134    <tag:name>
   135      <![CDATA[Some text here.]]>
   136    </tag:name>
   137  </body><!-- missing final newline -->`
   138  
   139  var testEntity = map[string]string{"何": "What", "is-it": "is it?"}
   140  
   141  var rawTokens = []Token{
   142  	CharData("\n"),
   143  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   144  	CharData("\n"),
   145  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   146    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   147  	CharData("\n"),
   148  	StartElement{Name{"", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   149  	CharData("\n  "),
   150  	StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   151  	CharData("World <>'\" 白鵬翔"),
   152  	EndElement{Name{"", "hello"}},
   153  	CharData("\n  "),
   154  	StartElement{Name{"", "query"}, []Attr{}},
   155  	CharData("What is it?"),
   156  	EndElement{Name{"", "query"}},
   157  	CharData("\n  "),
   158  	StartElement{Name{"", "goodbye"}, []Attr{}},
   159  	EndElement{Name{"", "goodbye"}},
   160  	CharData("\n  "),
   161  	StartElement{Name{"", "outer"}, []Attr{{Name{"foo", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   162  	CharData("\n    "),
   163  	StartElement{Name{"", "inner"}, []Attr{}},
   164  	EndElement{Name{"", "inner"}},
   165  	CharData("\n  "),
   166  	EndElement{Name{"", "outer"}},
   167  	CharData("\n  "),
   168  	StartElement{Name{"tag", "name"}, []Attr{}},
   169  	CharData("\n    "),
   170  	CharData("Some text here."),
   171  	CharData("\n  "),
   172  	EndElement{Name{"tag", "name"}},
   173  	CharData("\n"),
   174  	EndElement{Name{"", "body"}},
   175  	Comment(" missing final newline "),
   176  }
   177  
   178  var cookedTokens = []Token{
   179  	CharData("\n"),
   180  	ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
   181  	CharData("\n"),
   182  	Directive(`DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
   183    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"`),
   184  	CharData("\n"),
   185  	StartElement{Name{"ns2", "body"}, []Attr{{Name{"xmlns", "foo"}, "ns1"}, {Name{"", "xmlns"}, "ns2"}, {Name{"xmlns", "tag"}, "ns3"}}},
   186  	CharData("\n  "),
   187  	StartElement{Name{"ns2", "hello"}, []Attr{{Name{"", "lang"}, "en"}}},
   188  	CharData("World <>'\" 白鵬翔"),
   189  	EndElement{Name{"ns2", "hello"}},
   190  	CharData("\n  "),
   191  	StartElement{Name{"ns2", "query"}, []Attr{}},
   192  	CharData("What is it?"),
   193  	EndElement{Name{"ns2", "query"}},
   194  	CharData("\n  "),
   195  	StartElement{Name{"ns2", "goodbye"}, []Attr{}},
   196  	EndElement{Name{"ns2", "goodbye"}},
   197  	CharData("\n  "),
   198  	StartElement{Name{"ns2", "outer"}, []Attr{{Name{"ns1", "attr"}, "value"}, {Name{"xmlns", "tag"}, "ns4"}}},
   199  	CharData("\n    "),
   200  	StartElement{Name{"ns2", "inner"}, []Attr{}},
   201  	EndElement{Name{"ns2", "inner"}},
   202  	CharData("\n  "),
   203  	EndElement{Name{"ns2", "outer"}},
   204  	CharData("\n  "),
   205  	StartElement{Name{"ns3", "name"}, []Attr{}},
   206  	CharData("\n    "),
   207  	CharData("Some text here."),
   208  	CharData("\n  "),
   209  	EndElement{Name{"ns3", "name"}},
   210  	CharData("\n"),
   211  	EndElement{Name{"ns2", "body"}},
   212  	Comment(" missing final newline "),
   213  }
   214  
   215  const testInputAltEncoding = `
   216  <?xml version="1.0" encoding="x-testing-uppercase"?>
   217  <TAG>VALUE</TAG>`
   218  
   219  var rawTokensAltEncoding = []Token{
   220  	CharData("\n"),
   221  	ProcInst{"xml", []byte(`version="1.0" encoding="x-testing-uppercase"`)},
   222  	CharData("\n"),
   223  	StartElement{Name{"", "tag"}, []Attr{}},
   224  	CharData("value"),
   225  	EndElement{Name{"", "tag"}},
   226  }
   227  
   228  var xmlInput = []string{
   229  	// unexpected EOF cases
   230  	"<",
   231  	"<t",
   232  	"<t ",
   233  	"<t/",
   234  	"<!",
   235  	"<!-",
   236  	"<!--",
   237  	"<!--c-",
   238  	"<!--c--",
   239  	"<!d",
   240  	"<t></",
   241  	"<t></t",
   242  	"<?",
   243  	"<?p",
   244  	"<t a",
   245  	"<t a=",
   246  	"<t a='",
   247  	"<t a=''",
   248  	"<t/><![",
   249  	"<t/><![C",
   250  	"<t/><![CDATA[d",
   251  	"<t/><![CDATA[d]",
   252  	"<t/><![CDATA[d]]",
   253  
   254  	// other Syntax errors
   255  	"<>",
   256  	"<t/a",
   257  	"<0 />",
   258  	"<?0 >",
   259  	//	"<!0 >",	// let the Token() caller handle
   260  	"</0>",
   261  	"<t 0=''>",
   262  	"<t a='&'>",
   263  	"<t a='<'>",
   264  	"<t>&nbspc;</t>",
   265  	"<t a>",
   266  	"<t a=>",
   267  	"<t a=v>",
   268  	//	"<![CDATA[d]]>",	// let the Token() caller handle
   269  	"<t></e>",
   270  	"<t></>",
   271  	"<t></t!",
   272  	"<t>cdata]]></t>",
   273  }
   274  
   275  func TestRawToken(t *testing.T) {
   276  	d := NewDecoder(strings.NewReader(testInput))
   277  	d.Entity = testEntity
   278  	testRawToken(t, d, testInput, rawTokens)
   279  }
   280  
   281  const nonStrictInput = `
   282  <tag>non&entity</tag>
   283  <tag>&unknown;entity</tag>
   284  <tag>&#123</tag>
   285  <tag>&#zzz;</tag>
   286  <tag>&なまえ3;</tag>
   287  <tag>&lt-gt;</tag>
   288  <tag>&;</tag>
   289  <tag>&0a;</tag>
   290  `
   291  
   292  var nonStrictTokens = []Token{
   293  	CharData("\n"),
   294  	StartElement{Name{"", "tag"}, []Attr{}},
   295  	CharData("non&entity"),
   296  	EndElement{Name{"", "tag"}},
   297  	CharData("\n"),
   298  	StartElement{Name{"", "tag"}, []Attr{}},
   299  	CharData("&unknown;entity"),
   300  	EndElement{Name{"", "tag"}},
   301  	CharData("\n"),
   302  	StartElement{Name{"", "tag"}, []Attr{}},
   303  	CharData("&#123"),
   304  	EndElement{Name{"", "tag"}},
   305  	CharData("\n"),
   306  	StartElement{Name{"", "tag"}, []Attr{}},
   307  	CharData("&#zzz;"),
   308  	EndElement{Name{"", "tag"}},
   309  	CharData("\n"),
   310  	StartElement{Name{"", "tag"}, []Attr{}},
   311  	CharData("&なまえ3;"),
   312  	EndElement{Name{"", "tag"}},
   313  	CharData("\n"),
   314  	StartElement{Name{"", "tag"}, []Attr{}},
   315  	CharData("&lt-gt;"),
   316  	EndElement{Name{"", "tag"}},
   317  	CharData("\n"),
   318  	StartElement{Name{"", "tag"}, []Attr{}},
   319  	CharData("&;"),
   320  	EndElement{Name{"", "tag"}},
   321  	CharData("\n"),
   322  	StartElement{Name{"", "tag"}, []Attr{}},
   323  	CharData("&0a;"),
   324  	EndElement{Name{"", "tag"}},
   325  	CharData("\n"),
   326  }
   327  
   328  func TestNonStrictRawToken(t *testing.T) {
   329  	d := NewDecoder(strings.NewReader(nonStrictInput))
   330  	d.Strict = false
   331  	testRawToken(t, d, nonStrictInput, nonStrictTokens)
   332  }
   333  
   334  type downCaser struct {
   335  	t *testing.T
   336  	r io.ByteReader
   337  }
   338  
   339  func (d *downCaser) ReadByte() (c byte, err error) {
   340  	c, err = d.r.ReadByte()
   341  	if c >= 'A' && c <= 'Z' {
   342  		c += 'a' - 'A'
   343  	}
   344  	return
   345  }
   346  
   347  func (d *downCaser) Read(p []byte) (int, error) {
   348  	d.t.Fatalf("unexpected Read call on downCaser reader")
   349  	panic("unreachable")
   350  }
   351  
   352  func TestRawTokenAltEncoding(t *testing.T) {
   353  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   354  	d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
   355  		if charset != "x-testing-uppercase" {
   356  			t.Fatalf("unexpected charset %q", charset)
   357  		}
   358  		return &downCaser{t, input.(io.ByteReader)}, nil
   359  	}
   360  	testRawToken(t, d, testInputAltEncoding, rawTokensAltEncoding)
   361  }
   362  
   363  func TestRawTokenAltEncodingNoConverter(t *testing.T) {
   364  	d := NewDecoder(strings.NewReader(testInputAltEncoding))
   365  	token, err := d.RawToken()
   366  	if token == nil {
   367  		t.Fatalf("expected a token on first RawToken call")
   368  	}
   369  	if err != nil {
   370  		t.Fatal(err)
   371  	}
   372  	token, err = d.RawToken()
   373  	if token != nil {
   374  		t.Errorf("expected a nil token; got %#v", token)
   375  	}
   376  	if err == nil {
   377  		t.Fatalf("expected an error on second RawToken call")
   378  	}
   379  	const encoding = "x-testing-uppercase"
   380  	if !strings.Contains(err.Error(), encoding) {
   381  		t.Errorf("expected error to contain %q; got error: %v",
   382  			encoding, err)
   383  	}
   384  }
   385  
   386  func testRawToken(t *testing.T, d *Decoder, raw string, rawTokens []Token) {
   387  	lastEnd := int64(0)
   388  	for i, want := range rawTokens {
   389  		start := d.InputOffset()
   390  		have, err := d.RawToken()
   391  		end := d.InputOffset()
   392  		if err != nil {
   393  			t.Fatalf("token %d: unexpected error: %s", i, err)
   394  		}
   395  		if !reflect.DeepEqual(have, want) {
   396  			var shave, swant string
   397  			if _, ok := have.(CharData); ok {
   398  				shave = fmt.Sprintf("CharData(%q)", have)
   399  			} else {
   400  				shave = fmt.Sprintf("%#v", have)
   401  			}
   402  			if _, ok := want.(CharData); ok {
   403  				swant = fmt.Sprintf("CharData(%q)", want)
   404  			} else {
   405  				swant = fmt.Sprintf("%#v", want)
   406  			}
   407  			t.Errorf("token %d = %s, want %s", i, shave, swant)
   408  		}
   409  
   410  		// Check that InputOffset returned actual token.
   411  		switch {
   412  		case start < lastEnd:
   413  			t.Errorf("token %d: position [%d,%d) for %T is before previous token", i, start, end, have)
   414  		case start >= end:
   415  			// Special case: EndElement can be synthesized.
   416  			if start == end && end == lastEnd {
   417  				break
   418  			}
   419  			t.Errorf("token %d: position [%d,%d) for %T is empty", i, start, end, have)
   420  		case end > int64(len(raw)):
   421  			t.Errorf("token %d: position [%d,%d) for %T extends beyond input", i, start, end, have)
   422  		default:
   423  			text := raw[start:end]
   424  			if strings.ContainsAny(text, "<>") && (!strings.HasPrefix(text, "<") || !strings.HasSuffix(text, ">")) {
   425  				t.Errorf("token %d: misaligned raw token %#q for %T", i, text, have)
   426  			}
   427  		}
   428  		lastEnd = end
   429  	}
   430  }
   431  
   432  // Ensure that directives (specifically !DOCTYPE) include the complete
   433  // text of any nested directives, noting that < and > do not change
   434  // nesting depth if they are in single or double quotes.
   435  
   436  var nestedDirectivesInput = `
   437  <!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   438  <!DOCTYPE [<!ENTITY xlt ">">]>
   439  <!DOCTYPE [<!ENTITY xlt "<">]>
   440  <!DOCTYPE [<!ENTITY xlt '>'>]>
   441  <!DOCTYPE [<!ENTITY xlt '<'>]>
   442  <!DOCTYPE [<!ENTITY xlt '">'>]>
   443  <!DOCTYPE [<!ENTITY xlt "'<">]>
   444  `
   445  
   446  var nestedDirectivesTokens = []Token{
   447  	CharData("\n"),
   448  	Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   449  	CharData("\n"),
   450  	Directive(`DOCTYPE [<!ENTITY xlt ">">]`),
   451  	CharData("\n"),
   452  	Directive(`DOCTYPE [<!ENTITY xlt "<">]`),
   453  	CharData("\n"),
   454  	Directive(`DOCTYPE [<!ENTITY xlt '>'>]`),
   455  	CharData("\n"),
   456  	Directive(`DOCTYPE [<!ENTITY xlt '<'>]`),
   457  	CharData("\n"),
   458  	Directive(`DOCTYPE [<!ENTITY xlt '">'>]`),
   459  	CharData("\n"),
   460  	Directive(`DOCTYPE [<!ENTITY xlt "'<">]`),
   461  	CharData("\n"),
   462  }
   463  
   464  func TestNestedDirectives(t *testing.T) {
   465  	d := NewDecoder(strings.NewReader(nestedDirectivesInput))
   466  
   467  	for i, want := range nestedDirectivesTokens {
   468  		have, err := d.Token()
   469  		if err != nil {
   470  			t.Fatalf("token %d: unexpected error: %s", i, err)
   471  		}
   472  		if !reflect.DeepEqual(have, want) {
   473  			t.Errorf("token %d = %#v want %#v", i, have, want)
   474  		}
   475  	}
   476  }
   477  
   478  func TestToken(t *testing.T) {
   479  	d := NewDecoder(strings.NewReader(testInput))
   480  	d.Entity = testEntity
   481  
   482  	for i, want := range cookedTokens {
   483  		have, err := d.Token()
   484  		if err != nil {
   485  			t.Fatalf("token %d: unexpected error: %s", i, err)
   486  		}
   487  		if !reflect.DeepEqual(have, want) {
   488  			t.Errorf("token %d = %#v want %#v", i, have, want)
   489  		}
   490  	}
   491  }
   492  
   493  func TestSyntax(t *testing.T) {
   494  	for i := range xmlInput {
   495  		d := NewDecoder(strings.NewReader(xmlInput[i]))
   496  		var err error
   497  		for _, err = d.Token(); err == nil; _, err = d.Token() {
   498  		}
   499  		if _, ok := err.(*SyntaxError); !ok {
   500  			t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
   501  		}
   502  	}
   503  }
   504  
   505  func TestInputLinePos(t *testing.T) {
   506  	testInput := `<root>
   507  <?pi
   508   ?>  <elt
   509  att
   510  =
   511  "val">
   512  <![CDATA[
   513  ]]><!--
   514  
   515  --></elt>
   516  </root>`
   517  	linePos := [][]int{
   518  		{1, 7},
   519  		{2, 1},
   520  		{3, 4},
   521  		{3, 6},
   522  		{6, 7},
   523  		{7, 1},
   524  		{8, 4},
   525  		{10, 4},
   526  		{10, 10},
   527  		{11, 1},
   528  		{11, 8},
   529  	}
   530  	dec := NewDecoder(strings.NewReader(testInput))
   531  	for _, want := range linePos {
   532  		if _, err := dec.Token(); err != nil {
   533  			t.Errorf("Unexpected error: %v", err)
   534  			continue
   535  		}
   536  
   537  		gotLine, gotCol := dec.InputPos()
   538  		if gotLine != want[0] || gotCol != want[1] {
   539  			t.Errorf("dec.InputPos() = %d,%d, want %d,%d", gotLine, gotCol, want[0], want[1])
   540  		}
   541  	}
   542  }
   543  
   544  type allScalars struct {
   545  	True1     bool
   546  	True2     bool
   547  	False1    bool
   548  	False2    bool
   549  	Int       int
   550  	Int8      int8
   551  	Int16     int16
   552  	Int32     int32
   553  	Int64     int64
   554  	Uint      int
   555  	Uint8     uint8
   556  	Uint16    uint16
   557  	Uint32    uint32
   558  	Uint64    uint64
   559  	Uintptr   uintptr
   560  	Float32   float32
   561  	Float64   float64
   562  	String    string
   563  	PtrString *string
   564  }
   565  
   566  var all = allScalars{
   567  	True1:     true,
   568  	True2:     true,
   569  	False1:    false,
   570  	False2:    false,
   571  	Int:       1,
   572  	Int8:      -2,
   573  	Int16:     3,
   574  	Int32:     -4,
   575  	Int64:     5,
   576  	Uint:      6,
   577  	Uint8:     7,
   578  	Uint16:    8,
   579  	Uint32:    9,
   580  	Uint64:    10,
   581  	Uintptr:   11,
   582  	Float32:   13.0,
   583  	Float64:   14.0,
   584  	String:    "15",
   585  	PtrString: &sixteen,
   586  }
   587  
   588  var sixteen = "16"
   589  
   590  const testScalarsInput = `<allscalars>
   591  	<True1>true</True1>
   592  	<True2>1</True2>
   593  	<False1>false</False1>
   594  	<False2>0</False2>
   595  	<Int>1</Int>
   596  	<Int8>-2</Int8>
   597  	<Int16>3</Int16>
   598  	<Int32>-4</Int32>
   599  	<Int64>5</Int64>
   600  	<Uint>6</Uint>
   601  	<Uint8>7</Uint8>
   602  	<Uint16>8</Uint16>
   603  	<Uint32>9</Uint32>
   604  	<Uint64>10</Uint64>
   605  	<Uintptr>11</Uintptr>
   606  	<Float>12.0</Float>
   607  	<Float32>13.0</Float32>
   608  	<Float64>14.0</Float64>
   609  	<String>15</String>
   610  	<PtrString>16</PtrString>
   611  </allscalars>`
   612  
   613  func TestAllScalars(t *testing.T) {
   614  	var a allScalars
   615  	err := Unmarshal([]byte(testScalarsInput), &a)
   616  
   617  	if err != nil {
   618  		t.Fatal(err)
   619  	}
   620  	if !reflect.DeepEqual(a, all) {
   621  		t.Errorf("have %+v want %+v", a, all)
   622  	}
   623  }
   624  
   625  type item struct {
   626  	FieldA string
   627  }
   628  
   629  func TestIssue569(t *testing.T) {
   630  	data := `<item><FieldA>abcd</FieldA></item>`
   631  	var i item
   632  	err := Unmarshal([]byte(data), &i)
   633  
   634  	if err != nil || i.FieldA != "abcd" {
   635  		t.Fatal("Expecting abcd")
   636  	}
   637  }
   638  
   639  func TestUnquotedAttrs(t *testing.T) {
   640  	data := "<tag attr=azAZ09:-_\t>"
   641  	d := NewDecoder(strings.NewReader(data))
   642  	d.Strict = false
   643  	token, err := d.Token()
   644  	if _, ok := err.(*SyntaxError); ok {
   645  		t.Errorf("Unexpected error: %v", err)
   646  	}
   647  	if token.(StartElement).Name.Local != "tag" {
   648  		t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   649  	}
   650  	attr := token.(StartElement).Attr[0]
   651  	if attr.Value != "azAZ09:-_" {
   652  		t.Errorf("Unexpected attribute value: %v", attr.Value)
   653  	}
   654  	if attr.Name.Local != "attr" {
   655  		t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   656  	}
   657  }
   658  
   659  func TestValuelessAttrs(t *testing.T) {
   660  	tests := [][3]string{
   661  		{"<p nowrap>", "p", "nowrap"},
   662  		{"<p nowrap >", "p", "nowrap"},
   663  		{"<input checked/>", "input", "checked"},
   664  		{"<input checked />", "input", "checked"},
   665  	}
   666  	for _, test := range tests {
   667  		d := NewDecoder(strings.NewReader(test[0]))
   668  		d.Strict = false
   669  		token, err := d.Token()
   670  		if _, ok := err.(*SyntaxError); ok {
   671  			t.Errorf("Unexpected error: %v", err)
   672  		}
   673  		if token.(StartElement).Name.Local != test[1] {
   674  			t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
   675  		}
   676  		attr := token.(StartElement).Attr[0]
   677  		if attr.Value != test[2] {
   678  			t.Errorf("Unexpected attribute value: %v", attr.Value)
   679  		}
   680  		if attr.Name.Local != test[2] {
   681  			t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
   682  		}
   683  	}
   684  }
   685  
   686  func TestCopyTokenCharData(t *testing.T) {
   687  	data := []byte("same data")
   688  	var tok1 Token = CharData(data)
   689  	tok2 := CopyToken(tok1)
   690  	if !reflect.DeepEqual(tok1, tok2) {
   691  		t.Error("CopyToken(CharData) != CharData")
   692  	}
   693  	data[1] = 'o'
   694  	if reflect.DeepEqual(tok1, tok2) {
   695  		t.Error("CopyToken(CharData) uses same buffer.")
   696  	}
   697  }
   698  
   699  func TestCopyTokenStartElement(t *testing.T) {
   700  	elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
   701  	var tok1 Token = elt
   702  	tok2 := CopyToken(tok1)
   703  	if tok1.(StartElement).Attr[0].Value != "en" {
   704  		t.Error("CopyToken overwrote Attr[0]")
   705  	}
   706  	if !reflect.DeepEqual(tok1, tok2) {
   707  		t.Error("CopyToken(StartElement) != StartElement")
   708  	}
   709  	tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
   710  	if reflect.DeepEqual(tok1, tok2) {
   711  		t.Error("CopyToken(CharData) uses same buffer.")
   712  	}
   713  }
   714  
   715  func TestCopyTokenComment(t *testing.T) {
   716  	data := []byte("<!-- some comment -->")
   717  	var tok1 Token = Comment(data)
   718  	tok2 := CopyToken(tok1)
   719  	if !reflect.DeepEqual(tok1, tok2) {
   720  		t.Error("CopyToken(Comment) != Comment")
   721  	}
   722  	data[1] = 'o'
   723  	if reflect.DeepEqual(tok1, tok2) {
   724  		t.Error("CopyToken(Comment) uses same buffer.")
   725  	}
   726  }
   727  
   728  func TestSyntaxErrorLineNum(t *testing.T) {
   729  	testInput := "<P>Foo<P>\n\n<P>Bar</>\n"
   730  	d := NewDecoder(strings.NewReader(testInput))
   731  	var err error
   732  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   733  	}
   734  	synerr, ok := err.(*SyntaxError)
   735  	if !ok {
   736  		t.Error("Expected SyntaxError.")
   737  	}
   738  	if synerr.Line != 3 {
   739  		t.Error("SyntaxError didn't have correct line number.")
   740  	}
   741  }
   742  
   743  func TestTrailingRawToken(t *testing.T) {
   744  	input := `<FOO></FOO>  `
   745  	d := NewDecoder(strings.NewReader(input))
   746  	var err error
   747  	for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
   748  	}
   749  	if err != io.EOF {
   750  		t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
   751  	}
   752  }
   753  
   754  func TestTrailingToken(t *testing.T) {
   755  	input := `<FOO></FOO>  `
   756  	d := NewDecoder(strings.NewReader(input))
   757  	var err error
   758  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   759  	}
   760  	if err != io.EOF {
   761  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   762  	}
   763  }
   764  
   765  func TestEntityInsideCDATA(t *testing.T) {
   766  	input := `<test><![CDATA[ &val=foo ]]></test>`
   767  	d := NewDecoder(strings.NewReader(input))
   768  	var err error
   769  	for _, err = d.Token(); err == nil; _, err = d.Token() {
   770  	}
   771  	if err != io.EOF {
   772  		t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
   773  	}
   774  }
   775  
   776  var characterTests = []struct {
   777  	in  string
   778  	err string
   779  }{
   780  	{"\x12<doc/>", "illegal character code U+0012"},
   781  	{"<?xml version=\"1.0\"?>\x0b<doc/>", "illegal character code U+000B"},
   782  	{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
   783  	{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
   784  	{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
   785  	{"<doc>&abc\x01;</doc>", "invalid character entity &abc (no semicolon)"},
   786  	{"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
   787  	{"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &\uFFFE;"},
   788  	{"<doc>&hello;</doc>", "invalid character entity &hello;"},
   789  }
   790  
   791  func TestDisallowedCharacters(t *testing.T) {
   792  
   793  	for i, tt := range characterTests {
   794  		d := NewDecoder(strings.NewReader(tt.in))
   795  		var err error
   796  
   797  		for err == nil {
   798  			_, err = d.Token()
   799  		}
   800  		synerr, ok := err.(*SyntaxError)
   801  		if !ok {
   802  			t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
   803  		}
   804  		if synerr.Msg != tt.err {
   805  			t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
   806  		}
   807  	}
   808  }
   809  
   810  func TestIsInCharacterRange(t *testing.T) {
   811  	invalid := []rune{
   812  		utf8.MaxRune + 1,
   813  		0xD800, // surrogate min
   814  		0xDFFF, // surrogate max
   815  		-1,
   816  	}
   817  	for _, r := range invalid {
   818  		if isInCharacterRange(r) {
   819  			t.Errorf("rune %U considered valid", r)
   820  		}
   821  	}
   822  }
   823  
   824  var procInstTests = []struct {
   825  	input  string
   826  	expect [2]string
   827  }{
   828  	{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
   829  	{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   830  	{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
   831  	{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
   832  	{`encoding="FOO" `, [2]string{"", "FOO"}},
   833  	{`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
   834  	{`version= encoding=`, [2]string{"", ""}},
   835  	{`encoding="version=1.0"`, [2]string{"", "version=1.0"}},
   836  	{``, [2]string{"", ""}},
   837  	// TODO: what's the right approach to handle these nested cases?
   838  	{`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}},
   839  	{`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}},
   840  }
   841  
   842  func TestProcInstEncoding(t *testing.T) {
   843  	for _, test := range procInstTests {
   844  		if got := procInst("version", test.input); got != test.expect[0] {
   845  			t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
   846  		}
   847  		if got := procInst("encoding", test.input); got != test.expect[1] {
   848  			t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
   849  		}
   850  	}
   851  }
   852  
   853  // Ensure that directives with comments include the complete
   854  // text of any nested directives.
   855  
   856  var directivesWithCommentsInput = `
   857  <!DOCTYPE [<!-- a comment --><!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
   858  <!DOCTYPE [<!ENTITY go "Golang"><!-- a comment-->]>
   859  <!DOCTYPE <!-> <!> <!----> <!-->--> <!--->--> [<!ENTITY go "Golang"><!-- a comment-->]>
   860  `
   861  
   862  var directivesWithCommentsTokens = []Token{
   863  	CharData("\n"),
   864  	Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
   865  	CharData("\n"),
   866  	Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
   867  	CharData("\n"),
   868  	Directive(`DOCTYPE <!-> <!>       [<!ENTITY go "Golang"> ]`),
   869  	CharData("\n"),
   870  }
   871  
   872  func TestDirectivesWithComments(t *testing.T) {
   873  	d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
   874  
   875  	for i, want := range directivesWithCommentsTokens {
   876  		have, err := d.Token()
   877  		if err != nil {
   878  			t.Fatalf("token %d: unexpected error: %s", i, err)
   879  		}
   880  		if !reflect.DeepEqual(have, want) {
   881  			t.Errorf("token %d = %#v want %#v", i, have, want)
   882  		}
   883  	}
   884  }
   885  
   886  // Writer whose Write method always returns an error.
   887  type errWriter struct{}
   888  
   889  func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
   890  
   891  func TestEscapeTextIOErrors(t *testing.T) {
   892  	expectErr := "unwritable"
   893  	err := EscapeText(errWriter{}, []byte{'A'})
   894  
   895  	if err == nil || err.Error() != expectErr {
   896  		t.Errorf("have %v, want %v", err, expectErr)
   897  	}
   898  }
   899  
   900  func TestEscapeTextInvalidChar(t *testing.T) {
   901  	input := []byte("A \x00 terminated string.")
   902  	expected := "A \uFFFD terminated string."
   903  
   904  	buff := new(strings.Builder)
   905  	if err := EscapeText(buff, input); err != nil {
   906  		t.Fatalf("have %v, want nil", err)
   907  	}
   908  	text := buff.String()
   909  
   910  	if text != expected {
   911  		t.Errorf("have %v, want %v", text, expected)
   912  	}
   913  }
   914  
   915  func TestIssue5880(t *testing.T) {
   916  	type T []byte
   917  	data, err := Marshal(T{192, 168, 0, 1})
   918  	if err != nil {
   919  		t.Errorf("Marshal error: %v", err)
   920  	}
   921  	if !utf8.Valid(data) {
   922  		t.Errorf("Marshal generated invalid UTF-8: %x", data)
   923  	}
   924  }
   925  
   926  func TestIssue8535(t *testing.T) {
   927  
   928  	type ExampleConflict struct {
   929  		XMLName  Name   `xml:"example"`
   930  		Link     string `xml:"link"`
   931  		AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space
   932  	}
   933  	testCase := `<example>
   934  			<title>Example</title>
   935  			<link>http://example.com/default</link> <!-- not assigned -->
   936  			<link>http://example.com/home</link> <!-- not assigned -->
   937  			<ns:link xmlns:ns="http://www.w3.org/2005/Atom">http://example.com/ns</ns:link>
   938  		</example>`
   939  
   940  	var dest ExampleConflict
   941  	d := NewDecoder(strings.NewReader(testCase))
   942  	if err := d.Decode(&dest); err != nil {
   943  		t.Fatal(err)
   944  	}
   945  }
   946  
   947  func TestEncodeXMLNS(t *testing.T) {
   948  	testCases := []struct {
   949  		f    func() ([]byte, error)
   950  		want string
   951  		ok   bool
   952  	}{
   953  		{encodeXMLNS1, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
   954  		{encodeXMLNS2, `<Test><body xmlns="http://example.com/ns">hello world</body></Test>`, true},
   955  		{encodeXMLNS3, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, true},
   956  		{encodeXMLNS4, `<Test xmlns="http://example.com/ns"><Body>hello world</Body></Test>`, false},
   957  	}
   958  
   959  	for i, tc := range testCases {
   960  		if b, err := tc.f(); err == nil {
   961  			if got, want := string(b), tc.want; got != want {
   962  				t.Errorf("%d: got %s, want %s \n", i, got, want)
   963  			}
   964  		} else {
   965  			t.Errorf("%d: marshal failed with %s", i, err)
   966  		}
   967  	}
   968  }
   969  
   970  func encodeXMLNS1() ([]byte, error) {
   971  
   972  	type T struct {
   973  		XMLName Name   `xml:"Test"`
   974  		Ns      string `xml:"xmlns,attr"`
   975  		Body    string
   976  	}
   977  
   978  	s := &T{Ns: "http://example.com/ns", Body: "hello world"}
   979  	return Marshal(s)
   980  }
   981  
   982  func encodeXMLNS2() ([]byte, error) {
   983  
   984  	type Test struct {
   985  		Body string `xml:"http://example.com/ns body"`
   986  	}
   987  
   988  	s := &Test{Body: "hello world"}
   989  	return Marshal(s)
   990  }
   991  
   992  func encodeXMLNS3() ([]byte, error) {
   993  
   994  	type Test struct {
   995  		XMLName Name `xml:"http://example.com/ns Test"`
   996  		Body    string
   997  	}
   998  
   999  	//s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing
  1000  	// as documentation states
  1001  	s := &Test{Body: "hello world"}
  1002  	return Marshal(s)
  1003  }
  1004  
  1005  func encodeXMLNS4() ([]byte, error) {
  1006  
  1007  	type Test struct {
  1008  		Ns   string `xml:"xmlns,attr"`
  1009  		Body string
  1010  	}
  1011  
  1012  	s := &Test{Ns: "http://example.com/ns", Body: "hello world"}
  1013  	return Marshal(s)
  1014  }
  1015  
  1016  func TestIssue11405(t *testing.T) {
  1017  	testCases := []string{
  1018  		"<root>",
  1019  		"<root><foo>",
  1020  		"<root><foo></foo>",
  1021  	}
  1022  	for _, tc := range testCases {
  1023  		d := NewDecoder(strings.NewReader(tc))
  1024  		var err error
  1025  		for {
  1026  			_, err = d.Token()
  1027  			if err != nil {
  1028  				break
  1029  			}
  1030  		}
  1031  		if _, ok := err.(*SyntaxError); !ok {
  1032  			t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
  1033  		}
  1034  	}
  1035  }
  1036  
  1037  func TestIssue12417(t *testing.T) {
  1038  	testCases := []struct {
  1039  		s  string
  1040  		ok bool
  1041  	}{
  1042  		{`<?xml encoding="UtF-8" version="1.0"?><root/>`, true},
  1043  		{`<?xml encoding="UTF-8" version="1.0"?><root/>`, true},
  1044  		{`<?xml encoding="utf-8" version="1.0"?><root/>`, true},
  1045  		{`<?xml encoding="uuu-9" version="1.0"?><root/>`, false},
  1046  	}
  1047  	for _, tc := range testCases {
  1048  		d := NewDecoder(strings.NewReader(tc.s))
  1049  		var err error
  1050  		for {
  1051  			_, err = d.Token()
  1052  			if err != nil {
  1053  				if err == io.EOF {
  1054  					err = nil
  1055  				}
  1056  				break
  1057  			}
  1058  		}
  1059  		if err != nil && tc.ok {
  1060  			t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
  1061  			continue
  1062  		}
  1063  		if err == nil && !tc.ok {
  1064  			t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
  1065  		}
  1066  	}
  1067  }
  1068  
  1069  func TestIssue7113(t *testing.T) {
  1070  	type C struct {
  1071  		XMLName Name `xml:""` // Sets empty namespace
  1072  	}
  1073  
  1074  	type D struct {
  1075  		XMLName Name `xml:"d"`
  1076  	}
  1077  
  1078  	type A struct {
  1079  		XMLName Name `xml:""`
  1080  		C       C    `xml:""`
  1081  		D       D
  1082  	}
  1083  
  1084  	var a A
  1085  	structSpace := "b"
  1086  	xmlTest := `<A xmlns="` + structSpace + `"><C xmlns=""></C><d></d></A>`
  1087  	t.Log(xmlTest)
  1088  	err := Unmarshal([]byte(xmlTest), &a)
  1089  	if err != nil {
  1090  		t.Fatal(err)
  1091  	}
  1092  
  1093  	if a.XMLName.Space != structSpace {
  1094  		t.Errorf("overidding with empty namespace: unmarshaling, got %s, want %s\n", a.XMLName.Space, structSpace)
  1095  	}
  1096  	if len(a.C.XMLName.Space) != 0 {
  1097  		t.Fatalf("overidding with empty namespace: unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
  1098  	}
  1099  
  1100  	var b []byte
  1101  	b, err = Marshal(&a)
  1102  	if err != nil {
  1103  		t.Fatal(err)
  1104  	}
  1105  	if len(a.C.XMLName.Space) != 0 {
  1106  		t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space)
  1107  	}
  1108  	if string(b) != xmlTest {
  1109  		t.Fatalf("overidding with empty namespace: marshaling, got %s, want %s\n", b, xmlTest)
  1110  	}
  1111  	var c A
  1112  	err = Unmarshal(b, &c)
  1113  	if err != nil {
  1114  		t.Fatalf("second Unmarshal failed: %s", err)
  1115  	}
  1116  	if c.XMLName.Space != "b" {
  1117  		t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace)
  1118  	}
  1119  	if len(c.C.XMLName.Space) != 0 {
  1120  		t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
  1121  	}
  1122  }
  1123  
  1124  func TestIssue20396(t *testing.T) {
  1125  
  1126  	var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element")
  1127  
  1128  	testCases := []struct {
  1129  		s       string
  1130  		wantErr error
  1131  	}{
  1132  		{`<a:te:st xmlns:a="abcd"/>`, // Issue 20396
  1133  			UnmarshalError("XML syntax error on line 1: expected element name after <")},
  1134  		{`<a:te=st xmlns:a="abcd"/>`, attrError},
  1135  		{`<a:te&st xmlns:a="abcd"/>`, attrError},
  1136  		{`<a:test xmlns:a="abcd"/>`, nil},
  1137  		{`<a:te:st xmlns:a="abcd">1</a:te:st>`,
  1138  			UnmarshalError("XML syntax error on line 1: expected element name after <")},
  1139  		{`<a:te=st xmlns:a="abcd">1</a:te=st>`, attrError},
  1140  		{`<a:te&st xmlns:a="abcd">1</a:te&st>`, attrError},
  1141  		{`<a:test xmlns:a="abcd">1</a:test>`, nil},
  1142  	}
  1143  
  1144  	var dest string
  1145  	for _, tc := range testCases {
  1146  		if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want {
  1147  			if got == nil {
  1148  				t.Errorf("%s: Unexpected success, want %v", tc.s, want)
  1149  			} else if want == nil {
  1150  				t.Errorf("%s: Unexpected error, got %v", tc.s, got)
  1151  			} else if got.Error() != want.Error() {
  1152  				t.Errorf("%s: got %v, want %v", tc.s, got, want)
  1153  			}
  1154  		}
  1155  	}
  1156  }
  1157  
  1158  func TestIssue20685(t *testing.T) {
  1159  	testCases := []struct {
  1160  		s  string
  1161  		ok bool
  1162  	}{
  1163  		{`<x:book xmlns:x="abcd" xmlns:y="abcd"><unclosetag>one</x:book>`, false},
  1164  		{`<x:book xmlns:x="abcd" xmlns:y="abcd">one</x:book>`, true},
  1165  		{`<x:book xmlns:x="abcd" xmlns:y="abcd">one</y:book>`, false},
  1166  		{`<x:book xmlns:y="abcd" xmlns:x="abcd">one</y:book>`, false},
  1167  		{`<x:book xmlns:x="abcd">one</y:book>`, false},
  1168  		{`<x:book>one</y:book>`, false},
  1169  		{`<xbook>one</ybook>`, false},
  1170  	}
  1171  	for _, tc := range testCases {
  1172  		d := NewDecoder(strings.NewReader(tc.s))
  1173  		var err error
  1174  		for {
  1175  			_, err = d.Token()
  1176  			if err != nil {
  1177  				if err == io.EOF {
  1178  					err = nil
  1179  				}
  1180  				break
  1181  			}
  1182  		}
  1183  		if err != nil && tc.ok {
  1184  			t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err)
  1185  			continue
  1186  		}
  1187  		if err == nil && !tc.ok {
  1188  			t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s)
  1189  		}
  1190  	}
  1191  }
  1192  
  1193  func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
  1194  	return func(src TokenReader) TokenReader {
  1195  		return mapper{
  1196  			t: src,
  1197  			f: mapping,
  1198  		}
  1199  	}
  1200  }
  1201  
  1202  type mapper struct {
  1203  	t TokenReader
  1204  	f func(Token) Token
  1205  }
  1206  
  1207  func (m mapper) Token() (Token, error) {
  1208  	tok, err := m.t.Token()
  1209  	if err != nil {
  1210  		return nil, err
  1211  	}
  1212  	return m.f(tok), nil
  1213  }
  1214  
  1215  func TestNewTokenDecoderIdempotent(t *testing.T) {
  1216  	d := NewDecoder(strings.NewReader(`<br>`))
  1217  	d2 := NewTokenDecoder(d)
  1218  	if d != d2 {
  1219  		t.Error("NewTokenDecoder did not detect underlying Decoder")
  1220  	}
  1221  }
  1222  
  1223  func TestWrapDecoder(t *testing.T) {
  1224  	d := NewDecoder(strings.NewReader(`<quote>[Re-enter Clown with a letter, and FABIAN]</quote>`))
  1225  	m := tokenMap(func(t Token) Token {
  1226  		switch tok := t.(type) {
  1227  		case StartElement:
  1228  			if tok.Name.Local == "quote" {
  1229  				tok.Name.Local = "blocking"
  1230  				return tok
  1231  			}
  1232  		case EndElement:
  1233  			if tok.Name.Local == "quote" {
  1234  				tok.Name.Local = "blocking"
  1235  				return tok
  1236  			}
  1237  		}
  1238  		return t
  1239  	})
  1240  
  1241  	d = NewTokenDecoder(m(d))
  1242  
  1243  	o := struct {
  1244  		XMLName  Name   `xml:"blocking"`
  1245  		Chardata string `xml:",chardata"`
  1246  	}{}
  1247  
  1248  	if err := d.Decode(&o); err != nil {
  1249  		t.Fatal("Got unexpected error while decoding:", err)
  1250  	}
  1251  
  1252  	if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
  1253  		t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
  1254  	}
  1255  }
  1256  
  1257  type tokReader struct{}
  1258  
  1259  func (tokReader) Token() (Token, error) {
  1260  	return StartElement{}, nil
  1261  }
  1262  
  1263  type Failure struct{}
  1264  
  1265  func (Failure) UnmarshalXML(*Decoder, StartElement) error {
  1266  	return nil
  1267  }
  1268  
  1269  func TestTokenUnmarshaler(t *testing.T) {
  1270  	defer func() {
  1271  		if r := recover(); r != nil {
  1272  			t.Error("Unexpected panic using custom token unmarshaler")
  1273  		}
  1274  	}()
  1275  
  1276  	d := NewTokenDecoder(tokReader{})
  1277  	d.Decode(&Failure{})
  1278  }
  1279  
  1280  func testRoundTrip(t *testing.T, input string) {
  1281  	d := NewDecoder(strings.NewReader(input))
  1282  	var tokens []Token
  1283  	var buf bytes.Buffer
  1284  	e := NewEncoder(&buf)
  1285  	for {
  1286  		tok, err := d.Token()
  1287  		if err == io.EOF {
  1288  			break
  1289  		}
  1290  		if err != nil {
  1291  			t.Fatalf("invalid input: %v", err)
  1292  		}
  1293  		if err := e.EncodeToken(tok); err != nil {
  1294  			t.Fatalf("failed to re-encode input: %v", err)
  1295  		}
  1296  		tokens = append(tokens, CopyToken(tok))
  1297  	}
  1298  	if err := e.Flush(); err != nil {
  1299  		t.Fatal(err)
  1300  	}
  1301  
  1302  	d = NewDecoder(&buf)
  1303  	for {
  1304  		tok, err := d.Token()
  1305  		if err == io.EOF {
  1306  			break
  1307  		}
  1308  		if err != nil {
  1309  			t.Fatalf("failed to decode output: %v", err)
  1310  		}
  1311  		if len(tokens) == 0 {
  1312  			t.Fatalf("unexpected token: %#v", tok)
  1313  		}
  1314  		a, b := tokens[0], tok
  1315  		if !reflect.DeepEqual(a, b) {
  1316  			t.Fatalf("token mismatch: %#v vs %#v", a, b)
  1317  		}
  1318  		tokens = tokens[1:]
  1319  	}
  1320  	if len(tokens) > 0 {
  1321  		t.Fatalf("lost tokens: %#v", tokens)
  1322  	}
  1323  }
  1324  
  1325  func TestRoundTrip(t *testing.T) {
  1326  	tests := map[string]string{
  1327  		"trailing colon":         `<foo abc:="x"></foo>`,
  1328  		"comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
  1329  	}
  1330  	for name, input := range tests {
  1331  		t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
  1332  	}
  1333  }
  1334  
  1335  func TestParseErrors(t *testing.T) {
  1336  	withDefaultHeader := func(s string) string {
  1337  		return `<?xml version="1.0" encoding="UTF-8"?>` + s
  1338  	}
  1339  	tests := []struct {
  1340  		src string
  1341  		err string
  1342  	}{
  1343  		{withDefaultHeader(`</foo>`), `unexpected end element </foo>`},
  1344  		{withDefaultHeader(`<x:foo></y:foo>`), `element <foo> in space x closed by </foo> in space y`},
  1345  		{withDefaultHeader(`<? not ok ?>`), `expected target name after <?`},
  1346  		{withDefaultHeader(`<!- not ok -->`), `invalid sequence <!- not part of <!--`},
  1347  		{withDefaultHeader(`<!-? not ok -->`), `invalid sequence <!- not part of <!--`},
  1348  		{withDefaultHeader(`<![not ok]>`), `invalid <![ sequence`},
  1349  		{withDefaultHeader(`<zzz:foo xmlns:zzz="http://example.com"><bar>baz</bar></foo>`),
  1350  			`element <foo> in space zzz closed by </foo> in space ""`},
  1351  		{withDefaultHeader("\xf1"), `invalid UTF-8`},
  1352  
  1353  		// Header-related errors.
  1354  		{`<?xml version="1.1" encoding="UTF-8"?>`, `unsupported version "1.1"; only version 1.0 is supported`},
  1355  
  1356  		// Cases below are for "no errors".
  1357  		{withDefaultHeader(`<?ok?>`), ``},
  1358  		{withDefaultHeader(`<?ok version="ok"?>`), ``},
  1359  	}
  1360  
  1361  	for _, test := range tests {
  1362  		d := NewDecoder(strings.NewReader(test.src))
  1363  		var err error
  1364  		for {
  1365  			_, err = d.Token()
  1366  			if err != nil {
  1367  				break
  1368  			}
  1369  		}
  1370  		if test.err == "" {
  1371  			if err != io.EOF {
  1372  				t.Errorf("parse %s: have %q error, expected none", test.src, err)
  1373  			}
  1374  			continue
  1375  		}
  1376  		// Inv: err != nil
  1377  		if err == io.EOF {
  1378  			t.Errorf("parse %s: unexpected EOF", test.src)
  1379  			continue
  1380  		}
  1381  		if !strings.Contains(err.Error(), test.err) {
  1382  			t.Errorf("parse %s: can't find %q error substring\nerror: %q", test.src, test.err, err)
  1383  			continue
  1384  		}
  1385  	}
  1386  }
  1387  
  1388  const testInputHTMLAutoClose = `<?xml version="1.0" encoding="UTF-8"?>
  1389  <br>
  1390  <br/><br/>
  1391  <br><br>
  1392  <br></br>
  1393  <BR>
  1394  <BR/><BR/>
  1395  <Br></Br>
  1396  <BR><span id="test">abc</span><br/><br/>`
  1397  
  1398  func BenchmarkHTMLAutoClose(b *testing.B) {
  1399  	b.RunParallel(func(p *testing.PB) {
  1400  		for p.Next() {
  1401  			d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
  1402  			d.Strict = false
  1403  			d.AutoClose = HTMLAutoClose
  1404  			d.Entity = HTMLEntity
  1405  			for {
  1406  				_, err := d.Token()
  1407  				if err != nil {
  1408  					if err == io.EOF {
  1409  						break
  1410  					}
  1411  					b.Fatalf("unexpected error: %v", err)
  1412  				}
  1413  			}
  1414  		}
  1415  	})
  1416  }
  1417  
  1418  func TestHTMLAutoClose(t *testing.T) {
  1419  	wantTokens := []Token{
  1420  		ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
  1421  		CharData("\n"),
  1422  		StartElement{Name{"", "br"}, []Attr{}},
  1423  		EndElement{Name{"", "br"}},
  1424  		CharData("\n"),
  1425  		StartElement{Name{"", "br"}, []Attr{}},
  1426  		EndElement{Name{"", "br"}},
  1427  		StartElement{Name{"", "br"}, []Attr{}},
  1428  		EndElement{Name{"", "br"}},
  1429  		CharData("\n"),
  1430  		StartElement{Name{"", "br"}, []Attr{}},
  1431  		EndElement{Name{"", "br"}},
  1432  		StartElement{Name{"", "br"}, []Attr{}},
  1433  		EndElement{Name{"", "br"}},
  1434  		CharData("\n"),
  1435  		StartElement{Name{"", "br"}, []Attr{}},
  1436  		EndElement{Name{"", "br"}},
  1437  		CharData("\n"),
  1438  		StartElement{Name{"", "BR"}, []Attr{}},
  1439  		EndElement{Name{"", "BR"}},
  1440  		CharData("\n"),
  1441  		StartElement{Name{"", "BR"}, []Attr{}},
  1442  		EndElement{Name{"", "BR"}},
  1443  		StartElement{Name{"", "BR"}, []Attr{}},
  1444  		EndElement{Name{"", "BR"}},
  1445  		CharData("\n"),
  1446  		StartElement{Name{"", "Br"}, []Attr{}},
  1447  		EndElement{Name{"", "Br"}},
  1448  		CharData("\n"),
  1449  		StartElement{Name{"", "BR"}, []Attr{}},
  1450  		EndElement{Name{"", "BR"}},
  1451  		StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}},
  1452  		CharData("abc"),
  1453  		EndElement{Name{"", "span"}},
  1454  		StartElement{Name{"", "br"}, []Attr{}},
  1455  		EndElement{Name{"", "br"}},
  1456  		StartElement{Name{"", "br"}, []Attr{}},
  1457  		EndElement{Name{"", "br"}},
  1458  	}
  1459  
  1460  	d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
  1461  	d.Strict = false
  1462  	d.AutoClose = HTMLAutoClose
  1463  	d.Entity = HTMLEntity
  1464  	var haveTokens []Token
  1465  	for {
  1466  		tok, err := d.Token()
  1467  		if err != nil {
  1468  			if err == io.EOF {
  1469  				break
  1470  			}
  1471  			t.Fatalf("unexpected error: %v", err)
  1472  		}
  1473  		haveTokens = append(haveTokens, CopyToken(tok))
  1474  	}
  1475  	if len(haveTokens) != len(wantTokens) {
  1476  		t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens))
  1477  	}
  1478  	for i, want := range wantTokens {
  1479  		if i >= len(haveTokens) {
  1480  			t.Errorf("token[%d] expected %#v, have no token", i, want)
  1481  		} else {
  1482  			have := haveTokens[i]
  1483  			if !reflect.DeepEqual(have, want) {
  1484  				t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want)
  1485  			}
  1486  		}
  1487  	}
  1488  }
  1489  

View as plain text