"
d := NewDecoder(strings.NewReader(data))
d.Strict = false
token, err := d.Token()
if _, ok := err.(*SyntaxError); ok {
t.Errorf("Unexpected error: %v", err)
}
if token.(StartElement).Name.Local != "tag" {
t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
}
attr := token.(StartElement).Attr[0]
if attr.Value != "azAZ09:-_" {
t.Errorf("Unexpected attribute value: %v", attr.Value)
}
if attr.Name.Local != "attr" {
t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
}
}
func TestValuelessAttrs(t *testing.T) {
tests := [][3]string{
{"", "p", "nowrap"},
{"
", "p", "nowrap"},
{"", "input", "checked"},
{"", "input", "checked"},
}
for _, test := range tests {
d := NewDecoder(strings.NewReader(test[0]))
d.Strict = false
token, err := d.Token()
if _, ok := err.(*SyntaxError); ok {
t.Errorf("Unexpected error: %v", err)
}
if token.(StartElement).Name.Local != test[1] {
t.Errorf("Unexpected tag name: %v", token.(StartElement).Name.Local)
}
attr := token.(StartElement).Attr[0]
if attr.Value != test[2] {
t.Errorf("Unexpected attribute value: %v", attr.Value)
}
if attr.Name.Local != test[2] {
t.Errorf("Unexpected attribute name: %v", attr.Name.Local)
}
}
}
func TestCopyTokenCharData(t *testing.T) {
data := []byte("same data")
var tok1 Token = CharData(data)
tok2 := CopyToken(tok1)
if !reflect.DeepEqual(tok1, tok2) {
t.Error("CopyToken(CharData) != CharData")
}
data[1] = 'o'
if reflect.DeepEqual(tok1, tok2) {
t.Error("CopyToken(CharData) uses same buffer.")
}
}
func TestCopyTokenStartElement(t *testing.T) {
elt := StartElement{Name{"", "hello"}, []Attr{{Name{"", "lang"}, "en"}}}
var tok1 Token = elt
tok2 := CopyToken(tok1)
if tok1.(StartElement).Attr[0].Value != "en" {
t.Error("CopyToken overwrote Attr[0]")
}
if !reflect.DeepEqual(tok1, tok2) {
t.Error("CopyToken(StartElement) != StartElement")
}
tok1.(StartElement).Attr[0] = Attr{Name{"", "lang"}, "de"}
if reflect.DeepEqual(tok1, tok2) {
t.Error("CopyToken(CharData) uses same buffer.")
}
}
func TestCopyTokenComment(t *testing.T) {
data := []byte("")
var tok1 Token = Comment(data)
tok2 := CopyToken(tok1)
if !reflect.DeepEqual(tok1, tok2) {
t.Error("CopyToken(Comment) != Comment")
}
data[1] = 'o'
if reflect.DeepEqual(tok1, tok2) {
t.Error("CopyToken(Comment) uses same buffer.")
}
}
func TestSyntaxErrorLineNum(t *testing.T) {
testInput := "
Foo
\n\n
Bar>\n"
d := NewDecoder(strings.NewReader(testInput))
var err error
for _, err = d.Token(); err == nil; _, err = d.Token() {
}
synerr, ok := err.(*SyntaxError)
if !ok {
t.Error("Expected SyntaxError.")
}
if synerr.Line != 3 {
t.Error("SyntaxError didn't have correct line number.")
}
}
func TestTrailingRawToken(t *testing.T) {
input := ` `
d := NewDecoder(strings.NewReader(input))
var err error
for _, err = d.RawToken(); err == nil; _, err = d.RawToken() {
}
if err != io.EOF {
t.Fatalf("d.RawToken() = _, %v, want _, io.EOF", err)
}
}
func TestTrailingToken(t *testing.T) {
input := ` `
d := NewDecoder(strings.NewReader(input))
var err error
for _, err = d.Token(); err == nil; _, err = d.Token() {
}
if err != io.EOF {
t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
}
}
func TestEntityInsideCDATA(t *testing.T) {
input := ``
d := NewDecoder(strings.NewReader(input))
var err error
for _, err = d.Token(); err == nil; _, err = d.Token() {
}
if err != io.EOF {
t.Fatalf("d.Token() = _, %v, want _, io.EOF", err)
}
}
var characterTests = []struct {
in string
err string
}{
{"\x12", "illegal character code U+0012"},
{"\x0b", "illegal character code U+000B"},
{"\xef\xbf\xbe", "illegal character code U+FFFE"},
{"\r\n\x07", "illegal character code U+0007"},
{"what's up", "expected attribute name in element"},
{"&abc\x01;", "invalid character entity &abc (no semicolon)"},
{"&\x01;", "invalid character entity & (no semicolon)"},
{"&\xef\xbf\xbe;", "invalid character entity &\uFFFE;"},
{"&hello;", "invalid character entity &hello;"},
}
func TestDisallowedCharacters(t *testing.T) {
for i, tt := range characterTests {
d := NewDecoder(strings.NewReader(tt.in))
var err error
for err == nil {
_, err = d.Token()
}
synerr, ok := err.(*SyntaxError)
if !ok {
t.Fatalf("input %d d.Token() = _, %v, want _, *SyntaxError", i, err)
}
if synerr.Msg != tt.err {
t.Fatalf("input %d synerr.Msg wrong: want %q, got %q", i, tt.err, synerr.Msg)
}
}
}
func TestIsInCharacterRange(t *testing.T) {
invalid := []rune{
utf8.MaxRune + 1,
0xD800, // surrogate min
0xDFFF, // surrogate max
-1,
}
for _, r := range invalid {
if isInCharacterRange(r) {
t.Errorf("rune %U considered valid", r)
}
}
}
var procInstTests = []struct {
input string
expect [2]string
}{
{`version="1.0" encoding="utf-8"`, [2]string{"1.0", "utf-8"}},
{`version="1.0" encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
{`version="1.0" encoding='utf-8' `, [2]string{"1.0", "utf-8"}},
{`version="1.0" encoding=utf-8`, [2]string{"1.0", ""}},
{`encoding="FOO" `, [2]string{"", "FOO"}},
{`version=2.0 version="1.0" encoding=utf-7 encoding='utf-8'`, [2]string{"1.0", "utf-8"}},
{`version= encoding=`, [2]string{"", ""}},
{`encoding="version=1.0"`, [2]string{"", "version=1.0"}},
{``, [2]string{"", ""}},
// TODO: what's the right approach to handle these nested cases?
{`encoding="version='1.0'"`, [2]string{"1.0", "version='1.0'"}},
{`version="encoding='utf-8'"`, [2]string{"encoding='utf-8'", "utf-8"}},
}
func TestProcInstEncoding(t *testing.T) {
for _, test := range procInstTests {
if got := procInst("version", test.input); got != test.expect[0] {
t.Errorf("procInst(version, %q) = %q; want %q", test.input, got, test.expect[0])
}
if got := procInst("encoding", test.input); got != test.expect[1] {
t.Errorf("procInst(encoding, %q) = %q; want %q", test.input, got, test.expect[1])
}
}
}
// Ensure that directives with comments include the complete
// text of any nested directives.
var directivesWithCommentsInput = `
]>
]>
--> --> []>
`
var directivesWithCommentsTokens = []Token{
CharData("\n"),
Directive(`DOCTYPE [ ]`),
CharData("\n"),
Directive(`DOCTYPE [ ]`),
CharData("\n"),
Directive(`DOCTYPE [ ]`),
CharData("\n"),
}
func TestDirectivesWithComments(t *testing.T) {
d := NewDecoder(strings.NewReader(directivesWithCommentsInput))
for i, want := range directivesWithCommentsTokens {
have, err := d.Token()
if err != nil {
t.Fatalf("token %d: unexpected error: %s", i, err)
}
if !reflect.DeepEqual(have, want) {
t.Errorf("token %d = %#v want %#v", i, have, want)
}
}
}
// Writer whose Write method always returns an error.
type errWriter struct{}
func (errWriter) Write(p []byte) (n int, err error) { return 0, fmt.Errorf("unwritable") }
func TestEscapeTextIOErrors(t *testing.T) {
expectErr := "unwritable"
err := EscapeText(errWriter{}, []byte{'A'})
if err == nil || err.Error() != expectErr {
t.Errorf("have %v, want %v", err, expectErr)
}
}
func TestEscapeTextInvalidChar(t *testing.T) {
input := []byte("A \x00 terminated string.")
expected := "A \uFFFD terminated string."
buff := new(strings.Builder)
if err := EscapeText(buff, input); err != nil {
t.Fatalf("have %v, want nil", err)
}
text := buff.String()
if text != expected {
t.Errorf("have %v, want %v", text, expected)
}
}
func TestIssue5880(t *testing.T) {
type T []byte
data, err := Marshal(T{192, 168, 0, 1})
if err != nil {
t.Errorf("Marshal error: %v", err)
}
if !utf8.Valid(data) {
t.Errorf("Marshal generated invalid UTF-8: %x", data)
}
}
func TestIssue8535(t *testing.T) {
type ExampleConflict struct {
XMLName Name `xml:"example"`
Link string `xml:"link"`
AtomLink string `xml:"http://www.w3.org/2005/Atom link"` // Same name in a different name space
}
testCase := `
Example
http://example.com/default
http://example.com/home
http://example.com/ns
`
var dest ExampleConflict
d := NewDecoder(strings.NewReader(testCase))
if err := d.Decode(&dest); err != nil {
t.Fatal(err)
}
}
func TestEncodeXMLNS(t *testing.T) {
testCases := []struct {
f func() ([]byte, error)
want string
ok bool
}{
{encodeXMLNS1, `hello world`, true},
{encodeXMLNS2, `hello world`, true},
{encodeXMLNS3, `hello world`, true},
{encodeXMLNS4, `hello world`, false},
}
for i, tc := range testCases {
if b, err := tc.f(); err == nil {
if got, want := string(b), tc.want; got != want {
t.Errorf("%d: got %s, want %s \n", i, got, want)
}
} else {
t.Errorf("%d: marshal failed with %s", i, err)
}
}
}
func encodeXMLNS1() ([]byte, error) {
type T struct {
XMLName Name `xml:"Test"`
Ns string `xml:"xmlns,attr"`
Body string
}
s := &T{Ns: "http://example.com/ns", Body: "hello world"}
return Marshal(s)
}
func encodeXMLNS2() ([]byte, error) {
type Test struct {
Body string `xml:"http://example.com/ns body"`
}
s := &Test{Body: "hello world"}
return Marshal(s)
}
func encodeXMLNS3() ([]byte, error) {
type Test struct {
XMLName Name `xml:"http://example.com/ns Test"`
Body string
}
//s := &Test{XMLName: Name{"http://example.com/ns",""}, Body: "hello world"} is unusable as the "-" is missing
// as documentation states
s := &Test{Body: "hello world"}
return Marshal(s)
}
func encodeXMLNS4() ([]byte, error) {
type Test struct {
Ns string `xml:"xmlns,attr"`
Body string
}
s := &Test{Ns: "http://example.com/ns", Body: "hello world"}
return Marshal(s)
}
func TestIssue11405(t *testing.T) {
testCases := []string{
"",
"",
"",
}
for _, tc := range testCases {
d := NewDecoder(strings.NewReader(tc))
var err error
for {
_, err = d.Token()
if err != nil {
break
}
}
if _, ok := err.(*SyntaxError); !ok {
t.Errorf("%s: Token: Got error %v, want SyntaxError", tc, err)
}
}
}
func TestIssue12417(t *testing.T) {
testCases := []struct {
s string
ok bool
}{
{``, true},
{``, true},
{``, true},
{``, false},
}
for _, tc := range testCases {
d := NewDecoder(strings.NewReader(tc.s))
var err error
for {
_, err = d.Token()
if err != nil {
if err == io.EOF {
err = nil
}
break
}
}
if err != nil && tc.ok {
t.Errorf("%q: Encoding charset: expected no error, got %s", tc.s, err)
continue
}
if err == nil && !tc.ok {
t.Errorf("%q: Encoding charset: expected error, got nil", tc.s)
}
}
}
func TestIssue7113(t *testing.T) {
type C struct {
XMLName Name `xml:""` // Sets empty namespace
}
type D struct {
XMLName Name `xml:"d"`
}
type A struct {
XMLName Name `xml:""`
C C `xml:""`
D D
}
var a A
structSpace := "b"
xmlTest := ``
t.Log(xmlTest)
err := Unmarshal([]byte(xmlTest), &a)
if err != nil {
t.Fatal(err)
}
if a.XMLName.Space != structSpace {
t.Errorf("overidding with empty namespace: unmarshaling, got %s, want %s\n", a.XMLName.Space, structSpace)
}
if len(a.C.XMLName.Space) != 0 {
t.Fatalf("overidding with empty namespace: unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
}
var b []byte
b, err = Marshal(&a)
if err != nil {
t.Fatal(err)
}
if len(a.C.XMLName.Space) != 0 {
t.Errorf("overidding with empty namespace: marshaling, got %s in C tag which should be empty\n", a.C.XMLName.Space)
}
if string(b) != xmlTest {
t.Fatalf("overidding with empty namespace: marshaling, got %s, want %s\n", b, xmlTest)
}
var c A
err = Unmarshal(b, &c)
if err != nil {
t.Fatalf("second Unmarshal failed: %s", err)
}
if c.XMLName.Space != "b" {
t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, XML name space: got %s, want %s\n", a.XMLName.Space, structSpace)
}
if len(c.C.XMLName.Space) != 0 {
t.Errorf("overidding with empty namespace: after marshaling & unmarshaling, got %s, want empty\n", a.C.XMLName.Space)
}
}
func TestIssue20396(t *testing.T) {
var attrError = UnmarshalError("XML syntax error on line 1: expected attribute name in element")
testCases := []struct {
s string
wantErr error
}{
{``, // Issue 20396
UnmarshalError("XML syntax error on line 1: expected element name after <")},
{``, attrError},
{``, attrError},
{``, nil},
{`1`,
UnmarshalError("XML syntax error on line 1: expected element name after <")},
{`1`, attrError},
{`1`, attrError},
{`1`, nil},
}
var dest string
for _, tc := range testCases {
if got, want := Unmarshal([]byte(tc.s), &dest), tc.wantErr; got != want {
if got == nil {
t.Errorf("%s: Unexpected success, want %v", tc.s, want)
} else if want == nil {
t.Errorf("%s: Unexpected error, got %v", tc.s, got)
} else if got.Error() != want.Error() {
t.Errorf("%s: got %v, want %v", tc.s, got, want)
}
}
}
}
func TestIssue20685(t *testing.T) {
testCases := []struct {
s string
ok bool
}{
{`one`, false},
{`one`, true},
{`one`, false},
{`one`, false},
{`one`, false},
{`one`, false},
{`one`, false},
}
for _, tc := range testCases {
d := NewDecoder(strings.NewReader(tc.s))
var err error
for {
_, err = d.Token()
if err != nil {
if err == io.EOF {
err = nil
}
break
}
}
if err != nil && tc.ok {
t.Errorf("%q: Closing tag with namespace : expected no error, got %s", tc.s, err)
continue
}
if err == nil && !tc.ok {
t.Errorf("%q: Closing tag with namespace : expected error, got nil", tc.s)
}
}
}
func tokenMap(mapping func(t Token) Token) func(TokenReader) TokenReader {
return func(src TokenReader) TokenReader {
return mapper{
t: src,
f: mapping,
}
}
}
type mapper struct {
t TokenReader
f func(Token) Token
}
func (m mapper) Token() (Token, error) {
tok, err := m.t.Token()
if err != nil {
return nil, err
}
return m.f(tok), nil
}
func TestNewTokenDecoderIdempotent(t *testing.T) {
d := NewDecoder(strings.NewReader(`
`))
d2 := NewTokenDecoder(d)
if d != d2 {
t.Error("NewTokenDecoder did not detect underlying Decoder")
}
}
func TestWrapDecoder(t *testing.T) {
d := NewDecoder(strings.NewReader(`[Re-enter Clown with a letter, and FABIAN]
`))
m := tokenMap(func(t Token) Token {
switch tok := t.(type) {
case StartElement:
if tok.Name.Local == "quote" {
tok.Name.Local = "blocking"
return tok
}
case EndElement:
if tok.Name.Local == "quote" {
tok.Name.Local = "blocking"
return tok
}
}
return t
})
d = NewTokenDecoder(m(d))
o := struct {
XMLName Name `xml:"blocking"`
Chardata string `xml:",chardata"`
}{}
if err := d.Decode(&o); err != nil {
t.Fatal("Got unexpected error while decoding:", err)
}
if o.Chardata != "[Re-enter Clown with a letter, and FABIAN]" {
t.Fatalf("Got unexpected chardata: `%s`\n", o.Chardata)
}
}
type tokReader struct{}
func (tokReader) Token() (Token, error) {
return StartElement{}, nil
}
type Failure struct{}
func (Failure) UnmarshalXML(*Decoder, StartElement) error {
return nil
}
func TestTokenUnmarshaler(t *testing.T) {
defer func() {
if r := recover(); r != nil {
t.Error("Unexpected panic using custom token unmarshaler")
}
}()
d := NewTokenDecoder(tokReader{})
d.Decode(&Failure{})
}
func testRoundTrip(t *testing.T, input string) {
d := NewDecoder(strings.NewReader(input))
var tokens []Token
var buf bytes.Buffer
e := NewEncoder(&buf)
for {
tok, err := d.Token()
if err == io.EOF {
break
}
if err != nil {
t.Fatalf("invalid input: %v", err)
}
if err := e.EncodeToken(tok); err != nil {
t.Fatalf("failed to re-encode input: %v", err)
}
tokens = append(tokens, CopyToken(tok))
}
if err := e.Flush(); err != nil {
t.Fatal(err)
}
d = NewDecoder(&buf)
for {
tok, err := d.Token()
if err == io.EOF {
break
}
if err != nil {
t.Fatalf("failed to decode output: %v", err)
}
if len(tokens) == 0 {
t.Fatalf("unexpected token: %#v", tok)
}
a, b := tokens[0], tok
if !reflect.DeepEqual(a, b) {
t.Fatalf("token mismatch: %#v vs %#v", a, b)
}
tokens = tokens[1:]
}
if len(tokens) > 0 {
t.Fatalf("lost tokens: %#v", tokens)
}
}
func TestRoundTrip(t *testing.T) {
tests := map[string]string{
"trailing colon": ``,
"comments in directives": `--x --> > --x ]>`,
}
for name, input := range tests {
t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })
}
}
func TestParseErrors(t *testing.T) {
withDefaultHeader := func(s string) string {
return `` + s
}
tests := []struct {
src string
err string
}{
{withDefaultHeader(``), `unexpected end element `},
{withDefaultHeader(``), `element in space x closed by in space y`},
{withDefaultHeader(` not ok ?>`), `expected target name after `},
{withDefaultHeader(``), `invalid sequence `), `invalid sequence `), `invalid baz`),
`element in space zzz closed by in space ""`},
{withDefaultHeader("\xf1"), `invalid UTF-8`},
// Header-related errors.
{``, `unsupported version "1.1"; only version 1.0 is supported`},
// Cases below are for "no errors".
{withDefaultHeader(``), ``},
{withDefaultHeader(``), ``},
}
for _, test := range tests {
d := NewDecoder(strings.NewReader(test.src))
var err error
for {
_, err = d.Token()
if err != nil {
break
}
}
if test.err == "" {
if err != io.EOF {
t.Errorf("parse %s: have %q error, expected none", test.src, err)
}
continue
}
// Inv: err != nil
if err == io.EOF {
t.Errorf("parse %s: unexpected EOF", test.src)
continue
}
if !strings.Contains(err.Error(), test.err) {
t.Errorf("parse %s: can't find %q error substring\nerror: %q", test.src, test.err, err)
continue
}
}
}
const testInputHTMLAutoClose = `
abc
`
func BenchmarkHTMLAutoClose(b *testing.B) {
b.RunParallel(func(p *testing.PB) {
for p.Next() {
d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
d.Strict = false
d.AutoClose = HTMLAutoClose
d.Entity = HTMLEntity
for {
_, err := d.Token()
if err != nil {
if err == io.EOF {
break
}
b.Fatalf("unexpected error: %v", err)
}
}
}
})
}
func TestHTMLAutoClose(t *testing.T) {
wantTokens := []Token{
ProcInst{"xml", []byte(`version="1.0" encoding="UTF-8"`)},
CharData("\n"),
StartElement{Name{"", "br"}, []Attr{}},
EndElement{Name{"", "br"}},
CharData("\n"),
StartElement{Name{"", "br"}, []Attr{}},
EndElement{Name{"", "br"}},
StartElement{Name{"", "br"}, []Attr{}},
EndElement{Name{"", "br"}},
CharData("\n"),
StartElement{Name{"", "br"}, []Attr{}},
EndElement{Name{"", "br"}},
StartElement{Name{"", "br"}, []Attr{}},
EndElement{Name{"", "br"}},
CharData("\n"),
StartElement{Name{"", "br"}, []Attr{}},
EndElement{Name{"", "br"}},
CharData("\n"),
StartElement{Name{"", "BR"}, []Attr{}},
EndElement{Name{"", "BR"}},
CharData("\n"),
StartElement{Name{"", "BR"}, []Attr{}},
EndElement{Name{"", "BR"}},
StartElement{Name{"", "BR"}, []Attr{}},
EndElement{Name{"", "BR"}},
CharData("\n"),
StartElement{Name{"", "Br"}, []Attr{}},
EndElement{Name{"", "Br"}},
CharData("\n"),
StartElement{Name{"", "BR"}, []Attr{}},
EndElement{Name{"", "BR"}},
StartElement{Name{"", "span"}, []Attr{{Name: Name{"", "id"}, Value: "test"}}},
CharData("abc"),
EndElement{Name{"", "span"}},
StartElement{Name{"", "br"}, []Attr{}},
EndElement{Name{"", "br"}},
StartElement{Name{"", "br"}, []Attr{}},
EndElement{Name{"", "br"}},
}
d := NewDecoder(strings.NewReader(testInputHTMLAutoClose))
d.Strict = false
d.AutoClose = HTMLAutoClose
d.Entity = HTMLEntity
var haveTokens []Token
for {
tok, err := d.Token()
if err != nil {
if err == io.EOF {
break
}
t.Fatalf("unexpected error: %v", err)
}
haveTokens = append(haveTokens, CopyToken(tok))
}
if len(haveTokens) != len(wantTokens) {
t.Errorf("tokens count mismatch: have %d, want %d", len(haveTokens), len(wantTokens))
}
for i, want := range wantTokens {
if i >= len(haveTokens) {
t.Errorf("token[%d] expected %#v, have no token", i, want)
} else {
have := haveTokens[i]
if !reflect.DeepEqual(have, want) {
t.Errorf("token[%d] mismatch:\nhave: %#v\nwant: %#v", i, have, want)
}
}
}
}