Source file src/encoding/csv/reader_test.go

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package csv
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"reflect"
    12  	"slices"
    13  	"strings"
    14  	"testing"
    15  	"unicode/utf8"
    16  )
    17  
    18  type readTest struct {
    19  	Name      string
    20  	Input     string
    21  	Output    [][]string
    22  	Positions [][][2]int
    23  	Errors    []error
    24  
    25  	// These fields are copied into the Reader
    26  	Comma              rune
    27  	Comment            rune
    28  	UseFieldsPerRecord bool // false (default) means FieldsPerRecord is -1
    29  	FieldsPerRecord    int
    30  	LazyQuotes         bool
    31  	TrimLeadingSpace   bool
    32  	ReuseRecord        bool
    33  }
    34  
    35  // In these tests, the §, ¶ and ∑ characters in readTest.Input are used to denote
    36  // the start of a field, a record boundary and the position of an error respectively.
    37  // They are removed before parsing and are used to verify the position
    38  // information reported by FieldPos.
    39  
    40  var readTests = []readTest{{
    41  	Name:   "Simple",
    42  	Input:  "§a,§b,§c\n",
    43  	Output: [][]string{{"a", "b", "c"}},
    44  }, {
    45  	Name:   "CRLF",
    46  	Input:  "§a,§b\r\n¶§c,§d\r\n",
    47  	Output: [][]string{{"a", "b"}, {"c", "d"}},
    48  }, {
    49  	Name:   "BareCR",
    50  	Input:  "§a,§b\rc,§d\r\n",
    51  	Output: [][]string{{"a", "b\rc", "d"}},
    52  }, {
    53  	Name: "RFC4180test",
    54  	Input: `§#field1,§field2,§field3
    55  ¶§"aaa",§"bb
    56  b",§"ccc"
    57  ¶§"a,a",§"b""bb",§"ccc"
    58  ¶§zzz,§yyy,§xxx
    59  `,
    60  	Output: [][]string{
    61  		{"#field1", "field2", "field3"},
    62  		{"aaa", "bb\nb", "ccc"},
    63  		{"a,a", `b"bb`, "ccc"},
    64  		{"zzz", "yyy", "xxx"},
    65  	},
    66  	UseFieldsPerRecord: true,
    67  	FieldsPerRecord:    0,
    68  }, {
    69  	Name:   "NoEOLTest",
    70  	Input:  "§a,§b,§c",
    71  	Output: [][]string{{"a", "b", "c"}},
    72  }, {
    73  	Name:   "Semicolon",
    74  	Input:  "§a;§b;§c\n",
    75  	Output: [][]string{{"a", "b", "c"}},
    76  	Comma:  ';',
    77  }, {
    78  	Name: "MultiLine",
    79  	Input: `§"two
    80  line",§"one line",§"three
    81  line
    82  field"`,
    83  	Output: [][]string{{"two\nline", "one line", "three\nline\nfield"}},
    84  }, {
    85  	Name:  "BlankLine",
    86  	Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
    87  	Output: [][]string{
    88  		{"a", "b", "c"},
    89  		{"d", "e", "f"},
    90  	},
    91  }, {
    92  	Name:  "BlankLineFieldCount",
    93  	Input: "§a,§b,§c\n\n¶§d,§e,§f\n\n",
    94  	Output: [][]string{
    95  		{"a", "b", "c"},
    96  		{"d", "e", "f"},
    97  	},
    98  	UseFieldsPerRecord: true,
    99  	FieldsPerRecord:    0,
   100  }, {
   101  	Name:             "TrimSpace",
   102  	Input:            " §a,  §b,   §c\n",
   103  	Output:           [][]string{{"a", "b", "c"}},
   104  	TrimLeadingSpace: true,
   105  }, {
   106  	Name:   "LeadingSpace",
   107  	Input:  "§ a,§  b,§   c\n",
   108  	Output: [][]string{{" a", "  b", "   c"}},
   109  }, {
   110  	Name:    "Comment",
   111  	Input:   "#1,2,3\n§a,§b,§c\n#comment",
   112  	Output:  [][]string{{"a", "b", "c"}},
   113  	Comment: '#',
   114  }, {
   115  	Name:   "NoComment",
   116  	Input:  "§#1,§2,§3\n¶§a,§b,§c",
   117  	Output: [][]string{{"#1", "2", "3"}, {"a", "b", "c"}},
   118  }, {
   119  	Name:       "LazyQuotes",
   120  	Input:      `§a "word",§"1"2",§a",§"b`,
   121  	Output:     [][]string{{`a "word"`, `1"2`, `a"`, `b`}},
   122  	LazyQuotes: true,
   123  }, {
   124  	Name:       "BareQuotes",
   125  	Input:      `§a "word",§"1"2",§a"`,
   126  	Output:     [][]string{{`a "word"`, `1"2`, `a"`}},
   127  	LazyQuotes: true,
   128  }, {
   129  	Name:       "BareDoubleQuotes",
   130  	Input:      `§a""b,§c`,
   131  	Output:     [][]string{{`a""b`, `c`}},
   132  	LazyQuotes: true,
   133  }, {
   134  	Name:   "BadDoubleQuotes",
   135  	Input:  `§a∑""b,c`,
   136  	Errors: []error{&ParseError{Err: ErrBareQuote}},
   137  }, {
   138  	Name:             "TrimQuote",
   139  	Input:            ` §"a",§" b",§c`,
   140  	Output:           [][]string{{"a", " b", "c"}},
   141  	TrimLeadingSpace: true,
   142  }, {
   143  	Name:   "BadBareQuote",
   144  	Input:  `§a ∑"word","b"`,
   145  	Errors: []error{&ParseError{Err: ErrBareQuote}},
   146  }, {
   147  	Name:   "BadTrailingQuote",
   148  	Input:  `§"a word",b∑"`,
   149  	Errors: []error{&ParseError{Err: ErrBareQuote}},
   150  }, {
   151  	Name:   "ExtraneousQuote",
   152  	Input:  `§"a ∑"word","b"`,
   153  	Errors: []error{&ParseError{Err: ErrQuote}},
   154  }, {
   155  	Name:               "BadFieldCount",
   156  	Input:              "§a,§b,§c\n¶∑§d,§e",
   157  	Errors:             []error{nil, &ParseError{Err: ErrFieldCount}},
   158  	Output:             [][]string{{"a", "b", "c"}, {"d", "e"}},
   159  	UseFieldsPerRecord: true,
   160  	FieldsPerRecord:    0,
   161  }, {
   162  	Name:               "BadFieldCountMultiple",
   163  	Input:              "§a,§b,§c\n¶∑§d,§e\n¶∑§f",
   164  	Errors:             []error{nil, &ParseError{Err: ErrFieldCount}, &ParseError{Err: ErrFieldCount}},
   165  	Output:             [][]string{{"a", "b", "c"}, {"d", "e"}, {"f"}},
   166  	UseFieldsPerRecord: true,
   167  	FieldsPerRecord:    0,
   168  }, {
   169  	Name:               "BadFieldCount1",
   170  	Input:              `§∑a,§b,§c`,
   171  	Errors:             []error{&ParseError{Err: ErrFieldCount}},
   172  	Output:             [][]string{{"a", "b", "c"}},
   173  	UseFieldsPerRecord: true,
   174  	FieldsPerRecord:    2,
   175  }, {
   176  	Name:   "FieldCount",
   177  	Input:  "§a,§b,§c\n¶§d,§e",
   178  	Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
   179  }, {
   180  	Name:   "TrailingCommaEOF",
   181  	Input:  "§a,§b,§c,§",
   182  	Output: [][]string{{"a", "b", "c", ""}},
   183  }, {
   184  	Name:   "TrailingCommaEOL",
   185  	Input:  "§a,§b,§c,§\n",
   186  	Output: [][]string{{"a", "b", "c", ""}},
   187  }, {
   188  	Name:             "TrailingCommaSpaceEOF",
   189  	Input:            "§a,§b,§c, §",
   190  	Output:           [][]string{{"a", "b", "c", ""}},
   191  	TrimLeadingSpace: true,
   192  }, {
   193  	Name:             "TrailingCommaSpaceEOL",
   194  	Input:            "§a,§b,§c, §\n",
   195  	Output:           [][]string{{"a", "b", "c", ""}},
   196  	TrimLeadingSpace: true,
   197  }, {
   198  	Name:             "TrailingCommaLine3",
   199  	Input:            "§a,§b,§c\n¶§d,§e,§f\n¶§g,§hi,§",
   200  	Output:           [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
   201  	TrimLeadingSpace: true,
   202  }, {
   203  	Name:   "NotTrailingComma3",
   204  	Input:  "§a,§b,§c,§ \n",
   205  	Output: [][]string{{"a", "b", "c", " "}},
   206  }, {
   207  	Name: "CommaFieldTest",
   208  	Input: `§x,§y,§z,§w
   209  ¶§x,§y,§z,§
   210  ¶§x,§y,§,§
   211  ¶§x,§,§,§
   212  ¶§,§,§,§
   213  ¶§"x",§"y",§"z",§"w"
   214  ¶§"x",§"y",§"z",§""
   215  ¶§"x",§"y",§"",§""
   216  ¶§"x",§"",§"",§""
   217  ¶§"",§"",§"",§""
   218  `,
   219  	Output: [][]string{
   220  		{"x", "y", "z", "w"},
   221  		{"x", "y", "z", ""},
   222  		{"x", "y", "", ""},
   223  		{"x", "", "", ""},
   224  		{"", "", "", ""},
   225  		{"x", "y", "z", "w"},
   226  		{"x", "y", "z", ""},
   227  		{"x", "y", "", ""},
   228  		{"x", "", "", ""},
   229  		{"", "", "", ""},
   230  	},
   231  }, {
   232  	Name:  "TrailingCommaIneffective1",
   233  	Input: "§a,§b,§\n¶§c,§d,§e",
   234  	Output: [][]string{
   235  		{"a", "b", ""},
   236  		{"c", "d", "e"},
   237  	},
   238  	TrimLeadingSpace: true,
   239  }, {
   240  	Name:  "ReadAllReuseRecord",
   241  	Input: "§a,§b\n¶§c,§d",
   242  	Output: [][]string{
   243  		{"a", "b"},
   244  		{"c", "d"},
   245  	},
   246  	ReuseRecord: true,
   247  }, {
   248  	Name:   "StartLine1", // Issue 19019
   249  	Input:  "§a,\"b\nc∑\"d,e",
   250  	Errors: []error{&ParseError{Err: ErrQuote}},
   251  }, {
   252  	Name:   "StartLine2",
   253  	Input:  "§a,§b\n¶§\"d\n\n,e∑",
   254  	Errors: []error{nil, &ParseError{Err: ErrQuote}},
   255  	Output: [][]string{{"a", "b"}},
   256  }, {
   257  	Name:  "CRLFInQuotedField", // Issue 21201
   258  	Input: "§A,§\"Hello\r\nHi\",§B\r\n",
   259  	Output: [][]string{
   260  		{"A", "Hello\nHi", "B"},
   261  	},
   262  }, {
   263  	Name:   "BinaryBlobField", // Issue 19410
   264  	Input:  "§x09\x41\xb4\x1c,§aktau",
   265  	Output: [][]string{{"x09A\xb4\x1c", "aktau"}},
   266  }, {
   267  	Name:   "TrailingCR",
   268  	Input:  "§field1,§field2\r",
   269  	Output: [][]string{{"field1", "field2"}},
   270  }, {
   271  	Name:   "QuotedTrailingCR",
   272  	Input:  "§\"field\"\r",
   273  	Output: [][]string{{"field"}},
   274  }, {
   275  	Name:   "QuotedTrailingCRCR",
   276  	Input:  "§\"field∑\"\r\r",
   277  	Errors: []error{&ParseError{Err: ErrQuote}},
   278  }, {
   279  	Name:   "FieldCR",
   280  	Input:  "§field\rfield\r",
   281  	Output: [][]string{{"field\rfield"}},
   282  }, {
   283  	Name:   "FieldCRCR",
   284  	Input:  "§field\r\rfield\r\r",
   285  	Output: [][]string{{"field\r\rfield\r"}},
   286  }, {
   287  	Name:   "FieldCRCRLF",
   288  	Input:  "§field\r\r\n¶§field\r\r\n",
   289  	Output: [][]string{{"field\r"}, {"field\r"}},
   290  }, {
   291  	Name:   "FieldCRCRLFCR",
   292  	Input:  "§field\r\r\n¶§\rfield\r\r\n\r",
   293  	Output: [][]string{{"field\r"}, {"\rfield\r"}},
   294  }, {
   295  	Name:   "FieldCRCRLFCRCR",
   296  	Input:  "§field\r\r\n¶§\r\rfield\r\r\n¶§\r\r",
   297  	Output: [][]string{{"field\r"}, {"\r\rfield\r"}, {"\r"}},
   298  }, {
   299  	Name:  "MultiFieldCRCRLFCRCR",
   300  	Input: "§field1,§field2\r\r\n¶§\r\rfield1,§field2\r\r\n¶§\r\r,§",
   301  	Output: [][]string{
   302  		{"field1", "field2\r"},
   303  		{"\r\rfield1", "field2\r"},
   304  		{"\r\r", ""},
   305  	},
   306  }, {
   307  	Name:             "NonASCIICommaAndComment",
   308  	Input:            "§a£§b,c£ \t§d,e\n€ comment\n",
   309  	Output:           [][]string{{"a", "b,c", "d,e"}},
   310  	TrimLeadingSpace: true,
   311  	Comma:            '£',
   312  	Comment:          '€',
   313  }, {
   314  	Name:    "NonASCIICommaAndCommentWithQuotes",
   315  	Input:   "§a€§\"  b,\"€§ c\nλ comment\n",
   316  	Output:  [][]string{{"a", "  b,", " c"}},
   317  	Comma:   '€',
   318  	Comment: 'λ',
   319  }, {
   320  	// λ and θ start with the same byte.
   321  	// This tests that the parser doesn't confuse such characters.
   322  	Name:    "NonASCIICommaConfusion",
   323  	Input:   "§\"abθcd\"λ§efθgh",
   324  	Output:  [][]string{{"abθcd", "efθgh"}},
   325  	Comma:   'λ',
   326  	Comment: '€',
   327  }, {
   328  	Name:    "NonASCIICommentConfusion",
   329  	Input:   "§λ\n¶§λ\nθ\n¶§λ\n",
   330  	Output:  [][]string{{"λ"}, {"λ"}, {"λ"}},
   331  	Comment: 'θ',
   332  }, {
   333  	Name:   "QuotedFieldMultipleLF",
   334  	Input:  "§\"\n\n\n\n\"",
   335  	Output: [][]string{{"\n\n\n\n"}},
   336  }, {
   337  	Name:  "MultipleCRLF",
   338  	Input: "\r\n\r\n\r\n\r\n",
   339  }, {
   340  	// The implementation may read each line in several chunks if it doesn't fit entirely
   341  	// in the read buffer, so we should test the code to handle that condition.
   342  	Name:    "HugeLines",
   343  	Input:   strings.Repeat("#ignore\n", 10000) + "§" + strings.Repeat("@", 5000) + ",§" + strings.Repeat("*", 5000),
   344  	Output:  [][]string{{strings.Repeat("@", 5000), strings.Repeat("*", 5000)}},
   345  	Comment: '#',
   346  }, {
   347  	Name:   "QuoteWithTrailingCRLF",
   348  	Input:  "§\"foo∑\"bar\"\r\n",
   349  	Errors: []error{&ParseError{Err: ErrQuote}},
   350  }, {
   351  	Name:       "LazyQuoteWithTrailingCRLF",
   352  	Input:      "§\"foo\"bar\"\r\n",
   353  	Output:     [][]string{{`foo"bar`}},
   354  	LazyQuotes: true,
   355  }, {
   356  	Name:   "DoubleQuoteWithTrailingCRLF",
   357  	Input:  "§\"foo\"\"bar\"\r\n",
   358  	Output: [][]string{{`foo"bar`}},
   359  }, {
   360  	Name:   "EvenQuotes",
   361  	Input:  `§""""""""`,
   362  	Output: [][]string{{`"""`}},
   363  }, {
   364  	Name:   "OddQuotes",
   365  	Input:  `§"""""""∑`,
   366  	Errors: []error{&ParseError{Err: ErrQuote}},
   367  }, {
   368  	Name:       "LazyOddQuotes",
   369  	Input:      `§"""""""`,
   370  	Output:     [][]string{{`"""`}},
   371  	LazyQuotes: true,
   372  }, {
   373  	Name:   "BadComma1",
   374  	Comma:  '\n',
   375  	Errors: []error{errInvalidDelim},
   376  }, {
   377  	Name:   "BadComma2",
   378  	Comma:  '\r',
   379  	Errors: []error{errInvalidDelim},
   380  }, {
   381  	Name:   "BadComma3",
   382  	Comma:  '"',
   383  	Errors: []error{errInvalidDelim},
   384  }, {
   385  	Name:   "BadComma4",
   386  	Comma:  utf8.RuneError,
   387  	Errors: []error{errInvalidDelim},
   388  }, {
   389  	Name:    "BadComment1",
   390  	Comment: '\n',
   391  	Errors:  []error{errInvalidDelim},
   392  }, {
   393  	Name:    "BadComment2",
   394  	Comment: '\r',
   395  	Errors:  []error{errInvalidDelim},
   396  }, {
   397  	Name:    "BadComment3",
   398  	Comment: utf8.RuneError,
   399  	Errors:  []error{errInvalidDelim},
   400  }, {
   401  	Name:    "BadCommaComment",
   402  	Comma:   'X',
   403  	Comment: 'X',
   404  	Errors:  []error{errInvalidDelim},
   405  }}
   406  
   407  func TestRead(t *testing.T) {
   408  	newReader := func(tt readTest) (*Reader, [][][2]int, map[int][2]int, string) {
   409  		positions, errPositions, input := makePositions(tt.Input)
   410  		r := NewReader(strings.NewReader(input))
   411  
   412  		if tt.Comma != 0 {
   413  			r.Comma = tt.Comma
   414  		}
   415  		r.Comment = tt.Comment
   416  		if tt.UseFieldsPerRecord {
   417  			r.FieldsPerRecord = tt.FieldsPerRecord
   418  		} else {
   419  			r.FieldsPerRecord = -1
   420  		}
   421  		r.LazyQuotes = tt.LazyQuotes
   422  		r.TrimLeadingSpace = tt.TrimLeadingSpace
   423  		r.ReuseRecord = tt.ReuseRecord
   424  		return r, positions, errPositions, input
   425  	}
   426  
   427  	for _, tt := range readTests {
   428  		t.Run(tt.Name, func(t *testing.T) {
   429  			r, positions, errPositions, input := newReader(tt)
   430  			out, err := r.ReadAll()
   431  			if wantErr := firstError(tt.Errors, positions, errPositions); wantErr != nil {
   432  				if !reflect.DeepEqual(err, wantErr) {
   433  					t.Fatalf("ReadAll() error mismatch:\ngot  %v (%#v)\nwant %v (%#v)", err, err, wantErr, wantErr)
   434  				}
   435  				if out != nil {
   436  					t.Fatalf("ReadAll() output:\ngot  %q\nwant nil", out)
   437  				}
   438  			} else {
   439  				if err != nil {
   440  					t.Fatalf("unexpected Readall() error: %v", err)
   441  				}
   442  				if !reflect.DeepEqual(out, tt.Output) {
   443  					t.Fatalf("ReadAll() output:\ngot  %q\nwant %q", out, tt.Output)
   444  				}
   445  			}
   446  
   447  			// Check input offset after call ReadAll()
   448  			inputByteSize := len(input)
   449  			inputOffset := r.InputOffset()
   450  			if err == nil && int64(inputByteSize) != inputOffset {
   451  				t.Errorf("wrong input offset after call ReadAll():\ngot:  %d\nwant: %d\ninput: %s", inputOffset, inputByteSize, input)
   452  			}
   453  
   454  			// Check field and error positions.
   455  			r, _, _, _ = newReader(tt)
   456  			for recNum := 0; ; recNum++ {
   457  				rec, err := r.Read()
   458  				var wantErr error
   459  				if recNum < len(tt.Errors) && tt.Errors[recNum] != nil {
   460  					wantErr = errorWithPosition(tt.Errors[recNum], recNum, positions, errPositions)
   461  				} else if recNum >= len(tt.Output) {
   462  					wantErr = io.EOF
   463  				}
   464  				if !reflect.DeepEqual(err, wantErr) {
   465  					t.Fatalf("Read() error at record %d:\ngot %v (%#v)\nwant %v (%#v)", recNum, err, err, wantErr, wantErr)
   466  				}
   467  				// ErrFieldCount is explicitly non-fatal.
   468  				if err != nil && !errors.Is(err, ErrFieldCount) {
   469  					if recNum < len(tt.Output) {
   470  						t.Fatalf("need more records; got %d want %d", recNum, len(tt.Output))
   471  					}
   472  					break
   473  				}
   474  				if got, want := rec, tt.Output[recNum]; !slices.Equal(got, want) {
   475  					t.Errorf("Read vs ReadAll mismatch;\ngot %q\nwant %q", got, want)
   476  				}
   477  				pos := positions[recNum]
   478  				if len(pos) != len(rec) {
   479  					t.Fatalf("mismatched position length at record %d", recNum)
   480  				}
   481  				for i := range rec {
   482  					line, col := r.FieldPos(i)
   483  					if got, want := [2]int{line, col}, pos[i]; got != want {
   484  						t.Errorf("position mismatch at record %d, field %d;\ngot %v\nwant %v", recNum, i, got, want)
   485  					}
   486  				}
   487  			}
   488  		})
   489  	}
   490  }
   491  
   492  // firstError returns the first non-nil error in errs,
   493  // with the position adjusted according to the error's
   494  // index inside positions.
   495  func firstError(errs []error, positions [][][2]int, errPositions map[int][2]int) error {
   496  	for i, err := range errs {
   497  		if err != nil {
   498  			return errorWithPosition(err, i, positions, errPositions)
   499  		}
   500  	}
   501  	return nil
   502  }
   503  
   504  func errorWithPosition(err error, recNum int, positions [][][2]int, errPositions map[int][2]int) error {
   505  	parseErr, ok := err.(*ParseError)
   506  	if !ok {
   507  		return err
   508  	}
   509  	if recNum >= len(positions) {
   510  		panic(fmt.Errorf("no positions found for error at record %d", recNum))
   511  	}
   512  	errPos, ok := errPositions[recNum]
   513  	if !ok {
   514  		panic(fmt.Errorf("no error position found for error at record %d", recNum))
   515  	}
   516  	parseErr1 := *parseErr
   517  	parseErr1.StartLine = positions[recNum][0][0]
   518  	parseErr1.Line = errPos[0]
   519  	parseErr1.Column = errPos[1]
   520  	return &parseErr1
   521  }
   522  
   523  // makePositions returns the expected field positions of all
   524  // the fields in text, the positions of any errors, and the text with the position markers
   525  // removed.
   526  //
   527  // The start of each field is marked with a § symbol;
   528  // CSV lines are separated by ¶ symbols;
   529  // Error positions are marked with ∑ symbols.
   530  func makePositions(text string) ([][][2]int, map[int][2]int, string) {
   531  	buf := make([]byte, 0, len(text))
   532  	var positions [][][2]int
   533  	errPositions := make(map[int][2]int)
   534  	line, col := 1, 1
   535  	recNum := 0
   536  
   537  	for len(text) > 0 {
   538  		r, size := utf8.DecodeRuneInString(text)
   539  		switch r {
   540  		case '\n':
   541  			line++
   542  			col = 1
   543  			buf = append(buf, '\n')
   544  		case '§':
   545  			if len(positions) == 0 {
   546  				positions = append(positions, [][2]int{})
   547  			}
   548  			positions[len(positions)-1] = append(positions[len(positions)-1], [2]int{line, col})
   549  		case '¶':
   550  			positions = append(positions, [][2]int{})
   551  			recNum++
   552  		case '∑':
   553  			errPositions[recNum] = [2]int{line, col}
   554  		default:
   555  			buf = append(buf, text[:size]...)
   556  			col += size
   557  		}
   558  		text = text[size:]
   559  	}
   560  	return positions, errPositions, string(buf)
   561  }
   562  
   563  // nTimes is an io.Reader which yields the string s n times.
   564  type nTimes struct {
   565  	s   string
   566  	n   int
   567  	off int
   568  }
   569  
   570  func (r *nTimes) Read(p []byte) (n int, err error) {
   571  	for {
   572  		if r.n <= 0 || r.s == "" {
   573  			return n, io.EOF
   574  		}
   575  		n0 := copy(p, r.s[r.off:])
   576  		p = p[n0:]
   577  		n += n0
   578  		r.off += n0
   579  		if r.off == len(r.s) {
   580  			r.off = 0
   581  			r.n--
   582  		}
   583  		if len(p) == 0 {
   584  			return
   585  		}
   586  	}
   587  }
   588  
   589  // benchmarkRead measures reading the provided CSV rows data.
   590  // initReader, if non-nil, modifies the Reader before it's used.
   591  func benchmarkRead(b *testing.B, initReader func(*Reader), rows string) {
   592  	b.ReportAllocs()
   593  	r := NewReader(&nTimes{s: rows, n: b.N})
   594  	if initReader != nil {
   595  		initReader(r)
   596  	}
   597  	for {
   598  		_, err := r.Read()
   599  		if err == io.EOF {
   600  			break
   601  		}
   602  		if err != nil {
   603  			b.Fatal(err)
   604  		}
   605  	}
   606  }
   607  
   608  const benchmarkCSVData = `x,y,z,w
   609  x,y,z,
   610  x,y,,
   611  x,,,
   612  ,,,
   613  "x","y","z","w"
   614  "x","y","z",""
   615  "x","y","",""
   616  "x","","",""
   617  "","","",""
   618  `
   619  
   620  func BenchmarkRead(b *testing.B) {
   621  	benchmarkRead(b, nil, benchmarkCSVData)
   622  }
   623  
   624  func BenchmarkReadWithFieldsPerRecord(b *testing.B) {
   625  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = 4 }, benchmarkCSVData)
   626  }
   627  
   628  func BenchmarkReadWithoutFieldsPerRecord(b *testing.B) {
   629  	benchmarkRead(b, func(r *Reader) { r.FieldsPerRecord = -1 }, benchmarkCSVData)
   630  }
   631  
   632  func BenchmarkReadLargeFields(b *testing.B) {
   633  	benchmarkRead(b, nil, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   634  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   635  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   636  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   637  `, 3))
   638  }
   639  
   640  func BenchmarkReadReuseRecord(b *testing.B) {
   641  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, benchmarkCSVData)
   642  }
   643  
   644  func BenchmarkReadReuseRecordWithFieldsPerRecord(b *testing.B) {
   645  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = 4 }, benchmarkCSVData)
   646  }
   647  
   648  func BenchmarkReadReuseRecordWithoutFieldsPerRecord(b *testing.B) {
   649  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true; r.FieldsPerRecord = -1 }, benchmarkCSVData)
   650  }
   651  
   652  func BenchmarkReadReuseRecordLargeFields(b *testing.B) {
   653  	benchmarkRead(b, func(r *Reader) { r.ReuseRecord = true }, strings.Repeat(`xxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   654  xxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvv
   655  ,,zzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   656  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy,zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz,wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww,vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
   657  `, 3))
   658  }
   659  

View as plain text