1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53 package csv
54
55 import (
56 "bufio"
57 "bytes"
58 "errors"
59 "fmt"
60 "io"
61 "unicode"
62 "unicode/utf8"
63 )
64
65
66
67 type ParseError struct {
68 StartLine int
69 Line int
70 Column int
71 Err error
72 }
73
74 func (e *ParseError) Error() string {
75 if e.Err == ErrFieldCount {
76 return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
77 }
78 if e.StartLine != e.Line {
79 return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err)
80 }
81 return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err)
82 }
83
84 func (e *ParseError) Unwrap() error { return e.Err }
85
86
87 var (
88 ErrBareQuote = errors.New("bare \" in non-quoted-field")
89 ErrQuote = errors.New("extraneous or missing \" in quoted-field")
90 ErrFieldCount = errors.New("wrong number of fields")
91
92
93 ErrTrailingComma = errors.New("extra delimiter at end of line")
94 )
95
96 var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
97
98 func validDelim(r rune) bool {
99 return r != 0 && r != '"' && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
100 }
101
102
103
104
105
106
107
108
109
110
111 type Reader struct {
112
113
114
115
116 Comma rune
117
118
119
120
121
122
123
124
125 Comment rune
126
127
128
129
130
131
132
133 FieldsPerRecord int
134
135
136
137 LazyQuotes bool
138
139
140
141 TrimLeadingSpace bool
142
143
144
145
146 ReuseRecord bool
147
148
149 TrailingComma bool
150
151 r *bufio.Reader
152
153
154 numLine int
155
156
157 offset int64
158
159
160 rawBuffer []byte
161
162
163
164
165
166 recordBuffer []byte
167
168
169
170 fieldIndexes []int
171
172
173
174 fieldPositions []position
175
176
177 lastRecord []string
178 }
179
180
181 func NewReader(r io.Reader) *Reader {
182 return &Reader{
183 Comma: ',',
184 r: bufio.NewReader(r),
185 }
186 }
187
188
189
190
191
192
193
194
195
196
197 func (r *Reader) Read() (record []string, err error) {
198 if r.ReuseRecord {
199 record, err = r.readRecord(r.lastRecord)
200 r.lastRecord = record
201 } else {
202 record, err = r.readRecord(nil)
203 }
204 return record, err
205 }
206
207
208
209
210
211
212
213 func (r *Reader) FieldPos(field int) (line, column int) {
214 if field < 0 || field >= len(r.fieldPositions) {
215 panic("out of range index passed to FieldPos")
216 }
217 p := &r.fieldPositions[field]
218 return p.line, p.col
219 }
220
221
222
223
224 func (r *Reader) InputOffset() int64 {
225 return r.offset
226 }
227
228
229 type position struct {
230 line, col int
231 }
232
233
234
235
236
237
238 func (r *Reader) ReadAll() (records [][]string, err error) {
239 for {
240 record, err := r.readRecord(nil)
241 if err == io.EOF {
242 return records, nil
243 }
244 if err != nil {
245 return nil, err
246 }
247 records = append(records, record)
248 }
249 }
250
251
252
253
254
255 func (r *Reader) readLine() ([]byte, error) {
256 line, err := r.r.ReadSlice('\n')
257 if err == bufio.ErrBufferFull {
258 r.rawBuffer = append(r.rawBuffer[:0], line...)
259 for err == bufio.ErrBufferFull {
260 line, err = r.r.ReadSlice('\n')
261 r.rawBuffer = append(r.rawBuffer, line...)
262 }
263 line = r.rawBuffer
264 }
265 readSize := len(line)
266 if readSize > 0 && err == io.EOF {
267 err = nil
268
269 if line[readSize-1] == '\r' {
270 line = line[:readSize-1]
271 }
272 }
273 r.numLine++
274 r.offset += int64(readSize)
275
276 if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
277 line[n-2] = '\n'
278 line = line[:n-1]
279 }
280 return line, err
281 }
282
283
284 func lengthNL(b []byte) int {
285 if len(b) > 0 && b[len(b)-1] == '\n' {
286 return 1
287 }
288 return 0
289 }
290
291
292 func nextRune(b []byte) rune {
293 r, _ := utf8.DecodeRune(b)
294 return r
295 }
296
297 func (r *Reader) readRecord(dst []string) ([]string, error) {
298 if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
299 return nil, errInvalidDelim
300 }
301
302
303 var line []byte
304 var errRead error
305 for errRead == nil {
306 line, errRead = r.readLine()
307 if r.Comment != 0 && nextRune(line) == r.Comment {
308 line = nil
309 continue
310 }
311 if errRead == nil && len(line) == lengthNL(line) {
312 line = nil
313 continue
314 }
315 break
316 }
317 if errRead == io.EOF {
318 return nil, errRead
319 }
320
321
322 var err error
323 const quoteLen = len(`"`)
324 commaLen := utf8.RuneLen(r.Comma)
325 recLine := r.numLine
326 r.recordBuffer = r.recordBuffer[:0]
327 r.fieldIndexes = r.fieldIndexes[:0]
328 r.fieldPositions = r.fieldPositions[:0]
329 pos := position{line: r.numLine, col: 1}
330 parseField:
331 for {
332 if r.TrimLeadingSpace {
333 i := bytes.IndexFunc(line, func(r rune) bool {
334 return !unicode.IsSpace(r)
335 })
336 if i < 0 {
337 i = len(line)
338 pos.col -= lengthNL(line)
339 }
340 line = line[i:]
341 pos.col += i
342 }
343 if len(line) == 0 || line[0] != '"' {
344
345 i := bytes.IndexRune(line, r.Comma)
346 field := line
347 if i >= 0 {
348 field = field[:i]
349 } else {
350 field = field[:len(field)-lengthNL(field)]
351 }
352
353 if !r.LazyQuotes {
354 if j := bytes.IndexByte(field, '"'); j >= 0 {
355 col := pos.col + j
356 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
357 break parseField
358 }
359 }
360 r.recordBuffer = append(r.recordBuffer, field...)
361 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
362 r.fieldPositions = append(r.fieldPositions, pos)
363 if i >= 0 {
364 line = line[i+commaLen:]
365 pos.col += i + commaLen
366 continue parseField
367 }
368 break parseField
369 } else {
370
371 fieldPos := pos
372 line = line[quoteLen:]
373 pos.col += quoteLen
374 for {
375 i := bytes.IndexByte(line, '"')
376 if i >= 0 {
377
378 r.recordBuffer = append(r.recordBuffer, line[:i]...)
379 line = line[i+quoteLen:]
380 pos.col += i + quoteLen
381 switch rn := nextRune(line); {
382 case rn == '"':
383
384 r.recordBuffer = append(r.recordBuffer, '"')
385 line = line[quoteLen:]
386 pos.col += quoteLen
387 case rn == r.Comma:
388
389 line = line[commaLen:]
390 pos.col += commaLen
391 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
392 r.fieldPositions = append(r.fieldPositions, fieldPos)
393 continue parseField
394 case lengthNL(line) == len(line):
395
396 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
397 r.fieldPositions = append(r.fieldPositions, fieldPos)
398 break parseField
399 case r.LazyQuotes:
400
401 r.recordBuffer = append(r.recordBuffer, '"')
402 default:
403
404 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: pos.col - quoteLen, Err: ErrQuote}
405 break parseField
406 }
407 } else if len(line) > 0 {
408
409 r.recordBuffer = append(r.recordBuffer, line...)
410 if errRead != nil {
411 break parseField
412 }
413 pos.col += len(line)
414 line, errRead = r.readLine()
415 if len(line) > 0 {
416 pos.line++
417 pos.col = 1
418 }
419 if errRead == io.EOF {
420 errRead = nil
421 }
422 } else {
423
424 if !r.LazyQuotes && errRead == nil {
425 err = &ParseError{StartLine: recLine, Line: pos.line, Column: pos.col, Err: ErrQuote}
426 break parseField
427 }
428 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
429 r.fieldPositions = append(r.fieldPositions, fieldPos)
430 break parseField
431 }
432 }
433 }
434 }
435 if err == nil {
436 err = errRead
437 }
438
439
440
441 str := string(r.recordBuffer)
442 dst = dst[:0]
443 if cap(dst) < len(r.fieldIndexes) {
444 dst = make([]string, len(r.fieldIndexes))
445 }
446 dst = dst[:len(r.fieldIndexes)]
447 var preIdx int
448 for i, idx := range r.fieldIndexes {
449 dst[i] = str[preIdx:idx]
450 preIdx = idx
451 }
452
453
454 if r.FieldsPerRecord > 0 {
455 if len(dst) != r.FieldsPerRecord && err == nil {
456 err = &ParseError{
457 StartLine: recLine,
458 Line: recLine,
459 Column: 1,
460 Err: ErrFieldCount,
461 }
462 }
463 } else if r.FieldsPerRecord == 0 {
464 r.FieldsPerRecord = len(dst)
465 }
466 return dst, err
467 }
468
View as plain text