123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275 |
- const { Transform } = require('stream')
- const [cr] = Buffer.from('\r')
- const [nl] = Buffer.from('\n')
- const defaults = {
- escape: '"',
- headers: null,
- mapHeaders: ({ header }) => header,
- mapValues: ({ value }) => value,
- newline: '\n',
- quote: '"',
- raw: false,
- separator: ',',
- skipComments: false,
- skipLines: null,
- maxRowBytes: Number.MAX_SAFE_INTEGER,
- strict: false
- }
- class CsvParser extends Transform {
- constructor (opts = {}) {
- super({ objectMode: true, highWaterMark: 16 })
- if (Array.isArray(opts)) opts = { headers: opts }
- const options = Object.assign({}, defaults, opts)
- options.customNewline = options.newline !== defaults.newline
- for (const key of ['newline', 'quote', 'separator']) {
- if (typeof options[key] !== 'undefined') {
- ([options[key]] = Buffer.from(options[key]))
- }
- }
- // if escape is not defined on the passed options, use the end value of quote
- options.escape = (opts || {}).escape ? Buffer.from(options.escape)[0] : options.quote
- this.state = {
- empty: options.raw ? Buffer.alloc(0) : '',
- escaped: false,
- first: true,
- lineNumber: 0,
- previousEnd: 0,
- rowLength: 0,
- quoted: false
- }
- this._prev = null
- if (options.headers === false) {
- // enforce, as the column length check will fail if headers:false
- options.strict = false
- }
- if (options.headers || options.headers === false) {
- this.state.first = false
- }
- this.options = options
- this.headers = options.headers
- }
- parseCell (buffer, start, end) {
- const { escape, quote } = this.options
- // remove quotes from quoted cells
- if (buffer[start] === quote && buffer[end - 1] === quote) {
- start++
- end--
- }
- let y = start
- for (let i = start; i < end; i++) {
- // check for escape characters and skip them
- if (buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote) {
- i++
- }
- if (y !== i) {
- buffer[y] = buffer[i]
- }
- y++
- }
- return this.parseValue(buffer, start, y)
- }
- parseLine (buffer, start, end) {
- const { customNewline, escape, mapHeaders, mapValues, quote, separator, skipComments, skipLines } = this.options
- end-- // trim newline
- if (!customNewline && buffer.length && buffer[end - 1] === cr) {
- end--
- }
- const comma = separator
- const cells = []
- let isQuoted = false
- let offset = start
- if (skipComments) {
- const char = typeof skipComments === 'string' ? skipComments : '#'
- if (buffer[start] === Buffer.from(char)[0]) {
- return
- }
- }
- const mapValue = (value) => {
- if (this.state.first) {
- return value
- }
- const index = cells.length
- const header = this.headers[index]
- return mapValues({ header, index, value })
- }
- for (let i = start; i < end; i++) {
- const isStartingQuote = !isQuoted && buffer[i] === quote
- const isEndingQuote = isQuoted && buffer[i] === quote && i + 1 <= end && buffer[i + 1] === comma
- const isEscape = isQuoted && buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote
- if (isStartingQuote || isEndingQuote) {
- isQuoted = !isQuoted
- continue
- } else if (isEscape) {
- i++
- continue
- }
- if (buffer[i] === comma && !isQuoted) {
- let value = this.parseCell(buffer, offset, i)
- value = mapValue(value)
- cells.push(value)
- offset = i + 1
- }
- }
- if (offset < end) {
- let value = this.parseCell(buffer, offset, end)
- value = mapValue(value)
- cells.push(value)
- }
- if (buffer[end - 1] === comma) {
- cells.push(mapValue(this.state.empty))
- }
- const skip = skipLines && skipLines > this.state.lineNumber
- this.state.lineNumber++
- if (this.state.first && !skip) {
- this.state.first = false
- this.headers = cells.map((header, index) => mapHeaders({ header, index }))
- this.emit('headers', this.headers)
- return
- }
- if (!skip && this.options.strict && cells.length !== this.headers.length) {
- const e = new RangeError('Row length does not match headers')
- this.emit('error', e)
- } else {
- if (!skip) this.writeRow(cells)
- }
- }
- parseValue (buffer, start, end) {
- if (this.options.raw) {
- return buffer.slice(start, end)
- }
- return buffer.toString('utf-8', start, end)
- }
- writeRow (cells) {
- const headers = (this.headers === false) ? cells.map((value, index) => index) : this.headers
- const row = cells.reduce((o, cell, index) => {
- const header = headers[index]
- if (header === null) return o // skip columns
- if (header !== undefined) {
- o[header] = cell
- } else {
- o[`_${index}`] = cell
- }
- return o
- }, {})
- this.push(row)
- }
- _flush (cb) {
- if (this.state.escaped || !this._prev) return cb()
- this.parseLine(this._prev, this.state.previousEnd, this._prev.length + 1) // plus since online -1s
- cb()
- }
- _transform (data, enc, cb) {
- if (typeof data === 'string') {
- data = Buffer.from(data)
- }
- const { escape, quote } = this.options
- let start = 0
- let buffer = data
- if (this._prev) {
- start = this._prev.length
- buffer = Buffer.concat([this._prev, data])
- this._prev = null
- }
- const bufferLength = buffer.length
- for (let i = start; i < bufferLength; i++) {
- const chr = buffer[i]
- const nextChr = i + 1 < bufferLength ? buffer[i + 1] : null
- this.state.rowLength++
- if (this.state.rowLength > this.options.maxRowBytes) {
- return cb(new Error('Row exceeds the maximum size'))
- }
- if (!this.state.escaped && chr === escape && nextChr === quote && i !== start) {
- this.state.escaped = true
- continue
- } else if (chr === quote) {
- if (this.state.escaped) {
- this.state.escaped = false
- // non-escaped quote (quoting the cell)
- } else {
- this.state.quoted = !this.state.quoted
- }
- continue
- }
- if (!this.state.quoted) {
- if (this.state.first && !this.options.customNewline) {
- if (chr === nl) {
- this.options.newline = nl
- } else if (chr === cr) {
- if (nextChr !== nl) {
- this.options.newline = cr
- }
- }
- }
- if (chr === this.options.newline) {
- this.parseLine(buffer, this.state.previousEnd, i + 1)
- this.state.previousEnd = i + 1
- this.state.rowLength = 0
- }
- }
- }
- if (this.state.previousEnd === bufferLength) {
- this.state.previousEnd = 0
- return cb()
- }
- if (bufferLength - this.state.previousEnd < data.length) {
- this._prev = data
- this.state.previousEnd -= (bufferLength - data.length)
- return cb()
- }
- this._prev = buffer
- cb()
- }
- }
- module.exports = (opts) => new CsvParser(opts)
|