index.js 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. const { Transform } = require('stream')
  2. const [cr] = Buffer.from('\r')
  3. const [nl] = Buffer.from('\n')
  4. const defaults = {
  5. escape: '"',
  6. headers: null,
  7. mapHeaders: ({ header }) => header,
  8. mapValues: ({ value }) => value,
  9. newline: '\n',
  10. quote: '"',
  11. raw: false,
  12. separator: ',',
  13. skipComments: false,
  14. skipLines: null,
  15. maxRowBytes: Number.MAX_SAFE_INTEGER,
  16. strict: false
  17. }
  18. class CsvParser extends Transform {
  19. constructor (opts = {}) {
  20. super({ objectMode: true, highWaterMark: 16 })
  21. if (Array.isArray(opts)) opts = { headers: opts }
  22. const options = Object.assign({}, defaults, opts)
  23. options.customNewline = options.newline !== defaults.newline
  24. for (const key of ['newline', 'quote', 'separator']) {
  25. if (typeof options[key] !== 'undefined') {
  26. ([options[key]] = Buffer.from(options[key]))
  27. }
  28. }
  29. // if escape is not defined on the passed options, use the end value of quote
  30. options.escape = (opts || {}).escape ? Buffer.from(options.escape)[0] : options.quote
  31. this.state = {
  32. empty: options.raw ? Buffer.alloc(0) : '',
  33. escaped: false,
  34. first: true,
  35. lineNumber: 0,
  36. previousEnd: 0,
  37. rowLength: 0,
  38. quoted: false
  39. }
  40. this._prev = null
  41. if (options.headers === false) {
  42. // enforce, as the column length check will fail if headers:false
  43. options.strict = false
  44. }
  45. if (options.headers || options.headers === false) {
  46. this.state.first = false
  47. }
  48. this.options = options
  49. this.headers = options.headers
  50. }
  51. parseCell (buffer, start, end) {
  52. const { escape, quote } = this.options
  53. // remove quotes from quoted cells
  54. if (buffer[start] === quote && buffer[end - 1] === quote) {
  55. start++
  56. end--
  57. }
  58. let y = start
  59. for (let i = start; i < end; i++) {
  60. // check for escape characters and skip them
  61. if (buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote) {
  62. i++
  63. }
  64. if (y !== i) {
  65. buffer[y] = buffer[i]
  66. }
  67. y++
  68. }
  69. return this.parseValue(buffer, start, y)
  70. }
  71. parseLine (buffer, start, end) {
  72. const { customNewline, escape, mapHeaders, mapValues, quote, separator, skipComments, skipLines } = this.options
  73. end-- // trim newline
  74. if (!customNewline && buffer.length && buffer[end - 1] === cr) {
  75. end--
  76. }
  77. const comma = separator
  78. const cells = []
  79. let isQuoted = false
  80. let offset = start
  81. if (skipComments) {
  82. const char = typeof skipComments === 'string' ? skipComments : '#'
  83. if (buffer[start] === Buffer.from(char)[0]) {
  84. return
  85. }
  86. }
  87. const mapValue = (value) => {
  88. if (this.state.first) {
  89. return value
  90. }
  91. const index = cells.length
  92. const header = this.headers[index]
  93. return mapValues({ header, index, value })
  94. }
  95. for (let i = start; i < end; i++) {
  96. const isStartingQuote = !isQuoted && buffer[i] === quote
  97. const isEndingQuote = isQuoted && buffer[i] === quote && i + 1 <= end && buffer[i + 1] === comma
  98. const isEscape = isQuoted && buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote
  99. if (isStartingQuote || isEndingQuote) {
  100. isQuoted = !isQuoted
  101. continue
  102. } else if (isEscape) {
  103. i++
  104. continue
  105. }
  106. if (buffer[i] === comma && !isQuoted) {
  107. let value = this.parseCell(buffer, offset, i)
  108. value = mapValue(value)
  109. cells.push(value)
  110. offset = i + 1
  111. }
  112. }
  113. if (offset < end) {
  114. let value = this.parseCell(buffer, offset, end)
  115. value = mapValue(value)
  116. cells.push(value)
  117. }
  118. if (buffer[end - 1] === comma) {
  119. cells.push(mapValue(this.state.empty))
  120. }
  121. const skip = skipLines && skipLines > this.state.lineNumber
  122. this.state.lineNumber++
  123. if (this.state.first && !skip) {
  124. this.state.first = false
  125. this.headers = cells.map((header, index) => mapHeaders({ header, index }))
  126. this.emit('headers', this.headers)
  127. return
  128. }
  129. if (!skip && this.options.strict && cells.length !== this.headers.length) {
  130. const e = new RangeError('Row length does not match headers')
  131. this.emit('error', e)
  132. } else {
  133. if (!skip) this.writeRow(cells)
  134. }
  135. }
  136. parseValue (buffer, start, end) {
  137. if (this.options.raw) {
  138. return buffer.slice(start, end)
  139. }
  140. return buffer.toString('utf-8', start, end)
  141. }
  142. writeRow (cells) {
  143. const headers = (this.headers === false) ? cells.map((value, index) => index) : this.headers
  144. const row = cells.reduce((o, cell, index) => {
  145. const header = headers[index]
  146. if (header === null) return o // skip columns
  147. if (header !== undefined) {
  148. o[header] = cell
  149. } else {
  150. o[`_${index}`] = cell
  151. }
  152. return o
  153. }, {})
  154. this.push(row)
  155. }
  156. _flush (cb) {
  157. if (this.state.escaped || !this._prev) return cb()
  158. this.parseLine(this._prev, this.state.previousEnd, this._prev.length + 1) // plus since online -1s
  159. cb()
  160. }
  161. _transform (data, enc, cb) {
  162. if (typeof data === 'string') {
  163. data = Buffer.from(data)
  164. }
  165. const { escape, quote } = this.options
  166. let start = 0
  167. let buffer = data
  168. if (this._prev) {
  169. start = this._prev.length
  170. buffer = Buffer.concat([this._prev, data])
  171. this._prev = null
  172. }
  173. const bufferLength = buffer.length
  174. for (let i = start; i < bufferLength; i++) {
  175. const chr = buffer[i]
  176. const nextChr = i + 1 < bufferLength ? buffer[i + 1] : null
  177. this.state.rowLength++
  178. if (this.state.rowLength > this.options.maxRowBytes) {
  179. return cb(new Error('Row exceeds the maximum size'))
  180. }
  181. if (!this.state.escaped && chr === escape && nextChr === quote && i !== start) {
  182. this.state.escaped = true
  183. continue
  184. } else if (chr === quote) {
  185. if (this.state.escaped) {
  186. this.state.escaped = false
  187. // non-escaped quote (quoting the cell)
  188. } else {
  189. this.state.quoted = !this.state.quoted
  190. }
  191. continue
  192. }
  193. if (!this.state.quoted) {
  194. if (this.state.first && !this.options.customNewline) {
  195. if (chr === nl) {
  196. this.options.newline = nl
  197. } else if (chr === cr) {
  198. if (nextChr !== nl) {
  199. this.options.newline = cr
  200. }
  201. }
  202. }
  203. if (chr === this.options.newline) {
  204. this.parseLine(buffer, this.state.previousEnd, i + 1)
  205. this.state.previousEnd = i + 1
  206. this.state.rowLength = 0
  207. }
  208. }
  209. }
  210. if (this.state.previousEnd === bufferLength) {
  211. this.state.previousEnd = 0
  212. return cb()
  213. }
  214. if (bufferLength - this.state.previousEnd < data.length) {
  215. this._prev = data
  216. this.state.previousEnd -= (bufferLength - data.length)
  217. return cb()
  218. }
  219. this._prev = buffer
  220. cb()
  221. }
  222. }
  223. module.exports = (opts) => new CsvParser(opts)