From 0a5f91f3d03242857b2fbd19061f29967747f5f2 Mon Sep 17 00:00:00 2001 From: Fabien O'Carroll Date: Tue, 27 Oct 2020 10:44:06 +0000 Subject: [PATCH] Refactored members-csv to remove unused columnsToExtract no-issue - Was not used by the importer and removed for simplicity. - Updates the header mapping to happen in place, rather than in a loop - Updates the parsing of values to give correct types --- ghost/members-csv/lib/parse.js | 188 +++++++++------------------ ghost/members-csv/test/parse.test.js | 24 ++-- 2 files changed, 74 insertions(+), 138 deletions(-) diff --git a/ghost/members-csv/lib/parse.js b/ghost/members-csv/lib/parse.js index 8d5556fdaf..34c9525287 100644 --- a/ghost/members-csv/lib/parse.js +++ b/ghost/members-csv/lib/parse.js @@ -1,134 +1,76 @@ const Promise = require('bluebird'); +const pump = require('pump'); const papaparse = require('papaparse'); -const _ = require('lodash'); const fs = require('fs-extra'); -const mapRowsWithRegexes = (rows, columnsToExtract) => { - let results = []; - const columnMap = {}; - // If CSV is single column - return all values including header - const headers = _.keys(rows[0]); - - if (columnsToExtract.length === 1 && headers.length === 1) { - results = _.map(rows, function (value) { - let result = {}; - result[columnsToExtract[0].name] = value[headers[0]]; - return result; - }); - } else { - // If there are multiple columns in csv file - // try to match headers using lookup value - _.map(columnsToExtract, function findMatches(column) { - _.each(headers, function checkheader(header) { - if (column.lookup.test(header)) { - columnMap[column.name] = header; - } - }); - }); - - results = _.map(rows, function evaluateRow(row) { - const result = {}; - _.each(columnMap, function returnMatches(value, key) { - result[key] = row[value]; - }); - return result; - }); - } - - return results; -}; - -const mapRowsWithMappings = (rows, mapping) => { - const results = rows.map((row) => { - for (const key in mapping) { - row[key] = row[mapping[key]]; - - if (key !== mapping[key]) { - delete row[mapping[key]]; - } - } - - return row; - }); - - return results; -}; - -const readCSV = ({path, columnsToExtract, mapping}) => { - const rows = []; - +module.exports = (path, mapping, defaultLabels = []) => { return new Promise(function (resolve, reject) { - const readFile = fs.createReadStream(path); - - readFile.on('err', function (err) { - reject(err); - }) - .pipe(papaparse.parse(papaparse.NODE_STREAM_INPUT, { - header: true, - transform: function (value) { - if (value === '') { - return null; + const csvFileStream = fs.createReadStream(path); + const csvParserStream = papaparse.parse(papaparse.NODE_STREAM_INPUT, { + header: true, + transformHeader(_header) { + let header = _header; + if (mapping && Reflect.has(mapping, _header)) { + header = mapping[_header]; + } + if (header === 'subscribed_to_emails') { + return 'subscribed'; + } + return header; + }, + transform(value, header) { + if (header === 'labels') { + if (value && typeof value === 'string') { + return value.split(',').map(name => ({name})); } - - return value; - } - })) - .on('data', function (row) { - rows.push(row); - }) - .on('end', function () { - let results = []; - - if (columnsToExtract) { - results = mapRowsWithRegexes(rows, columnsToExtract); - } else { - results = mapRowsWithMappings(rows, mapping); } - resolve(results); - }); + if (header === 'subscribed_to_emails') { + return value.toLowerCase() !== 'false'; + } + + if (header === 'complimentary_plan') { + return value.toLowerCase() === 'true'; + } + + if (value === '') { + return null; + } + + if (value === 'undefined') { + return null; + } + + if (value.toLowerCase() === 'false') { + return false; + } + + if (value.toLowerCase() === 'true') { + return true; + } + + return value; + } + }); + + const rows = []; + const parsedCSVStream = pump(csvFileStream, csvParserStream, (err) => { + if (err) { + return reject(err); + } + resolve(rows); + }); + + parsedCSVStream.on('data', (row) => { + if (row.labels) { + row.labels = row.labels.concat(defaultLabels); + } else { + row.labels = defaultLabels; + } + rows.push(row); + }); }); }; -const parse = async (filePath, mapping) => { - const columnsToExtract = [{ - name: 'email', - lookup: /^email/i - }, { - name: 'name', - lookup: /name/i - }, { - name: 'note', - lookup: /note/i - }, { - name: 'subscribed_to_emails', - lookup: /subscribed_to_emails/i - }, { - name: 'stripe_customer_id', - lookup: /stripe_customer_id/i - }, { - name: 'complimentary_plan', - lookup: /complimentary_plan/i - }, { - name: 'labels', - lookup: /labels/i - }, { - name: 'created_at', - lookup: /created_at/i - }]; - - const options = { - path: filePath - }; - - if (mapping) { - options.mapping = mapping; - } else { - options.columnsToExtract = columnsToExtract; - } - - return await readCSV(options); -}; - -module.exports = parse; -module.exports.readCSV = readCSV; +// @TODO do we need this??? +module.exports.readCSV = ({path, mapping, defaultLabels}) => module.exports(path, mapping, defaultLabels); diff --git a/ghost/members-csv/test/parse.test.js b/ghost/members-csv/test/parse.test.js index c62bac1b17..ac06e46dbe 100644 --- a/ghost/members-csv/test/parse.test.js +++ b/ghost/members-csv/test/parse.test.js @@ -6,8 +6,7 @@ const csvPath = path.join(__dirname, '/fixtures/'); describe('parse', function () { it('read csv: empty file', async function () { const result = await readCSV({ - path: csvPath + 'empty.csv', - columnsToExtract: [{name: 'email', lookup: /email/i}] + path: csvPath + 'empty.csv' }); should.exist(result); @@ -16,8 +15,7 @@ describe('parse', function () { it('read csv: one column', async function () { const result = await readCSV({ - path: csvPath + 'single-column-with-header.csv', - columnsToExtract: [{name: 'email', lookup: /email/i}] + path: csvPath + 'single-column-with-header.csv' }); should.exist(result); @@ -28,24 +26,21 @@ describe('parse', function () { it('read csv: two columns, 1 filter', async function () { const result = await readCSV({ - path: csvPath + 'two-columns-with-header.csv', - columnsToExtract: [{name: 'email', lookup: /email/i}] + path: csvPath + 'two-columns-with-header.csv' }); should.exist(result); result.length.should.eql(2); result[0].email.should.eql('jbloggs@example.com'); result[1].email.should.eql('test@example.com'); - should.not.exist(result[0].id); }); it('read csv: two columns, 2 filters', async function () { const result = await readCSV({ path: csvPath + 'two-columns-obscure-header.csv', - columnsToExtract: [ - {name: 'email', lookup: /email/i}, - {name: 'id', lookup: /id/i} - ] + mapping: { + 'Email Address': 'email' + } }); should.exist(result); @@ -60,9 +55,8 @@ describe('parse', function () { const result = await readCSV({ path: csvPath + 'two-columns-mapping-header.csv', mapping: { - email: 'correo_electronico', - name: 'nombre', - id: 'id' + correo_electronico: 'email', + nombre: 'name' } }); @@ -81,7 +75,7 @@ describe('parse', function () { const result = await readCSV({ path: csvPath + 'two-columns-mapping-header.csv', mapping: { - email: 'correo_electronico' + correo_electronico: 'email' } });