From 5c46786ebc0abf14a64aef482455f504a1cd6e55 Mon Sep 17 00:00:00 2001 From: Nazar Gargol Date: Fri, 19 Jun 2020 18:24:51 +1200 Subject: [PATCH] Colocated readCSV util with members input serializer refs 8185b42d9e5fd9e9051f08ce3395a648ec02f3a4 - Allows to move files and keep the history in one go. - 'csv-parser' will be upgraded to 'papaparse' lib in a new package --- ghost/members-csv/parse | 60 +++++++++++++++++++++++++++- ghost/members-csv/test/parse.test.js | 53 ++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 ghost/members-csv/test/parse.test.js diff --git a/ghost/members-csv/parse b/ghost/members-csv/parse index 54e77c4072..202705a866 100644 --- a/ghost/members-csv/parse +++ b/ghost/members-csv/parse @@ -1,4 +1,59 @@ -const fsLib = require('../../../../../../lib/fs'); +const Promise = require('bluebird'); +const csvParser = require('csv-parser'); +const _ = require('lodash'); +const fs = require('fs-extra'); + +const readCSV = (options) => { + const columnsToExtract = options.columnsToExtract || []; + let results = []; + const rows = []; + + return new Promise(function (resolve, reject) { + const readFile = fs.createReadStream(options.path); + + readFile.on('err', function (err) { + reject(err); + }) + .pipe(csvParser()) + .on('data', function (row) { + rows.push(row); + }) + .on('end', function () { + // If CSV is single column - return all values including header + const headers = _.keys(rows[0]); + + let result = {}; + const columnMap = {}; + if (columnsToExtract.length === 1 && headers.length === 1) { + results = _.map(rows, function (value) { + result = {}; + result[columnsToExtract[0].name] = value[headers[0]]; + return result; + }); + } else { + // If there are multiple columns in csv file + // try to match headers using lookup value + + _.map(columnsToExtract, function findMatches(column) { + _.each(headers, function checkheader(header) { + if (column.lookup.test(header)) { + columnMap[column.name] = header; + } + }); + }); + + results = _.map(rows, function evaluateRow(row) { + const result = {}; + _.each(columnMap, function returnMatches(value, key) { + result[key] = row[value]; + }); + return result; + }); + } + resolve(results); + }); + }); +}; const parse = async (filePath) => { const columnsToExtract = [{ @@ -27,10 +82,11 @@ const parse = async (filePath) => { lookup: /created_at/i }]; - return await fsLib.readCSV({ + return await readCSV({ path: filePath, columnsToExtract: columnsToExtract }); }; module.exports.parse = parse; +module.exports.readCSV = readCSV; diff --git a/ghost/members-csv/test/parse.test.js b/ghost/members-csv/test/parse.test.js new file mode 100644 index 0000000000..19bbc184c8 --- /dev/null +++ b/ghost/members-csv/test/parse.test.js @@ -0,0 +1,53 @@ +const should = require('should'); +const path = require('path'); +const fsLib = require('../../../../../../../../core/server/api/canary/utils/serializers/input/utils/members-import-csv'); + +const csvPath = path.join(__dirname, '../../../../../../../utils/fixtures/csv/'); + +describe('members-import-csv: read csv', function () { + it('read csv: one column', function (done) { + fsLib.readCSV({ + path: csvPath + 'single-column-with-header.csv', + columnsToExtract: [{name: 'email', lookup: /email/i}] + }).then(function (result) { + should.exist(result); + result.length.should.eql(2); + result[0].email.should.eql('jbloggs@example.com'); + result[1].email.should.eql('test@example.com'); + done(); + }).catch(done); + }); + + it('read csv: two columns, 1 filter', function (done) { + fsLib.readCSV({ + path: csvPath + 'two-columns-with-header.csv', + columnsToExtract: [{name: 'email', lookup: /email/i}] + }).then(function (result) { + should.exist(result); + result.length.should.eql(2); + result[0].email.should.eql('jbloggs@example.com'); + result[1].email.should.eql('test@example.com'); + should.not.exist(result[0].id); + + done(); + }).catch(done); + }); + + it('read csv: two columns, 2 filters', function (done) { + fsLib.readCSV({ + path: csvPath + 'two-columns-obscure-header.csv', + columnsToExtract: [ + {name: 'email', lookup: /email/i}, + {name: 'id', lookup: /id/i} + ] + }).then(function (result) { + should.exist(result); + result.length.should.eql(2); + result[0].email.should.eql('jbloggs@example.com'); + result[0].id.should.eql('1'); + result[1].email.should.eql('test@example.com'); + result[1].id.should.eql('2'); + done(); + }).catch(done); + }); +});