mirror of
https://github.com/TryGhost/Ghost.git
synced 2025-01-06 22:40:14 -05:00
Add CSV parser for csv read utility
closes #6865 - switch csv-read to use a csv-parser for greater reliability and management of strings when importing a csv
This commit is contained in:
parent
db3df16c21
commit
0f0ca5a304
8 changed files with 76 additions and 189 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -7,6 +7,8 @@ b-cov
|
|||
*.pid
|
||||
*.gz
|
||||
|
||||
!core/test/utils/fixtures/**/*.csv
|
||||
|
||||
pids
|
||||
logs
|
||||
results
|
||||
|
|
|
@ -288,7 +288,7 @@ subscribers = {
|
|||
|
||||
return serverUtils.readCSV({
|
||||
path: filePath,
|
||||
columnsToExtract: ['email']
|
||||
columnsToExtract: [{name: 'email', lookup: /email/i}]
|
||||
}).then(function (result) {
|
||||
return Promise.all(result.map(function (entry) {
|
||||
return subscribers.add(
|
||||
|
|
|
@ -1,64 +1,57 @@
|
|||
var readline = require('readline'),
|
||||
Promise = require('bluebird'),
|
||||
lodash = require('lodash'),
|
||||
errors = require('../errors'),
|
||||
var Promise = require('bluebird'),
|
||||
csvParser = require('csv-parser'),
|
||||
_ = require('lodash'),
|
||||
fs = require('fs');
|
||||
|
||||
function readCSV(options) {
|
||||
var path = options.path,
|
||||
columnsToExtract = options.columnsToExtract || [],
|
||||
firstLine = true,
|
||||
mapping = {},
|
||||
toReturn = [],
|
||||
rl;
|
||||
var columnsToExtract = options.columnsToExtract || [],
|
||||
results = [], rows = [];
|
||||
|
||||
return new Promise(function (resolve, reject) {
|
||||
rl = readline.createInterface({
|
||||
input: fs.createReadStream(path),
|
||||
terminal: false
|
||||
});
|
||||
var readFile = fs.createReadStream(options.path);
|
||||
|
||||
rl.on('line', function (line) {
|
||||
var values = line.split(','),
|
||||
entry = {};
|
||||
|
||||
// CASE: column headers
|
||||
if (firstLine) {
|
||||
if (values.length === 1) {
|
||||
mapping[columnsToExtract[0]] = 0;
|
||||
} else {
|
||||
try {
|
||||
lodash.each(columnsToExtract, function (columnToExtract) {
|
||||
mapping[columnToExtract] = lodash.findIndex(values, function (value) {
|
||||
if (value.match(columnToExtract)) {
|
||||
return true;
|
||||
}
|
||||
});
|
||||
|
||||
// CASE: column does not exist
|
||||
if (mapping[columnToExtract] === -1) {
|
||||
throw new errors.ValidationError(
|
||||
'Column header missing: "{{column}}".'.replace('{{column}}', columnToExtract)
|
||||
);
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
}
|
||||
|
||||
firstLine = false;
|
||||
} else {
|
||||
lodash.each(mapping, function (index, columnName) {
|
||||
entry[columnName] = values[index];
|
||||
readFile.on('err', function (err) {
|
||||
reject(err);
|
||||
})
|
||||
.pipe(csvParser())
|
||||
.on('data', function (row) {
|
||||
rows.push(row);
|
||||
})
|
||||
.on('end', function () {
|
||||
// If CSV is single column - return all values including header
|
||||
var headers = _.keys(rows[0]), result = {}, columnMap = {};
|
||||
if (columnsToExtract.length === 1 && headers.length === 1) {
|
||||
results = _.map(rows, function (value) {
|
||||
result = {};
|
||||
result[columnsToExtract[0].name] = value[headers[0]];
|
||||
return result;
|
||||
});
|
||||
|
||||
toReturn.push(entry);
|
||||
}
|
||||
});
|
||||
// Add first row
|
||||
result = {};
|
||||
result[columnsToExtract[0].name] = headers[0];
|
||||
results = [result].concat(results);
|
||||
} else {
|
||||
// If there are multiple columns in csv file
|
||||
// try to match headers using lookup value
|
||||
|
||||
rl.on('close', function () {
|
||||
resolve(toReturn);
|
||||
_.map(columnsToExtract, function findMatches(column) {
|
||||
_.each(headers, function checkheader(header) {
|
||||
if (column.lookup.test(header)) {
|
||||
columnMap[column.name] = header;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
results = _.map(rows, function evaluateRow(row) {
|
||||
var result = {};
|
||||
_.each(columnMap, function returnMatches(value, key) {
|
||||
result[key] = row[value];
|
||||
});
|
||||
return result;
|
||||
});
|
||||
}
|
||||
resolve(results);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1,71 +1,29 @@
|
|||
/*globals describe, beforeEach, afterEach, it*/
|
||||
/*globals describe, it*/
|
||||
|
||||
var utils = require('../../../server/utils'),
|
||||
errors = require('../../../server/errors'),
|
||||
sinon = require('sinon'),
|
||||
should = require('should'),
|
||||
fs = require('fs'),
|
||||
lodash = require('lodash'),
|
||||
readline = require('readline');
|
||||
path = require ('path'),
|
||||
csvPath = path.join(__dirname, '../../utils/fixtures/csv/');
|
||||
|
||||
describe('read csv', function () {
|
||||
var scope = {};
|
||||
|
||||
beforeEach(function () {
|
||||
sinon.stub(fs, 'createReadStream');
|
||||
|
||||
sinon.stub(readline, 'createInterface', function () {
|
||||
return {
|
||||
on: function (eventName, cb) {
|
||||
switch (eventName) {
|
||||
case 'line':
|
||||
lodash.each(scope.csv, function (line) {
|
||||
cb(line);
|
||||
});
|
||||
break;
|
||||
case 'close':
|
||||
cb();
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(function () {
|
||||
fs.createReadStream.restore();
|
||||
readline.createInterface.restore();
|
||||
});
|
||||
|
||||
it('read csv: one column', function (done) {
|
||||
scope.csv = [
|
||||
'email',
|
||||
'hannah@ghost.org',
|
||||
'kate@ghost.org'
|
||||
];
|
||||
|
||||
utils.readCSV({
|
||||
path: 'read-file-is-mocked',
|
||||
columnsToExtract: ['email']
|
||||
path: csvPath + 'single-column-with-header.csv',
|
||||
columnsToExtract: [{name: 'email', lookup: /email/i}]
|
||||
}).then(function (result) {
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('hannah@ghost.org');
|
||||
result[1].email.should.eql('kate@ghost.org');
|
||||
result.length.should.eql(3);
|
||||
result[0].email.should.eql('email');
|
||||
result[1].email.should.eql('hannah@ghost.org');
|
||||
result[2].email.should.eql('kate@ghost.org');
|
||||
done();
|
||||
}).catch(done);
|
||||
});
|
||||
|
||||
it('read csv: two columns', function (done) {
|
||||
scope.csv = [
|
||||
'id,email',
|
||||
'1,hannah@ghost.org',
|
||||
'1,kate@ghost.org'
|
||||
];
|
||||
|
||||
it('read csv: two columns, 1 filter', function (done) {
|
||||
utils.readCSV({
|
||||
path: 'read-file-is-mocked',
|
||||
columnsToExtract: ['email']
|
||||
path: csvPath + 'two-columns-with-header.csv',
|
||||
columnsToExtract: [{name: 'email', lookup: /email/i}]
|
||||
}).then(function (result) {
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
|
@ -77,16 +35,13 @@ describe('read csv', function () {
|
|||
}).catch(done);
|
||||
});
|
||||
|
||||
it('read csv: two columns', function (done) {
|
||||
scope.csv = [
|
||||
'id,email',
|
||||
'1,hannah@ghost.org',
|
||||
'2,kate@ghost.org'
|
||||
];
|
||||
|
||||
it('read csv: two columns, 2 filters', function (done) {
|
||||
utils.readCSV({
|
||||
path: 'read-file-is-mocked',
|
||||
columnsToExtract: ['email', 'id']
|
||||
path: csvPath + 'two-columns-obscure-header.csv',
|
||||
columnsToExtract: [
|
||||
{name: 'email', lookup: /email/i},
|
||||
{name: 'id', lookup: /id/i}
|
||||
]
|
||||
}).then(function (result) {
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
|
@ -97,77 +52,4 @@ describe('read csv', function () {
|
|||
done();
|
||||
}).catch(done);
|
||||
});
|
||||
|
||||
it('read csv: test email regex', function (done) {
|
||||
scope.csv = [
|
||||
'email_address',
|
||||
'hannah@ghost.org',
|
||||
'kate@ghost.org'
|
||||
];
|
||||
|
||||
utils.readCSV({
|
||||
path: 'read-file-is-mocked',
|
||||
columnsToExtract: ['email']
|
||||
}).then(function (result) {
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('hannah@ghost.org');
|
||||
result[1].email.should.eql('kate@ghost.org');
|
||||
done();
|
||||
}).catch(done);
|
||||
});
|
||||
|
||||
it('read csv: support single column use case', function (done) {
|
||||
scope.csv = [
|
||||
'a_column',
|
||||
'hannah@ghost.org',
|
||||
'kate@ghost.org'
|
||||
];
|
||||
|
||||
utils.readCSV({
|
||||
path: 'read-file-is-mocked',
|
||||
columnsToExtract: ['email']
|
||||
}).then(function (result) {
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('hannah@ghost.org');
|
||||
result[1].email.should.eql('kate@ghost.org');
|
||||
done();
|
||||
}).catch(done);
|
||||
});
|
||||
|
||||
it('read csv: support single column use case (we would loose the first entry)', function (done) {
|
||||
scope.csv = [
|
||||
'hannah@ghost.org',
|
||||
'kate@ghost.org'
|
||||
];
|
||||
|
||||
utils.readCSV({
|
||||
path: 'read-file-is-mocked',
|
||||
columnsToExtract: ['email']
|
||||
}).then(function (result) {
|
||||
should.exist(result);
|
||||
result.length.should.eql(1);
|
||||
result[0].email.should.eql('kate@ghost.org');
|
||||
done();
|
||||
}).catch(done);
|
||||
});
|
||||
|
||||
it('read csv: broken', function (done) {
|
||||
scope.csv = [
|
||||
'id,test',
|
||||
'1,2',
|
||||
'1,2'
|
||||
];
|
||||
|
||||
utils.readCSV({
|
||||
path: 'read-file-is-mocked',
|
||||
columnsToExtract: ['email', 'id']
|
||||
}).then(function () {
|
||||
return done(new Error('we expected an error from read csv!'));
|
||||
}).catch(function (err) {
|
||||
(err instanceof errors.ValidationError).should.eql(true);
|
||||
done();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
email
|
||||
hannah@ghost.org
|
||||
kate@ghost.org
|
|
|
@ -0,0 +1,3 @@
|
|||
id,Email Address
|
||||
1,"hannah@ghost.org"
|
||||
2,kate@ghost.org
|
|
3
core/test/utils/fixtures/csv/two-columns-with-header.csv
Normal file
3
core/test/utils/fixtures/csv/two-columns-with-header.csv
Normal file
|
@ -0,0 +1,3 @@
|
|||
id,email
|
||||
1,"hannah@ghost.org"
|
||||
1,kate@ghost.org
|
|
|
@ -36,6 +36,7 @@
|
|||
"connect-slashes": "1.3.1",
|
||||
"cookie-session": "1.2.0",
|
||||
"cors": "2.7.1",
|
||||
"csv-parser": "1.9.3",
|
||||
"downsize": "0.0.8",
|
||||
"express": "4.13.4",
|
||||
"express-hbs": "1.0.1",
|
||||
|
|
Loading…
Reference in a new issue