0
Fork 0
mirror of https://github.com/TryGhost/Ghost.git synced 2025-02-17 23:44:39 -05:00

Added mapping support to CSV parser

no issue

- When processing CSV files `parse` function now allows for the client to specify "mapping" parameter in format of a hash as follows:
{ destination_property_name: 'source_column_name'}
e.g.:
{
  name: 'weird_name_column',
  email: 'email_column'
}
- It is done so to allow for the end user to provide exact mapping of the fields to be transformed into JSON.
This commit is contained in:
Nazar Gargol 2020-07-01 01:27:11 +12:00
parent bf0e890751
commit fd982fef73
3 changed files with 154 additions and 66 deletions

View file

@ -3,13 +3,62 @@ const csvParser = require('csv-parser');
const _ = require('lodash');
const fs = require('fs-extra');
const readCSV = (options) => {
const columnsToExtract = options.columnsToExtract || [];
const mapRowsWithRegexes = (rows, columnsToExtract) => {
let results = [];
const columnMap = {};
// If CSV is single column - return all values including header
const headers = _.keys(rows[0]);
if (columnsToExtract.length === 1 && headers.length === 1) {
results = _.map(rows, function (value) {
let result = {};
result[columnsToExtract[0].name] = value[headers[0]];
return result;
});
} else {
// If there are multiple columns in csv file
// try to match headers using lookup value
_.map(columnsToExtract, function findMatches(column) {
_.each(headers, function checkheader(header) {
if (column.lookup.test(header)) {
columnMap[column.name] = header;
}
});
});
results = _.map(rows, function evaluateRow(row) {
const result = {};
_.each(columnMap, function returnMatches(value, key) {
result[key] = row[value];
});
return result;
});
}
return results;
};
const mapRowsWithMappings = (rows, mapping) => {
const results = rows.map((row) => {
for (const key in mapping) {
row[key] = row[mapping[key]];
if (key !== mapping[key]) {
delete row[mapping[key]];
}
}
return row;
});
return results;
};
const readCSV = ({path, columnsToExtract, mapping}) => {
const rows = [];
return new Promise(function (resolve, reject) {
const readFile = fs.createReadStream(options.path);
const readFile = fs.createReadStream(path);
readFile.on('err', function (err) {
reject(err);
@ -19,43 +68,20 @@ const readCSV = (options) => {
rows.push(row);
})
.on('end', function () {
// If CSV is single column - return all values including header
const headers = _.keys(rows[0]);
let results = [];
let result = {};
const columnMap = {};
if (columnsToExtract.length === 1 && headers.length === 1) {
results = _.map(rows, function (value) {
result = {};
result[columnsToExtract[0].name] = value[headers[0]];
return result;
});
if (columnsToExtract) {
results = mapRowsWithRegexes(rows, columnsToExtract);
} else {
// If there are multiple columns in csv file
// try to match headers using lookup value
_.map(columnsToExtract, function findMatches(column) {
_.each(headers, function checkheader(header) {
if (column.lookup.test(header)) {
columnMap[column.name] = header;
}
});
});
results = _.map(rows, function evaluateRow(row) {
const result = {};
_.each(columnMap, function returnMatches(value, key) {
result[key] = row[value];
});
return result;
});
results = mapRowsWithMappings(rows, mapping);
}
resolve(results);
});
});
};
const parse = async (filePath) => {
const parse = async (filePath, mapping) => {
const columnsToExtract = [{
name: 'email',
lookup: /^email/i
@ -82,10 +108,17 @@ const parse = async (filePath) => {
lookup: /created_at/i
}];
return await readCSV({
path: filePath,
columnsToExtract: columnsToExtract
});
const options = {
path: filePath
};
if (mapping) {
options.mapping = mapping;
} else {
options.columnsToExtract = columnsToExtract;
}
return await readCSV(options);
};
module.exports = parse;

View file

@ -0,0 +1,3 @@
id,correo_electronico,nombre
1,"jbloggs@example.com","joe"
2,test@example.com,"test"
1 id correo_electronico nombre
2 1 jbloggs@example.com joe
3 2 test@example.com test

View file

@ -4,49 +4,101 @@ const {readCSV} = require('../lib/parse');
const csvPath = path.join(__dirname, '/fixtures/');
describe('read csv', function () {
it('read csv: one column', function (done) {
readCSV({
it('read csv: one column', async function () {
const result = await readCSV({
path: csvPath + 'single-column-with-header.csv',
columnsToExtract: [{name: 'email', lookup: /email/i}]
}).then(function (result) {
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('jbloggs@example.com');
result[1].email.should.eql('test@example.com');
done();
}).catch(done);
});
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('jbloggs@example.com');
result[1].email.should.eql('test@example.com');
});
it('read csv: two columns, 1 filter', function (done) {
readCSV({
it('read csv: two columns, 1 filter', async function () {
const result = await readCSV({
path: csvPath + 'two-columns-with-header.csv',
columnsToExtract: [{name: 'email', lookup: /email/i}]
}).then(function (result) {
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('jbloggs@example.com');
result[1].email.should.eql('test@example.com');
should.not.exist(result[0].id);
});
done();
}).catch(done);
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('jbloggs@example.com');
result[1].email.should.eql('test@example.com');
should.not.exist(result[0].id);
});
it('read csv: two columns, 2 filters', function (done) {
readCSV({
it('read csv: two columns, 2 filters', async function () {
const result = await readCSV({
path: csvPath + 'two-columns-obscure-header.csv',
columnsToExtract: [
{name: 'email', lookup: /email/i},
{name: 'id', lookup: /id/i}
]
}).then(function (result) {
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('jbloggs@example.com');
result[0].id.should.eql('1');
result[1].email.should.eql('test@example.com');
result[1].id.should.eql('2');
done();
}).catch(done);
});
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('jbloggs@example.com');
result[0].id.should.eql('1');
result[1].email.should.eql('test@example.com');
result[1].id.should.eql('2');
});
it('read csv: two columns with mapping', async function () {
const result = await readCSV({
path: csvPath + 'two-columns-mapping-header.csv',
mapping: {
email: 'correo_electronico',
name: 'nombre',
id: 'id'
}
});
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('jbloggs@example.com');
result[0].name.should.eql('joe');
result[0].id.should.eql('1');
result[1].email.should.eql('test@example.com');
result[1].name.should.eql('test');
result[1].id.should.eql('2');
});
it('read csv: two columns with partial mapping', async function () {
const result = await readCSV({
path: csvPath + 'two-columns-mapping-header.csv',
mapping: {
email: 'correo_electronico'
}
});
should.exist(result);
result.length.should.eql(2);
result[0].email.should.eql('jbloggs@example.com');
result[0].nombre.should.eql('joe');
result[0].id.should.eql('1');
result[1].email.should.eql('test@example.com');
result[1].nombre.should.eql('test');
result[1].id.should.eql('2');
});
it('read csv: two columns with empty mapping', async function () {
const result = await readCSV({
path: csvPath + 'two-columns-mapping-header.csv',
mapping: {}
});
should.exist(result);
result.length.should.eql(2);
result[0].correo_electronico.should.eql('jbloggs@example.com');
result[0].nombre.should.eql('joe');
result[0].id.should.eql('1');
result[1].correo_electronico.should.eql('test@example.com');
result[1].nombre.should.eql('test');
result[1].id.should.eql('2');
});
});