mirror of
https://github.com/TryGhost/Ghost.git
synced 2025-02-17 23:44:39 -05:00
✨ Added mapping support to CSV parser
no issue - When processing CSV files `parse` function now allows for the client to specify "mapping" parameter in format of a hash as follows: { destination_property_name: 'source_column_name'} e.g.: { name: 'weird_name_column', email: 'email_column' } - It is done so to allow for the end user to provide exact mapping of the fields to be transformed into JSON.
This commit is contained in:
parent
bf0e890751
commit
fd982fef73
3 changed files with 154 additions and 66 deletions
|
@ -3,13 +3,62 @@ const csvParser = require('csv-parser');
|
|||
const _ = require('lodash');
|
||||
const fs = require('fs-extra');
|
||||
|
||||
const readCSV = (options) => {
|
||||
const columnsToExtract = options.columnsToExtract || [];
|
||||
const mapRowsWithRegexes = (rows, columnsToExtract) => {
|
||||
let results = [];
|
||||
const columnMap = {};
|
||||
// If CSV is single column - return all values including header
|
||||
const headers = _.keys(rows[0]);
|
||||
|
||||
if (columnsToExtract.length === 1 && headers.length === 1) {
|
||||
results = _.map(rows, function (value) {
|
||||
let result = {};
|
||||
result[columnsToExtract[0].name] = value[headers[0]];
|
||||
return result;
|
||||
});
|
||||
} else {
|
||||
// If there are multiple columns in csv file
|
||||
// try to match headers using lookup value
|
||||
_.map(columnsToExtract, function findMatches(column) {
|
||||
_.each(headers, function checkheader(header) {
|
||||
if (column.lookup.test(header)) {
|
||||
columnMap[column.name] = header;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
results = _.map(rows, function evaluateRow(row) {
|
||||
const result = {};
|
||||
_.each(columnMap, function returnMatches(value, key) {
|
||||
result[key] = row[value];
|
||||
});
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
};
|
||||
|
||||
const mapRowsWithMappings = (rows, mapping) => {
|
||||
const results = rows.map((row) => {
|
||||
for (const key in mapping) {
|
||||
row[key] = row[mapping[key]];
|
||||
|
||||
if (key !== mapping[key]) {
|
||||
delete row[mapping[key]];
|
||||
}
|
||||
}
|
||||
|
||||
return row;
|
||||
});
|
||||
|
||||
return results;
|
||||
};
|
||||
|
||||
const readCSV = ({path, columnsToExtract, mapping}) => {
|
||||
const rows = [];
|
||||
|
||||
return new Promise(function (resolve, reject) {
|
||||
const readFile = fs.createReadStream(options.path);
|
||||
const readFile = fs.createReadStream(path);
|
||||
|
||||
readFile.on('err', function (err) {
|
||||
reject(err);
|
||||
|
@ -19,43 +68,20 @@ const readCSV = (options) => {
|
|||
rows.push(row);
|
||||
})
|
||||
.on('end', function () {
|
||||
// If CSV is single column - return all values including header
|
||||
const headers = _.keys(rows[0]);
|
||||
let results = [];
|
||||
|
||||
let result = {};
|
||||
const columnMap = {};
|
||||
if (columnsToExtract.length === 1 && headers.length === 1) {
|
||||
results = _.map(rows, function (value) {
|
||||
result = {};
|
||||
result[columnsToExtract[0].name] = value[headers[0]];
|
||||
return result;
|
||||
});
|
||||
if (columnsToExtract) {
|
||||
results = mapRowsWithRegexes(rows, columnsToExtract);
|
||||
} else {
|
||||
// If there are multiple columns in csv file
|
||||
// try to match headers using lookup value
|
||||
|
||||
_.map(columnsToExtract, function findMatches(column) {
|
||||
_.each(headers, function checkheader(header) {
|
||||
if (column.lookup.test(header)) {
|
||||
columnMap[column.name] = header;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
results = _.map(rows, function evaluateRow(row) {
|
||||
const result = {};
|
||||
_.each(columnMap, function returnMatches(value, key) {
|
||||
result[key] = row[value];
|
||||
});
|
||||
return result;
|
||||
});
|
||||
results = mapRowsWithMappings(rows, mapping);
|
||||
}
|
||||
|
||||
resolve(results);
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
const parse = async (filePath) => {
|
||||
const parse = async (filePath, mapping) => {
|
||||
const columnsToExtract = [{
|
||||
name: 'email',
|
||||
lookup: /^email/i
|
||||
|
@ -82,10 +108,17 @@ const parse = async (filePath) => {
|
|||
lookup: /created_at/i
|
||||
}];
|
||||
|
||||
return await readCSV({
|
||||
path: filePath,
|
||||
columnsToExtract: columnsToExtract
|
||||
});
|
||||
const options = {
|
||||
path: filePath
|
||||
};
|
||||
|
||||
if (mapping) {
|
||||
options.mapping = mapping;
|
||||
} else {
|
||||
options.columnsToExtract = columnsToExtract;
|
||||
}
|
||||
|
||||
return await readCSV(options);
|
||||
};
|
||||
|
||||
module.exports = parse;
|
||||
|
|
3
ghost/members-csv/test/fixtures/two-columns-mapping-header.csv
vendored
Normal file
3
ghost/members-csv/test/fixtures/two-columns-mapping-header.csv
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
id,correo_electronico,nombre
|
||||
1,"jbloggs@example.com","joe"
|
||||
2,test@example.com,"test"
|
|
|
@ -4,49 +4,101 @@ const {readCSV} = require('../lib/parse');
|
|||
const csvPath = path.join(__dirname, '/fixtures/');
|
||||
|
||||
describe('read csv', function () {
|
||||
it('read csv: one column', function (done) {
|
||||
readCSV({
|
||||
it('read csv: one column', async function () {
|
||||
const result = await readCSV({
|
||||
path: csvPath + 'single-column-with-header.csv',
|
||||
columnsToExtract: [{name: 'email', lookup: /email/i}]
|
||||
}).then(function (result) {
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('jbloggs@example.com');
|
||||
result[1].email.should.eql('test@example.com');
|
||||
done();
|
||||
}).catch(done);
|
||||
});
|
||||
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('jbloggs@example.com');
|
||||
result[1].email.should.eql('test@example.com');
|
||||
});
|
||||
|
||||
it('read csv: two columns, 1 filter', function (done) {
|
||||
readCSV({
|
||||
it('read csv: two columns, 1 filter', async function () {
|
||||
const result = await readCSV({
|
||||
path: csvPath + 'two-columns-with-header.csv',
|
||||
columnsToExtract: [{name: 'email', lookup: /email/i}]
|
||||
}).then(function (result) {
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('jbloggs@example.com');
|
||||
result[1].email.should.eql('test@example.com');
|
||||
should.not.exist(result[0].id);
|
||||
});
|
||||
|
||||
done();
|
||||
}).catch(done);
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('jbloggs@example.com');
|
||||
result[1].email.should.eql('test@example.com');
|
||||
should.not.exist(result[0].id);
|
||||
});
|
||||
|
||||
it('read csv: two columns, 2 filters', function (done) {
|
||||
readCSV({
|
||||
it('read csv: two columns, 2 filters', async function () {
|
||||
const result = await readCSV({
|
||||
path: csvPath + 'two-columns-obscure-header.csv',
|
||||
columnsToExtract: [
|
||||
{name: 'email', lookup: /email/i},
|
||||
{name: 'id', lookup: /id/i}
|
||||
]
|
||||
}).then(function (result) {
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('jbloggs@example.com');
|
||||
result[0].id.should.eql('1');
|
||||
result[1].email.should.eql('test@example.com');
|
||||
result[1].id.should.eql('2');
|
||||
done();
|
||||
}).catch(done);
|
||||
});
|
||||
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('jbloggs@example.com');
|
||||
result[0].id.should.eql('1');
|
||||
result[1].email.should.eql('test@example.com');
|
||||
result[1].id.should.eql('2');
|
||||
});
|
||||
|
||||
it('read csv: two columns with mapping', async function () {
|
||||
const result = await readCSV({
|
||||
path: csvPath + 'two-columns-mapping-header.csv',
|
||||
mapping: {
|
||||
email: 'correo_electronico',
|
||||
name: 'nombre',
|
||||
id: 'id'
|
||||
}
|
||||
});
|
||||
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('jbloggs@example.com');
|
||||
result[0].name.should.eql('joe');
|
||||
result[0].id.should.eql('1');
|
||||
|
||||
result[1].email.should.eql('test@example.com');
|
||||
result[1].name.should.eql('test');
|
||||
result[1].id.should.eql('2');
|
||||
});
|
||||
|
||||
it('read csv: two columns with partial mapping', async function () {
|
||||
const result = await readCSV({
|
||||
path: csvPath + 'two-columns-mapping-header.csv',
|
||||
mapping: {
|
||||
email: 'correo_electronico'
|
||||
}
|
||||
});
|
||||
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].email.should.eql('jbloggs@example.com');
|
||||
result[0].nombre.should.eql('joe');
|
||||
result[0].id.should.eql('1');
|
||||
|
||||
result[1].email.should.eql('test@example.com');
|
||||
result[1].nombre.should.eql('test');
|
||||
result[1].id.should.eql('2');
|
||||
});
|
||||
it('read csv: two columns with empty mapping', async function () {
|
||||
const result = await readCSV({
|
||||
path: csvPath + 'two-columns-mapping-header.csv',
|
||||
mapping: {}
|
||||
});
|
||||
|
||||
should.exist(result);
|
||||
result.length.should.eql(2);
|
||||
result[0].correo_electronico.should.eql('jbloggs@example.com');
|
||||
result[0].nombre.should.eql('joe');
|
||||
result[0].id.should.eql('1');
|
||||
|
||||
result[1].correo_electronico.should.eql('test@example.com');
|
||||
result[1].nombre.should.eql('test');
|
||||
result[1].id.should.eql('2');
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Add table
Reference in a new issue