✨ Added mapping support to CSV parser

no issue - When processing CSV files `parse` function now allows for the client to specify "mapping" parameter in format of a hash as follows: { destination_property_name: 'source_column_name'} e.g.: { name: 'weird_name_column', email: 'email_column' } - It is done so to allow for the end user to provide exact mapping of the fields to be transformed into JSON.
2025-04-01 02:41:39 -05:00 · 2020-07-01 01:27:11 +12:00 · 2020-07-01 01:27:11 +12:00 · fd982fef73
commit fd982fef73
parent bf0e890751
3 changed files with 154 additions and 66 deletions
--- a/ghost/members-csv/lib/parse.js
+++ b/ghost/members-csv/lib/parse.js
@ -3,13 +3,62 @@ const csvParser = require('csv-parser');
 const _ = require('lodash');
 const fs = require('fs-extra');

-const readCSV = (options) => {
-    const columnsToExtract = options.columnsToExtract || [];
+const mapRowsWithRegexes = (rows, columnsToExtract) => {
    let results = [];
+    const columnMap = {};
+    // If CSV is single column - return all values including header
+    const headers = _.keys(rows[0]);
+
+    if (columnsToExtract.length === 1 && headers.length === 1) {
+        results = _.map(rows, function (value) {
+            let result = {};
+            result[columnsToExtract[0].name] = value[headers[0]];
+            return result;
+        });
+    } else {
+        // If there are multiple columns in csv file
+        // try to match headers using lookup value
+        _.map(columnsToExtract, function findMatches(column) {
+            _.each(headers, function checkheader(header) {
+                if (column.lookup.test(header)) {
+                    columnMap[column.name] = header;
+                }
+            });
+        });
+
+        results = _.map(rows, function evaluateRow(row) {
+            const result = {};
+            _.each(columnMap, function returnMatches(value, key) {
+                result[key] = row[value];
+            });
+            return result;
+        });
+    }
+
+    return results;
+};
+
+const mapRowsWithMappings = (rows, mapping) => {
+    const results = rows.map((row) => {
+        for (const key in mapping) {
+            row[key] = row[mapping[key]];
+
+            if (key !== mapping[key]) {
+                delete row[mapping[key]];
+            }
+        }
+
+        return row;
+    });
+
+    return results;
+};
+
+const readCSV = ({path, columnsToExtract, mapping}) => {
    const rows = [];

    return new Promise(function (resolve, reject) {
-        const readFile = fs.createReadStream(options.path);
+        const readFile = fs.createReadStream(path);

        readFile.on('err', function (err) {
            reject(err);
@ -19,43 +68,20 @@ const readCSV = (options) => {
                rows.push(row);
            })
            .on('end', function () {
-            // If CSV is single column - return all values including header
-                const headers = _.keys(rows[0]);
+                let results = [];

-                let result = {};
-                const columnMap = {};
-                if (columnsToExtract.length === 1 && headers.length === 1) {
-                    results = _.map(rows, function (value) {
-                        result = {};
-                        result[columnsToExtract[0].name] = value[headers[0]];
-                        return result;
-                    });
+                if (columnsToExtract) {
+                    results = mapRowsWithRegexes(rows, columnsToExtract);
                } else {
-                // If there are multiple columns in csv file
-                // try to match headers using lookup value
-
-                    _.map(columnsToExtract, function findMatches(column) {
-                        _.each(headers, function checkheader(header) {
-                            if (column.lookup.test(header)) {
-                                columnMap[column.name] = header;
-                            }
-                        });
-                    });
-
-                    results = _.map(rows, function evaluateRow(row) {
-                        const result = {};
-                        _.each(columnMap, function returnMatches(value, key) {
-                            result[key] = row[value];
-                        });
-                        return result;
-                    });
+                    results = mapRowsWithMappings(rows, mapping);
                }
+
                resolve(results);
            });
    });
 };

-const parse = async (filePath) => {
+const parse = async (filePath, mapping) => {
    const columnsToExtract = [{
        name: 'email',
        lookup: /^email/i
@ -82,10 +108,17 @@ const parse = async (filePath) => {
        lookup: /created_at/i
    }];

-    return await readCSV({
-        path: filePath,
-        columnsToExtract: columnsToExtract
-    });
+    const options = {
+        path: filePath
+    };
+
+    if (mapping) {
+        options.mapping = mapping;
+    } else {
+        options.columnsToExtract = columnsToExtract;
+    }
+
+    return await readCSV(options);
 };

 module.exports = parse;
--- a/ghost/members-csv/test/fixtures/two-columns-mapping-header.csv
+++ b/ghost/members-csv/test/fixtures/two-columns-mapping-header.csv
@ -0,0 +1,3 @@
+id,correo_electronico,nombre
+1,"jbloggs@example.com","joe"
+2,test@example.com,"test"
--- a/ghost/members-csv/test/parse.test.js
+++ b/ghost/members-csv/test/parse.test.js
@ -4,49 +4,101 @@ const {readCSV} = require('../lib/parse');
 const csvPath = path.join(__dirname, '/fixtures/');

 describe('read csv', function () {
-    it('read csv: one column', function (done) {
-        readCSV({
+    it('read csv: one column', async function () {
+        const result = await readCSV({
            path: csvPath + 'single-column-with-header.csv',
            columnsToExtract: [{name: 'email', lookup: /email/i}]
-        }).then(function (result) {
-            should.exist(result);
-            result.length.should.eql(2);
-            result[0].email.should.eql('jbloggs@example.com');
-            result[1].email.should.eql('test@example.com');
-            done();
-        }).catch(done);
+        });
+
+        should.exist(result);
+        result.length.should.eql(2);
+        result[0].email.should.eql('jbloggs@example.com');
+        result[1].email.should.eql('test@example.com');
    });

-    it('read csv: two columns, 1 filter', function (done) {
-        readCSV({
+    it('read csv: two columns, 1 filter', async function () {
+        const result = await readCSV({
            path: csvPath + 'two-columns-with-header.csv',
            columnsToExtract: [{name: 'email', lookup: /email/i}]
-        }).then(function (result) {
-            should.exist(result);
-            result.length.should.eql(2);
-            result[0].email.should.eql('jbloggs@example.com');
-            result[1].email.should.eql('test@example.com');
-            should.not.exist(result[0].id);
+        });

-            done();
-        }).catch(done);
+        should.exist(result);
+        result.length.should.eql(2);
+        result[0].email.should.eql('jbloggs@example.com');
+        result[1].email.should.eql('test@example.com');
+        should.not.exist(result[0].id);
    });

-    it('read csv: two columns, 2 filters', function (done) {
-        readCSV({
+    it('read csv: two columns, 2 filters', async function () {
+        const result = await readCSV({
            path: csvPath + 'two-columns-obscure-header.csv',
            columnsToExtract: [
                {name: 'email', lookup: /email/i},
                {name: 'id', lookup: /id/i}
            ]
-        }).then(function (result) {
-            should.exist(result);
-            result.length.should.eql(2);
-            result[0].email.should.eql('jbloggs@example.com');
-            result[0].id.should.eql('1');
-            result[1].email.should.eql('test@example.com');
-            result[1].id.should.eql('2');
-            done();
-        }).catch(done);
+        });
+
+        should.exist(result);
+        result.length.should.eql(2);
+        result[0].email.should.eql('jbloggs@example.com');
+        result[0].id.should.eql('1');
+        result[1].email.should.eql('test@example.com');
+        result[1].id.should.eql('2');
+    });
+
+    it('read csv: two columns with mapping', async function () {
+        const result = await readCSV({
+            path: csvPath + 'two-columns-mapping-header.csv',
+            mapping: {
+                email: 'correo_electronico',
+                name: 'nombre',
+                id: 'id'
+            }
+        });
+
+        should.exist(result);
+        result.length.should.eql(2);
+        result[0].email.should.eql('jbloggs@example.com');
+        result[0].name.should.eql('joe');
+        result[0].id.should.eql('1');
+
+        result[1].email.should.eql('test@example.com');
+        result[1].name.should.eql('test');
+        result[1].id.should.eql('2');
+    });
+
+    it('read csv: two columns with partial mapping', async function () {
+        const result = await readCSV({
+            path: csvPath + 'two-columns-mapping-header.csv',
+            mapping: {
+                email: 'correo_electronico'
+            }
+        });
+
+        should.exist(result);
+        result.length.should.eql(2);
+        result[0].email.should.eql('jbloggs@example.com');
+        result[0].nombre.should.eql('joe');
+        result[0].id.should.eql('1');
+
+        result[1].email.should.eql('test@example.com');
+        result[1].nombre.should.eql('test');
+        result[1].id.should.eql('2');
+    });
+    it('read csv: two columns with empty mapping', async function () {
+        const result = await readCSV({
+            path: csvPath + 'two-columns-mapping-header.csv',
+            mapping: {}
+        });
+
+        should.exist(result);
+        result.length.should.eql(2);
+        result[0].correo_electronico.should.eql('jbloggs@example.com');
+        result[0].nombre.should.eql('joe');
+        result[0].id.should.eql('1');
+
+        result[1].correo_electronico.should.eql('test@example.com');
+        result[1].nombre.should.eql('test');
+        result[1].id.should.eql('2');
    });
 });