From 0a5f91f3d03242857b2fbd19061f29967747f5f2 Mon Sep 17 00:00:00 2001
From: Fabien O'Carroll <fabien@allou.is>
Date: Tue, 27 Oct 2020 10:44:06 +0000
Subject: [PATCH] Refactored members-csv to remove unused columnsToExtract

no-issue

 - Was not used by the importer and removed for simplicity.
 - Updates the header mapping to happen in place, rather than in a loop
 - Updates the parsing of values to give correct types
---
 ghost/members-csv/lib/parse.js       | 188 +++++++++------------------
 ghost/members-csv/test/parse.test.js |  24 ++--
 2 files changed, 74 insertions(+), 138 deletions(-)

diff --git a/ghost/members-csv/lib/parse.js b/ghost/members-csv/lib/parse.js
index 8d5556fdaf..34c9525287 100644
--- a/ghost/members-csv/lib/parse.js
+++ b/ghost/members-csv/lib/parse.js
@@ -1,134 +1,76 @@
 const Promise = require('bluebird');
+const pump = require('pump');
 const papaparse = require('papaparse');
-const _ = require('lodash');
 const fs = require('fs-extra');
 
-const mapRowsWithRegexes = (rows, columnsToExtract) => {
-    let results = [];
-    const columnMap = {};
-    // If CSV is single column - return all values including header
-    const headers = _.keys(rows[0]);
-
-    if (columnsToExtract.length === 1 && headers.length === 1) {
-        results = _.map(rows, function (value) {
-            let result = {};
-            result[columnsToExtract[0].name] = value[headers[0]];
-            return result;
-        });
-    } else {
-        // If there are multiple columns in csv file
-        // try to match headers using lookup value
-        _.map(columnsToExtract, function findMatches(column) {
-            _.each(headers, function checkheader(header) {
-                if (column.lookup.test(header)) {
-                    columnMap[column.name] = header;
-                }
-            });
-        });
-
-        results = _.map(rows, function evaluateRow(row) {
-            const result = {};
-            _.each(columnMap, function returnMatches(value, key) {
-                result[key] = row[value];
-            });
-            return result;
-        });
-    }
-
-    return results;
-};
-
-const mapRowsWithMappings = (rows, mapping) => {
-    const results = rows.map((row) => {
-        for (const key in mapping) {
-            row[key] = row[mapping[key]];
-
-            if (key !== mapping[key]) {
-                delete row[mapping[key]];
-            }
-        }
-
-        return row;
-    });
-
-    return results;
-};
-
-const readCSV = ({path, columnsToExtract, mapping}) => {
-    const rows = [];
-
+module.exports = (path, mapping, defaultLabels = []) => {
     return new Promise(function (resolve, reject) {
-        const readFile = fs.createReadStream(path);
-
-        readFile.on('err', function (err) {
-            reject(err);
-        })
-            .pipe(papaparse.parse(papaparse.NODE_STREAM_INPUT, {
-                header: true,
-                transform: function (value) {
-                    if (value === '') {
-                        return null;
+        const csvFileStream = fs.createReadStream(path);
+        const csvParserStream = papaparse.parse(papaparse.NODE_STREAM_INPUT, {
+            header: true,
+            transformHeader(_header) {
+                let header = _header;
+                if (mapping && Reflect.has(mapping, _header)) {
+                    header = mapping[_header];
+                }
+                if (header === 'subscribed_to_emails') {
+                    return 'subscribed';
+                }
+                return header;
+            },
+            transform(value, header) {
+                if (header === 'labels') {
+                    if (value && typeof value === 'string') {
+                        return value.split(',').map(name => ({name}));
                     }
-
-                    return value;
-                }
-            }))
-            .on('data', function (row) {
-                rows.push(row);
-            })
-            .on('end', function () {
-                let results = [];
-
-                if (columnsToExtract) {
-                    results = mapRowsWithRegexes(rows, columnsToExtract);
-                } else {
-                    results = mapRowsWithMappings(rows, mapping);
                 }
 
-                resolve(results);
-            });
+                if (header === 'subscribed_to_emails') {
+                    return value.toLowerCase() !== 'false';
+                }
+
+                if (header === 'complimentary_plan') {
+                    return value.toLowerCase() === 'true';
+                }
+
+                if (value === '') {
+                    return null;
+                }
+
+                if (value === 'undefined') {
+                    return null;
+                }
+
+                if (value.toLowerCase() === 'false') {
+                    return false;
+                }
+
+                if (value.toLowerCase() === 'true') {
+                    return true;
+                }
+
+                return value;
+            }
+        });
+
+        const rows = [];
+        const parsedCSVStream = pump(csvFileStream, csvParserStream, (err) => {
+            if (err) {
+                return reject(err);
+            }
+            resolve(rows);
+        });
+
+        parsedCSVStream.on('data', (row) => {
+            if (row.labels) {
+                row.labels = row.labels.concat(defaultLabels);
+            } else {
+                row.labels = defaultLabels;
+            }
+            rows.push(row);
+        });
     });
 };
 
-const parse = async (filePath, mapping) => {
-    const columnsToExtract = [{
-        name: 'email',
-        lookup: /^email/i
-    }, {
-        name: 'name',
-        lookup: /name/i
-    }, {
-        name: 'note',
-        lookup: /note/i
-    }, {
-        name: 'subscribed_to_emails',
-        lookup: /subscribed_to_emails/i
-    }, {
-        name: 'stripe_customer_id',
-        lookup: /stripe_customer_id/i
-    }, {
-        name: 'complimentary_plan',
-        lookup: /complimentary_plan/i
-    }, {
-        name: 'labels',
-        lookup: /labels/i
-    }, {
-        name: 'created_at',
-        lookup: /created_at/i
-    }];
-
-    const options = {
-        path: filePath
-    };
-
-    if (mapping) {
-        options.mapping = mapping;
-    } else {
-        options.columnsToExtract = columnsToExtract;
-    }
-
-    return await readCSV(options);
-};
-
-module.exports = parse;
-module.exports.readCSV = readCSV;
+// @TODO do we need this???
+module.exports.readCSV = ({path, mapping, defaultLabels}) => module.exports(path, mapping, defaultLabels);
diff --git a/ghost/members-csv/test/parse.test.js b/ghost/members-csv/test/parse.test.js
index c62bac1b17..ac06e46dbe 100644
--- a/ghost/members-csv/test/parse.test.js
+++ b/ghost/members-csv/test/parse.test.js
@@ -6,8 +6,7 @@ const csvPath = path.join(__dirname, '/fixtures/');
 describe('parse', function () {
     it('read csv: empty file', async function () {
         const result = await readCSV({
-            path: csvPath + 'empty.csv',
-            columnsToExtract: [{name: 'email', lookup: /email/i}]
+            path: csvPath + 'empty.csv'
         });
 
         should.exist(result);
@@ -16,8 +15,7 @@ describe('parse', function () {
 
     it('read csv: one column', async function () {
         const result = await readCSV({
-            path: csvPath + 'single-column-with-header.csv',
-            columnsToExtract: [{name: 'email', lookup: /email/i}]
+            path: csvPath + 'single-column-with-header.csv'
         });
 
         should.exist(result);
@@ -28,24 +26,21 @@ describe('parse', function () {
 
     it('read csv: two columns, 1 filter', async function () {
         const result = await readCSV({
-            path: csvPath + 'two-columns-with-header.csv',
-            columnsToExtract: [{name: 'email', lookup: /email/i}]
+            path: csvPath + 'two-columns-with-header.csv'
         });
 
         should.exist(result);
         result.length.should.eql(2);
         result[0].email.should.eql('jbloggs@example.com');
         result[1].email.should.eql('test@example.com');
-        should.not.exist(result[0].id);
     });
 
     it('read csv: two columns, 2 filters', async function () {
         const result = await readCSV({
             path: csvPath + 'two-columns-obscure-header.csv',
-            columnsToExtract: [
-                {name: 'email', lookup: /email/i},
-                {name: 'id', lookup: /id/i}
-            ]
+            mapping: {
+                'Email Address': 'email'
+            }
         });
 
         should.exist(result);
@@ -60,9 +55,8 @@ describe('parse', function () {
         const result = await readCSV({
             path: csvPath + 'two-columns-mapping-header.csv',
             mapping: {
-                email: 'correo_electronico',
-                name: 'nombre',
-                id: 'id'
+                correo_electronico: 'email',
+                nombre: 'name'
             }
         });
 
@@ -81,7 +75,7 @@ describe('parse', function () {
         const result = await readCSV({
             path: csvPath + 'two-columns-mapping-header.csv',
             mapping: {
-                email: 'correo_electronico'
+                correo_electronico: 'email'
             }
         });