From 51c9a50c4f6fbeb8a98fa97584fd23b5a55bbd6a Mon Sep 17 00:00:00 2001 From: naz Date: Mon, 17 Aug 2020 23:28:57 +1200 Subject: [PATCH] Added validation layer to members import endpoint (#12131) no issue - Additional validation is needed for imported data because in case of bulk insertions (through knex) we bypass model layer validation - this could lead to invalid data in the database, which would be hard to fix. - Chose validation method we use for other endpoints - through JSON Schema. It proved to be very performant (200ms overhead for 50k records). When comparing it with iterative method (validating each record separately) this was adding about 17s of overhead. - Refactored returned values from "sanitizeInput" method to encapsulate more logic so that the caller doesn't have to calculate amount of invalid records and deal with error types - Whole sanitizeInput method could now be easily extracted into separate module (somewhere close to members importer) - Bumped members-csv package. It is meant to handle empty string values - '' and null, which should allow validating member records more consistently! --- core/server/api/canary/members.js | 86 ++++++++++++++----- .../input/schemas/members-upload.json | 49 +++++++++++ .../api/canary/admin/members_spec.js | 18 +++- .../fixtures/csv/member-invalid-email.csv | 2 - .../fixtures/csv/members-invalid-values.csv | 3 + 5 files changed, 131 insertions(+), 27 deletions(-) create mode 100644 core/server/api/canary/utils/validators/input/schemas/members-upload.json delete mode 100644 test/utils/fixtures/csv/member-invalid-email.csv create mode 100644 test/utils/fixtures/csv/members-invalid-values.csv diff --git a/core/server/api/canary/members.js b/core/server/api/canary/members.js index 7b6f3b76bd..92c0412b10 100644 --- a/core/server/api/canary/members.js +++ b/core/server/api/canary/members.js @@ -26,8 +26,50 @@ const cleanupUndefined = (obj) => { } }; -// NOTE: this method can be removed once unique constraints are introduced ref.: https://github.com/TryGhost/Ghost/blob/e277c6b/core/server/data/schema/schema.js#L339 -const sanitizeInput = (members) => { +const sanitizeInput = async (members) => { + const validationErrors = []; + let invalidCount = 0; + + const jsonSchema = require('./utils/validators/utils/json-schema'); + const schema = require('./utils/validators/input/schemas/members-upload'); + const definitions = require('./utils/validators/input/schemas/members'); + + let invalidValidationCount = 0; + try { + await jsonSchema.validate(schema, definitions, members); + } catch (error) { + if (error.errorDetails && error.errorDetails.length) { + const jsonPointerIndexRegex = /\[(?\d+)\]/; + + let invalidRecordIndexes = error.errorDetails.map((errorDetail) => { + if (errorDetail.dataPath) { + const key = errorDetail.dataPath.split('.').pop(); + const [, index] = errorDetail.dataPath.match(jsonPointerIndexRegex); + validationErrors.push(new errors.ValidationError({ + message: i18n.t('notices.data.validation.index.schemaValidationFailed', { + key + }), + context: `${key} ${errorDetail.message}`, + errorDetails: `${errorDetail.dataPath} with value ${members[index][key]}` + })); + + return Number(index); + } + }); + + invalidRecordIndexes = _.uniq(invalidRecordIndexes); + invalidRecordIndexes = invalidRecordIndexes.filter(index => (index !== undefined)); + + invalidRecordIndexes.forEach((index) => { + members[index] = undefined; + }); + members = members.filter(record => (record !== undefined)); + invalidValidationCount += invalidRecordIndexes.length; + } + } + + invalidCount += invalidValidationCount; + const customersMap = members.reduce((acc, member) => { if (member.stripe_customer_id && member.stripe_customer_id !== 'undefined') { if (acc[member.stripe_customer_id]) { @@ -51,11 +93,21 @@ const sanitizeInput = (members) => { return !(toRemove.includes(member.stripe_customer_id)); }); - const duplicateStripeCount = members.length - sanitized.length; + const duplicateStripeCustomersCount = (members.length - sanitized.length); + if (duplicateStripeCustomersCount) { + validationErrors.push(new errors.ValidationError({ + message: i18n.t('errors.api.members.duplicateStripeCustomerIds.message'), + context: i18n.t('errors.api.members.duplicateStripeCustomerIds.context'), + help: i18n.t('errors.api.members.duplicateStripeCustomerIds.help') + })); + } + + invalidCount += duplicateStripeCustomersCount; return { sanitized, - duplicateStripeCount + invalidCount, + validationErrors }; }; @@ -432,16 +484,12 @@ module.exports = { const memberLabels = serializeMemberLabels(getUniqueMemberLabels(frame.data.members)); await findOrCreateLabels(memberLabels, frame.options); - return Promise.resolve().then(() => { - const {sanitized, duplicateStripeCount} = sanitizeInput(frame.data.members); - invalid.count += duplicateStripeCount; + return Promise.resolve().then(async () => { + const {sanitized, invalidCount, validationErrors} = await sanitizeInput(frame.data.members); + invalid.count += invalidCount; - if (duplicateStripeCount) { - invalid.errors.push(new errors.ValidationError({ - message: i18n.t('errors.api.members.duplicateStripeCustomerIds.message'), - context: i18n.t('errors.api.members.duplicateStripeCustomerIds.context'), - help: i18n.t('errors.api.members.duplicateStripeCustomerIds.help') - })); + if (validationErrors.length) { + invalid.errors.push(...validationErrors); } return Promise.map(sanitized, ((entry) => { @@ -588,15 +636,11 @@ module.exports = { const allLabelModels = [...importSetLabelModels, ...memberLabelModels].filter(model => model !== undefined); return Promise.resolve().then(async () => { - const {sanitized, duplicateStripeCount} = sanitizeInput(frame.data.members); - invalid.count += duplicateStripeCount; + const {sanitized, invalidCount, validationErrors} = await sanitizeInput(frame.data.members); + invalid.count += invalidCount; - if (duplicateStripeCount) { - invalid.errors.push(new errors.ValidationError({ - message: i18n.t('errors.api.members.duplicateStripeCustomerIds.message'), - context: i18n.t('errors.api.members.duplicateStripeCustomerIds.context'), - help: i18n.t('errors.api.members.duplicateStripeCustomerIds.help') - })); + if (validationErrors.length) { + invalid.errors.push(...validationErrors); } return doImport({ diff --git a/core/server/api/canary/utils/validators/input/schemas/members-upload.json b/core/server/api/canary/utils/validators/input/schemas/members-upload.json new file mode 100644 index 0000000000..88b32d22f2 --- /dev/null +++ b/core/server/api/canary/utils/validators/input/schemas/members-upload.json @@ -0,0 +1,49 @@ + +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "members.upload", + "title": "members.upload", + "description": "Schema for members.upload", + "type": "array", + "items": { + "type": "object", + "additionalProperties": true, + "required": ["email"], + "properties": { + "name": { + "type": ["null", "string"], + "maxLength": 191, + "pattern": "^([^,]|$)" + }, + "email": { + "type": "string", + "minLength": 1, + "maxLength": 191, + "format": "email" + }, + "note": { + "type": ["null", "string"], + "minLength": 0, + "maxLength": 2000 + }, + "subscribed": { + "type": ["string"], + "enum": ["true", "false", "TRUE", "FALSE", ""] + }, + "labels": { + "type": "string" + }, + "created_at": { + "type": ["string", "null"], + "format": "date-time" + }, + "stripe_customer_id": { + "type": ["string", "null"] + }, + "complimentary_plan": { + "type": ["string"], + "enum": ["true", "false", "TRUE", "FALSE", ""] + } + } + } + } diff --git a/test/regression/api/canary/admin/members_spec.js b/test/regression/api/canary/admin/members_spec.js index 213204f5be..a924cb0d2e 100644 --- a/test/regression/api/canary/admin/members_spec.js +++ b/test/regression/api/canary/admin/members_spec.js @@ -456,7 +456,7 @@ describe('Members API', function () { it('Fails to import memmber with invalid values', function () { return request .post(localUtils.API.getApiQuery(`members/upload/`)) - .attach('membersfile', path.join(__dirname, '/../../../../utils/fixtures/csv/member-invalid-email.csv')) + .attach('membersfile', path.join(__dirname, '/../../../../utils/fixtures/csv/members-invalid-values.csv')) .set('Origin', config.get('url')) .expect('Content-Type', /json/) .expect('Cache-Control', testUtils.cacheRules.private) @@ -470,10 +470,20 @@ describe('Members API', function () { should.exist(jsonResponse.meta.stats); jsonResponse.meta.stats.imported.count.should.equal(0); - jsonResponse.meta.stats.invalid.count.should.equal(1); + jsonResponse.meta.stats.invalid.count.should.equal(2); - should.equal(jsonResponse.meta.stats.invalid.errors.length, 1); - jsonResponse.meta.stats.invalid.errors[0].message.should.equal('Validation (isEmail) failed for email'); + should.equal(jsonResponse.meta.stats.invalid.errors.length, 4); + jsonResponse.meta.stats.invalid.errors[0].message.should.equal('Validation failed for \'name\''); + jsonResponse.meta.stats.invalid.errors[0].count.should.equal(1); + + jsonResponse.meta.stats.invalid.errors[1].message.should.equal('Validation failed for \'email\''); + jsonResponse.meta.stats.invalid.errors[1].count.should.equal(2); + + jsonResponse.meta.stats.invalid.errors[2].message.should.equal('Validation failed for \'created_at\''); + jsonResponse.meta.stats.invalid.errors[2].count.should.equal(1); + + jsonResponse.meta.stats.invalid.errors[3].message.should.equal('Validation failed for \'complimentary_plan\''); + jsonResponse.meta.stats.invalid.errors[3].count.should.equal(1); }); }); diff --git a/test/utils/fixtures/csv/member-invalid-email.csv b/test/utils/fixtures/csv/member-invalid-email.csv deleted file mode 100644 index 330109c06d..0000000000 --- a/test/utils/fixtures/csv/member-invalid-email.csv +++ /dev/null @@ -1,2 +0,0 @@ -email -invalid_email_value diff --git a/test/utils/fixtures/csv/members-invalid-values.csv b/test/utils/fixtures/csv/members-invalid-values.csv new file mode 100644 index 0000000000..49511a8900 --- /dev/null +++ b/test/utils/fixtures/csv/members-invalid-values.csv @@ -0,0 +1,3 @@ +email,name,note,subscribed_to_emails,complimentary_plan,stripe_customer_id,created_at,labels +invalid_email_value1,",name starting with coma",,not_boolean,false,,not_a_date,labels +invalid_email_value2,"good name",,true,not_boolean,,2019-10-30T14:52:08.000Z,more-labels