From c507ea9600ffa4f8c9ac538015c8218316e095e4 Mon Sep 17 00:00:00 2001 From: Naz Date: Fri, 9 Jun 2023 13:50:53 +0700 Subject: [PATCH] Improved oversized batch correction logic refs https://github.com/naz/Ghost/commit/551532f8743b324c288e887f3c8798140db5c0ba refs https://github.com/TryGhost/Team/issues/3324 - After analyzing data dumps, the data revealed that we have extra data from a stray batch. The filtering logic manually filters out the data to the recipients that belong to a "current batch". - Hunting down the root cause of the data mixup proved to be too expensive of an investigation, so this is a "good enough patch" to deal with the problem. - Most likely cause is the concurrent batch sending, but reducing the concurrency would be too expensive of a performance price to pay instead of filtering the data rarely. --- .../email-service/lib/BatchSendingService.js | 41 ++++++++----------- .../test/batch-sending-service.test.js | 19 +++++---- ghost/email-service/test/utils/index.js | 1 + 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/ghost/email-service/lib/BatchSendingService.js b/ghost/email-service/lib/BatchSendingService.js index 33235b1c32..9b4af57e80 100644 --- a/ghost/email-service/lib/BatchSendingService.js +++ b/ghost/email-service/lib/BatchSendingService.js @@ -1,4 +1,3 @@ -const uniqBy = require('lodash/uniqBy'); const logging = require('@tryghost/logging'); const ObjectID = require('bson-objectid').default; const errors = require('@tryghost/errors'); @@ -416,19 +415,6 @@ class BatchSendingService { {...this.#getBeforeRetryConfig(email), description: `getBatchMembers batch ${originalBatch.id}`} ); - if (members.length > this.#sendingService.getMaximumRecipients()) { - // @NOTE the unique by member_id is a best effort to make sure we don't send the same email to the same member twice - logging.error(`Email batch ${originalBatch.id} has ${members.length} members, which exceeds the maximum of ${this.#sendingService.getMaximumRecipients()}. Filtering to unique members`); - members = uniqBy(members, 'email'); - - if (members.length > this.#sendingService.getMaximumRecipients()) { - // @NOTE this is a best effort logic to still try sending an email batch - // even if it exceeds the maximum recipients limit of the sending service - logging.error(`Email batch ${originalBatch.id} has ${members.length} members, which exceeds the maximum of ${this.#sendingService.getMaximumRecipients()}. Truncating to ${this.#sendingService.getMaximumRecipients()}`); - members = members.slice(0, this.#sendingService.getMaximumRecipients()); - } - } - const response = await this.retryDb(async () => { return await this.#sendingService.send({ emailId: email.id, @@ -515,9 +501,25 @@ class BatchSendingService { * @returns {Promise} */ async getBatchMembers(batchId) { - const models = await this.#models.EmailRecipient.findAll({filter: `batch_id:${batchId}`, withRelated: ['member', 'member.stripeSubscriptions', 'member.products']}); + let models = await this.#models.EmailRecipient.findAll({filter: `batch_id:${batchId}`, withRelated: ['member', 'member.stripeSubscriptions', 'member.products']}); - const mappedMemberLikes = models.map((model) => { + const BATCH_SIZE = this.#sendingService.getMaximumRecipients(); + if (models.length > BATCH_SIZE) { + // @NOTE: filtering by batch_id is our best effort to "correct" returned data + logging.warn(`Email batch ${batchId} has ${models.length} members, which exceeds the maximum of ${BATCH_SIZE} members per batch. Filtering by batch_id: ${batchId}`); + models = models.filter(m => m.get('batch_id') === batchId); + + if (models.length > BATCH_SIZE) { + // @NOTE this is a best effort logic to still try sending an email batch + // even if it exceeds the maximum recipients limit of the sending service. + // In theory this should never happen, but being extra safe to make sure + // the email delivery still happens. + logging.error(`Email batch ${batchId} has ${models.length} members, which exceeds the maximum of ${BATCH_SIZE}. Truncating to ${BATCH_SIZE}`); + models = models.slice(0, BATCH_SIZE); + } + } + + return models.map((model) => { // Map subscriptions const subscriptions = model.related('member').related('stripeSubscriptions').toJSON(); const tiers = model.related('member').related('products').toJSON(); @@ -533,13 +535,6 @@ class BatchSendingService { tiers }; }); - - const BATCH_SIZE = this.#sendingService.getMaximumRecipients(); - if (mappedMemberLikes.length > BATCH_SIZE) { - logging.warn(`Batch ${batchId} has ${mappedMemberLikes.length} members, but the sending service only supports ${BATCH_SIZE} members per batch.`); - } - - return mappedMemberLikes; } /** diff --git a/ghost/email-service/test/batch-sending-service.test.js b/ghost/email-service/test/batch-sending-service.test.js index 15ba9359b3..e2ca295090 100644 --- a/ghost/email-service/test/batch-sending-service.test.js +++ b/ghost/email-service/test/batch-sending-service.test.js @@ -887,6 +887,7 @@ describe('Batch Sending Service', function () { it('Truncates recipients if more than the maximum are returned in a batch', async function () { const EmailBatch = createModelClass({ findOne: { + id: '123_batch_id', status: 'pending', member_segment: null } @@ -898,6 +899,7 @@ describe('Batch Sending Service', function () { { member_id: '123', member_uuid: '123', + batch_id: '123_batch_id', member_email: 'example@example.com', member_name: 'Test User', loaded: ['member'], @@ -912,6 +914,7 @@ describe('Batch Sending Service', function () { { member_id: '124', member_uuid: '124', + batch_id: '123_batch_id', member_email: 'example2@example.com', member_name: 'Test User 2', loaded: ['member'], @@ -926,6 +929,7 @@ describe('Batch Sending Service', function () { { member_id: '125', member_uuid: '125', + batch_id: '123_batch_id', member_email: 'example3@example.com', member_name: 'Test User 3', loaded: ['member'], @@ -937,10 +941,11 @@ describe('Batch Sending Service', function () { products: [] }) }, - // NOTE: one recipient with a duplicate data + // NOTE: one recipient from a different batch { member_id: '125', member_uuid: '125', + batch_id: '124_ANOTHER_batch_id', member_email: 'example3@example.com', member_name: 'Test User 3', loaded: ['member'], @@ -968,7 +973,9 @@ describe('Batch Sending Service', function () { const result = await service.sendBatch({ email: createModel({}), - batch: createModel({}), + batch: createModel({ + id: '123_batch_id' + }), post: createModel({}), newsletter: createModel({}) }); @@ -977,14 +984,12 @@ describe('Batch Sending Service', function () { sinon.assert.calledOnce(warnLog); const firstLoggedWarn = warnLog.firstCall.args[0]; - assert.match(firstLoggedWarn, /Batch [a-f0-9]{24} has 4 members, but the sending service only supports 2 members per batch/); + assert.match(firstLoggedWarn, /Email batch 123_batch_id has 4 members, which exceeds the maximum of 2 members per batch. Filtering by batch_id: 123_batch_id/); - sinon.assert.calledTwice(errorLog); + sinon.assert.calledOnce(errorLog); const firstLoggedError = errorLog.firstCall.args[0]; - const secondLoggedError = errorLog.secondCall.args[0]; - assert.match(firstLoggedError, /Email batch [a-f0-9]{24} has 4 members, which exceeds the maximum of 2. Filtering to unique members/); - assert.match(secondLoggedError, /Email batch [a-f0-9]{24} has 3 members, which exceeds the maximum of 2. Truncating to 2/); + assert.match(firstLoggedError, /Email batch 123_batch_id has 3 members, which exceeds the maximum of 2. Truncating to 2/); sinon.assert.calledOnce(sendingService.send); const {members} = sendingService.send.firstCall.args[0]; diff --git a/ghost/email-service/test/utils/index.js b/ghost/email-service/test/utils/index.js index 51f9828711..d4fd95ac03 100644 --- a/ghost/email-service/test/utils/index.js +++ b/ghost/email-service/test/utils/index.js @@ -85,6 +85,7 @@ const createModelClass = (options = {}) => { return Promise.resolve({ models, map: models.map.bind(models), + filter: models.filter.bind(models), length: models.length }); },