From 888acfb0c52ee08f7ca4c07ce6b6fc78fbc41e31 Mon Sep 17 00:00:00 2001 From: Steve Larson <9larsons@gmail.com> Date: Thu, 9 Nov 2023 19:24:56 -0600 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20edge=20case=20resulting=20?= =?UTF-8?q?in=20duplicate=20emails=20for=20some=20recipients=20(#18941)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refs https://ghost.slack.com/archives/CTH5NDJMS/p1699359241142969 It's possible for `ObjectIDs` to have only numeric characters. We were previously letting the type be inferred, which created a very rare but possible edge case where the last recipient of an email batch had a numeric ObjectID, resulting in a numeric comparison against alphanumeric `ObjectIDs` in the database. - updated the filter to add `'`'s around the `lastId` parameter - updated tests to check for the type of the id filter parameter value - can't fully test for numeric object IDs using what we have because javascript cannot handle numerics of that size; may be able to look at using fixture data loaded directly into the db --- .../email-service/lib/BatchSendingService.js | 2 +- .../test/batch-sending-service.test.js | 113 ++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) diff --git a/ghost/email-service/lib/BatchSendingService.js b/ghost/email-service/lib/BatchSendingService.js index 407bb3d786..4250913342 100644 --- a/ghost/email-service/lib/BatchSendingService.js +++ b/ghost/email-service/lib/BatchSendingService.js @@ -247,7 +247,7 @@ class BatchSendingService { while (!members || lastId) { logging.info(`Fetching members batch for email ${email.id} segment ${segment}, lastId: ${lastId}`); - const filter = segmentFilter + `+id:<${lastId}`; + const filter = segmentFilter + `+id:<'${lastId}'`; members = await this.#models.Member.getFilteredCollectionQuery({filter}) .orderByRaw('id DESC') .select('members.id', 'members.uuid', 'members.email', 'members.name').limit(BATCH_SIZE + 1); diff --git a/ghost/email-service/test/batch-sending-service.test.js b/ghost/email-service/test/batch-sending-service.test.js index 7221ae84af..59411f91e5 100644 --- a/ghost/email-service/test/batch-sending-service.test.js +++ b/ghost/email-service/test/batch-sending-service.test.js @@ -298,6 +298,10 @@ describe('Batch Sending Service', function () { })); const q = nql(filter); + // Check that the filter id:<${lastId} is a string + // In rare cases when the object ID is numeric, the query returns unexpected results + assert.equal(typeof q.toJSON().$and[1].id.$lt, 'string'); + const all = members.filter((member) => { return q.queryJSON(member.toJSON()); }); @@ -394,6 +398,10 @@ describe('Batch Sending Service', function () { Member.getFilteredCollectionQuery = ({filter}) => { const q = nql(filter); + // Check that the filter id:<${lastId} is a string + // In rare cases when the object ID is numeric, the query returns unexpected results + assert.equal(typeof q.toJSON().$and[2].id.$lt, 'string'); + const all = members.filter((member) => { return q.queryJSON(member.toJSON()); }); @@ -451,6 +459,111 @@ describe('Batch Sending Service', function () { // Check email_count set assert.equal(email.get('email_count'), 4); }); + + // NOTE: we can't fully test this because javascript can't handle a large number (e.g. 650706040078550001536020) - it uses scientific notation + // so we have to use a string + // ref: https://ghost.slack.com/archives/CTH5NDJMS/p1699359241142969 + it('sends expected emails if a batch ends on a numeric id', async function () { + const Member = createModelClass({}); + const EmailBatch = createModelClass({}); + const newsletter = createModel({}); + + const members = [ + createModel({ + id: '61a55008a9d68c003baec6df', + email: `test1@numericid.com`, + uuid: 'test1', + status: 'free', + newsletters: [ + newsletter + ] + }), + createModel({ + id: '650706040078550001536020', // numeric object id + email: `test2@numericid.com`, + uuid: 'test2', + status: 'free', + newsletters: [ + newsletter + ] + }), + createModel({ + id: '65070957007855000153605b', + email: `test3@numericid.com`, + uuid: 'test3', + status: 'free', + newsletters: [ + newsletter + ] + }) + ]; + + const initialMembers = members.slice(); + + Member.getFilteredCollectionQuery = ({filter}) => { + const q = nql(filter); + // Check that the filter id:<${lastId} is a string + // In rare cases when the object ID is numeric, the query returns unexpected results + assert.equal(typeof q.toJSON().$and[2].id.$lt, 'string'); + + const all = members.filter((member) => { + return q.queryJSON(member.toJSON()); + }); + + // Sort all by id desc (string) - this is how we keep the order of members consistent (object id is a proxy for created_at) + all.sort((a, b) => { + return b.id.localeCompare(a.id); + }); + + return createDb({ + all: all.map(member => member.toJSON()) + }); + }; + + const db = createDb({}); + const insert = sinon.spy(db, 'insert'); + + const service = new BatchSendingService({ + models: {Member, EmailBatch}, + emailRenderer: { + getSegments() { + return ['status:free']; + } + }, + sendingService: { + getMaximumRecipients() { + return 2; // pick a batch size that ends with a numeric member object id + } + }, + emailSegmenter: { + getMemberFilterForSegment(n, _, segment) { + return `newsletters.id:${n.id}+(${segment})`; + } + }, + db + }); + + const email = createModel({}); + + const batches = await service.createBatches({ + email, + post: createModel({}), + newsletter + }); + assert.equal(batches.length, 2); + + const calls = insert.getCalls(); + assert.equal(calls.length, 2); + + const insertedRecipients = calls.flatMap(call => call.args[0]); + assert.equal(insertedRecipients.length, 3); + + // Check all recipients match initialMembers + assert.deepEqual(insertedRecipients.map(recipient => recipient.member_id).sort(), initialMembers.map(member => member.id).sort()); + + // Check email_count set + assert.equal(email.get('email_count'), 3); + }); }); describe('createBatch', function () {