From 7d1d6ec6eb4d413e55a57bb2abf3dc00d5191fb8 Mon Sep 17 00:00:00 2001 From: Hannah Wolfe Date: Fri, 17 Sep 2021 11:13:42 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fixed=20error=20in=20sitemap=20w?= =?UTF-8?q?ith=20>50k=20posts=20(#13317)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit closes: CORE-34 refs: https://github.com/TryGhost/Team/issues/1044 - this is a super basic fix, it adds a max nodes concept and limits the node in each sub-sitemap to 50k by default - this will prevent the error in google console - a better fix is in progress, but we want to at least solve the errors ASAP --- .../services/sitemap/base-generator.js | 18 ++++++++----- test/unit/services/sitemap/generator.test.js | 26 +++++++++++++++++++ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/core/frontend/services/sitemap/base-generator.js b/core/frontend/services/sitemap/base-generator.js index 83fa169e5d..c880b6bf19 100644 --- a/core/frontend/services/sitemap/base-generator.js +++ b/core/frontend/services/sitemap/base-generator.js @@ -19,30 +19,34 @@ class BaseSiteMapGenerator { this.nodeTimeLookup = {}; this.siteMapContent = null; this.lastModified = 0; + this.maxNodes = 50000; } generateXmlFromNodes() { - const self = this; - // Get a mapping of node to timestamp - const timedNodes = _.map(this.nodeLookup, function (node, id) { + let nodesToProcess = _.map(this.nodeLookup, (node, id) => { return { id: id, // Using negative here to sort newest to oldest - ts: -(self.nodeTimeLookup[id] || 0), + ts: -(this.nodeTimeLookup[id] || 0), node: node }; }); + // Limit to 50k nodes - this is a quick fix to prevent errors in google console + if (this.maxNodes) { + nodesToProcess = nodesToProcess.slice(0, this.maxNodes); + } + // Sort nodes by timestamp - const sortedNodes = _.sortBy(timedNodes, 'ts'); + nodesToProcess = _.sortBy(nodesToProcess, 'ts'); // Grab just the nodes - const urlElements = _.map(sortedNodes, 'node'); + nodesToProcess = _.map(nodesToProcess, 'node'); const data = { // Concat the elements to the _attr declaration - urlset: [XMLNS_DECLS].concat(urlElements) + urlset: [XMLNS_DECLS].concat(nodesToProcess) }; // Generate full xml diff --git a/test/unit/services/sitemap/generator.test.js b/test/unit/services/sitemap/generator.test.js index e49e38e3b9..54968e3d4e 100644 --- a/test/unit/services/sitemap/generator.test.js +++ b/test/unit/services/sitemap/generator.test.js @@ -52,6 +52,32 @@ describe('Generators', function () { sinon.restore(); }); + it('max node setting results in the right number of nodes', function () { + generator = new PostGenerator({maxNodes: 5}); + + for (let i = 0; i < 10; i++) { + generator.addUrl(`http://my-ghost-blog.com/episode-${i}/`, testUtils.DataGenerator.forKnex.createPost({ + created_at: (Date.UTC(2014, 11, 22, 12) - 360000) + 200, + updated_at: null, + published_at: null, + slug: `episode-${i}` + })); + } + + generator.getXml(); + + // We end up with 10 nodes + Object.keys(generator.nodeLookup).should.be.Array().with.lengthOf(10); + + // But only 5 are output in the xml + generator.siteMapContent.match(//g).should.be.Array().with.lengthOf(5); + }); + + it('default is 50k', function () { + generator = new PostGenerator(); + generator.maxNodes.should.eql(50000); + }); + describe('IndexGenerator', function () { beforeEach(function () { generator = new IndexGenerator({