0
Fork 0
mirror of https://github.com/TryGhost/Ghost.git synced 2025-01-20 22:42:53 -05:00

Excludes sites with canonical_url meta from sitemap. (#16376)

closes https://github.com/TryGhost/Team/issues/2531

This commit fixes the issue where non-canonical URLs are included in the
XML sitemap, leading to poor SEO for our user's sites. The solution
implemented is to exclude any page or post that specifies a canonical
URL in its metadata from the sitemap.

To achieve this, a condition has been added to the 'addUrl' method,
which checks for the existence of a canonical URL in the metadata of the
resource being added to the sitemap. If a canonical URL is present, the
resource is excluded from the sitemap.

With this fix, our user's sites will have better SEO and improved search
engine visibility.
This commit is contained in:
Ronald Langeveld 2023-03-09 16:38:43 +08:00 committed by GitHub
parent 0b13d542f8
commit 7539a681fe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 57 additions and 1 deletions

View file

@ -22,6 +22,10 @@ class BaseSiteMapGenerator {
this.maxPerPage = 50000; this.maxPerPage = 50000;
} }
hasCanonicalUrl(datum) {
return Boolean(datum?.canonical_url);
}
generateXmlFromNodes(page) { generateXmlFromNodes(page) {
// Get a mapping of node to timestamp // Get a mapping of node to timestamp
let nodesToProcess = _.map(this.nodeLookup, (node, id) => { let nodesToProcess = _.map(this.nodeLookup, (node, id) => {
@ -75,7 +79,7 @@ class BaseSiteMapGenerator {
addUrl(url, datum) { addUrl(url, datum) {
const node = this.createUrlNodeFromDatum(url, datum); const node = this.createUrlNodeFromDatum(url, datum);
if (node) { if (node && !this.hasCanonicalUrl(datum)) {
this.updateLastModified(datum); this.updateLastModified(datum);
this.updateLookups(datum, node); this.updateLookups(datum, node);
// force regeneration of xml // force regeneration of xml

View file

@ -153,6 +153,44 @@ describe('Generators', function () {
}); });
}); });
describe('fn: hasCanonicalUrl', function () {
it('can check for canonical url', function () {
const isCanonical = generator.hasCanonicalUrl(testUtils.DataGenerator.forKnex.createPost({
page: false,
slug: 'some-cool-page',
canonical_url: 'https://myblog.com/test/'
}
));
isCanonical.should.eql(true);
});
it('returns false if no canonical url', function () {
const isCanonical = generator.hasCanonicalUrl(testUtils.DataGenerator.forKnex.createPost({
page: false,
slug: 'some-cool-page',
canonical_url: null
}
));
isCanonical.should.eql(false);
});
});
describe('fn: addUrl', function () {
it('does not include posts containing canonical_url', function () {
generator.addUrl('https://myblog.com/test2/', testUtils.DataGenerator.forKnex.createPost({
page: false,
slug: 'test2',
canonical_url: null
}));
generator.addUrl('https://myblog.com/test/', testUtils.DataGenerator.forKnex.createPost({
page: false,
slug: 'test',
canonical_url: 'https://external.com/test/'
}));
const xml = generator.getXml();
xml.should.not.match(/https:\/\/external.com\/test\//);
});
});
describe('fn: getXml', function () { describe('fn: getXml', function () {
beforeEach(function () { beforeEach(function () {
sinon.stub(urlUtils, 'urlFor'); sinon.stub(urlUtils, 'urlFor');
@ -319,6 +357,20 @@ describe('Generators', function () {
// <loc> should exist exactly one time // <loc> should exist exactly one time
generator.siteMapContent.get(1).match(/<loc>/g).length.should.eql(3); generator.siteMapContent.get(1).match(/<loc>/g).length.should.eql(3);
}); });
it('does not include pages containing canonical_url', function () {
generator.addUrl('https://myblog.com/test2/', testUtils.DataGenerator.forKnex.createPost({
page: true,
slug: 'test2',
canonical_url: null
}));
generator.addUrl('https://myblog.com/test/', testUtils.DataGenerator.forKnex.createPost({
page: true,
slug: 'test',
canonical_url: 'https://external.com/test/'
}));
const xml = generator.getXml();
xml.should.not.match(/https:\/\/external.com\/test\//);
});
}); });
}); });