From 0e22462d1534afc8f7bb6782f86db680c7a5f245 Mon Sep 17 00:00:00 2001 From: Robin Gisler <63500142+gislerro@users.noreply.github.com> Date: Thu, 18 Apr 2024 09:09:25 +0200 Subject: [PATCH] fix(sitemap): Trailing slashes on root url (#10772) * add tests that reveal issue * fix trailing slash root page issue * add changeset --- .changeset/late-bags-marry.md | 5 ++ packages/integrations/sitemap/package.json | 1 + packages/integrations/sitemap/src/index.ts | 11 ++- .../integrations/sitemap/src/write-sitemap.ts | 69 +++++++++++++++++++ .../trailing-slash/src/pages/index.astro | 8 +++ .../sitemap/test/trailing-slash.test.js | 29 ++++++-- pnpm-lock.yaml | 7 ++ 7 files changed, 117 insertions(+), 13 deletions(-) create mode 100644 .changeset/late-bags-marry.md create mode 100644 packages/integrations/sitemap/src/write-sitemap.ts create mode 100644 packages/integrations/sitemap/test/fixtures/trailing-slash/src/pages/index.astro diff --git a/.changeset/late-bags-marry.md b/.changeset/late-bags-marry.md new file mode 100644 index 0000000000..af6b4ba3ef --- /dev/null +++ b/.changeset/late-bags-marry.md @@ -0,0 +1,5 @@ +--- +"@astrojs/sitemap": patch +--- + +Fixes an issue where the root url does not follow the `trailingSlash` config option diff --git a/packages/integrations/sitemap/package.json b/packages/integrations/sitemap/package.json index 9ccbf3d5be..cdff0a8aab 100644 --- a/packages/integrations/sitemap/package.json +++ b/packages/integrations/sitemap/package.json @@ -34,6 +34,7 @@ }, "dependencies": { "sitemap": "^7.1.1", + "stream-replace-string": "^2.0.0", "zod": "^3.22.4" }, "devDependencies": { diff --git a/packages/integrations/sitemap/src/index.ts b/packages/integrations/sitemap/src/index.ts index b0548d8f11..a2fae5b5ad 100644 --- a/packages/integrations/sitemap/src/index.ts +++ b/packages/integrations/sitemap/src/index.ts @@ -2,11 +2,11 @@ import path from 'node:path'; import { fileURLToPath } from 'node:url'; import type { AstroConfig, AstroIntegration } from 'astro'; import type { EnumChangefreq, LinkItem as LinkItemBase, SitemapItemLoose } from 'sitemap'; -import { simpleSitemapAndIndex } from 'sitemap'; import { ZodError } from 'zod'; -import { generateSitemap } from './generate-sitemap.js'; import { validateOptions } from './validate-options.js'; +import { generateSitemap } from './generate-sitemap.js'; +import { writeSitemap } from './write-sitemap.js'; export { EnumChangefreq as ChangeFreqEnum } from 'sitemap'; export type ChangeFreq = `${EnumChangefreq}`; @@ -167,14 +167,13 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => { } } const destDir = fileURLToPath(dir); - await simpleSitemapAndIndex({ + await writeSitemap({ hostname: finalSiteUrl.href, destinationDir: destDir, publicBasePath: config.base, sourceData: urlData, - limit: entryLimit, - gzip: false, - }); + limit: entryLimit + }, config) logger.info(`\`${OUTFILE}\` created at \`${path.relative(process.cwd(), destDir)}\``); } catch (err) { if (err instanceof ZodError) { diff --git a/packages/integrations/sitemap/src/write-sitemap.ts b/packages/integrations/sitemap/src/write-sitemap.ts new file mode 100644 index 0000000000..d55d4fc50b --- /dev/null +++ b/packages/integrations/sitemap/src/write-sitemap.ts @@ -0,0 +1,69 @@ +import { normalize, resolve } from 'path'; +import { createWriteStream, type WriteStream } from 'fs' +import { mkdir } from 'fs/promises'; +import { promisify } from 'util'; +import { Readable, pipeline } from 'stream'; +import replace from 'stream-replace-string' + +import { SitemapAndIndexStream, SitemapStream } from 'sitemap'; + +import type { AstroConfig } from 'astro'; +import type { SitemapItem } from "./index.js"; + +type WriteSitemapConfig = { + hostname: string; + sitemapHostname?: string; + sourceData: SitemapItem[]; + destinationDir: string; + publicBasePath?: string; + limit?: number; +} + +// adapted from sitemap.js/sitemap-simple +export async function writeSitemap({ hostname, sitemapHostname = hostname, +sourceData, destinationDir, limit = 50000, publicBasePath = './', }: WriteSitemapConfig, astroConfig: AstroConfig) { + + await mkdir(destinationDir, { recursive: true }) + + const sitemapAndIndexStream = new SitemapAndIndexStream({ + limit, + getSitemapStream: (i) => { + const sitemapStream = new SitemapStream({ + hostname, + }); + const path = `./sitemap-${i}.xml`; + const writePath = resolve(destinationDir, path); + if (!publicBasePath.endsWith('/')) { + publicBasePath += '/'; + } + const publicPath = normalize(publicBasePath + path); + + let stream: WriteStream + if (astroConfig.trailingSlash === 'never' || astroConfig.build.format === 'file') { + // workaround for trailing slash issue in sitemap.js: https://github.com/ekalinin/sitemap.js/issues/403 + const host = hostname.endsWith('/') ? hostname.slice(0, -1) : hostname + const searchStr = `${host}/` + const replaceStr = `${host}` + stream = sitemapStream.pipe(replace(searchStr, replaceStr)).pipe(createWriteStream(writePath)) + } else { + stream = sitemapStream.pipe(createWriteStream(writePath)) + } + + return [ + new URL( + publicPath, + sitemapHostname + ).toString(), + sitemapStream, + stream, + ]; + }, + }); + + let src = Readable.from(sourceData) + const indexPath = resolve( + destinationDir, + `./sitemap-index.xml` + ); + return promisify(pipeline)(src, sitemapAndIndexStream, createWriteStream(indexPath)); +} \ No newline at end of file diff --git a/packages/integrations/sitemap/test/fixtures/trailing-slash/src/pages/index.astro b/packages/integrations/sitemap/test/fixtures/trailing-slash/src/pages/index.astro new file mode 100644 index 0000000000..5a29cbdbe8 --- /dev/null +++ b/packages/integrations/sitemap/test/fixtures/trailing-slash/src/pages/index.astro @@ -0,0 +1,8 @@ + + + Index + + +

Index

+ + \ No newline at end of file diff --git a/packages/integrations/sitemap/test/trailing-slash.test.js b/packages/integrations/sitemap/test/trailing-slash.test.js index e0a6158fbe..181f0def53 100644 --- a/packages/integrations/sitemap/test/trailing-slash.test.js +++ b/packages/integrations/sitemap/test/trailing-slash.test.js @@ -22,7 +22,10 @@ describe('Trailing slash', () => { it('URLs end with trailing slash', async () => { const data = await readXML(fixture.readFile('/sitemap-0.xml')); const urls = data.urlset.url; - assert.equal(urls[0].loc[0], 'http://example.com/one/'); + + assert.equal(urls[0].loc[0], 'http://example.com/'); + assert.equal(urls[1].loc[0], 'http://example.com/one/'); + assert.equal(urls[2].loc[0], 'http://example.com/two/'); }); }); @@ -41,7 +44,10 @@ describe('Trailing slash', () => { it('URLs do not end with trailing slash', async () => { const data = await readXML(fixture.readFile('/sitemap-0.xml')); const urls = data.urlset.url; - assert.equal(urls[0].loc[0], 'http://example.com/one'); + + assert.equal(urls[0].loc[0], 'http://example.com'); + assert.equal(urls[1].loc[0], 'http://example.com/one'); + assert.equal(urls[2].loc[0], 'http://example.com/two'); }); }); }); @@ -55,10 +61,13 @@ describe('Trailing slash', () => { await fixture.build(); }); - it('URLs do no end with trailing slash', async () => { + it('URLs do not end with trailing slash', async () => { const data = await readXML(fixture.readFile('/sitemap-0.xml')); const urls = data.urlset.url; - assert.equal(urls[0].loc[0], 'http://example.com/one'); + + assert.equal(urls[0].loc[0], 'http://example.com'); + assert.equal(urls[1].loc[0], 'http://example.com/one'); + assert.equal(urls[2].loc[0], 'http://example.com/two'); }); describe('with base path', () => { before(async () => { @@ -73,7 +82,9 @@ describe('Trailing slash', () => { it('URLs do not end with trailing slash', async () => { const data = await readXML(fixture.readFile('/sitemap-0.xml')); const urls = data.urlset.url; - assert.equal(urls[0].loc[0], 'http://example.com/base/one'); + assert.equal(urls[0].loc[0], 'http://example.com/base'); + assert.equal(urls[1].loc[0], 'http://example.com/base/one'); + assert.equal(urls[2].loc[0], 'http://example.com/base/two'); }); }); }); @@ -90,7 +101,9 @@ describe('Trailing slash', () => { it('URLs end with trailing slash', async () => { const data = await readXML(fixture.readFile('/sitemap-0.xml')); const urls = data.urlset.url; - assert.equal(urls[0].loc[0], 'http://example.com/one/'); + assert.equal(urls[0].loc[0], 'http://example.com/'); + assert.equal(urls[1].loc[0], 'http://example.com/one/'); + assert.equal(urls[2].loc[0], 'http://example.com/two/'); }); describe('with base path', () => { before(async () => { @@ -105,7 +118,9 @@ describe('Trailing slash', () => { it('URLs end with trailing slash', async () => { const data = await readXML(fixture.readFile('/sitemap-0.xml')); const urls = data.urlset.url; - assert.equal(urls[0].loc[0], 'http://example.com/base/one/'); + assert.equal(urls[0].loc[0], 'http://example.com/base/'); + assert.equal(urls[1].loc[0], 'http://example.com/base/one/'); + assert.equal(urls[2].loc[0], 'http://example.com/base/two/'); }); }); }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3b50248aac..8b81958b2d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -4888,6 +4888,9 @@ importers: sitemap: specifier: ^7.1.1 version: 7.1.1 + stream-replace-string: + specifier: ^2.0.0 + version: 2.0.0 zod: specifier: ^3.22.4 version: 3.22.4 @@ -15642,6 +15645,10 @@ packages: bl: 5.1.0 dev: false + /stream-replace-string@2.0.0: + resolution: {integrity: sha512-TlnjJ1C0QrmxRNrON00JvaFFlNh5TTG00APw23j74ET7gkQpTASi6/L2fuiav8pzK715HXtUeClpBTw2NPSn6w==} + dev: false + /stream-transform@2.1.3: resolution: {integrity: sha512-9GHUiM5hMiCi6Y03jD2ARC1ettBXkQBoQAe7nJsPknnI0ow10aXjTnew8QtYQmLjzn974BnmWEAJgCY6ZP1DeQ==} dependencies: