From a7185735da7413e132f313ff5086db74304d7654 Mon Sep 17 00:00:00 2001 From: Drew Powers <1369770+drwpow@users.noreply.github.com> Date: Wed, 21 Apr 2021 14:14:24 -0600 Subject: [PATCH] Add sitemap generation (#120) * Add sitemap generation * Update README * Fix index handling, sort sitemap * Add --no-sitemap CLI flag (and config option) --- README.md | 12 ++++ examples/blog/astro.config.mjs | 3 + prettier-plugin-astro/package-lock.json | 2 +- src/@types/astro.ts | 4 ++ src/build.ts | 66 ++++++++++++++----- src/build/sitemap.ts | 15 +++++ src/cli.ts | 39 +++++++---- src/config.ts | 13 +++- test/fixtures/astro-dynamic/astro.config.mjs | 3 + test/fixtures/astro-markdown/astro.config.mjs | 1 + 10 files changed, 128 insertions(+), 30 deletions(-) create mode 100644 examples/blog/astro.config.mjs create mode 100644 src/build/sitemap.ts create mode 100644 test/fixtures/astro-dynamic/astro.config.mjs diff --git a/README.md b/README.md index 73bd618e1a..32ddd71096 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,8 @@ export default { }, /** Your public domain, e.g.: https://my-site.dev/ */ site: '', + /** Generate sitemap (set to "false" to disable) */ + sitemap: true, }; ``` @@ -154,6 +156,16 @@ const localData = Astro.fetchContent('../post/*.md'); --- ``` +### 🗺️ Sitemap + +Astro will automatically create a `/sitemap.xml` for you for SEO! Be sure to set the `site` URL in your [Astro config][config] so the URLs can be generated properly. + +⚠️ Note that Astro won’t inject this into your HTML for you! You’ll have to add the tag yourself in your `` on all pages that need it: + +```html + +``` + ##### Examples - [Blog Example][example-blog] diff --git a/examples/blog/astro.config.mjs b/examples/blog/astro.config.mjs new file mode 100644 index 0000000000..5fc4abd1c2 --- /dev/null +++ b/examples/blog/astro.config.mjs @@ -0,0 +1,3 @@ +export default { + site: 'https://muppet-blog.github.io/', +}; diff --git a/prettier-plugin-astro/package-lock.json b/prettier-plugin-astro/package-lock.json index dbd72e80f9..ec1c701399 100644 --- a/prettier-plugin-astro/package-lock.json +++ b/prettier-plugin-astro/package-lock.json @@ -189,7 +189,7 @@ "rollup": "^2.43.1", "rollup-plugin-terser": "^7.0.2", "sass": "^1.32.8", - "snowpack": "^3.3.2", + "snowpack": "^3.3.4", "svelte": "^3.35.0", "tiny-glob": "^0.2.8", "unified": "^9.2.1", diff --git a/src/@types/astro.ts b/src/@types/astro.ts index e12ceed844..4ecab847de 100644 --- a/src/@types/astro.ts +++ b/src/@types/astro.ts @@ -14,6 +14,10 @@ export interface AstroConfig { astroRoot: URL; public: URL; extensions?: Record; + /** Public URL base (e.g. 'https://mysite.com'). Used in generating sitemaps and canonical URLs. */ + site?: string; + /** Generate a sitemap? */ + sitemap: boolean; } export interface JsxItem { diff --git a/src/build.ts b/src/build.ts index 4c693ca073..9f9f8f3ea1 100644 --- a/src/build.ts +++ b/src/build.ts @@ -3,25 +3,29 @@ import type { LogOptions } from './logger'; import type { AstroRuntime, LoadResult } from './runtime'; import { existsSync, promises as fsPromises } from 'fs'; -import { relative as pathRelative } from 'path'; +import path from 'path'; +import cheerio from 'cheerio'; import { fileURLToPath } from 'url'; import { fdir } from 'fdir'; -import { defaultLogDestination, error } from './logger.js'; +import { defaultLogDestination, error, info } from './logger.js'; import { createRuntime } from './runtime.js'; import { bundle, collectDynamicImports } from './build/bundle.js'; +import { generateSitemap } from './build/sitemap.js'; import { collectStatics } from './build/static.js'; -const { mkdir, readdir, readFile, stat, writeFile } = fsPromises; +const { mkdir, readFile, writeFile } = fsPromises; interface PageBuildOptions { astroRoot: URL; dist: URL; filepath: URL; runtime: AstroRuntime; + sitemap: boolean; statics: Set; } interface PageResult { + canonicalURLs: string[]; statusCode: number; } @@ -49,14 +53,14 @@ function mergeSet(a: Set, b: Set) { } /** Utility for writing to file (async) */ -async function writeFilep(outPath: URL, bytes: string | Buffer, encoding: 'utf-8' | null) { +async function writeFilep(outPath: URL, bytes: string | Buffer, encoding: 'utf8' | null) { const outFolder = new URL('./', outPath); await mkdir(outFolder, { recursive: true }); await writeFile(outPath, bytes, encoding || 'binary'); } /** Utility for writing a build result to disk */ -async function writeResult(result: LoadResult, outPath: URL, encoding: null | 'utf-8') { +async function writeResult(result: LoadResult, outPath: URL, encoding: null | 'utf8') { if (result.statusCode === 500 || result.statusCode === 404) { error(logging, 'build', result.error || result.statusCode); } else if (result.statusCode !== 200) { @@ -75,7 +79,7 @@ function getPageType(filepath: URL): 'collection' | 'static' { /** Build collection */ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics }: PageBuildOptions): Promise { - const rel = pathRelative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro + const rel = path.relative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro const pagePath = `/${rel.replace(/\$([^.]+)\.astro$/, '$1')}`; const builtURLs = new Set(); // !important: internal cache that prevents building the same URLs @@ -86,8 +90,8 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics builtURLs.add(url); if (result.statusCode === 200) { const outPath = new URL('./' + url + '/index.html', dist); - await writeResult(result, outPath, 'utf-8'); - mergeSet(statics, collectStatics(result.contents.toString('utf-8'))); + await writeResult(result, outPath, 'utf8'); + mergeSet(statics, collectStatics(result.contents.toString('utf8'))); } return result; } @@ -103,6 +107,7 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics [...result.collectionInfo.additionalURLs].map(async (url) => { // for the top set of additional URLs, we render every new URL generated const addlResult = await loadCollection(url); + builtURLs.add(url); if (addlResult && addlResult.collectionInfo) { // believe it or not, we may still have a few unbuilt pages left. this is our last crawl: await Promise.all([...addlResult.collectionInfo.additionalURLs].map(async (url2) => loadCollection(url2))); @@ -112,14 +117,16 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics } return { + canonicalURLs: [...builtURLs].filter((url) => !url.endsWith('/1')), // note: canonical URLs are controlled by the collection, so these are canonical (but exclude "/1" pages as those are duplicates of the index) statusCode: result.statusCode, }; } /** Build static page */ -async function buildStaticPage({ astroRoot, dist, filepath, runtime, statics }: PageBuildOptions): Promise { - const rel = pathRelative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro +async function buildStaticPage({ astroRoot, dist, filepath, runtime, sitemap, statics }: PageBuildOptions): Promise { + const rel = path.relative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro const pagePath = `/${rel.replace(/\.(astro|md)$/, '')}`; + let canonicalURLs: string[] = []; let relPath = './' + rel.replace(/\.(astro|md)$/, '.html'); if (!relPath.endsWith('index.html')) { @@ -129,12 +136,21 @@ async function buildStaticPage({ astroRoot, dist, filepath, runtime, statics }: const outPath = new URL(relPath, dist); const result = await runtime.load(pagePath); - await writeResult(result, outPath, 'utf-8'); + await writeResult(result, outPath, 'utf8'); + if (result.statusCode === 200) { - mergeSet(statics, collectStatics(result.contents.toString('utf-8'))); + mergeSet(statics, collectStatics(result.contents.toString('utf8'))); + + // get Canonical URL (if user has specified one manually, use that) + if (sitemap) { + const $ = cheerio.load(result.contents); + const canonicalTag = $('link[rel="canonical"]'); + canonicalURLs.push(canonicalTag.attr('href') || pagePath.replace(/index$/, '')); + } } return { + canonicalURLs, statusCode: result.statusCode, }; } @@ -162,6 +178,7 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> { const collectImportsOptions = { astroConfig, logging, resolvePackageUrl, mode }; const pages = await allPages(pageRoot); + let builtURLs: string[] = []; try { await Promise.all( @@ -169,11 +186,13 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> { const filepath = new URL(`file://${pathname}`); const pageType = getPageType(filepath); - const pageOptions: PageBuildOptions = { astroRoot, dist, filepath, runtime, statics }; + const pageOptions: PageBuildOptions = { astroRoot, dist, filepath, runtime, sitemap: astroConfig.sitemap, statics }; if (pageType === 'collection') { - await buildCollectionPage(pageOptions); + const { canonicalURLs } = await buildCollectionPage(pageOptions); + builtURLs.push(...canonicalURLs); } else { - await buildStaticPage(pageOptions); + const { canonicalURLs } = await buildStaticPage(pageOptions); + builtURLs.push(...canonicalURLs); } mergeSet(imports, await collectDynamicImports(filepath, collectImportsOptions)); @@ -211,7 +230,7 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> { const publicFiles = (await new fdir().withFullPaths().crawl(fileURLToPath(pub)).withPromise()) as string[]; for (const filepath of publicFiles) { const fileUrl = new URL(`file://${filepath}`); - const rel = pathRelative(pub.pathname, fileUrl.pathname); + const rel = path.relative(pub.pathname, fileUrl.pathname); const outUrl = new URL('./' + rel, dist); const bytes = await readFile(fileUrl); @@ -219,6 +238,21 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> { } } + // build sitemap + if (astroConfig.sitemap && astroConfig.site) { + const sitemap = generateSitemap( + builtURLs.map((url) => ({ + canonicalURL: new URL( + path.extname(url) ? url : url.replace(/\/?$/, '/'), // add trailing slash if there’s no extension + astroConfig.site + ).href, + })) + ); + await writeFile(new URL('./sitemap.xml', dist), sitemap, 'utf8'); + } else if (astroConfig.sitemap) { + info(logging, 'tip', `Set your "site" in astro.config.mjs to generate a sitemap.xml`); + } + await runtime.shutdown(); return 0; } diff --git a/src/build/sitemap.ts b/src/build/sitemap.ts new file mode 100644 index 0000000000..1cb3f3e406 --- /dev/null +++ b/src/build/sitemap.ts @@ -0,0 +1,15 @@ +export interface PageMeta { + /** (required) The canonical URL of the page */ + canonicalURL: string; +} + +/** Construct sitemap.xml given a set of URLs */ +export function generateSitemap(pages: PageMeta[]): string { + let sitemap = ``; + pages.sort((a, b) => a.canonicalURL.localeCompare(b.canonicalURL, 'en', { numeric: true })); // sort alphabetically + for (const page of pages) { + sitemap += `${page.canonicalURL}`; + } + sitemap += `\n`; + return sitemap; +} diff --git a/src/cli.ts b/src/cli.ts index 7adf0c8100..c56a4c0984 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -16,24 +16,34 @@ const buildAndExit = async (...args: Parameters) => { }; type Arguments = yargs.Arguments; -type cliState = 'help' | 'version' | 'dev' | 'build'; +type cliCommand = 'help' | 'version' | 'dev' | 'build'; +interface CLIState { + cmd: cliCommand; + options: { + sitemap?: boolean; + }; +} /** Determine which action the user requested */ -function resolveArgs(flags: Arguments): cliState { +function resolveArgs(flags: Arguments): CLIState { + const options: CLIState['options'] = { + sitemap: typeof flags.sitemap === 'boolean' ? flags.sitemap : undefined, + }; + if (flags.version) { - return 'version'; + return { cmd: 'version', options }; } else if (flags.help) { - return 'help'; + return { cmd: 'help', options }; } const cmd = flags._[2]; switch (cmd) { case 'dev': - return 'dev'; + return { cmd: 'dev', options }; case 'build': - return 'build'; + return { cmd: 'build', options }; default: - return 'help'; + return { cmd: 'help', options }; } } @@ -48,6 +58,7 @@ function printHelp() { ${colors.bold('Flags:')} --version Show the version number and exit. --help Show this help message. + --no-sitemap Disable sitemap generation (build only). `); } @@ -57,10 +68,16 @@ async function printVersion() { console.error(pkg.version); } +/** Merge CLI flags & config options (CLI flags take priority) */ +function mergeCLIFlags(astroConfig: AstroConfig, flags: CLIState['options']) { + if (typeof flags.sitemap === 'boolean') astroConfig.sitemap = flags.sitemap; +} + /** Handle `astro run` command */ -async function runCommand(rawRoot: string, cmd: (a: AstroConfig) => Promise) { +async function runCommand(rawRoot: string, cmd: (a: AstroConfig) => Promise, options: CLIState['options']) { try { const astroConfig = await loadConfig(rawRoot); + mergeCLIFlags(astroConfig, options); return cmd(astroConfig); } catch (err) { console.error(colors.red(err.toString() || err)); @@ -78,7 +95,7 @@ export async function cli(args: string[]) { const flags = yargs(args); const state = resolveArgs(flags); - switch (state) { + switch (state.cmd) { case 'help': { printHelp(); process.exit(1); @@ -92,8 +109,8 @@ export async function cli(args: string[]) { case 'build': case 'dev': { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion - const cmd = cmdMap.get(state)!; - runCommand(flags._[3], cmd); + const cmd = cmdMap.get(state.cmd)!; + runCommand(flags._[3], cmd, state.options); } } } diff --git a/src/config.ts b/src/config.ts index 70463fee78..9374999236 100644 --- a/src/config.ts +++ b/src/config.ts @@ -12,11 +12,18 @@ function validateConfig(config: any): void { if (typeof config !== 'object') throw new Error(`[astro config] Expected object, received ${typeof config}`); // strings - for (const key of ['projectRoot', 'astroRoot', 'dist', 'public']) { - if (config[key] && typeof config[key] !== 'string') { + for (const key of ['projectRoot', 'astroRoot', 'dist', 'public', 'site']) { + if (config[key] !== undefined && config[key] !== null && typeof config[key] !== 'string') { throw new Error(`[astro config] ${key}: ${JSON.stringify(config[key])}\n Expected string, received ${type(config[key])}.`); } } + + // booleans + for (const key of ['sitemap']) { + if (config[key] !== undefined && config[key] !== null && typeof config[key] !== 'boolean') { + throw new Error(`[astro config] ${key}: ${JSON.stringify(config[key])}\n Expected boolean, received ${type(config[key])}.`); + } + } } /** Set default config values */ @@ -28,6 +35,8 @@ function configDefaults(userConfig?: any): any { if (!config.dist) config.dist = './_site'; if (!config.public) config.public = './public'; + if (typeof config.sitemap === 'undefined') config.sitemap = true; + return config; } diff --git a/test/fixtures/astro-dynamic/astro.config.mjs b/test/fixtures/astro-dynamic/astro.config.mjs new file mode 100644 index 0000000000..12ec645aa4 --- /dev/null +++ b/test/fixtures/astro-dynamic/astro.config.mjs @@ -0,0 +1,3 @@ +export default { + sitemap: false, +}; diff --git a/test/fixtures/astro-markdown/astro.config.mjs b/test/fixtures/astro-markdown/astro.config.mjs index f50751cfdc..c3bdc353c1 100644 --- a/test/fixtures/astro-markdown/astro.config.mjs +++ b/test/fixtures/astro-markdown/astro.config.mjs @@ -2,4 +2,5 @@ export default { extensions: { '.jsx': 'preact', }, + sitemap: false, };