From a7185735da7413e132f313ff5086db74304d7654 Mon Sep 17 00:00:00 2001
From: Drew Powers <1369770+drwpow@users.noreply.github.com>
Date: Wed, 21 Apr 2021 14:14:24 -0600
Subject: [PATCH] Add sitemap generation (#120)
* Add sitemap generation
* Update README
* Fix index handling, sort sitemap
* Add --no-sitemap CLI flag (and config option)
---
README.md | 12 ++++
examples/blog/astro.config.mjs | 3 +
prettier-plugin-astro/package-lock.json | 2 +-
src/@types/astro.ts | 4 ++
src/build.ts | 66 ++++++++++++++-----
src/build/sitemap.ts | 15 +++++
src/cli.ts | 39 +++++++----
src/config.ts | 13 +++-
test/fixtures/astro-dynamic/astro.config.mjs | 3 +
test/fixtures/astro-markdown/astro.config.mjs | 1 +
10 files changed, 128 insertions(+), 30 deletions(-)
create mode 100644 examples/blog/astro.config.mjs
create mode 100644 src/build/sitemap.ts
create mode 100644 test/fixtures/astro-dynamic/astro.config.mjs
diff --git a/README.md b/README.md
index 73bd618e1a..32ddd71096 100644
--- a/README.md
+++ b/README.md
@@ -52,6 +52,8 @@ export default {
},
/** Your public domain, e.g.: https://my-site.dev/ */
site: '',
+ /** Generate sitemap (set to "false" to disable) */
+ sitemap: true,
};
```
@@ -154,6 +156,16 @@ const localData = Astro.fetchContent('../post/*.md');
---
```
+### 🗺️ Sitemap
+
+Astro will automatically create a `/sitemap.xml` for you for SEO! Be sure to set the `site` URL in your [Astro config][config] so the URLs can be generated properly.
+
+⚠️ Note that Astro won’t inject this into your HTML for you! You’ll have to add the tag yourself in your `
` on all pages that need it:
+
+```html
+
+```
+
##### Examples
- [Blog Example][example-blog]
diff --git a/examples/blog/astro.config.mjs b/examples/blog/astro.config.mjs
new file mode 100644
index 0000000000..5fc4abd1c2
--- /dev/null
+++ b/examples/blog/astro.config.mjs
@@ -0,0 +1,3 @@
+export default {
+ site: 'https://muppet-blog.github.io/',
+};
diff --git a/prettier-plugin-astro/package-lock.json b/prettier-plugin-astro/package-lock.json
index dbd72e80f9..ec1c701399 100644
--- a/prettier-plugin-astro/package-lock.json
+++ b/prettier-plugin-astro/package-lock.json
@@ -189,7 +189,7 @@
"rollup": "^2.43.1",
"rollup-plugin-terser": "^7.0.2",
"sass": "^1.32.8",
- "snowpack": "^3.3.2",
+ "snowpack": "^3.3.4",
"svelte": "^3.35.0",
"tiny-glob": "^0.2.8",
"unified": "^9.2.1",
diff --git a/src/@types/astro.ts b/src/@types/astro.ts
index e12ceed844..4ecab847de 100644
--- a/src/@types/astro.ts
+++ b/src/@types/astro.ts
@@ -14,6 +14,10 @@ export interface AstroConfig {
astroRoot: URL;
public: URL;
extensions?: Record;
+ /** Public URL base (e.g. 'https://mysite.com'). Used in generating sitemaps and canonical URLs. */
+ site?: string;
+ /** Generate a sitemap? */
+ sitemap: boolean;
}
export interface JsxItem {
diff --git a/src/build.ts b/src/build.ts
index 4c693ca073..9f9f8f3ea1 100644
--- a/src/build.ts
+++ b/src/build.ts
@@ -3,25 +3,29 @@ import type { LogOptions } from './logger';
import type { AstroRuntime, LoadResult } from './runtime';
import { existsSync, promises as fsPromises } from 'fs';
-import { relative as pathRelative } from 'path';
+import path from 'path';
+import cheerio from 'cheerio';
import { fileURLToPath } from 'url';
import { fdir } from 'fdir';
-import { defaultLogDestination, error } from './logger.js';
+import { defaultLogDestination, error, info } from './logger.js';
import { createRuntime } from './runtime.js';
import { bundle, collectDynamicImports } from './build/bundle.js';
+import { generateSitemap } from './build/sitemap.js';
import { collectStatics } from './build/static.js';
-const { mkdir, readdir, readFile, stat, writeFile } = fsPromises;
+const { mkdir, readFile, writeFile } = fsPromises;
interface PageBuildOptions {
astroRoot: URL;
dist: URL;
filepath: URL;
runtime: AstroRuntime;
+ sitemap: boolean;
statics: Set;
}
interface PageResult {
+ canonicalURLs: string[];
statusCode: number;
}
@@ -49,14 +53,14 @@ function mergeSet(a: Set, b: Set) {
}
/** Utility for writing to file (async) */
-async function writeFilep(outPath: URL, bytes: string | Buffer, encoding: 'utf-8' | null) {
+async function writeFilep(outPath: URL, bytes: string | Buffer, encoding: 'utf8' | null) {
const outFolder = new URL('./', outPath);
await mkdir(outFolder, { recursive: true });
await writeFile(outPath, bytes, encoding || 'binary');
}
/** Utility for writing a build result to disk */
-async function writeResult(result: LoadResult, outPath: URL, encoding: null | 'utf-8') {
+async function writeResult(result: LoadResult, outPath: URL, encoding: null | 'utf8') {
if (result.statusCode === 500 || result.statusCode === 404) {
error(logging, 'build', result.error || result.statusCode);
} else if (result.statusCode !== 200) {
@@ -75,7 +79,7 @@ function getPageType(filepath: URL): 'collection' | 'static' {
/** Build collection */
async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics }: PageBuildOptions): Promise {
- const rel = pathRelative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro
+ const rel = path.relative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro
const pagePath = `/${rel.replace(/\$([^.]+)\.astro$/, '$1')}`;
const builtURLs = new Set(); // !important: internal cache that prevents building the same URLs
@@ -86,8 +90,8 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics
builtURLs.add(url);
if (result.statusCode === 200) {
const outPath = new URL('./' + url + '/index.html', dist);
- await writeResult(result, outPath, 'utf-8');
- mergeSet(statics, collectStatics(result.contents.toString('utf-8')));
+ await writeResult(result, outPath, 'utf8');
+ mergeSet(statics, collectStatics(result.contents.toString('utf8')));
}
return result;
}
@@ -103,6 +107,7 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics
[...result.collectionInfo.additionalURLs].map(async (url) => {
// for the top set of additional URLs, we render every new URL generated
const addlResult = await loadCollection(url);
+ builtURLs.add(url);
if (addlResult && addlResult.collectionInfo) {
// believe it or not, we may still have a few unbuilt pages left. this is our last crawl:
await Promise.all([...addlResult.collectionInfo.additionalURLs].map(async (url2) => loadCollection(url2)));
@@ -112,14 +117,16 @@ async function buildCollectionPage({ astroRoot, dist, filepath, runtime, statics
}
return {
+ canonicalURLs: [...builtURLs].filter((url) => !url.endsWith('/1')), // note: canonical URLs are controlled by the collection, so these are canonical (but exclude "/1" pages as those are duplicates of the index)
statusCode: result.statusCode,
};
}
/** Build static page */
-async function buildStaticPage({ astroRoot, dist, filepath, runtime, statics }: PageBuildOptions): Promise {
- const rel = pathRelative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro
+async function buildStaticPage({ astroRoot, dist, filepath, runtime, sitemap, statics }: PageBuildOptions): Promise {
+ const rel = path.relative(fileURLToPath(astroRoot) + '/pages', fileURLToPath(filepath)); // pages/index.astro
const pagePath = `/${rel.replace(/\.(astro|md)$/, '')}`;
+ let canonicalURLs: string[] = [];
let relPath = './' + rel.replace(/\.(astro|md)$/, '.html');
if (!relPath.endsWith('index.html')) {
@@ -129,12 +136,21 @@ async function buildStaticPage({ astroRoot, dist, filepath, runtime, statics }:
const outPath = new URL(relPath, dist);
const result = await runtime.load(pagePath);
- await writeResult(result, outPath, 'utf-8');
+ await writeResult(result, outPath, 'utf8');
+
if (result.statusCode === 200) {
- mergeSet(statics, collectStatics(result.contents.toString('utf-8')));
+ mergeSet(statics, collectStatics(result.contents.toString('utf8')));
+
+ // get Canonical URL (if user has specified one manually, use that)
+ if (sitemap) {
+ const $ = cheerio.load(result.contents);
+ const canonicalTag = $('link[rel="canonical"]');
+ canonicalURLs.push(canonicalTag.attr('href') || pagePath.replace(/index$/, ''));
+ }
}
return {
+ canonicalURLs,
statusCode: result.statusCode,
};
}
@@ -162,6 +178,7 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
const collectImportsOptions = { astroConfig, logging, resolvePackageUrl, mode };
const pages = await allPages(pageRoot);
+ let builtURLs: string[] = [];
try {
await Promise.all(
@@ -169,11 +186,13 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
const filepath = new URL(`file://${pathname}`);
const pageType = getPageType(filepath);
- const pageOptions: PageBuildOptions = { astroRoot, dist, filepath, runtime, statics };
+ const pageOptions: PageBuildOptions = { astroRoot, dist, filepath, runtime, sitemap: astroConfig.sitemap, statics };
if (pageType === 'collection') {
- await buildCollectionPage(pageOptions);
+ const { canonicalURLs } = await buildCollectionPage(pageOptions);
+ builtURLs.push(...canonicalURLs);
} else {
- await buildStaticPage(pageOptions);
+ const { canonicalURLs } = await buildStaticPage(pageOptions);
+ builtURLs.push(...canonicalURLs);
}
mergeSet(imports, await collectDynamicImports(filepath, collectImportsOptions));
@@ -211,7 +230,7 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
const publicFiles = (await new fdir().withFullPaths().crawl(fileURLToPath(pub)).withPromise()) as string[];
for (const filepath of publicFiles) {
const fileUrl = new URL(`file://${filepath}`);
- const rel = pathRelative(pub.pathname, fileUrl.pathname);
+ const rel = path.relative(pub.pathname, fileUrl.pathname);
const outUrl = new URL('./' + rel, dist);
const bytes = await readFile(fileUrl);
@@ -219,6 +238,21 @@ export async function build(astroConfig: AstroConfig): Promise<0 | 1> {
}
}
+ // build sitemap
+ if (astroConfig.sitemap && astroConfig.site) {
+ const sitemap = generateSitemap(
+ builtURLs.map((url) => ({
+ canonicalURL: new URL(
+ path.extname(url) ? url : url.replace(/\/?$/, '/'), // add trailing slash if there’s no extension
+ astroConfig.site
+ ).href,
+ }))
+ );
+ await writeFile(new URL('./sitemap.xml', dist), sitemap, 'utf8');
+ } else if (astroConfig.sitemap) {
+ info(logging, 'tip', `Set your "site" in astro.config.mjs to generate a sitemap.xml`);
+ }
+
await runtime.shutdown();
return 0;
}
diff --git a/src/build/sitemap.ts b/src/build/sitemap.ts
new file mode 100644
index 0000000000..1cb3f3e406
--- /dev/null
+++ b/src/build/sitemap.ts
@@ -0,0 +1,15 @@
+export interface PageMeta {
+ /** (required) The canonical URL of the page */
+ canonicalURL: string;
+}
+
+/** Construct sitemap.xml given a set of URLs */
+export function generateSitemap(pages: PageMeta[]): string {
+ let sitemap = ``;
+ pages.sort((a, b) => a.canonicalURL.localeCompare(b.canonicalURL, 'en', { numeric: true })); // sort alphabetically
+ for (const page of pages) {
+ sitemap += `${page.canonicalURL}`;
+ }
+ sitemap += `\n`;
+ return sitemap;
+}
diff --git a/src/cli.ts b/src/cli.ts
index 7adf0c8100..c56a4c0984 100644
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -16,24 +16,34 @@ const buildAndExit = async (...args: Parameters) => {
};
type Arguments = yargs.Arguments;
-type cliState = 'help' | 'version' | 'dev' | 'build';
+type cliCommand = 'help' | 'version' | 'dev' | 'build';
+interface CLIState {
+ cmd: cliCommand;
+ options: {
+ sitemap?: boolean;
+ };
+}
/** Determine which action the user requested */
-function resolveArgs(flags: Arguments): cliState {
+function resolveArgs(flags: Arguments): CLIState {
+ const options: CLIState['options'] = {
+ sitemap: typeof flags.sitemap === 'boolean' ? flags.sitemap : undefined,
+ };
+
if (flags.version) {
- return 'version';
+ return { cmd: 'version', options };
} else if (flags.help) {
- return 'help';
+ return { cmd: 'help', options };
}
const cmd = flags._[2];
switch (cmd) {
case 'dev':
- return 'dev';
+ return { cmd: 'dev', options };
case 'build':
- return 'build';
+ return { cmd: 'build', options };
default:
- return 'help';
+ return { cmd: 'help', options };
}
}
@@ -48,6 +58,7 @@ function printHelp() {
${colors.bold('Flags:')}
--version Show the version number and exit.
--help Show this help message.
+ --no-sitemap Disable sitemap generation (build only).
`);
}
@@ -57,10 +68,16 @@ async function printVersion() {
console.error(pkg.version);
}
+/** Merge CLI flags & config options (CLI flags take priority) */
+function mergeCLIFlags(astroConfig: AstroConfig, flags: CLIState['options']) {
+ if (typeof flags.sitemap === 'boolean') astroConfig.sitemap = flags.sitemap;
+}
+
/** Handle `astro run` command */
-async function runCommand(rawRoot: string, cmd: (a: AstroConfig) => Promise) {
+async function runCommand(rawRoot: string, cmd: (a: AstroConfig) => Promise, options: CLIState['options']) {
try {
const astroConfig = await loadConfig(rawRoot);
+ mergeCLIFlags(astroConfig, options);
return cmd(astroConfig);
} catch (err) {
console.error(colors.red(err.toString() || err));
@@ -78,7 +95,7 @@ export async function cli(args: string[]) {
const flags = yargs(args);
const state = resolveArgs(flags);
- switch (state) {
+ switch (state.cmd) {
case 'help': {
printHelp();
process.exit(1);
@@ -92,8 +109,8 @@ export async function cli(args: string[]) {
case 'build':
case 'dev': {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
- const cmd = cmdMap.get(state)!;
- runCommand(flags._[3], cmd);
+ const cmd = cmdMap.get(state.cmd)!;
+ runCommand(flags._[3], cmd, state.options);
}
}
}
diff --git a/src/config.ts b/src/config.ts
index 70463fee78..9374999236 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -12,11 +12,18 @@ function validateConfig(config: any): void {
if (typeof config !== 'object') throw new Error(`[astro config] Expected object, received ${typeof config}`);
// strings
- for (const key of ['projectRoot', 'astroRoot', 'dist', 'public']) {
- if (config[key] && typeof config[key] !== 'string') {
+ for (const key of ['projectRoot', 'astroRoot', 'dist', 'public', 'site']) {
+ if (config[key] !== undefined && config[key] !== null && typeof config[key] !== 'string') {
throw new Error(`[astro config] ${key}: ${JSON.stringify(config[key])}\n Expected string, received ${type(config[key])}.`);
}
}
+
+ // booleans
+ for (const key of ['sitemap']) {
+ if (config[key] !== undefined && config[key] !== null && typeof config[key] !== 'boolean') {
+ throw new Error(`[astro config] ${key}: ${JSON.stringify(config[key])}\n Expected boolean, received ${type(config[key])}.`);
+ }
+ }
}
/** Set default config values */
@@ -28,6 +35,8 @@ function configDefaults(userConfig?: any): any {
if (!config.dist) config.dist = './_site';
if (!config.public) config.public = './public';
+ if (typeof config.sitemap === 'undefined') config.sitemap = true;
+
return config;
}
diff --git a/test/fixtures/astro-dynamic/astro.config.mjs b/test/fixtures/astro-dynamic/astro.config.mjs
new file mode 100644
index 0000000000..12ec645aa4
--- /dev/null
+++ b/test/fixtures/astro-dynamic/astro.config.mjs
@@ -0,0 +1,3 @@
+export default {
+ sitemap: false,
+};
diff --git a/test/fixtures/astro-markdown/astro.config.mjs b/test/fixtures/astro-markdown/astro.config.mjs
index f50751cfdc..c3bdc353c1 100644
--- a/test/fixtures/astro-markdown/astro.config.mjs
+++ b/test/fixtures/astro-markdown/astro.config.mjs
@@ -2,4 +2,5 @@ export default {
extensions: {
'.jsx': 'preact',
},
+ sitemap: false,
};