2023-02-17 13:46:00 -05:00
|
|
|
import { EnumChangefreq } from 'sitemap';
|
2022-06-21 10:29:18 -05:00
|
|
|
import type { SitemapItem, SitemapOptions } from './index.js';
|
|
|
|
import { parseUrl } from './utils/parse-url.js';
|
2022-06-16 14:06:48 -05:00
|
|
|
|
|
|
|
const STATUS_CODE_PAGE_REGEXP = /\/[0-9]{3}\/?$/;
|
|
|
|
|
|
|
|
/** Construct sitemap.xml given a set of URLs */
|
|
|
|
export function generateSitemap(pages: string[], finalSiteUrl: string, opts: SitemapOptions) {
|
2022-06-20 14:29:53 -05:00
|
|
|
const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts!;
|
2022-06-16 14:06:48 -05:00
|
|
|
// TODO: find way to respect <link rel="canonical"> URLs here
|
|
|
|
const urls = [...pages].filter((url) => !STATUS_CODE_PAGE_REGEXP.test(url));
|
|
|
|
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time
|
|
|
|
|
|
|
|
const lastmod = lastmodSrc?.toISOString();
|
|
|
|
|
|
|
|
const { locales, defaultLocale } = i18n || {};
|
|
|
|
const localeCodes = Object.keys(locales || {});
|
|
|
|
|
|
|
|
const getPath = (url: string) => {
|
|
|
|
const result = parseUrl(url, i18n?.defaultLocale || '', localeCodes, finalSiteUrl);
|
|
|
|
return result?.path;
|
|
|
|
};
|
|
|
|
const getLocale = (url: string) => {
|
|
|
|
const result = parseUrl(url, i18n?.defaultLocale || '', localeCodes, finalSiteUrl);
|
|
|
|
return result?.locale;
|
|
|
|
};
|
|
|
|
|
2022-06-20 14:29:53 -05:00
|
|
|
const urlData: SitemapItem[] = urls.map((url) => {
|
2022-06-16 14:06:48 -05:00
|
|
|
let links;
|
|
|
|
if (defaultLocale && locales) {
|
|
|
|
const currentPath = getPath(url);
|
|
|
|
if (currentPath) {
|
|
|
|
const filtered = urls.filter((subUrl) => getPath(subUrl) === currentPath);
|
|
|
|
if (filtered.length > 1) {
|
|
|
|
links = filtered.map((subUrl) => ({
|
|
|
|
url: subUrl,
|
|
|
|
lang: locales[getLocale(subUrl)!],
|
|
|
|
}));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return {
|
|
|
|
url,
|
|
|
|
links,
|
|
|
|
lastmod,
|
|
|
|
priority,
|
2023-02-17 13:46:00 -05:00
|
|
|
changefreq: changefreq as EnumChangefreq,
|
2022-06-20 14:29:53 -05:00
|
|
|
};
|
2022-06-16 14:06:48 -05:00
|
|
|
});
|
|
|
|
|
|
|
|
return urlData;
|
|
|
|
}
|