0
Fork 0
mirror of https://github.com/withastro/astro.git synced 2025-02-17 22:44:24 -05:00

Refactor shikiji syntax highlighting code (#9083)

This commit is contained in:
Bjorn Lu 2023-11-14 23:00:17 +08:00 committed by GitHub
parent 6f5de8dfba
commit 4537ecf0d0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 198 additions and 293 deletions

View file

@ -0,0 +1,5 @@
---
'@astrojs/markdown-remark': minor
---
Exports `createShikiHighlighter` for low-level syntax highlighting usage

View file

@ -0,0 +1,6 @@
---
'@astrojs/markdoc': patch
'astro': patch
---
Uses new `createShikiHighlighter` API from `@astrojs/markdown-remark` to avoid code duplication

View file

@ -10,8 +10,7 @@ import type {
ThemeRegistration,
ThemeRegistrationRaw,
} from 'shikiji';
import { visit } from 'unist-util-visit';
import { getCachedHighlighter, replaceCssVariables } from '../dist/core/shiki.js';
import { getCachedHighlighter } from '../dist/core/shiki.js';
interface Props {
/** The code to highlight. Required. */
@ -94,60 +93,13 @@ if (typeof lang === 'object') {
const highlighter = await getCachedHighlighter({
langs: [lang],
themes: Object.values(experimentalThemes).length ? Object.values(experimentalThemes) : [theme],
theme,
experimentalThemes,
wrap,
});
const themeOptions = Object.values(experimentalThemes).length
? { themes: experimentalThemes }
: { theme };
const html = highlighter.codeToHtml(code, {
lang: typeof lang === 'string' ? lang : lang.name,
...themeOptions,
transforms: {
pre(node) {
// Swap to `code` tag if inline
if (inline) {
node.tagName = 'code';
}
// Cast to string as shikiji will always pass them as strings instead of any other types
const classValue = (node.properties.class as string) ?? '';
const styleValue = (node.properties.style as string) ?? '';
// Replace "shiki" class naming with "astro-code"
node.properties.class = classValue.replace(/shiki/g, 'astro-code');
// Handle code wrapping
// if wrap=null, do nothing.
if (wrap === false) {
node.properties.style = styleValue + '; overflow-x: auto;';
} else if (wrap === true) {
node.properties.style =
styleValue + '; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;';
}
},
code(node) {
if (inline) {
return node.children[0] as typeof node;
}
},
root(node) {
if (Object.values(experimentalThemes).length) {
return;
}
// theme.id for shiki -> shikiji compat
const themeName = typeof theme === 'string' ? theme : theme.name;
if (themeName === 'css-variables') {
// Replace special color tokens to CSS variables
visit(node as any, 'element', (child) => {
if (child.properties?.style) {
child.properties.style = replaceCssVariables(child.properties.style);
}
});
}
},
},
const html = highlighter.highlight(code, typeof lang === 'string' ? lang : lang.name, {
inline,
});
---

View file

@ -2,8 +2,8 @@ import * as fs from 'node:fs';
import { fileURLToPath } from 'node:url';
import { codeToHtml } from 'shikiji';
import type { ErrorPayload } from 'vite';
import { replaceCssVariables } from '@astrojs/markdown-remark';
import type { ModuleLoader } from '../../module-loader/index.js';
import { replaceCssVariables } from '../../shiki.js';
import { FailedToLoadModuleSSR, InvalidGlob, MdxIntegrationMissingError } from '../errors-data.js';
import { AstroError, type ErrorWithMetadata } from '../errors.js';
import { createSafeError } from '../utils.js';

View file

@ -1,36 +1,13 @@
import { getHighlighter, type Highlighter } from 'shikiji';
import {
createShikiHighlighter,
type ShikiHighlighter,
type ShikiConfig,
} from '@astrojs/markdown-remark';
type HighlighterOptions = NonNullable<Parameters<typeof getHighlighter>[0]>;
const ASTRO_COLOR_REPLACEMENTS: Record<string, string> = {
'#000001': 'var(--astro-code-color-text)',
'#000002': 'var(--astro-code-color-background)',
'#000004': 'var(--astro-code-token-constant)',
'#000005': 'var(--astro-code-token-string)',
'#000006': 'var(--astro-code-token-comment)',
'#000007': 'var(--astro-code-token-keyword)',
'#000008': 'var(--astro-code-token-parameter)',
'#000009': 'var(--astro-code-token-function)',
'#000010': 'var(--astro-code-token-string-expression)',
'#000011': 'var(--astro-code-token-punctuation)',
'#000012': 'var(--astro-code-token-link)',
};
const COLOR_REPLACEMENT_REGEX = new RegExp(
`(${Object.keys(ASTRO_COLOR_REPLACEMENTS).join('|')})`,
'g'
);
// Caches Promise<Highlighter> for reuse when the same theme and langs are provided
// Caches Promise<ShikiHighlighter> for reuse when the same theme and langs are provided
const cachedHighlighters = new Map();
/**
* shiki -> shikiji compat as we need to manually replace it
*/
export function replaceCssVariables(str: string) {
return str.replace(COLOR_REPLACEMENT_REGEX, (match) => ASTRO_COLOR_REPLACEMENTS[match] || match);
}
export function getCachedHighlighter(opts: HighlighterOptions): Promise<Highlighter> {
export function getCachedHighlighter(opts: ShikiConfig): Promise<ShikiHighlighter> {
// Always sort keys before stringifying to make sure objects match regardless of parameter ordering
const key = JSON.stringify(opts, Object.keys(opts).sort());
@ -39,7 +16,7 @@ export function getCachedHighlighter(opts: HighlighterOptions): Promise<Highligh
return cachedHighlighters.get(key);
}
const highlighter = getHighlighter(opts);
const highlighter = createShikiHighlighter(opts);
cachedHighlighters.set(key, highlighter);
return highlighter;

View file

@ -1,107 +1,19 @@
import Markdoc from '@markdoc/markdoc';
import { createShikiHighlighter } from '@astrojs/markdown-remark';
import type { ShikiConfig } from 'astro';
import { unescapeHTML } from 'astro/runtime/server/index.js';
import { bundledLanguages, getHighlighter, type Highlighter } from 'shikiji';
import type { AstroMarkdocConfig } from '../config.js';
const ASTRO_COLOR_REPLACEMENTS: Record<string, string> = {
'#000001': 'var(--astro-code-color-text)',
'#000002': 'var(--astro-code-color-background)',
'#000004': 'var(--astro-code-token-constant)',
'#000005': 'var(--astro-code-token-string)',
'#000006': 'var(--astro-code-token-comment)',
'#000007': 'var(--astro-code-token-keyword)',
'#000008': 'var(--astro-code-token-parameter)',
'#000009': 'var(--astro-code-token-function)',
'#000010': 'var(--astro-code-token-string-expression)',
'#000011': 'var(--astro-code-token-punctuation)',
'#000012': 'var(--astro-code-token-link)',
};
const COLOR_REPLACEMENT_REGEX = new RegExp(
`(${Object.keys(ASTRO_COLOR_REPLACEMENTS).join('|')})`,
'g'
);
const PRE_SELECTOR = /<pre class="(.*?)shiki(.*?)"/;
const LINE_SELECTOR = /<span class="line"><span style="(.*?)">([\+|\-])/g;
const INLINE_STYLE_SELECTOR = /style="(.*?)"/;
const INLINE_STYLE_SELECTOR_GLOBAL = /style="(.*?)"/g;
/**
* Note: cache only needed for dev server reloads, internal test suites, and manual calls to `Markdoc.transform` by the user.
* Otherwise, `shiki()` is only called once per build, NOT once per page, so a cache isn't needed!
*/
const highlighterCache = new Map<string, Highlighter>();
export default async function shiki({
langs = [],
theme = 'github-dark',
wrap = false,
}: ShikiConfig = {}): Promise<AstroMarkdocConfig> {
const cacheId = typeof theme === 'string' ? theme : theme.name || '';
let highlighter = highlighterCache.get(cacheId)!;
if (!highlighter) {
highlighter = await getHighlighter({
langs: langs.length ? langs : Object.keys(bundledLanguages),
themes: [theme],
});
highlighterCache.set(cacheId, highlighter);
}
export default async function shiki(config?: ShikiConfig): Promise<AstroMarkdocConfig> {
const highlighter = await createShikiHighlighter(config);
return {
nodes: {
fence: {
attributes: Markdoc.nodes.fence.attributes!,
transform({ attributes }) {
let lang: string;
if (typeof attributes.language === 'string') {
const langExists = highlighter
.getLoadedLanguages()
.includes(attributes.language as any);
if (langExists) {
lang = attributes.language;
} else {
console.warn(
`[Shiki highlighter] The language "${attributes.language}" doesn't exist, falling back to plaintext.`
);
lang = 'plaintext';
}
} else {
lang = 'plaintext';
}
let html = highlighter.codeToHtml(attributes.content, { lang, theme });
// Q: Could these regexes match on a user's inputted code blocks?
// A: Nope! All rendered HTML is properly escaped.
// Ex. If a user typed `<span class="line"` into a code block,
// It would become this before hitting our regexes:
// &lt;span class=&quot;line&quot;
html = html.replace(PRE_SELECTOR, `<pre class="$1astro-code$2"`);
// Add "user-select: none;" for "+"/"-" diff symbols
if (attributes.language === 'diff') {
html = html.replace(
LINE_SELECTOR,
'<span class="line"><span style="$1"><span style="user-select: none;">$2</span>'
);
}
if (wrap === false) {
html = html.replace(INLINE_STYLE_SELECTOR, 'style="$1; overflow-x: auto;"');
} else if (wrap === true) {
html = html.replace(
INLINE_STYLE_SELECTOR,
'style="$1; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;"'
);
}
// theme.id for shiki -> shikiji compat
const themeName = typeof theme === 'string' ? theme : theme.name;
if (themeName === 'css-variables') {
html = html.replace(INLINE_STYLE_SELECTOR_GLOBAL, (m) => replaceCssVariables(m));
}
const lang = typeof attributes.language === 'string' ? attributes.language : 'plaintext';
const html = highlighter.highlight(attributes.content, lang);
// Use `unescapeHTML` to return `HTMLString` for Astro renderer to inline as HTML
return unescapeHTML(html) as any;
@ -110,10 +22,3 @@ export default async function shiki({
},
};
}
/**
* shiki -> shikiji compat as we need to manually replace it
*/
function replaceCssVariables(str: string) {
return str.replace(COLOR_REPLACEMENT_REGEX, (match) => ASTRO_COLOR_REPLACEMENTS[match] || match);
}

View file

@ -32,6 +32,7 @@ export { rehypeHeadingIds } from './rehype-collect-headings.js';
export { remarkCollectImages } from './remark-collect-images.js';
export { remarkPrism } from './remark-prism.js';
export { remarkShiki } from './remark-shiki.js';
export { createShikiHighlighter, replaceCssVariables, type ShikiHighlighter } from './shiki.js';
export * from './types.js';
export const markdownConfigDefaults: Omit<Required<AstroMarkdownOptions>, 'drafts'> = {

View file

@ -1,109 +1,17 @@
import { bundledLanguages, getHighlighter, type Highlighter } from 'shikiji';
import { visit } from 'unist-util-visit';
import type { RemarkPlugin, ShikiConfig } from './types.js';
import { createShikiHighlighter, type ShikiHighlighter } from './shiki.js';
const ASTRO_COLOR_REPLACEMENTS: Record<string, string> = {
'#000001': 'var(--astro-code-color-text)',
'#000002': 'var(--astro-code-color-background)',
'#000004': 'var(--astro-code-token-constant)',
'#000005': 'var(--astro-code-token-string)',
'#000006': 'var(--astro-code-token-comment)',
'#000007': 'var(--astro-code-token-keyword)',
'#000008': 'var(--astro-code-token-parameter)',
'#000009': 'var(--astro-code-token-function)',
'#000010': 'var(--astro-code-token-string-expression)',
'#000011': 'var(--astro-code-token-punctuation)',
'#000012': 'var(--astro-code-token-link)',
};
const COLOR_REPLACEMENT_REGEX = new RegExp(
`(${Object.keys(ASTRO_COLOR_REPLACEMENTS).join('|')})`,
'g'
);
/**
* getHighlighter() is the most expensive step of Shiki. Instead of calling it on every page,
* cache it here as much as possible. Make sure that your highlighters can be cached, state-free.
* We make this async, so that multiple calls to parse markdown still share the same highlighter.
*/
const highlighterCacheAsync = new Map<string, Promise<Highlighter>>();
export function remarkShiki({
langs = [],
theme = 'github-dark',
experimentalThemes = {},
wrap = false,
}: ShikiConfig = {}): ReturnType<RemarkPlugin> {
const themes = experimentalThemes;
const cacheId =
Object.values(themes)
.map((t) => (typeof t === 'string' ? t : t.name ?? ''))
.join(',') +
(typeof theme === 'string' ? theme : theme.name ?? '') +
langs.map((l) => l.name ?? (l as any).id).join(',');
let highlighterAsync = highlighterCacheAsync.get(cacheId);
if (!highlighterAsync) {
highlighterAsync = getHighlighter({
langs: langs.length ? langs : Object.keys(bundledLanguages),
themes: Object.values(themes).length ? Object.values(themes) : [theme],
});
highlighterCacheAsync.set(cacheId, highlighterAsync);
}
export function remarkShiki(config?: ShikiConfig): ReturnType<RemarkPlugin> {
let highlighterAsync: Promise<ShikiHighlighter> | undefined;
return async (tree: any) => {
const highlighter = await highlighterAsync!;
highlighterAsync ??= createShikiHighlighter(config);
const highlighter = await highlighterAsync;
visit(tree, 'code', (node) => {
let lang: string;
if (typeof node.lang === 'string') {
const langExists = highlighter.getLoadedLanguages().includes(node.lang);
if (langExists) {
lang = node.lang;
} else {
// eslint-disable-next-line no-console
console.warn(`The language "${node.lang}" doesn't exist, falling back to plaintext.`);
lang = 'plaintext';
}
} else {
lang = 'plaintext';
}
let themeOptions = Object.values(themes).length ? { themes } : { theme };
let html = highlighter.codeToHtml(node.value, { ...themeOptions, lang });
// Q: Couldn't these regexes match on a user's inputted code blocks?
// A: Nope! All rendered HTML is properly escaped.
// Ex. If a user typed `<span class="line"` into a code block,
// It would become this before hitting our regexes:
// &lt;span class=&quot;line&quot;
// Replace "shiki" class naming with "astro".
html = html.replace(/<pre class="(.*?)shiki(.*?)"/, `<pre class="$1astro-code$2"`);
// Add "user-select: none;" for "+"/"-" diff symbols
if (node.lang === 'diff') {
html = html.replace(
/<span class="line"><span style="(.*?)">([\+|\-])/g,
'<span class="line"><span style="$1"><span style="user-select: none;">$2</span>'
);
}
// Handle code wrapping
// if wrap=null, do nothing.
if (wrap === false) {
html = html.replace(/style="(.*?)"/, 'style="$1; overflow-x: auto;"');
} else if (wrap === true) {
html = html.replace(
/style="(.*?)"/,
'style="$1; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;"'
);
}
// theme.id for shiki -> shikiji compat
const themeName = typeof theme === 'string' ? theme : theme.name;
if (themeName === 'css-variables') {
html = html.replace(/style="(.*?)"/g, (m) => replaceCssVariables(m));
}
const lang = typeof node.lang === 'string' ? node.lang : 'plaintext';
const html = highlighter.highlight(node.value, lang);
node.type = 'html';
node.value = html;
@ -111,10 +19,3 @@ export function remarkShiki({
});
};
}
/**
* shiki -> shikiji compat as we need to manually replace it
*/
function replaceCssVariables(str: string) {
return str.replace(COLOR_REPLACEMENT_REGEX, (match) => ASTRO_COLOR_REPLACEMENTS[match] || match);
}

View file

@ -0,0 +1,135 @@
import { bundledLanguages, getHighlighter } from 'shikiji';
import { visit } from 'unist-util-visit';
import type { ShikiConfig } from './types.js';
export interface ShikiHighlighter {
highlight(code: string, lang?: string, options?: { inline?: boolean }): string;
}
const ASTRO_COLOR_REPLACEMENTS: Record<string, string> = {
'#000001': 'var(--astro-code-color-text)',
'#000002': 'var(--astro-code-color-background)',
'#000004': 'var(--astro-code-token-constant)',
'#000005': 'var(--astro-code-token-string)',
'#000006': 'var(--astro-code-token-comment)',
'#000007': 'var(--astro-code-token-keyword)',
'#000008': 'var(--astro-code-token-parameter)',
'#000009': 'var(--astro-code-token-function)',
'#000010': 'var(--astro-code-token-string-expression)',
'#000011': 'var(--astro-code-token-punctuation)',
'#000012': 'var(--astro-code-token-link)',
};
const COLOR_REPLACEMENT_REGEX = new RegExp(
`(${Object.keys(ASTRO_COLOR_REPLACEMENTS).join('|')})`,
'g'
);
export async function createShikiHighlighter({
langs = [],
theme = 'github-dark',
experimentalThemes = {},
wrap = false,
}: ShikiConfig = {}): Promise<ShikiHighlighter> {
const themes = experimentalThemes;
const highlighter = await getHighlighter({
langs: langs.length ? langs : Object.keys(bundledLanguages),
themes: Object.values(themes).length ? Object.values(themes) : [theme],
});
const loadedLanguages = highlighter.getLoadedLanguages();
return {
highlight(code, lang = 'plaintext', options) {
if (lang !== 'plaintext' && !loadedLanguages.includes(lang)) {
// eslint-disable-next-line no-console
console.warn(`[Shiki] The language "${lang}" doesn't exist, falling back to "plaintext".`);
lang = 'plaintext';
}
const themeOptions = Object.values(themes).length ? { themes } : { theme };
const inline = options?.inline ?? false;
return highlighter.codeToHtml(code, {
...themeOptions,
lang,
transforms: {
pre(node) {
// Swap to `code` tag if inline
if (inline) {
node.tagName = 'code';
}
// Cast to string as shikiji will always pass them as strings instead of any other types
const classValue = (node.properties.class as string) ?? '';
const styleValue = (node.properties.style as string) ?? '';
// Replace "shiki" class naming with "astro-code"
node.properties.class = classValue.replace(/shiki/g, 'astro-code');
// Handle code wrapping
// if wrap=null, do nothing.
if (wrap === false) {
node.properties.style = styleValue + '; overflow-x: auto;';
} else if (wrap === true) {
node.properties.style =
styleValue + '; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;';
}
},
line(node) {
// Add "user-select: none;" for "+"/"-" diff symbols.
// Transform `<span class="line"><span style="...">+ something</span></span>
// into `<span class="line"><span style="..."><span style="user-select: none;">+</span> something</span></span>`
if (lang === 'diff') {
const innerSpanNode = node.children[0];
const innerSpanTextNode =
innerSpanNode?.type === 'element' && innerSpanNode.children?.[0];
if (innerSpanTextNode && innerSpanTextNode.type === 'text') {
const start = innerSpanTextNode.value[0];
if (start === '+' || start === '-') {
innerSpanTextNode.value = innerSpanTextNode.value.slice(1);
innerSpanNode.children.unshift({
type: 'element',
tagName: 'span',
properties: { style: 'user-select: none;' },
children: [{ type: 'text', value: start }],
});
}
}
}
},
code(node) {
if (inline) {
return node.children[0] as typeof node;
}
},
root(node) {
if (Object.values(experimentalThemes).length) {
return;
}
// theme.id for shiki -> shikiji compat
const themeName = typeof theme === 'string' ? theme : theme.name;
if (themeName === 'css-variables') {
// Replace special color tokens to CSS variables
visit(node as any, 'element', (child) => {
if (child.properties?.style) {
child.properties.style = replaceCssVariables(child.properties.style);
}
});
}
},
},
});
},
};
}
/**
* shiki -> shikiji compat as we need to manually replace it
* @internal Exported for error overlay use only
*/
export function replaceCssVariables(str: string) {
return str.replace(COLOR_REPLACEMENT_REGEX, (match) => ASTRO_COLOR_REPLACEMENTS[match] || match);
}

View file

@ -1,4 +1,4 @@
import { createMarkdownProcessor } from '../dist/index.js';
import { createMarkdownProcessor, createShikiHighlighter } from '../dist/index.js';
import chai from 'chai';
describe('shiki syntax highlighting', () => {
@ -27,4 +27,27 @@ describe('shiki syntax highlighting', () => {
chai.expect(code).to.contain('--shiki-dark-bg:');
chai.expect(code).to.contain('github-dark');
});
it('createShikiHighlighter works', async () => {
const highlighter = await createShikiHighlighter();
const html = highlighter.highlight('const foo = "bar";', 'js');
chai.expect(html).to.contain('astro-code github-dark');
chai.expect(html).to.contain('background-color:#24292e;color:#e1e4e8;');
});
it('diff +/- text has user-select: none', async () => {
const highlighter = await createShikiHighlighter();
const html = highlighter.highlight(
`\
- const foo = "bar";
+ const foo = "world";`,
'diff'
);
chai.expect(html).to.contain('user-select: none');
chai.expect(html).to.contain('>-</span>');
chai.expect(html).to.contain('>+</span>');
});
});