From a31bbd7ff8f3ec62ee507f72d1d25140b82ffc18 Mon Sep 17 00:00:00 2001 From: Remco Haszing Date: Fri, 8 Mar 2024 11:53:39 +0100 Subject: [PATCH] =?UTF-8?q?fix(markdown):=20don=E2=80=99t=20generate=20mda?= =?UTF-8?q?st=20html=20nodes=20(#10104)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(markdown): don’t generate mdast html nodes `html` nodes from mdast are converted to `raw` hast nodes. These nodes are then not processed by proper rehype plugins. Typically if a remark plugin generates `html` nodes, this indicates it should have actually been a rehype plugin. This changes the remark plugins that generate `html` nodes into rehype nodes. These were `remarkPrism` and `remarkShiki`. Closes #9909 * Apply suggestions from code review * refactor(mdx): move user defined rehype plugins after syntax highlighting * fix(mdx): fix issue in mdx rehype plugin ordering * docs: explain why html/raw nodes are avoided in changeset This also includes some hints on what users could do to upgrade of they rely on these nodes. * Fix MDX rehype plugin ordering * refactor(remark): restore remarkPrism and remarkShiki They aren’t used anymore, but removing would be a breaking change. * chore: mark deprecated * Apply suggestions from code review Co-authored-by: Sarah Rainsberger * Update .changeset/thirty-beds-smoke.md Co-authored-by: Sarah Rainsberger --------- Co-authored-by: Emanuele Stoppa Co-authored-by: Bjorn Lu Co-authored-by: Sarah Rainsberger --- .changeset/thirty-beds-smoke.md | 12 ++++ packages/integrations/mdx/src/plugins.ts | 47 ++++++------- packages/markdown/remark/package.json | 4 ++ packages/markdown/remark/src/highlight.ts | 70 ++++++++++++++++++++ packages/markdown/remark/src/index.ts | 22 +++--- packages/markdown/remark/src/rehype-prism.ts | 12 ++++ packages/markdown/remark/src/rehype-shiki.ts | 16 +++++ packages/markdown/remark/src/remark-prism.ts | 3 + packages/markdown/remark/src/remark-shiki.ts | 3 + pnpm-lock.yaml | 14 +++- 10 files changed, 166 insertions(+), 37 deletions(-) create mode 100644 .changeset/thirty-beds-smoke.md create mode 100644 packages/markdown/remark/src/highlight.ts create mode 100644 packages/markdown/remark/src/rehype-prism.ts create mode 100644 packages/markdown/remark/src/rehype-shiki.ts diff --git a/.changeset/thirty-beds-smoke.md b/.changeset/thirty-beds-smoke.md new file mode 100644 index 0000000000..6d57166c12 --- /dev/null +++ b/.changeset/thirty-beds-smoke.md @@ -0,0 +1,12 @@ +--- +"@astrojs/mdx": minor +"@astrojs/markdown-remark": minor +--- + +Changes Astro's internal syntax highlighting to use rehype plugins instead of remark plugins. This provides better interoperability with other [rehype plugins](https://github.com/rehypejs/rehype/blob/main/doc/plugins.md#list-of-plugins) that deal with code blocks, in particular with third party syntax highlighting plugins and [`rehype-mermaid`](https://github.com/remcohaszing/rehype-mermaid). + +This may be a breaking change if you are currently using: +- a remark plugin that relies on nodes of type `html` +- a rehype plugin that depends on nodes of type `raw`. + +Please review your rendered code samples carefully, and if necessary, consider using a rehype plugin that deals with the generated `element` nodes instead. You can transform the AST of raw HTML strings, or alternatively use [`hast-util-to-html`](https://github.com/syntax-tree/hast-util-to-html) to get a string from a `raw` node. diff --git a/packages/integrations/mdx/src/plugins.ts b/packages/integrations/mdx/src/plugins.ts index d52f303019..5bc7ca982c 100644 --- a/packages/integrations/mdx/src/plugins.ts +++ b/packages/integrations/mdx/src/plugins.ts @@ -1,8 +1,8 @@ import { rehypeHeadingIds, + rehypePrism, + rehypeShiki, remarkCollectImages, - remarkPrism, - remarkShiki, } from '@astrojs/markdown-remark'; import { createProcessor, nodeTypes } from '@mdx-js/mdx'; import rehypeRaw from 'rehype-raw'; @@ -54,22 +54,7 @@ function getRemarkPlugins(mdxOptions: MdxOptions): PluggableList { } } - remarkPlugins = [ - ...remarkPlugins, - ...mdxOptions.remarkPlugins, - remarkCollectImages, - remarkImageToComponent, - ]; - - if (!isPerformanceBenchmark) { - // Apply syntax highlighters after user plugins to match `markdown/remark` behavior - if (mdxOptions.syntaxHighlight === 'shiki') { - remarkPlugins.push([remarkShiki, mdxOptions.shikiConfig]); - } - if (mdxOptions.syntaxHighlight === 'prism') { - remarkPlugins.push(remarkPrism); - } - } + remarkPlugins.push(...mdxOptions.remarkPlugins, remarkCollectImages, remarkImageToComponent); return remarkPlugins; } @@ -79,18 +64,28 @@ function getRehypePlugins(mdxOptions: MdxOptions): PluggableList { // ensure `data.meta` is preserved in `properties.metastring` for rehype syntax highlighters rehypeMetaString, // rehypeRaw allows custom syntax highlighters to work without added config - [rehypeRaw, { passThrough: nodeTypes }] as any, + [rehypeRaw, { passThrough: nodeTypes }], ]; - rehypePlugins = [ - ...rehypePlugins, - ...mdxOptions.rehypePlugins, + if (!isPerformanceBenchmark) { + // Apply syntax highlighters after user plugins to match `markdown/remark` behavior + if (mdxOptions.syntaxHighlight === 'shiki') { + rehypePlugins.push([rehypeShiki, mdxOptions.shikiConfig]); + } else if (mdxOptions.syntaxHighlight === 'prism') { + rehypePlugins.push(rehypePrism); + } + } + + rehypePlugins.push(...mdxOptions.rehypePlugins); + + if (!isPerformanceBenchmark) { // getHeadings() is guaranteed by TS, so this must be included. // We run `rehypeHeadingIds` _last_ to respect any custom IDs set by user plugins. - ...(isPerformanceBenchmark ? [] : [rehypeHeadingIds, rehypeInjectHeadingsExport]), - // computed from `astro.data.frontmatter` in VFile data - rehypeApplyFrontmatterExport, - ]; + rehypePlugins.push(rehypeHeadingIds, rehypeInjectHeadingsExport); + } + + // computed from `astro.data.frontmatter` in VFile data + rehypePlugins.push(rehypeApplyFrontmatterExport); if (mdxOptions.optimize) { // Convert user `optimize` option to compatible `rehypeOptimizeStatic` option diff --git a/packages/markdown/remark/package.json b/packages/markdown/remark/package.json index 98a2393013..3e07f49d15 100644 --- a/packages/markdown/remark/package.json +++ b/packages/markdown/remark/package.json @@ -36,6 +36,8 @@ "dependencies": { "@astrojs/prism": "^3.0.0", "github-slugger": "^2.0.0", + "hast-util-from-html": "^2.0.0", + "hast-util-to-text": "^4.0.0", "import-meta-resolve": "^4.0.0", "mdast-util-definitions": "^6.0.0", "rehype-raw": "^7.0.0", @@ -46,7 +48,9 @@ "remark-smartypants": "^2.0.0", "shiki": "^1.1.2", "unified": "^11.0.4", + "unist-util-remove-position": "^5.0.0", "unist-util-visit": "^5.0.0", + "unist-util-visit-parents": "^6.0.0", "vfile": "^6.0.1" }, "devDependencies": { diff --git a/packages/markdown/remark/src/highlight.ts b/packages/markdown/remark/src/highlight.ts new file mode 100644 index 0000000000..eaf4c9bdf9 --- /dev/null +++ b/packages/markdown/remark/src/highlight.ts @@ -0,0 +1,70 @@ +import type { Element, Root } from 'hast'; +import { fromHtml } from 'hast-util-from-html'; +import { toText } from 'hast-util-to-text'; +import { removePosition } from 'unist-util-remove-position'; +import { visitParents } from 'unist-util-visit-parents'; + +type Highlighter = (code: string, language: string) => string; + +const languagePattern = /\blanguage-(\S+)\b/; + +/** + * A hast utility to syntax highlight code blocks with a given syntax highlighter. + * + * @param tree + * The hast tree in which to syntax highlight code blocks. + * @param highlighter + * A fnction which receives the code and language, and returns the HTML of a syntax + * highlighted `
` element.
+ */
+export function highlightCodeBlocks(tree: Root, highlighter: Highlighter) {
+	// We’re looking for `` elements
+	visitParents(tree, { type: 'element', tagName: 'code' }, (node, ancestors) => {
+		const parent = ancestors.at(-1);
+
+		// Whose parent is a `
`.
+		if (parent?.type !== 'element' || parent.tagName !== 'pre') {
+			return;
+		}
+
+		// Where the `` is the only child.
+		if (parent.children.length !== 1) {
+			return;
+		}
+
+		// And the `` has a class name that starts with `language-`.
+		let languageMatch: RegExpMatchArray | null | undefined;
+		let { className } = node.properties;
+		if (typeof className === 'string') {
+			languageMatch = className.match(languagePattern);
+		} else if (Array.isArray(className)) {
+			for (const cls of className) {
+				if (typeof cls !== 'string') {
+					continue;
+				}
+
+				languageMatch = cls.match(languagePattern);
+				if (languageMatch) {
+					break;
+				}
+			}
+		}
+
+		// Don’t mighlight math code blocks.
+		if (languageMatch?.[1] === 'math') {
+			return;
+		}
+
+		const code = toText(node, { whitespace: 'pre' });
+		const html = highlighter(code, languageMatch?.[1] || 'plaintext');
+		// The replacement returns a root node with 1 child, the `` element replacement.
+		const replacement = fromHtml(html, { fragment: true }).children[0] as Element;
+		// We just generated this node, so any positional information is invalid.
+		removePosition(replacement);
+
+		// We replace the parent in its parent with the new `
` element.
+		const grandParent = ancestors.at(-2)!;
+		const index = grandParent.children.indexOf(parent);
+		grandParent.children[index] = replacement;
+	});
+}
diff --git a/packages/markdown/remark/src/index.ts b/packages/markdown/remark/src/index.ts
index 6ab0e5c676..ef1afc8fba 100644
--- a/packages/markdown/remark/src/index.ts
+++ b/packages/markdown/remark/src/index.ts
@@ -7,9 +7,9 @@ import {
 } from './frontmatter-injection.js';
 import { loadPlugins } from './load-plugins.js';
 import { rehypeHeadingIds } from './rehype-collect-headings.js';
+import { rehypePrism } from './rehype-prism.js';
+import { rehypeShiki } from './rehype-shiki.js';
 import { remarkCollectImages } from './remark-collect-images.js';
-import { remarkPrism } from './remark-prism.js';
-import { remarkShiki } from './remark-shiki.js';
 
 import rehypeRaw from 'rehype-raw';
 import rehypeStringify from 'rehype-stringify';
@@ -24,6 +24,8 @@ import { rehypeImages } from './rehype-images.js';
 export { InvalidAstroDataError, setVfileFrontmatter } from './frontmatter-injection.js';
 export { rehypeHeadingIds } from './rehype-collect-headings.js';
 export { remarkCollectImages } from './remark-collect-images.js';
+export { rehypePrism } from './rehype-prism.js';
+export { rehypeShiki } from './rehype-shiki.js';
 export { remarkPrism } from './remark-prism.js';
 export { remarkShiki } from './remark-shiki.js';
 export { createShikiHighlighter, replaceCssVariables, type ShikiHighlighter } from './shiki.js';
@@ -85,13 +87,6 @@ export async function createMarkdownProcessor(
 	}
 
 	if (!isPerformanceBenchmark) {
-		// Syntax highlighting
-		if (syntaxHighlight === 'shiki') {
-			parser.use(remarkShiki, shikiConfig);
-		} else if (syntaxHighlight === 'prism') {
-			parser.use(remarkPrism);
-		}
-
 		// Apply later in case user plugins resolve relative image paths
 		parser.use(remarkCollectImages);
 	}
@@ -103,6 +98,15 @@ export async function createMarkdownProcessor(
 		...remarkRehypeOptions,
 	});
 
+	if (!isPerformanceBenchmark) {
+		// Syntax highlighting
+		if (syntaxHighlight === 'shiki') {
+			parser.use(rehypeShiki, shikiConfig);
+		} else if (syntaxHighlight === 'prism') {
+			parser.use(rehypePrism);
+		}
+	}
+
 	// User rehype plugins
 	for (const [plugin, pluginOpts] of loadedRehypePlugins) {
 		parser.use(plugin, pluginOpts);
diff --git a/packages/markdown/remark/src/rehype-prism.ts b/packages/markdown/remark/src/rehype-prism.ts
new file mode 100644
index 0000000000..4305a06767
--- /dev/null
+++ b/packages/markdown/remark/src/rehype-prism.ts
@@ -0,0 +1,12 @@
+import { runHighlighterWithAstro } from '@astrojs/prism/dist/highlighter';
+import type { Root } from 'hast';
+import type { Plugin } from 'unified';
+import { highlightCodeBlocks } from './highlight.js';
+
+export const rehypePrism: Plugin<[], Root> = () => (tree) => {
+	highlightCodeBlocks(tree, (code, language) => {
+		let { html, classLanguage } = runHighlighterWithAstro(language, code);
+
+		return `
${html}
`; + }); +}; diff --git a/packages/markdown/remark/src/rehype-shiki.ts b/packages/markdown/remark/src/rehype-shiki.ts new file mode 100644 index 0000000000..137cb81411 --- /dev/null +++ b/packages/markdown/remark/src/rehype-shiki.ts @@ -0,0 +1,16 @@ +import type { Root } from 'hast'; +import type { Plugin } from 'unified'; +import { createShikiHighlighter, type ShikiHighlighter } from './shiki.js'; +import type { ShikiConfig } from './types.js'; +import { highlightCodeBlocks } from './highlight.js'; + +export const rehypeShiki: Plugin<[ShikiConfig?], Root> = (config) => { + let highlighterAsync: Promise | undefined; + + return async (tree) => { + highlighterAsync ??= createShikiHighlighter(config); + const highlighter = await highlighterAsync; + + highlightCodeBlocks(tree, highlighter.highlight); + }; +}; diff --git a/packages/markdown/remark/src/remark-prism.ts b/packages/markdown/remark/src/remark-prism.ts index a3f476d6e4..49e38d73cf 100644 --- a/packages/markdown/remark/src/remark-prism.ts +++ b/packages/markdown/remark/src/remark-prism.ts @@ -2,6 +2,9 @@ import { runHighlighterWithAstro } from '@astrojs/prism/dist/highlighter'; import { visit } from 'unist-util-visit'; import type { RemarkPlugin } from './types.js'; +/** + * @deprecated Use `rehypePrism` instead + */ export function remarkPrism(): ReturnType { return function (tree: any) { visit(tree, 'code', (node) => { diff --git a/packages/markdown/remark/src/remark-shiki.ts b/packages/markdown/remark/src/remark-shiki.ts index 7c1cdd0750..512cd453aa 100644 --- a/packages/markdown/remark/src/remark-shiki.ts +++ b/packages/markdown/remark/src/remark-shiki.ts @@ -2,6 +2,9 @@ import { visit } from 'unist-util-visit'; import { type ShikiHighlighter, createShikiHighlighter } from './shiki.js'; import type { RemarkPlugin, ShikiConfig } from './types.js'; +/** + * @deprecated Use `rehypeShiki` instead + */ export function remarkShiki(config?: ShikiConfig): ReturnType { let highlighterAsync: Promise | undefined; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e32f61f2b0..b8e8d09977 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -5212,6 +5212,12 @@ importers: github-slugger: specifier: ^2.0.0 version: 2.0.0 + hast-util-from-html: + specifier: ^2.0.0 + version: 2.0.1 + hast-util-to-text: + specifier: ^4.0.0 + version: 4.0.0 import-meta-resolve: specifier: ^4.0.0 version: 4.0.0 @@ -5242,9 +5248,15 @@ importers: unified: specifier: ^11.0.4 version: 11.0.4 + unist-util-remove-position: + specifier: ^5.0.0 + version: 5.0.0 unist-util-visit: specifier: ^5.0.0 version: 5.0.0 + unist-util-visit-parents: + specifier: ^6.0.0 + version: 6.0.1 vfile: specifier: ^6.0.1 version: 6.0.1 @@ -11259,7 +11271,6 @@ packages: '@types/unist': 3.0.2 hast-util-is-element: 3.0.0 unist-util-find-after: 5.0.0 - dev: true /hast-util-whitespace@3.0.0: resolution: {integrity: sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==} @@ -16068,7 +16079,6 @@ packages: dependencies: '@types/unist': 3.0.2 unist-util-is: 6.0.0 - dev: true /unist-util-is@3.0.0: resolution: {integrity: sha512-sVZZX3+kspVNmLWBPAB6r+7D9ZgAFPNWm66f7YNb420RlQSbn+n8rG8dGZSkrER7ZIXGQYNm5pqC3v3HopH24A==}