Improve Static Asset Generation Performance (#12922)

* fix(perf): do not `await` in `for` loop to enqueue asset transforms. Fixes #12845 * fix: avoid `queue.add()` during async for loop * Update changeset --------- Co-authored-by: Matt Kane <m@mk.gg>
2025-04-14 23:51:49 -05:00 · 2025-01-10 04:15:54 -05:00 · 2025-01-10 04:15:54 -05:00 · faf74af522
commit faf74af522
parent 1c36331782
3 changed files with 73 additions and 8 deletions
--- a/.changeset/new-radios-pay.md
+++ b/.changeset/new-radios-pay.md
@ -0,0 +1,5 @@
+---
+'astro': patch
+---
+
+Improves performance of static asset generation by fixing a bug that caused image transforms to be performed serially. This fix ensures that processing uses all CPUs when running in a multi-core environment.
--- a/packages/astro/src/assets/build/generate.ts
+++ b/packages/astro/src/assets/build/generate.ts
@ -1,7 +1,6 @@
 import fs, { readFileSync } from 'node:fs';
 import { basename } from 'node:path/posix';
 import { dim, green } from 'kleur/colors';
-import type PQueue from 'p-queue';
 import { getOutDirWithinCwd } from '../../core/build/common.js';
 import type { BuildPipeline } from '../../core/build/pipeline.js';
 import { getTimeStat } from '../../core/build/util.js';
@ -101,16 +100,11 @@ export async function generateImagesForPath(
 	originalFilePath: string,
 	transformsAndPath: MapValue<AssetsGlobalStaticImagesList>,
 	env: AssetEnv,
-	queue: PQueue,
 ) {
 	let originalImage: ImageData;

 	for (const [_, transform] of transformsAndPath.transforms) {
-		await queue
-			.add(async () => generateImage(transform.finalPath, transform.transform))
-			.catch((e) => {
-				throw e;
-			});
+		await generateImage(transform.finalPath, transform.transform);
 	}

 	// In SSR, we cannot know if an image is referenced in a server-rendered page, so we can't delete anything
--- a/packages/astro/src/core/build/generate.ts
+++ b/packages/astro/src/core/build/generate.ts
@ -125,7 +125,73 @@ export async function generatePages(options: StaticBuildOptions, internals: Buil

 		const assetsTimer = performance.now();
 		for (const [originalPath, transforms] of staticImageList) {
-			await generateImagesForPath(originalPath, transforms, assetsCreationPipeline, queue);
+			// Process each source image in parallel based on the queue’s concurrency
+			// (`cpuCount`). Process each transform for a source image sequentially.
+			//
+			// # Design Decision:
+			// We have 3 source images (A.png, B.png, C.png) and 3 transforms for
+			// each:
+			// ```
+			// A1.png A2.png A3.png
+			// B1.png B2.png B3.png
+			// C1.png C2.png C3.png
+			// ```
+			//
+			// ## Option 1
+			// Enqueue all transforms indiscriminantly
+			// ```
+			// |_A1.png   |_B2.png   |_C1.png
+			// |_B3.png   |_A2.png   |_C3.png
+			// |_C2.png   |_A3.png   |_B1.png
+			// ```
+			// * Advantage: Maximum parallelism, saturate CPU
+			// * Disadvantage: Spike in context switching
+			//
+			// ## Option 2
+			// Enqueue all transforms, but constrain processing order by source image
+			// ```
+			// |_A3.png   |_B1.png   |_C2.png
+			// |_A1.png   |_B3.png   |_C1.png
+			// |_A2.png   |_B2.png   |_C3.png
+			// ```
+			// * Advantage: Maximum parallelism, saturate CPU (same as Option 1) in
+			//   hope to avoid context switching
+			// * Disadvantage: Context switching still occurs and performance still
+			//   suffers
+			//
+			// ## Option 3
+			// Enqueue each source image, but perform the transforms for that source
+			// image sequentially
+			// ```
+			// \_A1.png   \_B1.png   \_C1.png
+			//  \_A2.png   \_B2.png   \_C2.png
+			//   \_A3.png   \_B3.png   \_C3.png
+			// ```
+			// * Advantage: Less context switching
+			// * Disadvantage: If you have a low number of source images with high
+			//   number of transforms then this is suboptimal.
+			//
+			// ## BEST OPTION:
+			// **Option 3**. Most projects will have a higher number of source images
+			// with a few transforms on each. Even though Option 2 should be faster
+			// and _should_ prevent context switching, this was not observed in
+			// nascent tests. Context switching was high and the overall performance
+			// was half of Option 3.
+			//
+			// If looking to optimize further, please consider the following:
+			// * Avoid `queue.add()` in an async for loop. Notice the `await
+			//   queue.onIdle();` after this loop. We do not want to create a scenario
+			//   where tasks are added to the queue after the queue.onIdle() resolves.
+			//   This can break tests and create annoying race conditions.
+			// * Exposing a concurrency property in `astro.config.mjs` to allow users
+			//   to override Node’s os.cpus().length default.
+			// * Create a proper performance benchmark for asset transformations of
+			//   projects in varying sizes of source images and transforms.
+			queue
+				.add(() => generateImagesForPath(originalPath, transforms, assetsCreationPipeline))
+				.catch((e) => {
+					throw e;
+				});
 		}

 		await queue.onIdle();