From 3dc02c57e4060cb2bde7c4e05d91841dd5dd8eb7 Mon Sep 17 00:00:00 2001 From: oliverlynch <59594611+oliverlynch@users.noreply.github.com> Date: Wed, 18 Dec 2024 21:20:12 +0800 Subject: [PATCH] feat(assets): Use entity-tags to revalidate cached remote images (#12426) * feat(assets): Store etag to refresh cached images without a full download * Seperate loading and revalidating functions * Add changeset * Updates based on requested changes * Wording changes, use stale cache on failure to revalidate * Add If-Modified-Since as cache revalidation method * Update .changeset/red-poems-pay.md Co-authored-by: Sarah Rainsberger <5098874+sarah11918@users.noreply.github.com> --------- Co-authored-by: Matt Kane Co-authored-by: Sarah Rainsberger <5098874+sarah11918@users.noreply.github.com> --- .changeset/red-poems-pay.md | 7 ++ packages/astro/src/assets/build/generate.ts | 101 +++++++++++++++----- packages/astro/src/assets/build/remote.ts | 62 +++++++++++- 3 files changed, 144 insertions(+), 26 deletions(-) create mode 100644 .changeset/red-poems-pay.md diff --git a/.changeset/red-poems-pay.md b/.changeset/red-poems-pay.md new file mode 100644 index 0000000000..250b12de6b --- /dev/null +++ b/.changeset/red-poems-pay.md @@ -0,0 +1,7 @@ +--- +'astro': minor +--- + +Improves asset caching of remote images + +Astro will now store [entity tags](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag) and the [Last-Modified](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified) date for cached remote images and use them to revalidate the cache when it goes stale. diff --git a/packages/astro/src/assets/build/generate.ts b/packages/astro/src/assets/build/generate.ts index 37b6915255..89358e3c36 100644 --- a/packages/astro/src/assets/build/generate.ts +++ b/packages/astro/src/assets/build/generate.ts @@ -15,10 +15,10 @@ import { getConfiguredImageService } from '../internal.js'; import type { LocalImageService } from '../services/service.js'; import type { AssetsGlobalStaticImagesList, ImageMetadata, ImageTransform } from '../types.js'; import { isESMImportedImage } from '../utils/imageKind.js'; -import { type RemoteCacheEntry, loadRemoteImage } from './remote.js'; +import { type RemoteCacheEntry, loadRemoteImage, revalidateRemoteImage } from './remote.js'; interface GenerationDataUncached { - cached: false; + cached: 'miss'; weight: { before: number; after: number; @@ -26,7 +26,7 @@ interface GenerationDataUncached { } interface GenerationDataCached { - cached: true; + cached: 'revalidated' | 'hit'; } type GenerationData = GenerationDataUncached | GenerationDataCached; @@ -43,7 +43,12 @@ type AssetEnv = { assetsFolder: AstroConfig['build']['assets']; }; -type ImageData = { data: Uint8Array; expires: number }; +type ImageData = { + data: Uint8Array; + expires: number; + etag?: string; + lastModified?: string; +}; export async function prepareAssetsGenerationEnv( pipeline: BuildPipeline, @@ -135,9 +140,12 @@ export async function generateImagesForPath( const timeEnd = performance.now(); const timeChange = getTimeStat(timeStart, timeEnd); const timeIncrease = `(+${timeChange})`; - const statsText = generationData.cached - ? `(reused cache entry)` - : `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`; + const statsText = + generationData.cached !== 'miss' + ? generationData.cached === 'hit' + ? `(reused cache entry)` + : `(revalidated cache entry)` + : `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`; const count = `(${env.count.current}/${env.count.total})`; env.logger.info( null, @@ -156,7 +164,7 @@ export async function generateImagesForPath( const finalFolderURL = new URL('./', finalFileURL); await fs.promises.mkdir(finalFolderURL, { recursive: true }); - // For remote images, instead of saving the image directly, we save a JSON file with the image data and expiration date from the server + // For remote images, instead of saving the image directly, we save a JSON file with the image data, expiration date, etag and last-modified date from the server const cacheFile = basename(filepath) + (isLocalImage ? '' : '.json'); const cachedFileURL = new URL(cacheFile, env.assetsCacheDir); @@ -166,7 +174,7 @@ export async function generateImagesForPath( await fs.promises.copyFile(cachedFileURL, finalFileURL, fs.constants.COPYFILE_FICLONE); return { - cached: true, + cached: 'hit', }; } else { const JSONData = JSON.parse(readFileSync(cachedFileURL, 'utf-8')) as RemoteCacheEntry; @@ -184,11 +192,43 @@ export async function generateImagesForPath( await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64')); return { - cached: true, + cached: 'hit', }; - } else { - await fs.promises.unlink(cachedFileURL); } + + // Try to revalidate the cache + if (JSONData.etag || JSONData.lastModified) { + try { + const revalidatedData = await revalidateRemoteImage(options.src as string, { + etag: JSONData.etag, + lastModified: JSONData.lastModified, + }); + + if (revalidatedData.data.length) { + // Image cache was stale, update original image to avoid redownload + originalImage = revalidatedData; + } else { + revalidatedData.data = Buffer.from(JSONData.data, 'base64'); + + // Freshen cache on disk + await writeRemoteCacheFile(cachedFileURL, revalidatedData, env); + + await fs.promises.writeFile(finalFileURL, revalidatedData.data); + return { cached: 'revalidated' }; + } + } catch (e) { + // Reuse stale cache if revalidation fails + env.logger.warn( + null, + `An error was encountered while revalidating a cached remote asset. Proceeding with stale cache. ${e}`, + ); + + await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64')); + return { cached: 'hit' }; + } + } + + await fs.promises.unlink(cachedFileURL); } } catch (e: any) { if (e.code !== 'ENOENT') { @@ -209,6 +249,8 @@ export async function generateImagesForPath( let resultData: Partial = { data: undefined, expires: originalImage.expires, + etag: originalImage.etag, + lastModified: originalImage.lastModified, }; const imageService = (await getConfiguredImageService()) as LocalImageService; @@ -239,13 +281,7 @@ export async function generateImagesForPath( if (isLocalImage) { await fs.promises.writeFile(cachedFileURL, resultData.data); } else { - await fs.promises.writeFile( - cachedFileURL, - JSON.stringify({ - data: Buffer.from(resultData.data).toString('base64'), - expires: resultData.expires, - }), - ); + await writeRemoteCacheFile(cachedFileURL, resultData as ImageData, env); } } } catch (e) { @@ -259,7 +295,7 @@ export async function generateImagesForPath( } return { - cached: false, + cached: 'miss', weight: { // Divide by 1024 to get size in kilobytes before: Math.trunc(originalImage.data.byteLength / 1024), @@ -269,6 +305,25 @@ export async function generateImagesForPath( } } +async function writeRemoteCacheFile(cachedFileURL: URL, resultData: ImageData, env: AssetEnv) { + try { + return await fs.promises.writeFile( + cachedFileURL, + JSON.stringify({ + data: Buffer.from(resultData.data).toString('base64'), + expires: resultData.expires, + etag: resultData.etag, + lastModified: resultData.lastModified, + }), + ); + } catch (e) { + env.logger.warn( + null, + `An error was encountered while writing the cache file for a remote asset. Proceeding without caching this asset. Error: ${e}`, + ); + } +} + export function getStaticImageList(): AssetsGlobalStaticImagesList { if (!globalThis?.astroAsset?.staticImages) { return new Map(); @@ -279,11 +334,7 @@ export function getStaticImageList(): AssetsGlobalStaticImagesList { async function loadImage(path: string, env: AssetEnv): Promise { if (isRemotePath(path)) { - const remoteImage = await loadRemoteImage(path); - return { - data: remoteImage.data, - expires: remoteImage.expires, - }; + return await loadRemoteImage(path); } return { diff --git a/packages/astro/src/assets/build/remote.ts b/packages/astro/src/assets/build/remote.ts index 08f8f45191..acd451c48b 100644 --- a/packages/astro/src/assets/build/remote.ts +++ b/packages/astro/src/assets/build/remote.ts @@ -1,6 +1,11 @@ import CachePolicy from 'http-cache-semantics'; -export type RemoteCacheEntry = { data: string; expires: number }; +export type RemoteCacheEntry = { + data: string; + expires: number; + etag?: string; + lastModified?: string; +}; export async function loadRemoteImage(src: string) { const req = new Request(src); @@ -19,6 +24,61 @@ export async function loadRemoteImage(src: string) { return { data: Buffer.from(await res.arrayBuffer()), expires: Date.now() + expires, + etag: res.headers.get('Etag') ?? undefined, + lastModified: res.headers.get('Last-Modified') ?? undefined, + }; +} + +/** + * Revalidate a cached remote asset using its entity-tag or modified date. + * Uses the [If-None-Match](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match) and [If-Modified-Since](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since) + * headers to check with the remote server if the cached version of a remote asset is still up to date. + * The remote server may respond that the cached asset is still up-to-date if the entity-tag or modification time matches (304 Not Modified), or respond with an updated asset (200 OK) + * @param src - url to remote asset + * @param revalidationData - an object containing the stored Entity-Tag of the cached asset and/or the Last Modified time + * @returns An ImageData object containing the asset data, a new expiry time, and the asset's etag. The data buffer will be empty if the asset was not modified. + */ +export async function revalidateRemoteImage( + src: string, + revalidationData: { etag?: string; lastModified?: string }, +) { + const headers = { + ...(revalidationData.etag && { 'If-None-Match': revalidationData.etag }), + ...(revalidationData.lastModified && { 'If-Modified-Since': revalidationData.lastModified }), + }; + const req = new Request(src, { headers }); + const res = await fetch(req); + + // Asset not modified: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/304 + if (!res.ok && res.status !== 304) { + throw new Error( + `Failed to revalidate cached remote image ${src}. The request did not return a 200 OK / 304 NOT MODIFIED response. (received ${res.status} ${res.statusText})`, + ); + } + + const data = Buffer.from(await res.arrayBuffer()); + + if (res.ok && !data.length) { + // Server did not include body but indicated cache was stale + return await loadRemoteImage(src); + } + + // calculate an expiration date based on the response's TTL + const policy = new CachePolicy( + webToCachePolicyRequest(req), + webToCachePolicyResponse( + res.ok ? res : new Response(null, { status: 200, headers: res.headers }), + ), // 304 responses themselves are not cachable, so just pretend to get the refreshed TTL + ); + const expires = policy.storable() ? policy.timeToLive() : 0; + + return { + data, + expires: Date.now() + expires, + // While servers should respond with the same headers as a 200 response, if they don't we should reuse the stored value + etag: res.headers.get('Etag') ?? (res.ok ? undefined : revalidationData.etag), + lastModified: + res.headers.get('Last-Modified') ?? (res.ok ? undefined : revalidationData.lastModified), }; }