0
Fork 0
mirror of https://github.com/withastro/astro.git synced 2025-01-13 22:11:20 -05:00

feat(assets): Use entity-tags to revalidate cached remote images (#12426)

* feat(assets): Store etag to refresh cached images without a full download

* Seperate loading and revalidating functions

* Add changeset

* Updates based on requested changes

* Wording changes, use stale cache on failure to revalidate

* Add If-Modified-Since as cache revalidation method

* Update .changeset/red-poems-pay.md

Co-authored-by: Sarah Rainsberger <5098874+sarah11918@users.noreply.github.com>

---------

Co-authored-by: Matt Kane <m@mk.gg>
Co-authored-by: Sarah Rainsberger <5098874+sarah11918@users.noreply.github.com>
This commit is contained in:
oliverlynch 2024-12-18 21:20:12 +08:00 committed by GitHub
parent ca3ff1504a
commit 3dc02c57e4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 144 additions and 26 deletions

View file

@ -0,0 +1,7 @@
---
'astro': minor
---
Improves asset caching of remote images
Astro will now store [entity tags](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag) and the [Last-Modified](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified) date for cached remote images and use them to revalidate the cache when it goes stale.

View file

@ -15,10 +15,10 @@ import { getConfiguredImageService } from '../internal.js';
import type { LocalImageService } from '../services/service.js';
import type { AssetsGlobalStaticImagesList, ImageMetadata, ImageTransform } from '../types.js';
import { isESMImportedImage } from '../utils/imageKind.js';
import { type RemoteCacheEntry, loadRemoteImage } from './remote.js';
import { type RemoteCacheEntry, loadRemoteImage, revalidateRemoteImage } from './remote.js';
interface GenerationDataUncached {
cached: false;
cached: 'miss';
weight: {
before: number;
after: number;
@ -26,7 +26,7 @@ interface GenerationDataUncached {
}
interface GenerationDataCached {
cached: true;
cached: 'revalidated' | 'hit';
}
type GenerationData = GenerationDataUncached | GenerationDataCached;
@ -43,7 +43,12 @@ type AssetEnv = {
assetsFolder: AstroConfig['build']['assets'];
};
type ImageData = { data: Uint8Array; expires: number };
type ImageData = {
data: Uint8Array;
expires: number;
etag?: string;
lastModified?: string;
};
export async function prepareAssetsGenerationEnv(
pipeline: BuildPipeline,
@ -135,8 +140,11 @@ export async function generateImagesForPath(
const timeEnd = performance.now();
const timeChange = getTimeStat(timeStart, timeEnd);
const timeIncrease = `(+${timeChange})`;
const statsText = generationData.cached
const statsText =
generationData.cached !== 'miss'
? generationData.cached === 'hit'
? `(reused cache entry)`
: `(revalidated cache entry)`
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`;
const count = `(${env.count.current}/${env.count.total})`;
env.logger.info(
@ -156,7 +164,7 @@ export async function generateImagesForPath(
const finalFolderURL = new URL('./', finalFileURL);
await fs.promises.mkdir(finalFolderURL, { recursive: true });
// For remote images, instead of saving the image directly, we save a JSON file with the image data and expiration date from the server
// For remote images, instead of saving the image directly, we save a JSON file with the image data, expiration date, etag and last-modified date from the server
const cacheFile = basename(filepath) + (isLocalImage ? '' : '.json');
const cachedFileURL = new URL(cacheFile, env.assetsCacheDir);
@ -166,7 +174,7 @@ export async function generateImagesForPath(
await fs.promises.copyFile(cachedFileURL, finalFileURL, fs.constants.COPYFILE_FICLONE);
return {
cached: true,
cached: 'hit',
};
} else {
const JSONData = JSON.parse(readFileSync(cachedFileURL, 'utf-8')) as RemoteCacheEntry;
@ -184,11 +192,43 @@ export async function generateImagesForPath(
await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64'));
return {
cached: true,
cached: 'hit',
};
} else {
await fs.promises.unlink(cachedFileURL);
}
// Try to revalidate the cache
if (JSONData.etag || JSONData.lastModified) {
try {
const revalidatedData = await revalidateRemoteImage(options.src as string, {
etag: JSONData.etag,
lastModified: JSONData.lastModified,
});
if (revalidatedData.data.length) {
// Image cache was stale, update original image to avoid redownload
originalImage = revalidatedData;
} else {
revalidatedData.data = Buffer.from(JSONData.data, 'base64');
// Freshen cache on disk
await writeRemoteCacheFile(cachedFileURL, revalidatedData, env);
await fs.promises.writeFile(finalFileURL, revalidatedData.data);
return { cached: 'revalidated' };
}
} catch (e) {
// Reuse stale cache if revalidation fails
env.logger.warn(
null,
`An error was encountered while revalidating a cached remote asset. Proceeding with stale cache. ${e}`,
);
await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64'));
return { cached: 'hit' };
}
}
await fs.promises.unlink(cachedFileURL);
}
} catch (e: any) {
if (e.code !== 'ENOENT') {
@ -209,6 +249,8 @@ export async function generateImagesForPath(
let resultData: Partial<ImageData> = {
data: undefined,
expires: originalImage.expires,
etag: originalImage.etag,
lastModified: originalImage.lastModified,
};
const imageService = (await getConfiguredImageService()) as LocalImageService;
@ -239,13 +281,7 @@ export async function generateImagesForPath(
if (isLocalImage) {
await fs.promises.writeFile(cachedFileURL, resultData.data);
} else {
await fs.promises.writeFile(
cachedFileURL,
JSON.stringify({
data: Buffer.from(resultData.data).toString('base64'),
expires: resultData.expires,
}),
);
await writeRemoteCacheFile(cachedFileURL, resultData as ImageData, env);
}
}
} catch (e) {
@ -259,7 +295,7 @@ export async function generateImagesForPath(
}
return {
cached: false,
cached: 'miss',
weight: {
// Divide by 1024 to get size in kilobytes
before: Math.trunc(originalImage.data.byteLength / 1024),
@ -269,6 +305,25 @@ export async function generateImagesForPath(
}
}
async function writeRemoteCacheFile(cachedFileURL: URL, resultData: ImageData, env: AssetEnv) {
try {
return await fs.promises.writeFile(
cachedFileURL,
JSON.stringify({
data: Buffer.from(resultData.data).toString('base64'),
expires: resultData.expires,
etag: resultData.etag,
lastModified: resultData.lastModified,
}),
);
} catch (e) {
env.logger.warn(
null,
`An error was encountered while writing the cache file for a remote asset. Proceeding without caching this asset. Error: ${e}`,
);
}
}
export function getStaticImageList(): AssetsGlobalStaticImagesList {
if (!globalThis?.astroAsset?.staticImages) {
return new Map();
@ -279,11 +334,7 @@ export function getStaticImageList(): AssetsGlobalStaticImagesList {
async function loadImage(path: string, env: AssetEnv): Promise<ImageData> {
if (isRemotePath(path)) {
const remoteImage = await loadRemoteImage(path);
return {
data: remoteImage.data,
expires: remoteImage.expires,
};
return await loadRemoteImage(path);
}
return {

View file

@ -1,6 +1,11 @@
import CachePolicy from 'http-cache-semantics';
export type RemoteCacheEntry = { data: string; expires: number };
export type RemoteCacheEntry = {
data: string;
expires: number;
etag?: string;
lastModified?: string;
};
export async function loadRemoteImage(src: string) {
const req = new Request(src);
@ -19,6 +24,61 @@ export async function loadRemoteImage(src: string) {
return {
data: Buffer.from(await res.arrayBuffer()),
expires: Date.now() + expires,
etag: res.headers.get('Etag') ?? undefined,
lastModified: res.headers.get('Last-Modified') ?? undefined,
};
}
/**
* Revalidate a cached remote asset using its entity-tag or modified date.
* Uses the [If-None-Match](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match) and [If-Modified-Since](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since)
* headers to check with the remote server if the cached version of a remote asset is still up to date.
* The remote server may respond that the cached asset is still up-to-date if the entity-tag or modification time matches (304 Not Modified), or respond with an updated asset (200 OK)
* @param src - url to remote asset
* @param revalidationData - an object containing the stored Entity-Tag of the cached asset and/or the Last Modified time
* @returns An ImageData object containing the asset data, a new expiry time, and the asset's etag. The data buffer will be empty if the asset was not modified.
*/
export async function revalidateRemoteImage(
src: string,
revalidationData: { etag?: string; lastModified?: string },
) {
const headers = {
...(revalidationData.etag && { 'If-None-Match': revalidationData.etag }),
...(revalidationData.lastModified && { 'If-Modified-Since': revalidationData.lastModified }),
};
const req = new Request(src, { headers });
const res = await fetch(req);
// Asset not modified: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/304
if (!res.ok && res.status !== 304) {
throw new Error(
`Failed to revalidate cached remote image ${src}. The request did not return a 200 OK / 304 NOT MODIFIED response. (received ${res.status} ${res.statusText})`,
);
}
const data = Buffer.from(await res.arrayBuffer());
if (res.ok && !data.length) {
// Server did not include body but indicated cache was stale
return await loadRemoteImage(src);
}
// calculate an expiration date based on the response's TTL
const policy = new CachePolicy(
webToCachePolicyRequest(req),
webToCachePolicyResponse(
res.ok ? res : new Response(null, { status: 200, headers: res.headers }),
), // 304 responses themselves are not cachable, so just pretend to get the refreshed TTL
);
const expires = policy.storable() ? policy.timeToLive() : 0;
return {
data,
expires: Date.now() + expires,
// While servers should respond with the same headers as a 200 response, if they don't we should reuse the stored value
etag: res.headers.get('Etag') ?? (res.ok ? undefined : revalidationData.etag),
lastModified:
res.headers.get('Last-Modified') ?? (res.ok ? undefined : revalidationData.lastModified),
};
}