0
Fork 0
mirror of https://github.com/withastro/astro.git synced 2024-12-30 22:03:56 -05:00

feat: custom file formats in file content loader (#12047)

* add custom file format support

* add tests

* lint/format

* changeset

* nits

* finish tests

* add nested json test

* requested changes

* update changeset with @sarah11918 suggestions

* typos/formatting

* add map<id, data> yaml test

* fix tests and rebase
This commit is contained in:
Rohan Godha 2024-09-29 14:41:15 -04:00 committed by GitHub
parent 0a1036eef6
commit 21b5e806c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 408 additions and 29 deletions

View file

@ -0,0 +1,68 @@
---
'astro': minor
---
Adds a new optional `parser` property to the built-in `file()` loader for content collections to support additional file types such as `toml` and `csv`.
The `file()` loader now accepts a second argument that defines a `parser` function. This allows you to specify a custom parser (e.g. `toml.parse` or `csv-parse`) to create a collection from a file's contents. The `file()` loader will automatically detect and parse JSON and YAML files (based on their file extension) with no need for a `parser`.
This works with any type of custom file formats including `csv` and `toml`. The following example defines a content collection `dogs` using a `.toml` file.
```toml
[[dogs]]
id = "..."
age = "..."
[[dogs]]
id = "..."
age = "..."
```
After importing TOML's parser, you can load the `dogs` collection into your project by passing both a file path and `parser` to the `file()` loader.
```typescript
import { defineCollection } from "astro:content"
import { file } from "astro/loaders"
import { parse as parseToml } from "toml"
const dogs = defineCollection({
loader: file("src/data/dogs.toml", { parser: (text) => parseToml(text).dogs }),
schema: /* ... */
})
// it also works with CSVs!
import { parse as parseCsv } from "csv-parse/sync";
const cats = defineCollection({
loader: file("src/data/cats.csv", { parser: (text) => parseCsv(text, { columns: true, skipEmptyLines: true })})
});
```
The `parser` argument also allows you to load a single collection from a nested JSON document. For example, this JSON file contains multiple collections:
```json
{"dogs": [{}], "cats": [{}]}
```
You can seperate these collections by passing a custom `parser` to the `file()` loader like so:
```typescript
const dogs = defineCollection({
loader: file("src/data/pets.json", { parser: (text) => JSON.parse(text).dogs })
});
const cats = defineCollection({
loader: file("src/data/pets.json", { parser: (text) => JSON.parse(text).cats })
});
```
And it continues to work with maps of `id` to `data`
```yaml
bubbles:
breed: "Goldfish"
age: 2
finn:
breed: "Betta"
age: 1
```
```typescript
const fish = defineCollection({
loader: file("src/data/fish.yaml"),
schema: z.object({ breed: z.string(), age: z.number() })
});
```

View file

@ -1,25 +1,56 @@
import { promises as fs, existsSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import yaml from 'js-yaml';
import { posixRelative } from '../utils.js';
import type { Loader, LoaderContext } from './types.js';
export interface FileOptions {
/**
* the parsing function to use for this data
* @default JSON.parse or yaml.load, depending on the extension of the file
* */
parser?: (
text: string,
) => Record<string, Record<string, unknown>> | Array<Record<string, unknown>>;
}
/**
* Loads entries from a JSON file. The file must contain an array of objects that contain unique `id` fields, or an object with string keys.
* @todo Add support for other file types, such as YAML, CSV etc.
* @param fileName The path to the JSON file to load, relative to the content directory.
* @param options Additional options for the file loader
*/
export function file(fileName: string): Loader {
export function file(fileName: string, options?: FileOptions): Loader {
if (fileName.includes('*')) {
// TODO: AstroError
throw new Error('Glob patterns are not supported in `file` loader. Use `glob` loader instead.');
}
let parse: ((text: string) => any) | null = null;
const ext = fileName.split('.').at(-1);
if (ext === 'json') {
parse = JSON.parse;
} else if (ext === 'yml' || ext === 'yaml') {
parse = (text) =>
yaml.load(text, {
filename: fileName,
});
}
if (options?.parser) parse = options.parser;
if (parse === null) {
// TODO: AstroError
throw new Error(
`No parser found for file '${fileName}'. Try passing a parser to the \`file\` loader.`,
);
}
async function syncData(filePath: string, { logger, parseData, store, config }: LoaderContext) {
let json: Array<Record<string, unknown>>;
let data: Array<Record<string, unknown>> | Record<string, Record<string, unknown>>;
try {
const data = await fs.readFile(filePath, 'utf-8');
json = JSON.parse(data);
const contents = await fs.readFile(filePath, 'utf-8');
data = parse!(contents);
} catch (error: any) {
logger.error(`Error reading data from ${fileName}`);
logger.debug(error.message);
@ -28,28 +59,28 @@ export function file(fileName: string): Loader {
const normalizedFilePath = posixRelative(fileURLToPath(config.root), filePath);
if (Array.isArray(json)) {
if (json.length === 0) {
if (Array.isArray(data)) {
if (data.length === 0) {
logger.warn(`No items found in ${fileName}`);
}
logger.debug(`Found ${json.length} item array in ${fileName}`);
logger.debug(`Found ${data.length} item array in ${fileName}`);
store.clear();
for (const rawItem of json) {
for (const rawItem of data) {
const id = (rawItem.id ?? rawItem.slug)?.toString();
if (!id) {
logger.error(`Item in ${fileName} is missing an id or slug field.`);
continue;
}
const data = await parseData({ id, data: rawItem, filePath });
store.set({ id, data, filePath: normalizedFilePath });
const parsedData = await parseData({ id, data: rawItem, filePath });
store.set({ id, data: parsedData, filePath: normalizedFilePath });
}
} else if (typeof json === 'object') {
const entries = Object.entries<Record<string, unknown>>(json);
} else if (typeof data === 'object') {
const entries = Object.entries<Record<string, unknown>>(data);
logger.debug(`Found object with ${entries.length} entries in ${fileName}`);
store.clear();
for (const [id, rawItem] of entries) {
const data = await parseData({ id, data: rawItem, filePath });
store.set({ id, data, filePath: normalizedFilePath });
const parsedData = await parseData({ id, data: rawItem, filePath });
store.set({ id, data: parsedData, filePath: normalizedFilePath });
}
} else {
logger.error(`Invalid data in ${fileName}. Must be an array or object.`);

View file

@ -53,11 +53,11 @@ describe('Content Layer', () => {
assert.equal(json.customLoader.length, 5);
});
it('Returns `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('fileLoader'));
assert.ok(Array.isArray(json.fileLoader));
it('Returns json `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('jsonLoader'));
assert.ok(Array.isArray(json.jsonLoader));
const ids = json.fileLoader.map((item) => item.data.id);
const ids = json.jsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'labrador-retriever',
'german-shepherd',
@ -97,6 +97,58 @@ describe('Content Layer', () => {
);
});
it('Returns nested json `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('nestedJsonLoader'));
assert.ok(Array.isArray(json.nestedJsonLoader));
const ids = json.nestedJsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, ['bluejay', 'robin', 'sparrow', 'cardinal', 'goldfinch']);
});
it('Returns yaml `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('yamlLoader'));
assert.ok(Array.isArray(json.yamlLoader));
const ids = json.yamlLoader.map((item) => item.id);
assert.deepEqual(ids, [
'bubbles',
'finn',
'shadow',
'spark',
'splash',
'nemo',
'angel-fish',
'gold-stripe',
'blue-tail',
'bubble-buddy',
]);
});
it('Returns toml `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('tomlLoader'));
assert.ok(Array.isArray(json.tomlLoader));
const ids = json.tomlLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'crown',
'nikes-on-my-feet',
'stars',
'never-let-me-down',
'no-church-in-the-wild',
'family-ties',
'somebody',
'honest',
]);
});
it('Returns nested json `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('nestedJsonLoader'));
assert.ok(Array.isArray(json.nestedJsonLoader));
const ids = json.nestedJsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, ['bluejay', 'robin', 'sparrow', 'cardinal', 'goldfinch']);
});
it('Returns data entry by id', async () => {
assert.ok(json.hasOwnProperty('dataEntry'));
assert.equal(json.dataEntry.filePath?.split(sep).join(posixSep), 'src/data/dogs.json');
@ -276,10 +328,10 @@ describe('Content Layer', () => {
});
it('Returns `file()` loader collection', async () => {
assert.ok(json.hasOwnProperty('fileLoader'));
assert.ok(Array.isArray(json.fileLoader));
assert.ok(json.hasOwnProperty('jsonLoader'));
assert.ok(Array.isArray(json.jsonLoader));
const ids = json.fileLoader.map((item) => item.data.id);
const ids = json.jsonLoader.map((item) => item.data.id);
assert.deepEqual(ids, [
'labrador-retriever',
'german-shepherd',
@ -348,7 +400,7 @@ describe('Content Layer', () => {
it('updates collection when data file is changed', async () => {
const rawJsonResponse = await fixture.fetch('/collections.json');
const initialJson = devalue.parse(await rawJsonResponse.text());
assert.equal(initialJson.fileLoader[0].data.temperament.includes('Bouncy'), false);
assert.equal(initialJson.jsonLoader[0].data.temperament.includes('Bouncy'), false);
await fixture.editFile('/src/data/dogs.json', (prev) => {
const data = JSON.parse(prev);
@ -359,7 +411,7 @@ describe('Content Layer', () => {
await fixture.onNextDataStoreChange();
const updatedJsonResponse = await fixture.fetch('/collections.json');
const updated = devalue.parse(await updatedJsonResponse.text());
assert.ok(updated.fileLoader[0].data.temperament.includes('Bouncy'));
assert.ok(updated.jsonLoader[0].data.temperament.includes('Bouncy'));
await fixture.resetAllFiles();
});
});

View file

@ -4,6 +4,7 @@
"private": true,
"dependencies": {
"astro": "workspace:*",
"@astrojs/mdx": "workspace:*"
"@astrojs/mdx": "workspace:*",
"toml": "^3.0.0"
}
}

View file

@ -1,6 +1,7 @@
import { defineCollection, z, reference } from 'astro:content';
import { file, glob } from 'astro/loaders';
import { loader } from '../loaders/post-loader.js';
import { parse as parseToml } from 'toml';
const blog = defineCollection({
loader: loader({ url: 'https://jsonplaceholder.typicode.com/posts' }),
@ -118,6 +119,27 @@ const cats = defineCollection({
}),
});
const fish = defineCollection({
loader: file('src/data/fish.yaml'),
schema: z.object({
name: z.string(),
breed: z.string(),
age: z.number(),
}),
});
const birds = defineCollection({
loader: file('src/data/birds.json', {
parser: (text) => JSON.parse(text).birds,
}),
schema: z.object({
id: z.string(),
name: z.string(),
breed: z.string(),
age: z.number(),
}),
});
// Absolute paths should also work
const absoluteRoot = new URL('../../content/space', import.meta.url);
@ -198,14 +220,36 @@ const increment = defineCollection({
},
});
const artists = defineCollection({
loader: file('src/data/music.toml', { parser: (text) => parseToml(text).artists }),
schema: z.object({
id: z.string(),
name: z.string(),
genre: z.string().array(),
}),
});
const songs = defineCollection({
loader: file('src/data/music.toml', { parser: (text) => parseToml(text).songs }),
schema: z.object({
id: z.string(),
name: z.string(),
artists: z.array(reference('artists')),
}),
});
export const collections = {
blog,
dogs,
cats,
fish,
birds,
numbers,
spacecraft,
increment,
images,
artists,
songs,
probes,
rodents,
};

View file

@ -0,0 +1,34 @@
{
"birds": [
{
"id": "bluejay",
"name": "Blue Jay",
"breed": "Cyanocitta cristata",
"age": 3
},
{
"id": "robin",
"name": "Robin",
"breed": "Turdus migratorius",
"age": 2
},
{
"id": "sparrow",
"name": "Sparrow",
"breed": "Passer domesticus",
"age": 1
},
{
"id": "cardinal",
"name": "Cardinal",
"breed": "Cardinalis cardinalis",
"age": 4
},
{
"id": "goldfinch",
"name": "Goldfinch",
"breed": "Spinus tristis",
"age": 2
}
]
}

View file

@ -0,0 +1,42 @@
# map of ids to data
bubbles:
name: "Bubbles"
breed: "Goldfish"
age: 2
finn:
name: "Finn"
breed: "Betta"
age: 1
shadow:
name: "Shadow"
breed: "Catfish"
age: 3
spark:
name: "Spark"
breed: "Tetra"
age: 1
splash:
name: "Splash"
breed: "Guppy"
age: 2
nemo:
name: "Nemo"
breed: "Clownfish"
age: 3
angel-fish:
name: "Angel Fish"
breed: "Angelfish"
age: 4
gold-stripe:
name: "Gold Stripe"
breed: "Molly"
age: 1
blue-tail:
name: "Blue Tail"
breed: "Swordtail"
age: 2
bubble-buddy:
name: "Bubble Buddy"
breed: "Betta"
age: 3

View file

@ -0,0 +1,89 @@
[[artists]]
id = "kendrick-lamar"
name = "Kendrick Lamar"
genre = ["Hip-Hop", "Rap"]
[[artists]]
id = "mac-miller"
name = "Mac Miller"
genre = ["Hip-Hop", "Rap"]
[[artists]]
id = "jid"
name = "JID"
genre = ["Hip-Hop", "Rap"]
[[artists]]
id = "yasiin-bey"
name = "Yasiin Bey"
genre = ["Hip-Hop", "Rap"]
[[artists]]
id = "kanye-west"
name = "Kanye West"
genre = ["Hip-Hop", "Rap"]
[[artists]]
id = "jay-z"
name = "JAY-Z"
genre = ["Hip-Hop", "Rap"]
[[artists]]
id = "j-ivy"
name = "J. Ivy"
genre = ["Spoken Word", "Rap"]
[[artists]]
id = "frank-ocean"
name = "Frank Ocean"
genre = ["R&B", "Hip-Hop"]
[[artists]]
id = "the-dream"
name = "The-Dream"
genre = ["R&B", "Hip-Hop"]
[[artists]]
id = "baby-keem"
name = "Baby Keem"
genre = ["Hip-Hop", "Rap"]
[[songs]]
id = "crown"
name = "Crown"
artists = ["kendrick-lamar"]
[[songs]]
id = "nikes-on-my-feet"
name = "Nikes on My Feet"
artists = ["mac-miller"]
[[songs]]
id = "stars"
name = "Stars"
artists = ["jid", "yasiin-bey"]
[[songs]]
id = "never-let-me-down"
name = "Never Let Me Down"
artists = ["kanye-west", "jay-z", "j-ivy"]
[[songs]]
id = "no-church-in-the-wild"
name = "No Church In The Wild"
artists = ["jay-z", "kanye-west", "frank-ocean", "the-dream"]
[[songs]]
id = "family-ties"
name = "family ties"
artists = ["kendrick-lamar", "baby-keem"]
[[songs]]
id = "somebody"
name = "Somebody"
artists = ["jid"]
[[songs]]
id = "honest"
name = "HONEST"
artists = ["baby-keem"]

View file

@ -5,7 +5,7 @@ export async function GET() {
const customLoader = await getCollection('blog', (entry) => {
return entry.data.id < 6;
});
const fileLoader = await getCollection('dogs');
const jsonLoader = await getCollection('dogs');
const dataEntry = await getEntry('dogs', 'beagle');
@ -23,10 +23,17 @@ export async function GET() {
const simpleLoaderObject = await getCollection('rodents')
const probes = await getCollection('probes');
const yamlLoader = await getCollection('fish');
const tomlLoader = await getCollection('songs');
const nestedJsonLoader = await getCollection('birds');
return new Response(
devalue.stringify({
customLoader,
fileLoader,
jsonLoader,
dataEntry,
simpleLoader,
simpleLoaderObject,
@ -35,7 +42,10 @@ export async function GET() {
referencedEntry,
increment,
images,
probes
})
probes,
yamlLoader,
tomlLoader,
nestedJsonLoader,
}),
);
}

View file

@ -2713,6 +2713,9 @@ importers:
astro:
specifier: workspace:*
version: link:../../..
toml:
specifier: ^3.0.0
version: 3.0.0
packages/astro/test/fixtures/content-layer-markdoc:
dependencies:
@ -10327,6 +10330,9 @@ packages:
resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==}
engines: {node: '>=0.6'}
toml@3.0.0:
resolution: {integrity: sha512-y/mWCZinnvxjTKYhJ+pYxwD0mRLVvOtdS2Awbgxln6iEnt4rk0yBxeSBHkGJcPucRiG0e55mwWp+g/05rsrd6w==}
totalist@3.0.1:
resolution: {integrity: sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ==}
engines: {node: '>=6'}
@ -16512,6 +16518,8 @@ snapshots:
toidentifier@1.0.1: {}
toml@3.0.0: {}
totalist@3.0.1: {}
tough-cookie@4.1.3: