mirror of
https://github.com/immich-app/immich.git
synced 2025-01-21 00:52:43 -05:00
refactor(server): filesystem crawl (#4395)
Co-authored-by: Jonathan Jogenfors <jonathan@jogenfors.se>
This commit is contained in:
parent
9070a361bc
commit
9033e7f179
2 changed files with 205 additions and 212 deletions
|
@ -2,208 +2,204 @@ import { CrawlOptionsDto } from '@app/domain';
|
|||
import mockfs from 'mock-fs';
|
||||
import { FilesystemProvider } from './filesystem.provider';
|
||||
|
||||
interface Test {
|
||||
test: string;
|
||||
options: CrawlOptionsDto;
|
||||
files: Record<string, boolean>;
|
||||
}
|
||||
|
||||
const cwd = process.cwd();
|
||||
|
||||
const tests: Test[] = [
|
||||
{
|
||||
test: 'should return empty when crawling an empty path list',
|
||||
options: {
|
||||
pathsToCrawl: [],
|
||||
},
|
||||
files: {},
|
||||
},
|
||||
{
|
||||
test: 'should crawl a single path',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should exclude by file extension',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
exclusionPatterns: ['**/*.tif'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
'/photos/image.tif': false,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should exclude by file extension without case sensitivity',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
exclusionPatterns: ['**/*.TIF'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
'/photos/image.tif': false,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should exclude by folder',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
exclusionPatterns: ['**/raw/**'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
'/photos/raw/image.jpg': false,
|
||||
'/photos/raw2/image.jpg': true,
|
||||
'/photos/folder/raw/image.jpg': false,
|
||||
'/photos/crawl/image.jpg': true,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should crawl multiple paths',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/', '/images/', '/albums/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image1.jpg': true,
|
||||
'/images/image2.jpg': true,
|
||||
'/albums/image3.jpg': true,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should support globbing paths',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos*'],
|
||||
},
|
||||
files: {
|
||||
'/photos1/image1.jpg': true,
|
||||
'/photos2/image2.jpg': true,
|
||||
'/images/image3.jpg': false,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should crawl a single path without trailing slash',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should crawl a single path',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
'/photos/subfolder/image1.jpg': true,
|
||||
'/photos/subfolder/image2.jpg': true,
|
||||
'/image1.jpg': false,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should filter file extensions',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
'/photos/image.txt': false,
|
||||
'/photos/1': false,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should include photo and video extensions',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/', '/videos/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
'/photos/image.jpeg': true,
|
||||
'/photos/image.heic': true,
|
||||
'/photos/image.heif': true,
|
||||
'/photos/image.png': true,
|
||||
'/photos/image.gif': true,
|
||||
'/photos/image.tif': true,
|
||||
'/photos/image.tiff': true,
|
||||
'/photos/image.webp': true,
|
||||
'/photos/image.dng': true,
|
||||
'/photos/image.nef': true,
|
||||
'/videos/video.mp4': true,
|
||||
'/videos/video.mov': true,
|
||||
'/videos/video.webm': true,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should check file extensions without case sensitivity',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/'],
|
||||
},
|
||||
files: {
|
||||
'/photos/image.jpg': true,
|
||||
'/photos/image.Jpg': true,
|
||||
'/photos/image.jpG': true,
|
||||
'/photos/image.JPG': true,
|
||||
'/photos/image.jpEg': true,
|
||||
'/photos/image.TIFF': true,
|
||||
'/photos/image.tif': true,
|
||||
'/photos/image.dng': true,
|
||||
'/photos/image.NEF': true,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should normalize the path',
|
||||
options: {
|
||||
pathsToCrawl: ['/photos/1/../2'],
|
||||
},
|
||||
files: {
|
||||
'/photos/1/image.jpg': false,
|
||||
'/photos/2/image.jpg': true,
|
||||
},
|
||||
},
|
||||
{
|
||||
test: 'should return absolute paths',
|
||||
options: {
|
||||
pathsToCrawl: ['photos'],
|
||||
},
|
||||
files: {
|
||||
[`${cwd}/photos/1.jpg`]: true,
|
||||
[`${cwd}/photos/2.jpg`]: true,
|
||||
[`/photos/3.jpg`]: false,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
describe(FilesystemProvider.name, () => {
|
||||
const sut: FilesystemProvider = new FilesystemProvider();
|
||||
const sut = new FilesystemProvider();
|
||||
|
||||
console.log(process.cwd());
|
||||
|
||||
afterEach(() => {
|
||||
mockfs.restore();
|
||||
});
|
||||
|
||||
describe('crawl', () => {
|
||||
it('should return empty wnen crawling an empty path list', async () => {
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = [];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths).toHaveLength(0);
|
||||
});
|
||||
for (const { test, options, files } of tests) {
|
||||
it(test, async () => {
|
||||
mockfs(Object.fromEntries(Object.keys(files).map((file) => [file, ''])));
|
||||
|
||||
it('should crawl a single path', async () => {
|
||||
mockfs({
|
||||
'/photos/image.jpg': '',
|
||||
const actual = await sut.crawl(options);
|
||||
const expected = Object.entries(files)
|
||||
.filter((entry) => entry[1])
|
||||
.map(([file]) => file);
|
||||
|
||||
expect(actual.sort()).toEqual(expected.sort());
|
||||
});
|
||||
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos/'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
|
||||
});
|
||||
|
||||
it('should exclude by file extension', async () => {
|
||||
mockfs({
|
||||
'/photos/image.jpg': '',
|
||||
'/photos/image.tif': '',
|
||||
});
|
||||
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos/'];
|
||||
options.exclusionPatterns = ['**/*.tif'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
|
||||
});
|
||||
|
||||
it('should exclude by file extension without case sensitivity', async () => {
|
||||
mockfs({
|
||||
'/photos/image.jpg': '',
|
||||
'/photos/image.tif': '',
|
||||
});
|
||||
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos/'];
|
||||
options.exclusionPatterns = ['**/*.TIF'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
|
||||
});
|
||||
|
||||
it('should exclude by folder', async () => {
|
||||
mockfs({
|
||||
'/photos/image.jpg': '',
|
||||
'/photos/raw/image.jpg': '',
|
||||
'/photos/raw2/image.jpg': '',
|
||||
'/photos/folder/raw/image.jpg': '',
|
||||
'/photos/crawl/image.jpg': '',
|
||||
});
|
||||
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos/'];
|
||||
options.exclusionPatterns = ['**/raw/**'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths.sort()).toEqual(['/photos/image.jpg', '/photos/raw2/image.jpg', '/photos/crawl/image.jpg'].sort());
|
||||
});
|
||||
|
||||
it('should crawl multiple paths', async () => {
|
||||
mockfs({
|
||||
'/photos/image1.jpg': '',
|
||||
'/images/image2.jpg': '',
|
||||
'/albums/image3.jpg': '',
|
||||
});
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos/', '/images/', '/albums/'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths.sort()).toEqual(['/photos/image1.jpg', '/images/image2.jpg', '/albums/image3.jpg'].sort());
|
||||
});
|
||||
|
||||
it('should support globbing paths', async () => {
|
||||
mockfs({
|
||||
'/photos1/image1.jpg': '',
|
||||
'/photos2/image2.jpg': '',
|
||||
'/images/image3.jpg': '',
|
||||
});
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos*'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths.sort()).toEqual(['/photos1/image1.jpg', '/photos2/image2.jpg'].sort());
|
||||
});
|
||||
|
||||
it('should crawl a single path without trailing slash', async () => {
|
||||
mockfs({
|
||||
'/photos/image.jpg': '',
|
||||
});
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
|
||||
});
|
||||
|
||||
// TODO: test for hidden paths (not yet implemented)
|
||||
|
||||
it('should crawl a single path', async () => {
|
||||
mockfs({
|
||||
'/photos/image.jpg': '',
|
||||
'/photos/subfolder/image1.jpg': '',
|
||||
'/photos/subfolder/image2.jpg': '',
|
||||
'/image1.jpg': '',
|
||||
});
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos/'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths.sort()).toEqual(
|
||||
['/photos/image.jpg', '/photos/subfolder/image1.jpg', '/photos/subfolder/image2.jpg'].sort(),
|
||||
);
|
||||
});
|
||||
|
||||
it('should filter file extensions', async () => {
|
||||
mockfs({
|
||||
'/photos/image.jpg': '',
|
||||
'/photos/image.txt': '',
|
||||
'/photos/1': '',
|
||||
});
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos/'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths.sort()).toEqual(['/photos/image.jpg'].sort());
|
||||
});
|
||||
|
||||
it('should include photo and video extensions', async () => {
|
||||
mockfs({
|
||||
'/photos/image.jpg': '',
|
||||
'/photos/image.jpeg': '',
|
||||
'/photos/image.heic': '',
|
||||
'/photos/image.heif': '',
|
||||
'/photos/image.png': '',
|
||||
'/photos/image.gif': '',
|
||||
'/photos/image.tif': '',
|
||||
'/photos/image.tiff': '',
|
||||
'/photos/image.webp': '',
|
||||
'/photos/image.dng': '',
|
||||
'/photos/image.nef': '',
|
||||
'/videos/video.mp4': '',
|
||||
'/videos/video.mov': '',
|
||||
'/videos/video.webm': '',
|
||||
});
|
||||
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos/', '/videos/'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
|
||||
expect(paths.sort()).toEqual(
|
||||
[
|
||||
'/photos/image.jpg',
|
||||
'/photos/image.jpeg',
|
||||
'/photos/image.heic',
|
||||
'/photos/image.heif',
|
||||
'/photos/image.png',
|
||||
'/photos/image.gif',
|
||||
'/photos/image.tif',
|
||||
'/photos/image.tiff',
|
||||
'/photos/image.webp',
|
||||
'/photos/image.dng',
|
||||
'/photos/image.nef',
|
||||
'/videos/video.mp4',
|
||||
'/videos/video.mov',
|
||||
'/videos/video.webm',
|
||||
].sort(),
|
||||
);
|
||||
});
|
||||
|
||||
it('should check file extensions without case sensitivity', async () => {
|
||||
mockfs({
|
||||
'/photos/image.jpg': '',
|
||||
'/photos/image.Jpg': '',
|
||||
'/photos/image.jpG': '',
|
||||
'/photos/image.JPG': '',
|
||||
'/photos/image.jpEg': '',
|
||||
'/photos/image.TIFF': '',
|
||||
'/photos/image.tif': '',
|
||||
'/photos/image.dng': '',
|
||||
'/photos/image.NEF': '',
|
||||
});
|
||||
|
||||
const options = new CrawlOptionsDto();
|
||||
options.pathsToCrawl = ['/photos/'];
|
||||
const paths: string[] = await sut.crawl(options);
|
||||
expect(paths.sort()).toEqual(
|
||||
[
|
||||
'/photos/image.jpg',
|
||||
'/photos/image.Jpg',
|
||||
'/photos/image.jpG',
|
||||
'/photos/image.JPG',
|
||||
'/photos/image.jpEg',
|
||||
'/photos/image.TIFF',
|
||||
'/photos/image.tif',
|
||||
'/photos/image.dng',
|
||||
'/photos/image.NEF',
|
||||
].sort(),
|
||||
);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
mockfs.restore();
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
@ -111,24 +111,21 @@ export class FilesystemProvider implements IStorageRepository {
|
|||
};
|
||||
}
|
||||
|
||||
async crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
|
||||
const pathsToCrawl = crawlOptions.pathsToCrawl;
|
||||
|
||||
let paths: string;
|
||||
crawl(crawlOptions: CrawlOptionsDto): Promise<string[]> {
|
||||
const { pathsToCrawl, exclusionPatterns } = crawlOptions;
|
||||
if (!pathsToCrawl) {
|
||||
// No paths to crawl, return empty list
|
||||
return [];
|
||||
} else if (pathsToCrawl.length === 1) {
|
||||
paths = pathsToCrawl[0];
|
||||
} else {
|
||||
paths = '{' + pathsToCrawl.join(',') + '}';
|
||||
return Promise.resolve([]);
|
||||
}
|
||||
|
||||
paths = paths + '/**/*{' + mimeTypes.getSupportedFileExtensions().join(',') + '}';
|
||||
const base = pathsToCrawl.length === 1 ? pathsToCrawl[0] : `{${pathsToCrawl.join(',')}}`;
|
||||
const extensions = `*{${mimeTypes.getSupportedFileExtensions().join(',')}}`;
|
||||
|
||||
return (await glob(paths, { nocase: true, nodir: true, ignore: crawlOptions.exclusionPatterns })).map((assetPath) =>
|
||||
path.normalize(assetPath),
|
||||
);
|
||||
return glob(`${base}/**/${extensions}`, {
|
||||
absolute: true,
|
||||
nocase: true,
|
||||
nodir: true,
|
||||
ignore: exclusionPatterns,
|
||||
});
|
||||
}
|
||||
|
||||
readdir = readdir;
|
||||
|
|
Loading…
Add table
Reference in a new issue