0
Fork 0
mirror of https://github.com/TryGhost/Ghost.git synced 2025-01-06 22:40:14 -05:00

Optimized collection repository queries (#18194)

refs https://github.com/TryGhost/Arch/issues/86

- Creating bookshelf models for each collection_post relation created a
massive overhead. On a dataset with 500k collections_posts records the
timing was roughly 7s comparing to 810ms after the optimization.
- Optimized memory and performance of collections fetching by querying post
ids only by default
This commit is contained in:
naz 2023-09-19 16:02:14 +08:00 committed by Naz
parent 3a8fac8348
commit 826b5be002
No known key found for this signature in database
3 changed files with 60 additions and 12 deletions

View file

@ -29,7 +29,11 @@ module.exports = {
},
permissions: true,
query(frame) {
return collectionsService.api.getAll(frame.options);
return collectionsService.api.getAll({
filter: frame.options.filter,
limit: frame.options.limit,
page: frame.options.page
});
}
},

View file

@ -28,12 +28,14 @@ module.exports = class BookshelfCollectionsRepository {
async getById(id, options = {}) {
const model = await this.#model.findOne({id}, {
require: false,
withRelated: ['collectionPosts'],
transacting: options.transaction
});
if (!model) {
return null;
}
model.collectionPostIds = await this.#fetchCollectionPostIds(model.id, options);
return this.#modelToCollection(model);
}
@ -44,28 +46,63 @@ module.exports = class BookshelfCollectionsRepository {
async getBySlug(slug, options = {}) {
const model = await this.#model.findOne({slug}, {
require: false,
withRelated: ['collectionPosts'],
transacting: options.transaction
});
if (!model) {
return null;
}
model.collectionPostIds = await this.#fetchCollectionPostIds(model.id, options);
return this.#modelToCollection(model);
}
/**
* NOTE: we are only fetching post_id column here to save memory on
* instances with a large amount of posts
*
* The method could be further optimized to fetch posts for
* multiple collections at a time.
*
* @param {string} collectionId collection to fetch post ids for
* @param {Object} options bookshelf options
*
* @returns {Promise<Array<{post_id: string}>>}
*/
async #fetchCollectionPostIds(collectionId, options = {}) {
const toSelect = options.columns || ['post_id'];
const query = this.#relationModel.query();
if (options.transaction) {
query.transacting(options.transaction);
}
return await query
.select(toSelect)
.where('collection_id', collectionId);
}
/**
* @param {object} [options]
* @param {string} [options.filter]
* @param {string} [options.order]
* @param {string[]} [options.withRelated]
* @param {import('knex').Transaction} [options.transaction]
*/
async getAll(options = {}) {
const models = await this.#model.findAll({
...options,
transacting: options.transaction,
withRelated: ['collectionPosts']
transacting: options.transaction
});
for (const model of models) {
// NOTE: Ideally collection posts would be fetching as a part of findAll query.
// Because bookshelf introduced a massive processing and memory overhead
// we are fetching collection post ids separately using raw knex query
model.collectionPostIds = await this.#fetchCollectionPostIds(model.id, options);
}
return (await Promise.all(models.map(model => this.#modelToCollection(model)))).filter(entity => !!entity);
}
@ -78,6 +115,10 @@ module.exports = class BookshelfCollectionsRepository {
}
try {
// NOTE: collectionPosts are not a part of serialized model
// and are fetched separately to save memory
const posts = model.collectionPostIds;
return await Collection.create({
id: json.id,
slug: json.slug,
@ -86,7 +127,7 @@ module.exports = class BookshelfCollectionsRepository {
filter: filter,
type: json.type,
featureImage: json.feature_image,
posts: json.collectionPosts.map(collectionPost => collectionPost.post_id),
posts: posts.map(collectionPost => collectionPost.post_id),
createdAt: json.created_at,
updatedAt: json.updated_at
});
@ -135,8 +176,7 @@ module.exports = class BookshelfCollectionsRepository {
{id: data.id},
{
require: false,
transacting: options.transaction,
withRelated: ['collectionPosts']
transacting: options.transaction
}
);
@ -175,7 +215,11 @@ module.exports = class BookshelfCollectionsRepository {
if (collection.type === 'manual') {
await this.#relationModel.query().delete().where('collection_id', collection.id).transacting(options.transaction);
} else {
const existingRelations = existing.toJSON().collectionPosts;
const collectionPostsOptions = {
transaction: options.transaction,
columns: ['id', 'post_id']
};
const existingRelations = await this.#fetchCollectionPostIds(collection.id, collectionPostsOptions);
for (const existingRelation of existingRelations) {
const found = collectionPostsRelations.find((thing) => {

View file

@ -117,7 +117,7 @@ describe('Collections API', function () {
});
}, this.skip.bind(this));
const collectionRelatedQueries = queries.filter(query => query.sql.includes('collection'));
assert(collectionRelatedQueries.length === 2);
assert(collectionRelatedQueries.length === 3);
});
it('Can browse Collections and include the posts count', async function () {
@ -572,7 +572,7 @@ describe('Collections API', function () {
}, this.skip.bind(this));
const collectionRelatedQueries = queries.filter(query => query.sql.includes('collection'));
assert.equal(collectionRelatedQueries.length, 8);
assert.equal(collectionRelatedQueries.length, 13);
}
await agent
@ -604,7 +604,7 @@ describe('Collections API', function () {
}, this.skip.bind(this));
const collectionRelatedQueries = queries.filter(query => query.sql.includes('collection'));
assert.equal(collectionRelatedQueries.length, 14);
assert.equal(collectionRelatedQueries.length, 18);
}
await agent