0
Fork 0
mirror of https://github.com/TryGhost/Ghost.git synced 2025-04-08 02:52:39 -05:00

Added caching to the LinkRedirectRepository (#20036)

ref
https://linear.app/tryghost/issue/ENG-851/implement-a-minimal-but-complete-version-of-redirect-caching-to
ref https://app.incident.io/ghost/incidents/55

Often immediately after sending an email, sites receive a large volume
of requests to LinkRedirect endpoints from members clicking on the links in
the email.

We currently don't cache any of these requests in our CDN, because we
also record click events, update the member's `last_seen_at` timestamp,
and send webhooks in response to these clicks, so Ghost needs to handle
each of these requests itself. This means that each of these LinkRedirect requests
hits Ghost, and currently all these requests hit the database to lookup
where to redirect the member to.

Each one of these requests can make up to 11 database queries, which can
quickly exhaust Ghost's database connection pool. Even though the
LinkRedirect lookup query is fairly cheap and quick, these queries aren't
prioritized over the "record" queries Ghost needs to handle, so they can
get stuck behind other queries in the queue and eventually timeout.

The result is that members are unable to actually reach the destination
of the link they clicked on, instead receiving a 500 error in Ghost, or
it can take a long time (60s+) for the redirect to happen.

This PR uses our existing `adapterManager` to cache the redirect lookups
either in-memory or in Redis (if configured — by default there is no caching). This only removes 1 out of
11 queries per redirect request, so it won't reduce the load on the DB
drastically, but it at least decouples the serving of the LinkRedirect from
the DB so the member can be redirected even if the DB is under heavy
load.

Local load testing results have shown a decrease in response times from
60 seconds to ~50ms for the redirect requests when handling 500 requests
per second, and reduced the 500 error rate to 0.
This commit is contained in:
Chris Raible 2024-04-25 19:17:25 -07:00 committed by GitHub
parent 892b9ab397
commit dcd65bfa4f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 343 additions and 8 deletions

View file

@ -1,27 +1,51 @@
const LinkRedirect = require('@tryghost/link-redirects').LinkRedirect;
const ObjectID = require('bson-objectid').default;
const debug = require('@tryghost/debug')('LinkRedirectRepository');
module.exports = class LinkRedirectRepository {
/** @type {Object} */
#LinkRedirect;
/** @type {Object} */
#urlUtils;
/** @type {Boolean} */
#cacheEnabled;
/** @type {Object} */
#cache;
/**
* @param {object} deps
* @param {object} deps.LinkRedirect Bookshelf Model
* @param {object} deps.LinkRedirect - Bookshelf Model
* @param {object} deps.urlUtils
* @param {object} deps.cacheAdapter - Cache Adapter instance, or null if cache is disabled
* @param {object} deps.EventRegistry
*/
constructor(deps) {
debug('Creating LinkRedirectRepository');
this.#LinkRedirect = deps.LinkRedirect;
this.#urlUtils = deps.urlUtils;
this.#cache = null;
if (deps.cacheAdapter !== null) {
debug('Caching enabled with adapter:', deps.cacheAdapter.constructor.name);
this.#cache = deps.cacheAdapter;
// This is a bit of a blunt instrument, but it's the best we can do for now
// It covers all the cases we would need to invalidate the links cache
// We need to invalidate the cache when:
// - a redirect is edited
// - a site's subdirectory is changed (rare)
// - analytics settings are changed
deps.EventRegistry.on('site.changed', () => {
this.#cache.reset();
});
}
}
/**
* Save a new LinkRedirect to the DB
* @param {InstanceType<LinkRedirect>} linkRedirect
* @returns {Promise<void>}
*/
async save(linkRedirect) {
debug('Saving link redirect', linkRedirect.from.pathname, '->', linkRedirect.to.href);
const model = await this.#LinkRedirect.add({
// Only store the pathname (no support for variable query strings)
from: this.stripSubdirectoryFromPath(linkRedirect.from.pathname),
@ -29,17 +53,31 @@ module.exports = class LinkRedirectRepository {
}, {});
linkRedirect.link_id = ObjectID.createFromHexString(model.id);
if (this.#cache) {
debug('Caching new link redirect', linkRedirect.from.pathname);
this.#cache.set(linkRedirect.from.pathname, this.#serialize(linkRedirect));
}
}
/**
* Trim the leading slash from a URL path
* @param {string} url
* @returns {string} url without leading slash
*/
#trimLeadingSlash(url) {
return url.replace(/^\//, '');
}
/**
* Returns a LinkRedirect object from a model
* @param {object} model - Bookshelf model instance
* @returns {InstanceType<LinkRedirect>} LinkRedirect
*/
fromModel(model) {
// Store if link has been edited
// Note: in some edge cases updated_at is set directly after created_at, sometimes with a second difference, so we need to check for that
const edited = model.get('updated_at')?.getTime() > (model.get('created_at')?.getTime() + 1000);
return new LinkRedirect({
id: model.id,
from: new URL(this.#trimLeadingSlash(model.get('from')), this.#urlUtils.urlFor('home', true)),
@ -48,6 +86,43 @@ module.exports = class LinkRedirectRepository {
});
}
/**
* Create a LinkRedirect object from a JSON object (e.g. from the cache)
* @param {object} serialized
* @param {string} serialized.link_id - string representation of ObjectID
* @param {string} serialized.from - path of the URL
* @param {string} serialized.to - URL to redirect to
* @param {boolean} serialized.edited - whether the link has been edited
* @returns {InstanceType<LinkRedirect>} LinkRedirect
*/
#fromSerialized(serialized) {
return new LinkRedirect({
id: serialized.link_id,
from: new URL(this.#trimLeadingSlash(serialized.from), this.#urlUtils.urlFor('home', true)),
to: new URL(serialized.to),
edited: serialized.edited
});
}
/**
* Serialize a LinkRedirect object to a plain object (e.g. for caching)
* @param {InstanceType<LinkRedirect>} linkRedirect
* @returns {object} - serialized LinkRedirect
*/
#serialize(linkRedirect) {
return {
link_id: linkRedirect.link_id.toHexString(),
from: linkRedirect.from.pathname,
to: linkRedirect.to.href,
edited: linkRedirect.edited
};
}
/**
* Get all LinkRedirects from the DB, with optional filters
* @param {object} options - options passed directly to LinkRedirect.findAll
* @returns {Promise<InstanceType<LinkRedirect>[]>} array of LinkRedirects
*/
async getAll(options) {
const collection = await this.#LinkRedirect.findAll(options);
@ -60,6 +135,11 @@ module.exports = class LinkRedirectRepository {
return result;
}
/**
* Get all LinkRedirect IDs from the DB, with optional filters
* @param {object} options - options passed directly to LinkRedirect.getFilteredCollectionQuery
* @returns {Promise<string[]>} array of LinkRedirect IDs
*/
async getFilteredIds(options) {
const linkRows = await this.#LinkRedirect.getFilteredCollectionQuery(options)
.select('redirects.id')
@ -68,25 +148,44 @@ module.exports = class LinkRedirectRepository {
}
/**
*
* Get a LinkRedirect by its URL
* @param {URL} url
* @returns {Promise<InstanceType<LinkRedirect>|undefined>} linkRedirect
* @returns {Promise<InstanceType<LinkRedirect>|undefined>} LinkRedirect
*/
async getByURL(url) {
debug('Getting link redirect for', url.pathname);
// Strip subdirectory from path
const from = this.stripSubdirectoryFromPath(url.pathname);
const linkRedirect = await this.#LinkRedirect.findOne({
if (this.#cache) {
const cachedLink = await this.#cache.get(from);
// Cache hit, serve from cache
if (cachedLink) {
debug('Cache hit for', from);
return this.#fromSerialized(cachedLink);
}
}
// Cache miss, fetch from the DB
const linkRedirectModel = await this.#LinkRedirect.findOne({
from
}, {});
if (linkRedirect) {
return this.fromModel(linkRedirect);
if (linkRedirectModel) {
const linkRedirect = this.fromModel(linkRedirectModel);
if (this.#cache) {
debug('Cache miss for', from, '. Caching');
// Cache the link
this.#cache.set(from, this.#serialize(linkRedirect));
}
return linkRedirect;
}
}
/**
* Convert root relative URLs to subdirectory relative URLs
* @param {string} path
* @returns {string} path without subdirectory
*/
stripSubdirectoryFromPath(path) {
// Bit weird, but only way to do it with the urlUtils atm

View file

@ -1,5 +1,8 @@
const urlUtils = require('../../../shared/url-utils');
const LinkRedirectRepository = require('./LinkRedirectRepository');
const adapterManager = require('../adapter-manager');
const config = require('../../../shared/config');
const EventRegistry = require('../../lib/common/events');
class LinkRedirectsServiceWrapper {
async init() {
@ -15,7 +18,9 @@ class LinkRedirectsServiceWrapper {
this.linkRedirectRepository = new LinkRedirectRepository({
LinkRedirect: models.Redirect,
urlUtils
urlUtils,
cacheAdapter: config.get('hostSettings:linkRedirectsPublicCache:enabled') ? adapterManager.getAdapter('cache:linkRedirectsPublic') : null,
EventRegistry
});
// Expose the service

View file

@ -0,0 +1,231 @@
const should = require('should');
const sinon = require('sinon');
const ObjectID = require('bson-objectid').default;
const EventEmitter = require('events').EventEmitter;
const LinkRedirect = require('@tryghost/link-redirects').LinkRedirect;
const LinkRedirectRepository = require('../../../../../core/server/services/link-redirection/LinkRedirectRepository');
/**
* Create a stubbed LinkRedirect Bookshelf model for testing, with overridable values
*
* @param {object} [values] - Values to override in the stubbed LinkRedirect model
* @param {string} [values.id] - The ID of the model
* @param {Date} [values.updated_at] - The updated_at date of the model
* @param {Date} [values.created_at] - The created_at date of the model
* @param {string} [values.from] - The from URL path of the model (path only)
* @param {string} [values.to] - The to URL of the model (full URL including protocol, but not a URL object)
* @returns {object} - A stubbed LinkRedirect Bookshelf model
*
*/
function createRedirectModel(values = {}) {
const get = sinon.stub();
get.withArgs('updated_at').returns(values.updated_at || new Date('2022-10-20T00:00:10.000Z'));
get.withArgs('created_at').returns(values.created_at || new Date('2022-10-20T00:00:00.000Z'));
get.withArgs('from').returns(values.from || '/r/1234abcd');
get.withArgs('to').returns(values.to || 'https://google.com');
return {
id: values.id || '662194931d0ba6fb37c080ee',
get
};
}
/**
* Create a LinkRedirectRepository instance with stubbed dependencies
* Optionally override dependencies with custom stubs if needed
*
* @param {object} deps
* @param {object} [deps.LinkRedirect] - Stubbed LinkRedirect Bookshelf model
* @param {object} [deps.urlUtils] - Stubbed URL Utils module
* @param {object} [deps.cacheAdapter] - Stubbed cache adapter, or null is cache is disabled
* @param {object} [deps.EventRegistry] - Stubbed EventRegistry
* @returns {LinkRedirectRepository}
*/
function createLinkRedirectRepository(deps = {}) {
const linkRows = [
{id: '662194931d0ba6fb37c080ee'}
];
const model = createRedirectModel();
const models = {
models: [model]
};
return new LinkRedirectRepository({
LinkRedirect: deps.LinkRedirect || {
findOne: sinon.stub().returns(model),
findAll: sinon.stub().returns(models),
getFilteredCollectionQuery: sinon.stub().returns({
select: sinon.stub().returns({
distinct: sinon.stub().returns(linkRows)
})
}),
add: sinon.stub().callsFake((data) => {
return createRedirectModel(data);
})
},
urlUtils: deps.urlUtils || {
urlFor: sinon.stub().returns('https://example.com'),
relativeToAbsolute: sinon.stub().returns(new URL('https://example.com')),
absoluteToRelative: sinon.stub().returns('/r/1234abcd')
},
cacheAdapter: deps.cacheAdapter || null,
EventRegistry: deps.EventRegistry || new EventEmitter()
});
}
describe('UNIT: LinkRedirectRepository class', function () {
let linkRedirectRepository;
afterEach(function () {
sinon.restore();
});
describe('fromModel', function () {
it('should set edited to false if updated_at equals created_at', function () {
const model = createRedirectModel({
updated_at: new Date('2022-10-20T00:00:00.000Z'),
created_at: new Date('2022-10-20T00:00:00.000Z')
});
linkRedirectRepository = createLinkRedirectRepository();
const linkRedirect = linkRedirectRepository.fromModel(model);
should(linkRedirect.from.href).equal('https://example.com/r/1234abcd');
should(linkRedirect.to.href).equal('https://google.com/');
should(linkRedirect.edited).be.false();
should(ObjectID.isValid(linkRedirect.link_id)).be.true();
});
it('should set edited to false if updated_at is within 1 second of created_at', function () {
const model = createRedirectModel({
updated_at: new Date('2022-10-20T00:00:00.999Z'),
created_at: new Date('2022-10-20T00:00:00.000Z')
});
linkRedirectRepository = createLinkRedirectRepository();
const linkRedirect = linkRedirectRepository.fromModel(model);
should(linkRedirect.from.href).equal('https://example.com/r/1234abcd');
should(linkRedirect.to.href).equal('https://google.com/');
should(linkRedirect.edited).be.false();
should(ObjectID.isValid(linkRedirect.link_id)).be.true();
});
it('should set edited to true if updated_at is greater than created_at by more than 1 second', function () {
const model = createRedirectModel({
updated_at: new Date('2022-10-20T00:00:10.000Z'),
created_at: new Date('2022-10-20T00:00:00.000Z')
});
linkRedirectRepository = createLinkRedirectRepository();
const linkRedirect = linkRedirectRepository.fromModel(model);
should(linkRedirect.from.href).equal('https://example.com/r/1234abcd');
should(linkRedirect.to.href).equal('https://google.com/');
should(linkRedirect.edited).be.true();
should(ObjectID.isValid(linkRedirect.link_id)).be.true();
});
});
describe('getAll', function () {
it('should return an array of LinkRedirect instances', async function () {
linkRedirectRepository = createLinkRedirectRepository();
const linkRedirects = await linkRedirectRepository.getAll({});
should(linkRedirects).be.an.Array();
should(linkRedirects.length).equal(1);
const linkRedirect = linkRedirects[0];
should(linkRedirect.from.href).equal('https://example.com/r/1234abcd');
should(linkRedirect.to.href).equal('https://google.com/');
should(linkRedirect.edited).be.true();
should(ObjectID.isValid(linkRedirect.link_id)).be.true();
});
});
describe('getFilteredIds', function () {
it('should return an array of link ids', async function () {
linkRedirectRepository = createLinkRedirectRepository();
const linkIds = await linkRedirectRepository.getFilteredIds({});
should(linkIds).be.an.Array();
should(linkIds.length).equal(1);
should(linkIds[0]).equal('662194931d0ba6fb37c080ee');
});
});
describe('getByURL', function () {
it('should return a LinkRedirect instance', async function () {
const url = new URL('https://example.com/r/1234abcd');
linkRedirectRepository = createLinkRedirectRepository();
const result = await linkRedirectRepository.getByURL(url);
should(result).be.an.Object();
should(result.from.href).equal(url.href);
should(result.to.href).equal('https://google.com/');
});
it('should return a LinkRedirect instance from cache if enabled and key exists', async function () {
const url = new URL('https://example.com/r/1234abcd');
const cacheAdapterStub = {
get: sinon.stub().returns({
link_id: '662194931d0ba6fb37c080ee',
from: 'https://example.com/r/1234abcd',
to: 'https://google.com',
edited: true
}),
reset: sinon.stub()
};
linkRedirectRepository = createLinkRedirectRepository({
cacheAdapter: cacheAdapterStub
});
const result = await linkRedirectRepository.getByURL(url);
should(result).be.an.Object();
should(result.from.href).equal('https://example.com/r/1234abcd');
should(result.to.href).equal('https://google.com/');
should(result.edited).be.true();
should(ObjectID.isValid(result.link_id)).be.true();
});
it('should return a LinkRedirect instance from the DB if cache is enabled and key does not exist', async function () {
const url = new URL('https://example.com/r/1234abcd');
const cacheAdapterStub = {
get: sinon.stub().returns(null),
set: sinon.stub(),
reset: sinon.stub()
};
linkRedirectRepository = createLinkRedirectRepository({
cacheAdapter: cacheAdapterStub
});
const result = await linkRedirectRepository.getByURL(url);
should(result).be.an.Object();
should(result.from.href).equal('https://example.com/r/1234abcd');
should(result.to.href).equal('https://google.com/');
should(result.edited).be.true();
should(ObjectID.isValid(result.link_id)).be.true();
should(cacheAdapterStub.set.calledOnce).be.true();
});
});
describe('caching', function () {
it('should add a new link redirect to the cache on save', async function () {
const cacheAdapterStub = {
set: sinon.stub()
};
linkRedirectRepository = createLinkRedirectRepository({
cacheAdapter: cacheAdapterStub
});
const linkRedirect = new LinkRedirect({
from: new URL('https://example.com/r/1234abcd'),
to: new URL('https://google.com')
});
await linkRedirectRepository.save(linkRedirect);
should(cacheAdapterStub.set.calledOnce).be.true();
});
it('should clear cache on site.changed event', function () {
const reset = sinon.stub();
const cacheAdapterStub = {
reset: reset
};
const EventRegistry = new EventEmitter();
linkRedirectRepository = createLinkRedirectRepository({
cacheAdapter: cacheAdapterStub,
EventRegistry
});
EventRegistry.emit('site.changed');
should(reset.calledOnce).be.true();
});
});
});