0
Fork 0
mirror of https://github.com/TryGhost/Ghost.git synced 2025-03-11 02:12:21 -05:00

RSS Refactor with cache

refs #5091, refs #2263

- Move rss handling out of the frontend controller and into its own module
- Separate the code into logical blocks
- Wrap the generation code in a in-memory cache to prevent it being regenerated on every request
This commit is contained in:
Hannah Wolfe 2015-04-07 17:34:50 +01:00
parent 8f5960e1c5
commit 8d1e729f30
6 changed files with 266 additions and 214 deletions

View file

@ -42,6 +42,11 @@ function createUrl(urlPath, absolute, secure) {
output += ghostConfig.paths.subdir;
}
// Remove double subdirectory
if (urlPath.indexOf(ghostConfig.paths.subdir) === 0) {
urlPath = urlPath.replace(ghostConfig.paths.subdir, '');
}
// append the path, always starts and ends with a slash
output += urlPath;

View file

@ -5,21 +5,18 @@
/*global require, module */
var moment = require('moment'),
RSS = require('rss'),
rss = require('../data/xml/rss'),
_ = require('lodash'),
url = require('url'),
Promise = require('bluebird'),
api = require('../api'),
config = require('../config'),
filters = require('../filters'),
template = require('../helpers/template'),
errors = require('../errors'),
cheerio = require('cheerio'),
downsize = require('downsize'),
routeMatch = require('path-match')(),
frontendControllers,
staticPostPermalink,
staticPostPermalink;
// Cache static post permalink regex
staticPostPermalink = routeMatch('/:slug/:edit?');
@ -426,190 +423,7 @@ frontendControllers = {
return handleError(next)(err);
});
},
rss: function (req, res, next) {
function isPaginated() {
return req.route.path.indexOf(':page') !== -1;
}
function isTag() {
return req.route.path.indexOf('/' + config.routeKeywords.tag + '/') !== -1;
}
function isAuthor() {
return req.route.path.indexOf('/' + config.routeKeywords.author + '/') !== -1;
}
// Initialize RSS
var pageParam = req.params.page !== undefined ? parseInt(req.params.page, 10) : 1,
slugParam = req.params.slug,
baseUrl = config.paths.subdir;
if (isTag()) {
baseUrl += '/' + config.routeKeywords.tag + '/' + slugParam + '/rss/';
} else if (isAuthor()) {
baseUrl += '/' + config.routeKeywords.author + '/' + slugParam + '/rss/';
} else {
baseUrl += '/rss/';
}
// No negative pages, or page 1
if (isNaN(pageParam) || pageParam < 1 || (pageParam === 1 && isPaginated())) {
return res.redirect(baseUrl);
}
return Promise.all([
api.settings.read('title'),
api.settings.read('description'),
api.settings.read('permalinks')
]).then(function (result) {
var options = {};
if (pageParam) { options.page = pageParam; }
if (isTag()) { options.tag = slugParam; }
if (isAuthor()) { options.author = slugParam; }
options.include = 'author,tags,fields';
return api.posts.browse(options).then(function (page) {
var title = result[0].settings[0].value,
description = result[1].settings[0].value,
permalinks = result[2].settings[0],
majorMinor = /^(\d+\.)?(\d+)/,
trimmedVersion = res.locals.version,
siteUrl = config.urlFor('home', {secure: req.secure}, true),
feedUrl = config.urlFor('rss', {secure: req.secure}, true),
maxPage = page.meta.pagination.pages,
feed;
trimmedVersion = trimmedVersion ? trimmedVersion.match(majorMinor)[0] : '?';
if (isTag()) {
if (page.meta.filters.tags) {
title = page.meta.filters.tags[0].name + ' - ' + title;
feedUrl = siteUrl + config.routeKeywords.tag + '/' + page.meta.filters.tags[0].slug + '/rss/';
}
}
if (isAuthor()) {
if (page.meta.filters.author) {
title = page.meta.filters.author.name + ' - ' + title;
feedUrl = siteUrl + config.routeKeywords.author + '/' + page.meta.filters.author.slug + '/rss/';
}
}
feed = new RSS({
title: title,
description: description,
generator: 'Ghost ' + trimmedVersion,
feed_url: feedUrl,
site_url: siteUrl,
ttl: '60',
custom_namespaces: {
content: 'http://purl.org/rss/1.0/modules/content/',
media: 'http://search.yahoo.com/mrss/'
}
});
// If page is greater than number of pages we have, redirect to last page
if (pageParam > maxPage) {
return res.redirect(baseUrl + maxPage + '/');
}
setReqCtx(req, page.posts);
setResponseContext(req, res);
filters.doFilter('prePostsRender', page.posts).then(function (posts) {
posts.forEach(function (post) {
var item = {
title: post.title,
guid: post.uuid,
url: config.urlFor('post', {post: post, permalinks: permalinks}, true),
date: post.published_at,
categories: _.pluck(post.tags, 'name'),
author: post.author ? post.author.name : null,
custom_elements: []
},
htmlContent = cheerio.load(post.html, {decodeEntities: false}),
image;
// convert relative resource urls to absolute
['href', 'src'].forEach(function (attributeName) {
htmlContent('[' + attributeName + ']').each(function (ix, el) {
var baseUrl,
attributeValue,
parsed;
el = htmlContent(el);
attributeValue = el.attr(attributeName);
// if URL is absolute move on to the next element
try {
parsed = url.parse(attributeValue);
if (parsed.protocol) {
return;
}
} catch (e) {
return;
}
// compose an absolute URL
// if the relative URL begins with a '/' use the blog URL (including sub-directory)
// as the base URL, otherwise use the post's URL.
baseUrl = attributeValue[0] === '/' ? siteUrl : item.url;
// prevent double subdirectoreis
if (attributeValue.indexOf(config.paths.subdir) === 0) {
attributeValue = attributeValue.replace(config.paths.subdir, '');
}
// prevent double slashes
if (baseUrl.slice(-1) === '/' && attributeValue[0] === '/') {
attributeValue = attributeValue.substr(1);
}
attributeValue = baseUrl + attributeValue;
el.attr(attributeName, attributeValue);
});
});
item.description = post.meta_description || downsize(htmlContent.html(), {words: 50});
if (post.image) {
image = config.urlFor('image', {image: post.image}, true);
// Add a media content tag
item.custom_elements.push({
'media:content': {
_attr: {
url: image,
medium: 'image'
}
}
});
// Also add the image to the content, because not all readers support media:content
htmlContent('p').first().before('<img src="' + image + '" />');
htmlContent('img').attr('alt', post.title);
}
item.custom_elements.push({
'content:encoded': {
_cdata: htmlContent.html()
}
});
feed.item(item);
});
}).then(function () {
res.set('Content-Type', 'text/xml; charset=UTF-8');
res.send(feed.xml());
});
});
}).catch(handleError(next));
}
rss: rss
};
module.exports = frontendControllers;

View file

@ -0,0 +1,235 @@
var _ = require('lodash'),
Promise = require('bluebird'),
cheerio = require('cheerio'),
crypto = require('crypto'),
downsize = require('downsize'),
RSS = require('rss'),
url = require('url'),
config = require('../../../config'),
api = require('../../../api'),
filters = require('../../../filters'),
generate,
generateFeed,
getFeedXml,
feedCache = {};
function isPaginated(req) {
return req.route.path.indexOf(':page') !== -1;
}
function isTag(req) {
return req.route.path.indexOf('/' + config.routeKeywords.tag + '/') !== -1;
}
function isAuthor(req) {
return req.route.path.indexOf('/' + config.routeKeywords.author + '/') !== -1;
}
function handleError(next) {
return function (err) {
return next(err);
};
}
function getOptions(req, pageParam, slugParam) {
var options = {};
if (pageParam) { options.page = pageParam; }
if (isTag(req)) { options.tag = slugParam; }
if (isAuthor(req)) { options.author = slugParam; }
options.include = 'author,tags,fields';
return options;
}
function getData(options) {
var ops = {
title: api.settings.read('title'),
description: api.settings.read('description'),
permalinks: api.settings.read('permalinks'),
results: api.posts.browse(options)
};
return Promise.props(ops).then(function (result) {
var titleStart = options.tags ? result.results.meta.filters.tags[0].name + ' - ' :
options.author ? result.results.meta.filters.author.name + ' - ' : '';
return {
title: titleStart + result.title.settings[0].value,
description: result.description.settings[0].value,
permalinks: result.permalinks.settings[0],
results: result.results
};
});
}
function getBaseUrl(req, slugParam) {
var baseUrl = config.paths.subdir;
if (isTag(req)) {
baseUrl += '/' + config.routeKeywords.tag + '/' + slugParam + '/rss/';
} else if (isAuthor(req)) {
baseUrl += '/' + config.routeKeywords.author + '/' + slugParam + '/rss/';
} else {
baseUrl += '/rss/';
}
return baseUrl;
}
function processUrls(html, siteUrl, itemUrl) {
var htmlContent = cheerio.load(html, {decodeEntities: false});
// convert relative resource urls to absolute
['href', 'src'].forEach(function (attributeName) {
htmlContent('[' + attributeName + ']').each(function (ix, el) {
var baseUrl,
attributeValue,
parsed;
el = htmlContent(el);
attributeValue = el.attr(attributeName);
// if URL is absolute move on to the next element
try {
parsed = url.parse(attributeValue);
if (parsed.protocol) {
return;
}
} catch (e) {
return;
}
// compose an absolute URL
// if the relative URL begins with a '/' use the blog URL (including sub-directory)
// as the base URL, otherwise use the post's URL.
baseUrl = attributeValue[0] === '/' ? siteUrl : itemUrl;
// prevent double subdirectories
if (attributeValue.indexOf(config.paths.subdir) === 0) {
attributeValue = attributeValue.replace(config.paths.subdir, '');
}
// prevent double slashes
if (baseUrl.slice(-1) === '/' && attributeValue[0] === '/') {
attributeValue = attributeValue.substr(1);
}
attributeValue = baseUrl + attributeValue;
el.attr(attributeName, attributeValue);
});
});
return htmlContent;
}
getFeedXml = function (path, data) {
var dataHash = crypto.createHash('md5').update(JSON.stringify(data)).digest('hex');
if (!feedCache[path] || feedCache[path].hash !== dataHash) {
// We need to regenerate
feedCache[path] = {
hash: dataHash,
xml: generateFeed(data)
};
}
return feedCache[path].xml;
};
generateFeed = function (data) {
var feed = new RSS({
title: data.title,
description: data.description,
generator: 'Ghost ' + data.version,
feed_url: data.feedUrl,
site_url: data.siteUrl,
ttl: '60',
custom_namespaces: {
content: 'http://purl.org/rss/1.0/modules/content/',
media: 'http://search.yahoo.com/mrss/'
}
});
data.results.posts.forEach(function (post) {
var itemUrl = config.urlFor('post', {post: post, permalinks: data.permalinks}, true),
htmlContent = processUrls(post.html, data.siteUrl, itemUrl),
item = {
title: post.title,
description: post.meta_description || downsize(htmlContent.html(), {words: 50}),
guid: post.uuid,
url: itemUrl,
date: post.published_at,
categories: _.pluck(post.tags, 'name'),
author: post.author ? post.author.name : null,
custom_elements: []
},
imageUrl;
if (post.image) {
imageUrl = config.urlFor('image', {image: post.image}, true);
// Add a media content tag
item.custom_elements.push({
'media:content': {
_attr: {
url: imageUrl,
medium: 'image'
}
}
});
// Also add the image to the content, because not all readers support media:content
htmlContent('p').first().before('<img src="' + imageUrl + '" />');
htmlContent('img').attr('alt', post.title);
}
item.custom_elements.push({
'content:encoded': {
_cdata: htmlContent.html()
}
});
feed.item(item);
});
return filters.doFilter('rss.feed', feed).then(function (feed) {
return feed.xml();
});
};
generate = function (req, res, next) {
// Initialize RSS
var pageParam = req.params.page !== undefined ? parseInt(req.params.page, 10) : 1,
slugParam = req.params.slug,
baseUrl = getBaseUrl(req, slugParam),
options = getOptions(req, pageParam, slugParam);
// No negative pages, or page 1
if (isNaN(pageParam) || pageParam < 1 || (pageParam === 1 && isPaginated(req))) {
return res.redirect(baseUrl);
}
return getData(options).then(function (data) {
var maxPage = data.results.meta.pagination.pages;
// If page is greater than number of pages we have, redirect to last page
if (pageParam > maxPage) {
return res.redirect(baseUrl + maxPage + '/');
}
data.version = res.locals.safeVersion;
data.siteUrl = config.urlFor('home', {secure: req.secure}, true);
data.feedUrl = config.urlFor({relativeUrl: baseUrl, secure: req.secure}, true);
return getFeedXml(req.route.path, data).then(function (feedXml) {
res.set('Content-Type', 'text/xml; charset=UTF-8');
res.send(feedXml);
});
}).catch(handleError(next));
};
module.exports = generate;

View file

@ -261,8 +261,7 @@ ghost_head = function (options) {
var self = this,
useStructuredData = !config.isPrivacyDisabled('useStructuredData'),
head = [],
majorMinor = /^(\d+\.)?(\d+)/,
trimmedVersion = this.version,
safeVersion = this.safeVersion,
ops = [],
structuredData,
schema,
@ -270,8 +269,6 @@ ghost_head = function (options) {
context = self.context[0],
contextObject = self[context] || blog;
trimmedVersion = trimmedVersion ? trimmedVersion.match(majorMinor)[0] : '?';
// Push Async calls to an array of promises
ops.push(urlHelper.call(self, {hash: {absolute: true}}));
ops.push(meta_description.call(self, options));
@ -312,7 +309,7 @@ ghost_head = function (options) {
finaliseSchema(schema, head);
}
head.push('<meta name="generator" content="Ghost ' + trimmedVersion + '" />');
head.push('<meta name="generator" content="Ghost ' + safeVersion + '" />');
head.push('<link rel="alternate" type="application/rss+xml" title="' +
title + '" href="' + config.urlFor('rss', null, true) + '" />');
}).then(function () {

View file

@ -36,6 +36,7 @@ function ghostLocals(req, res, next) {
// Make sure we have a locals value.
res.locals = res.locals || {};
res.locals.version = config.ghostVersion;
res.locals.safeVersion = config.ghostVersion.match(/^(\d+\.)?(\d+)/)[0];
// relative path from the URL
res.locals.relativeUrl = req.path;

View file

@ -55,7 +55,7 @@ describe('{{ghost_head}} helper', function () {
it('returns meta tag string on paginated index page without structured data and schema', function (done) {
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/page/2/', context: ['paged', 'index']},
{safeVersion: '0.3', relativeUrl: '/page/2/', context: ['paged', 'index']},
{data: {root: {context: ['paged', 'index']}}}
).then(function (rendered) {
should.exist(rendered);
@ -71,7 +71,7 @@ describe('{{ghost_head}} helper', function () {
it('returns structured data on first index page', function (done) {
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/', context: ['home', 'index']},
{safeVersion: '0.3', relativeUrl: '/', context: ['home', 'index']},
{data: {root: {context: ['home', 'index']}}}
).then(function (rendered) {
should.exist(rendered);
@ -110,7 +110,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/tag/tagtitle/', tag: tag, context: ['tag']},
{safeVersion: '0.3', relativeUrl: '/tag/tagtitle/', tag: tag, context: ['tag']},
{data: {root: {context: ['tag']}}}
).then(function (rendered) {
should.exist(rendered);
@ -151,7 +151,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/tag/tagtitle/', tag: tag, context: ['tag']},
{safeVersion: '0.3', relativeUrl: '/tag/tagtitle/', tag: tag, context: ['tag']},
{data: {root: {context: ['tag']}}}
).then(function (rendered) {
should.exist(rendered);
@ -191,7 +191,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/tag/tagtitle/', tag: tag, context: ['tag']},
{safeVersion: '0.3', relativeUrl: '/tag/tagtitle/', tag: tag, context: ['tag']},
{data: {root: {context: ['tag']}}}
).then(function (rendered) {
should.exist(rendered);
@ -212,7 +212,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/tag/tagtitle/page/2/', tag: tag, context: ['paged', 'tag']},
{safeVersion: '0.3', relativeUrl: '/tag/tagtitle/page/2/', tag: tag, context: ['paged', 'tag']},
{data: {root: {context: ['tag']}}}
).then(function (rendered) {
should.exist(rendered);
@ -237,7 +237,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/author/AuthorName/', author: author, context: ['author']},
{safeVersion: '0.3', relativeUrl: '/author/AuthorName/', author: author, context: ['author']},
{data: {root: {context: ['author']}}}
).then(function (rendered) {
should.exist(rendered);
@ -279,7 +279,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/author/AuthorName/page/2/', author: author, context: ['paged', 'author']},
{safeVersion: '0.3', relativeUrl: '/author/AuthorName/page/2/', author: author, context: ['paged', 'author']},
{data: {root: {context: ['paged', 'author']}}}
).then(function (rendered) {
should.exist(rendered);
@ -293,9 +293,9 @@ describe('{{ghost_head}} helper', function () {
}).catch(done);
});
it('returns meta tag string even if version is invalid', function (done) {
it('returns meta tag string even if safeVersion is invalid', function (done) {
helpers.ghost_head.call(
{version: '0.9', context: []},
{safeVersion: '0.9', context: []},
{data: {root: {context: []}}}
).then(function (rendered) {
should.exist(rendered);
@ -326,7 +326,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{relativeUrl: '/post/', version: '0.3.0', context: ['post'], post: post},
{relativeUrl: '/post/', safeVersion: '0.3', context: ['post'], post: post},
{data: {root: {context: ['post']}}}
).then(function (rendered) {
var re1 = new RegExp('<meta property="article:published_time" content="' + post.published_at),
@ -398,7 +398,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{relativeUrl: '/post/', version: '0.3.0', context: ['post'], post: post},
{relativeUrl: '/post/', safeVersion: '0.3', context: ['post'], post: post},
{data: {root: {context: ['post']}}}
).then(function (rendered) {
var re1 = new RegExp('<meta property="article:published_time" content="' + post.published_at),
@ -469,7 +469,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{relativeUrl: '/post/', version: '0.3.0', context: ['post'], post: post},
{relativeUrl: '/post/', safeVersion: '0.3', context: ['post'], post: post},
{data: {root: {context: ['post']}}}).then(function (rendered) {
var re1 = new RegExp('<meta property="article:published_time" content="' + post.published_at),
re2 = new RegExp('<meta property="article:modified_time" content="' + post.updated_at),
@ -537,7 +537,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{relativeUrl: '/post/', version: '0.3.0', context: ['post'], post: post},
{relativeUrl: '/post/', safeVersion: '0.3', context: ['post'], post: post},
{data: {root: {context: ['post']}}}
).then(function (rendered) {
var re1 = new RegExp('<meta property="article:published_time" content="' + post.published_at),
@ -589,7 +589,7 @@ describe('{{ghost_head}} helper', function () {
it('returns canonical URL', function (done) {
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/about/', context: ['page']},
{safeVersion: '0.3', relativeUrl: '/about/', context: ['page']},
{data: {root: {context: ['page']}}}
).then(function (rendered) {
should.exist(rendered);
@ -605,7 +605,7 @@ describe('{{ghost_head}} helper', function () {
it('returns next & prev URL correctly for middle page', function (done) {
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/page/3/', context: ['paged', 'index'], pagination: {next: '4', prev: '2'}},
{safeVersion: '0.3', relativeUrl: '/page/3/', context: ['paged', 'index'], pagination: {next: '4', prev: '2'}},
{data: {root: {context: ['index', 'paged'], pagination: {total: 4, page: 3, next: 4, prev: 2}}}}
).then(function (rendered) {
should.exist(rendered);
@ -623,7 +623,7 @@ describe('{{ghost_head}} helper', function () {
it('returns next & prev URL correctly for second page', function (done) {
helpers.ghost_head.call(
{version: '0.3.0', relativeUrl: '/page/2/', context: ['paged', 'index'], pagination: {next: '3', prev: '1'}},
{safeVersion: '0.3', relativeUrl: '/page/2/', context: ['paged', 'index'], pagination: {next: '3', prev: '1'}},
{data: {root: {context: ['index', 'paged'], pagination: {total: 3, page: 2, next: 3, prev: 1}}}}
).then(function (rendered) {
should.exist(rendered);
@ -657,7 +657,7 @@ describe('{{ghost_head}} helper', function () {
it('returns correct rss url with subdirectory', function (done) {
helpers.ghost_head.call(
{version: '0.3.0', context: ['paged', 'index']},
{safeVersion: '0.3', context: ['paged', 'index']},
{data: {root: {context: []}}}
).then(function (rendered) {
should.exist(rendered);
@ -717,7 +717,7 @@ describe('{{ghost_head}} helper', function () {
};
helpers.ghost_head.call(
{relativeUrl: '/post/', version: '0.3.0', context: ['post'], post: post},
{relativeUrl: '/post/', safeVersion: '0.3', context: ['post'], post: post},
{data: {root: {context: ['post']}}}
).then(function (rendered) {
should.exist(rendered);
@ -758,7 +758,7 @@ describe('{{ghost_head}} helper', function () {
it('returns meta tag plus injected code', function (done) {
helpers.ghost_head.call(
{version: '0.3.0', context: ['paged', 'index'], post: false},
{safeVersion: '0.3', context: ['paged', 'index'], post: false},
{data: {root: {context: []}}}
).then(function (rendered) {
should.exist(rendered);