From 98d0954e32ce230ff6e12c465a5eadf1378e54c4 Mon Sep 17 00:00:00 2001 From: Mo Valipour Date: Fri, 12 Sep 2014 15:40:34 +0100 Subject: [PATCH] Fix URL replacement in RSS feed mucking with content closes #3983 - removed naive regex implementation - added cheerio to project dependencies - used cheerio to parse RSS content - use attribute getter/setter to replace href/src attribute urls with the resolved version --- core/server/controllers/frontend.js | 27 +++++++++++++-------------- package.json | 1 + 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/core/server/controllers/frontend.js b/core/server/controllers/frontend.js index 497f870d0f..8723ca414f 100644 --- a/core/server/controllers/frontend.js +++ b/core/server/controllers/frontend.js @@ -14,6 +14,7 @@ var moment = require('moment'), filters = require('../../server/filters'), template = require('../helpers/template'), errors = require('../errors'), + cheerio = require('cheerio'), frontendControllers, staticPostPermalink, @@ -485,23 +486,21 @@ frontendControllers = { categories: _.pluck(post.tags, 'name'), author: post.author ? post.author.name : null }, - content = post.html; + htmlContent = cheerio.load(post.html, { decodeEntities: false }); - //set img src to absolute url - content = content.replace(/src=["|'|\s]?([\w\/\?\$\.\+\-;%:@&=,_]+)["|'|\s]?/gi, function (match, p1) { - /*jslint unparam:true*/ - p1 = url.resolve(siteUrl, p1); - return "src='" + p1 + "' "; + // convert relative resource urls to absolute + ['href', 'src'].forEach(function (attributeName) { + htmlContent('[' + attributeName + ']').each(function (ix, el) { + el = htmlContent(el); + + var attributeValue = el.attr(attributeName); + attributeValue = url.resolve(siteUrl, attributeValue); + + el.attr(attributeName, attributeValue); + }); }); - //set a href to absolute url - content = content.replace(/href=["|'|\s]?([\w\/\?\$\.\+\-;%:@&=,_]+)["|'|\s]?/gi, function (match, p1) { - /*jslint unparam:true*/ - p1 = url.resolve(siteUrl, p1); - return "href='" + p1 + "' "; - }); - - item.description = content; + item.description = htmlContent.html(); feed.item(item); }); }).then(function () { diff --git a/package.json b/package.json index 874f6e4b95..0285dbb267 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "body-parser": "1.6.3", "bookshelf": "0.7.6", "busboy": "0.2.3", + "cheerio": "0.17.0", "colors": "0.6.2", "compression": "^1.0.2", "connect": "3.0.0-rc.1",