0
Fork 0
mirror of https://github.com/TryGhost/Ghost.git synced 2025-01-06 22:40:14 -05:00

🐛 Fixed HTML escaping when using outbound link tagging (#16380)

fixes https://github.com/TryGhost/Team/issues/2666

- Somehow occurrences of `&map_` got replaced with `↦`
- Disables escaping &, ', " and other HTML characters when not needed
(escaping is already handled by mobiledoc/lexical)
- Bumps unit test coverage of link replacer to 100%
This commit is contained in:
Simon Backx 2023-03-08 16:30:54 +01:00 committed by GitHub
parent 00cda68125
commit 4184b279d2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 116 additions and 80 deletions

View file

@ -21,7 +21,7 @@ Hopefully you don't find it a bore.",
"feature_image_caption": null,
"featured": false,
"frontmatter": null,
"html": "<!--kg-card-begin: markdown--><h1>Static page test is what this is for.</h1><p>Hopefully you don&apos;t find it a bore.</p><!--kg-card-end: markdown-->",
"html": "<!--kg-card-begin: markdown--><h1>Static page test is what this is for.</h1><p>Hopefully you don't find it a bore.</p><!--kg-card-end: markdown-->",
"id": "618ba1ffbe2896088840a6e9",
"meta_description": null,
"meta_title": null,
@ -48,7 +48,7 @@ exports[`Pages Content API Can request page 2: [headers] 1`] = `
Object {
"access-control-allow-origin": "*",
"cache-control": "public, max-age=0",
"content-length": "1088",
"content-length": "1083",
"content-type": "application/json; charset=utf-8",
"content-version": StringMatching /v\\\\d\\+\\\\\\.\\\\d\\+/,
"etag": StringMatching /\\(\\?:W\\\\/\\)\\?"\\(\\?:\\[ !#-\\\\x7E\\\\x80-\\\\xFF\\]\\*\\|\\\\r\\\\n\\[\\\\t \\]\\|\\\\\\\\\\.\\)\\*"/,
@ -90,7 +90,7 @@ Tip: If you're reading any post or page on your site and you notice something yo
"feature_image_caption": null,
"featured": false,
"frontmatter": null,
"html": "<p>Unlike posts, pages in Ghost don&apos;t appear in the main feed. They&apos;re separate, individual pages which only show up when you link to them. Great for content which is important, but separate from your usual posts.</p><p>An about page is a great example of one you might want to set up early on so people can find out more about you, and what you do. Why should people subscribe to your site and become a member? Details help!</p><blockquote><strong>Tip: </strong>If you&apos;re reading any post or page on your site and you notice something you want to edit, you can add <code>/edit</code> to the end of the URL &#x2013; and you&apos;ll be taken directly to the Ghost editor.</blockquote><p>Now tell the world what your site is all about.</p>",
"html": "<p>Unlike posts, pages in Ghost don't appear in the main feed. They're separate, individual pages which only show up when you link to them. Great for content which is important, but separate from your usual posts.</p><p>An about page is a great example of one you might want to set up early on so people can find out more about you, and what you do. Why should people subscribe to your site and become a member? Details help!</p><blockquote><strong>Tip: </strong>If you're reading any post or page on your site and you notice something you want to edit, you can add <code>/edit</code> to the end of the URL and you'll be taken directly to the Ghost editor.</blockquote><p>Now tell the world what your site is all about.</p>",
"id": "6194d3ce51e2700162531a78",
"meta_description": null,
"meta_title": null,
@ -134,7 +134,7 @@ If you prefer to use a contact form, almost all of the great embedded form servi
"feature_image_caption": null,
"featured": false,
"frontmatter": null,
"html": "<p>If you want to set up a contact page for people to be able to reach out to you, the simplest way is to set up a simple page like this and list the different ways people can reach out to you.</p><h3 id=\\"for-example-heres-how-to-reach-us\\">For example, here&apos;s how to reach us!</h3><ul><li><a href=\\"https://twitter.com/ghost?ref=ghost\\">@Ghost</a> on Twitter</li><li><a href=\\"https://www.facebook.com/ghost\\">@Ghost</a> on Facebook</li><li><a href=\\"https://instagram.com/ghost?ref=ghost\\">@Ghost</a> on Instagram</li></ul><p>If you prefer to use a contact form, almost all of the great embedded form services work great with Ghost and are easy to set up:</p><figure class=\\"kg-card kg-image-card\\"><a href=\\"https://ghost.org/integrations/?tag=forms&amp;ref=ghost\\"><img src=\\"https://static.ghost.org/v4.0.0/images/integrations.png\\" class=\\"kg-image\\" alt loading=\\"lazy\\" width=\\"2944\\" height=\\"1716\\"></a></figure>",
"html": "<p>If you want to set up a contact page for people to be able to reach out to you, the simplest way is to set up a simple page like this and list the different ways people can reach out to you.</p><h3 id=\\"for-example-heres-how-to-reach-us\\">For example, here's how to reach us!</h3><ul><li><a href=\\"https://twitter.com/ghost?ref=ghost\\">@Ghost</a> on Twitter</li><li><a href=\\"https://www.facebook.com/ghost\\">@Ghost</a> on Facebook</li><li><a href=\\"https://instagram.com/ghost?ref=ghost\\">@Ghost</a> on Instagram</li></ul><p>If you prefer to use a contact form, almost all of the great embedded form services work great with Ghost and are easy to set up:</p><figure class=\\"kg-card kg-image-card\\"><a href=\\"https://ghost.org/integrations/?tag=forms&ref=ghost\\"><img src=\\"https://static.ghost.org/v4.0.0/images/integrations.png\\" class=\\"kg-image\\" alt loading=\\"lazy\\" width=\\"2944\\" height=\\"1716\\"></a></figure>",
"id": "6194d3ce51e2700162531a79",
"meta_description": null,
"meta_title": null,
@ -174,7 +174,7 @@ Ghost is a non-profit organization, and we give away all our intellectual proper
"feature_image_caption": null,
"featured": false,
"frontmatter": null,
"html": "<p>Oh hey, you clicked every link of our starter content and even clicked this small link in the footer! If you like Ghost and you&apos;re enjoying the product so far, we&apos;d hugely appreciate your support in any way you care to show it.</p><p>Ghost is a non-profit organization, and we give away all our intellectual property as open source software. If you believe in what we do, there are a number of ways you can give us a hand, and we hugely appreciate all of them:</p><ul><li>Contribute code via <a href=\\"https://github.com/tryghost?ref=ghost\\">GitHub</a></li><li>Contribute financially via <a href=\\"https://github.com/sponsors/TryGhost?ref=ghost\\">GitHub Sponsors</a></li><li>Contribute financially via <a href=\\"https://opencollective.com/ghost?ref=ghost\\">Open Collective</a></li><li>Contribute reviews via <strong>writing a blog post</strong></li><li>Contribute good vibes via <strong>telling your friends</strong> about us</li></ul><p>Thanks for checking us out!</p>",
"html": "<p>Oh hey, you clicked every link of our starter content and even clicked this small link in the footer! If you like Ghost and you're enjoying the product so far, we'd hugely appreciate your support in any way you care to show it.</p><p>Ghost is a non-profit organization, and we give away all our intellectual property as open source software. If you believe in what we do, there are a number of ways you can give us a hand, and we hugely appreciate all of them:</p><ul><li>Contribute code via <a href=\\"https://github.com/tryghost?ref=ghost\\">GitHub</a></li><li>Contribute financially via <a href=\\"https://github.com/sponsors/TryGhost?ref=ghost\\">GitHub Sponsors</a></li><li>Contribute financially via <a href=\\"https://opencollective.com/ghost?ref=ghost\\">Open Collective</a></li><li>Contribute reviews via <strong>writing a blog post</strong></li><li>Contribute good vibes via <strong>telling your friends</strong> about us</li></ul><p>Thanks for checking us out!</p>",
"id": "6194d3ce51e2700162531a7b",
"meta_description": null,
"meta_title": null,
@ -211,7 +211,7 @@ You can integrate any products, services, ads or integrations with Ghost yoursel
"feature_image_caption": null,
"featured": false,
"frontmatter": null,
"html": "<p>Wondering how Ghost fares when it comes to privacy and GDPR rules? Good news: Ghost does not use any tracking cookies of any kind.</p><p>You can integrate any products, services, ads or integrations with Ghost yourself if you want to, but it&apos;s always a good idea to disclose how subscriber data will be used by putting together a privacy page.</p>",
"html": "<p>Wondering how Ghost fares when it comes to privacy and GDPR rules? Good news: Ghost does not use any tracking cookies of any kind.</p><p>You can integrate any products, services, ads or integrations with Ghost yourself if you want to, but it's always a good idea to disclose how subscriber data will be used by putting together a privacy page.</p>",
"id": "6194d3ce51e2700162531a7a",
"meta_description": null,
"meta_title": null,
@ -248,7 +248,7 @@ Hopefully you don't find it a bore.",
"feature_image_caption": null,
"featured": false,
"frontmatter": null,
"html": "<!--kg-card-begin: markdown--><h1>Static page test is what this is for.</h1><p>Hopefully you don&apos;t find it a bore.</p><!--kg-card-end: markdown-->",
"html": "<!--kg-card-begin: markdown--><h1>Static page test is what this is for.</h1><p>Hopefully you don't find it a bore.</p><!--kg-card-end: markdown-->",
"id": "618ba1ffbe2896088840a6e9",
"meta_description": null,
"meta_title": null,
@ -275,7 +275,7 @@ exports[`Pages Content API Can request pages 2: [headers] 1`] = `
Object {
"access-control-allow-origin": "*",
"cache-control": "public, max-age=0",
"content-length": "9263",
"content-length": "9209",
"content-type": "application/json; charset=utf-8",
"content-version": StringMatching /v\\\\d\\+\\\\\\.\\\\d\\+/,
"etag": StringMatching /\\(\\?:W\\\\/\\)\\?"\\(\\?:\\[ !#-\\\\x7E\\\\x80-\\\\xFF\\]\\*\\|\\\\r\\\\n\\[\\\\t \\]\\|\\\\\\\\\\.\\)\\*"/,

File diff suppressed because one or more lines are too long

View file

@ -8,7 +8,14 @@ class LinkReplacer {
async replace(html, replaceLink) {
const cheerio = require('cheerio');
try {
const $ = cheerio.load(html);
const $ = cheerio.load(html, {
xml: {
// This makes sure we use the faster and less destructive htmlparser2 parser
xmlMode: false
},
// Do not replace &, ', " and others with HTML entities (is bugged because it replaces &map_ with something weird (&#x21A6;))
decodeEntities: false
}, false);
for (const el of $('a').toArray()) {
const href = $(el).attr('href');

View file

@ -7,7 +7,7 @@
"main": "index.js",
"scripts": {
"dev": "echo \"Implement me!\"",
"test:unit": "NODE_ENV=testing c8 --all --check-coverage --reporter text --reporter cobertura mocha './test/**/*.test.js'",
"test:unit": "NODE_ENV=testing c8 --all --check-coverage --100 --reporter text --reporter cobertura mocha './test/**/*.test.js'",
"test": "yarn test:unit",
"lint:code": "eslint *.js lib/ --ext .js --cache",
"lint": "yarn lint:code && yarn lint:test",

View file

@ -1,7 +1,13 @@
const assert = require('assert');
const linkReplacer = require('../lib/LinkReplacer');
const cheerio = require('cheerio');
const sinon = require('sinon');
describe('LinkReplacementService', function () {
afterEach(function () {
sinon.restore();
});
it('exported', function () {
assert.equal(require('../index'), linkReplacer);
});
@ -15,6 +21,21 @@ describe('LinkReplacementService', function () {
assert.equal(replaced, expected);
});
it('Doesn\'t break weird &map', async function () {
// Refs https://github.com/TryGhost/Team/issues/2666: somehow this gets replaced with https://example.com/test.jpg?test=true↦id=de76 if decoding entities is enabled
const html = '<img src="https://example.com/test.jpg?test=true&map_id=test">';
const expected = '<img src="https://example.com/test.jpg?test=true&map_id=test">';
const replaced = await linkReplacer.replace(html, () => new URL('https://google.com/test-dir?test-query'));
assert.equal(replaced, expected);
});
it('Does not escape HTML characters', async function () {
const html = 'This is a test & this \'should\' not "be" escaped';
const replaced = await linkReplacer.replace(html, () => new URL('https://google.com/test-dir?test-query'));
assert.equal(replaced, html);
});
it('Can replace to string', async function () {
const html = '<a href="http://localhost:2368/dir/path">link</a>';
const expected = '<a href="#valid-string">link</a>';
@ -30,5 +51,13 @@ describe('LinkReplacementService', function () {
const replaced = await linkReplacer.replace(html, () => 'valid');
assert.equal(replaced, expected);
});
it('Ignores cheerio errors', async function () {
sinon.stub(cheerio, 'load').throws(new Error('test'));
const html = '<a href="http://localhost:2368/dir/path">link</a>';
const replaced = await linkReplacer.replace(html, () => 'valid');
assert.equal(replaced, html);
});
});
});