From 2659e5aa40605a161a67062224ef58d69f6c11be Mon Sep 17 00:00:00 2001 From: Daniel Lockyer Date: Tue, 7 May 2024 17:07:41 +0200 Subject: [PATCH] Added handling for parsing errors with user-submitted HTML fix https://linear.app/tryghost/issue/SLO-87/cannot-read-properties-of-undefined-reading-createimpl-an-unexpected refs https://github.com/jsdom/jsdom/issues/3709 - in the event we are given some HTML to parse, and that fails, we currently return a HTTP 500 because it's unhandled - the instance we saw was due to `` crashing jsdom, we've opened an issue for that - in terms of handling the error gracefully, we can surround the code in a try-catch and return a more suitable error. I've gone for a ValidationError for now - you could debate whether a different one is more appropriate - also added Sentry error capturing so we're not blind to these, ultimately we should make sure the parser can handle all user-submitted data --- .../utils/serializers/input/pages.js | 34 ++++++++++++++++-- .../utils/serializers/input/posts.js | 32 +++++++++++++++-- .../utils/serializers/input/pages.test.js | 35 +++++++++++++++++++ .../utils/serializers/input/posts.test.js | 35 +++++++++++++++++++ 4 files changed, 131 insertions(+), 5 deletions(-) diff --git a/ghost/core/core/server/api/endpoints/utils/serializers/input/pages.js b/ghost/core/core/server/api/endpoints/utils/serializers/input/pages.js index 06b4681e69..52ec3626b3 100644 --- a/ghost/core/core/server/api/endpoints/utils/serializers/input/pages.js +++ b/ghost/core/core/server/api/endpoints/utils/serializers/input/pages.js @@ -1,12 +1,20 @@ const _ = require('lodash'); const debug = require('@tryghost/debug')('api:endpoints:utils:serializers:input:pages'); -const mobiledoc = require('../../../../../lib/mobiledoc'); +const {ValidationError} = require('@tryghost/errors'); +const tpl = require('@tryghost/tpl'); const url = require('./utils/url'); const slugFilterOrder = require('./utils/slug-filter-order'); const localUtils = require('../../index'); +const mobiledoc = require('../../../../../lib/mobiledoc'); const postsMetaSchema = require('../../../../../data/schema').tables.posts_meta; const clean = require('./utils/clean'); const lexical = require('../../../../../lib/lexical'); +const sentry = require('../../../../../../shared/sentry'); + +const messages = { + failedHtmlToMobiledoc: 'Failed to convert HTML to Mobiledoc', + failedHtmlToLexical: 'Failed to convert HTML to Lexical' +}; function removeSourceFormats(frame) { if (frame.options.formats?.includes('mobiledoc') || frame.options.formats?.includes('lexical')) { @@ -136,7 +144,17 @@ module.exports = { if (process.env.CI) { console.time('htmlToMobiledocConverter (page)'); // eslint-disable-line no-console } - frame.data.pages[0].mobiledoc = JSON.stringify(mobiledoc.htmlToMobiledocConverter(html)); + + try { + frame.data.pages[0].mobiledoc = JSON.stringify(mobiledoc.htmlToMobiledocConverter(html)); + } catch (err) { + sentry.captureException(err); + throw new ValidationError({ + message: tpl(messages.failedHtmlToMobiledoc), + err + }); + } + if (process.env.CI) { console.timeEnd('htmlToMobiledocConverter (page)'); // eslint-disable-line no-console } @@ -146,7 +164,17 @@ module.exports = { if (process.env.CI) { console.time('htmlToLexicalConverter (page)'); // eslint-disable-line no-console } - frame.data.pages[0].lexical = JSON.stringify(lexical.htmlToLexicalConverter(html)); + + try { + frame.data.pages[0].lexical = JSON.stringify(lexical.htmlToLexicalConverter(html)); + } catch (err) { + sentry.captureException(err); + throw new ValidationError({ + message: tpl(messages.failedHtmlToLexical), + err + }); + } + if (process.env.CI) { console.timeEnd('htmlToLexicalConverter (page)'); // eslint-disable-line no-console } diff --git a/ghost/core/core/server/api/endpoints/utils/serializers/input/posts.js b/ghost/core/core/server/api/endpoints/utils/serializers/input/posts.js index faf968afff..66767b9514 100644 --- a/ghost/core/core/server/api/endpoints/utils/serializers/input/posts.js +++ b/ghost/core/core/server/api/endpoints/utils/serializers/input/posts.js @@ -1,5 +1,7 @@ const _ = require('lodash'); const debug = require('@tryghost/debug')('api:endpoints:utils:serializers:input:posts'); +const {ValidationError} = require('@tryghost/errors'); +const tpl = require('@tryghost/tpl'); const url = require('./utils/url'); const slugFilterOrder = require('./utils/slug-filter-order'); const localUtils = require('../../index'); @@ -7,6 +9,12 @@ const mobiledoc = require('../../../../../lib/mobiledoc'); const postsMetaSchema = require('../../../../../data/schema').tables.posts_meta; const clean = require('./utils/clean'); const lexical = require('../../../../../lib/lexical'); +const sentry = require('../../../../../../shared/sentry'); + +const messages = { + failedHtmlToMobiledoc: 'Failed to convert HTML to Mobiledoc', + failedHtmlToLexical: 'Failed to convert HTML to Lexical' +}; function removeSourceFormats(frame) { if (frame.options.formats?.includes('mobiledoc') || frame.options.formats?.includes('lexical')) { @@ -170,7 +178,17 @@ module.exports = { if (process.env.CI) { console.time('htmlToMobiledocConverter (post)'); // eslint-disable-line no-console } - frame.data.posts[0].mobiledoc = JSON.stringify(mobiledoc.htmlToMobiledocConverter(html)); + + try { + frame.data.posts[0].mobiledoc = JSON.stringify(mobiledoc.htmlToMobiledocConverter(html)); + } catch (err) { + sentry.captureException(err); + throw new ValidationError({ + message: tpl(messages.failedHtmlToMobiledoc), + err + }); + } + if (process.env.CI) { console.timeEnd('htmlToMobiledocConverter (post)'); // eslint-disable-line no-console } @@ -180,7 +198,17 @@ module.exports = { if (process.env.CI) { console.time('htmlToLexicalConverter (post)'); // eslint-disable-line no-console } - frame.data.posts[0].lexical = JSON.stringify(lexical.htmlToLexicalConverter(html)); + + try { + frame.data.posts[0].lexical = JSON.stringify(lexical.htmlToLexicalConverter(html)); + } catch (err) { + sentry.captureException(err); + throw new ValidationError({ + message: tpl(messages.failedHtmlToLexical), + err + }); + } + if (process.env.CI) { console.timeEnd('htmlToLexicalConverter (post)'); // eslint-disable-line no-console } diff --git a/ghost/core/test/unit/api/canary/utils/serializers/input/pages.test.js b/ghost/core/test/unit/api/canary/utils/serializers/input/pages.test.js index a46ea36a20..6a30fd39ca 100644 --- a/ghost/core/test/unit/api/canary/utils/serializers/input/pages.test.js +++ b/ghost/core/test/unit/api/canary/utils/serializers/input/pages.test.js @@ -1,7 +1,14 @@ const should = require('should'); +const sinon = require('sinon'); const serializers = require('../../../../../../../core/server/api/endpoints/utils/serializers'); +const mobiledocLib = require('@tryghost/html-to-mobiledoc'); + describe('Unit: endpoints/utils/serializers/input/pages', function () { + afterEach(function () { + sinon.restore(); + }); + describe('browse', function () { it('default', function () { const apiConfig = {}; @@ -179,6 +186,34 @@ describe('Unit: endpoints/utils/serializers/input/pages', function () { frame.data.pages[0].tags.should.eql([{slug: 'slug1', name: 'hey'}, {slug: 'slug2'}]); }); + it('throws error if HTML conversion fails', function () { + // JSDOM require is sometimes very slow on CI causing random timeouts + this.timeout(4000); + + const frame = { + options: { + source: 'html' + }, + data: { + posts: [ + { + id: 'id1', + html: '' + } + ] + } + }; + + sinon.stub(mobiledocLib, 'toMobiledoc').throws(new Error('Some error')); + + try { + serializers.input.posts.edit({}, frame); + should.fail('Error expected'); + } catch (err) { + err.message.should.eql('Failed to convert HTML to Mobiledoc'); + } + }); + describe('Ensure relations format', function () { it('relations is array of objects', function () { const apiConfig = {}; diff --git a/ghost/core/test/unit/api/canary/utils/serializers/input/posts.test.js b/ghost/core/test/unit/api/canary/utils/serializers/input/posts.test.js index 4da0c801ee..5d89afbf7f 100644 --- a/ghost/core/test/unit/api/canary/utils/serializers/input/posts.test.js +++ b/ghost/core/test/unit/api/canary/utils/serializers/input/posts.test.js @@ -1,7 +1,14 @@ const should = require('should'); +const sinon = require('sinon'); const serializers = require('../../../../../../../core/server/api/endpoints/utils/serializers'); +const mobiledocLib = require('@tryghost/html-to-mobiledoc'); + describe('Unit: endpoints/utils/serializers/input/posts', function () { + afterEach(function () { + sinon.restore(); + }); + describe('browse', function () { it('default', function () { const apiConfig = {}; @@ -288,6 +295,34 @@ describe('Unit: endpoints/utils/serializers/input/posts', function () { let postData = frame.data.posts[0]; postData.lexical.should.equal('{"root":{"children":[{"children":[{"detail":0,"format":0,"mode":"normal","style":"","text":"this is great feature","type":"extended-text","version":1}],"direction":null,"format":"","indent":0,"type":"paragraph","version":1},{"type":"html","version":1,"html":"
My Custom HTML
"},{"children":[{"detail":0,"format":0,"mode":"normal","style":"","text":"custom html preserved!","type":"extended-text","version":1}],"direction":null,"format":"","indent":0,"type":"paragraph","version":1}],"direction":null,"format":"","indent":0,"type":"root","version":1}}'); }); + + it('throws error when HTML conversion fails', function () { + // JSDOM require is sometimes very slow on CI causing random timeouts + this.timeout(4000); + + const frame = { + options: { + source: 'html' + }, + data: { + posts: [ + { + id: 'id1', + html: '' + } + ] + } + }; + + sinon.stub(mobiledocLib, 'toMobiledoc').throws(new Error('Some error')); + + try { + serializers.input.posts.edit({}, frame); + should.fail('Error expected'); + } catch (err) { + err.message.should.eql('Failed to convert HTML to Mobiledoc'); + } + }); }); it('tags relation is stripped of unknown properties', function () {