From 957f51e6778ae625bb07e2c42e1a60a1173a8019 Mon Sep 17 00:00:00 2001 From: Ryan McCarvill Date: Wed, 24 May 2017 02:15:32 +1200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9D=20Allow=20unbalanced=20HTML=20in?= =?UTF-8?q?=20markdown=20card.=20(#8320)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit no issue The simpledom interpreter that the Mobiledoc DOM renderer uses does not allow for unbalanced or incorrect HTML such as that which is entered by a user. This PR adds a step where the HTML is sanitised and balanced before being passed to simpledom. - use latest jsdom (+pin version), update yarn.lock, add comments - don't use node-4 incompatible shorthand method definition - grab content rather than document content - update markdown card specs to match markdown-it behaviour - revert to jsdom 9.12.0 for node 4.x support, close window to free memory - moved 3rd party libs into render function --- .../apps/default-cards/cards/markdown.js | 41 ++++++- .../apps/default-cards/tests/markdown_spec.js | 14 +++ package.json | 1 + yarn.lock | 111 +++++++++++++++++- 4 files changed, 157 insertions(+), 10 deletions(-) diff --git a/core/server/apps/default-cards/cards/markdown.js b/core/server/apps/default-cards/cards/markdown.js index ac204b4c04..bff789b32a 100644 --- a/core/server/apps/default-cards/cards/markdown.js +++ b/core/server/apps/default-cards/cards/markdown.js @@ -1,16 +1,45 @@ -var SimpleDom = require('simple-dom'), - tokenizer = require('simple-html-tokenizer').tokenize, - markdownConverter = require('../../../utils/markdown-converter'), - parser; +var markdownConverter = require('../../../utils/markdown-converter'); module.exports = { name: 'card-markdown', type: 'dom', - render(opts) { + render: function (opts) { + var SimpleDom = require('simple-dom'), + tokenizer = require('simple-html-tokenizer').tokenize, + jsdom = require('jsdom').jsdom, + html, doc, parser, sanitizedHTML; + + // markdown can be autosaved at any point by the client, even when + // writing HTML so you can end up with unbalanced HTML elements + // + // mobiledoc uses simple-dom to build a DOM object. simple-dom is + // purposefully very basic and only designed to handle valid HTML, + // if it's fed unbalanced or invalid HTML it will throw an error. + // + // to work around the possibility of having invalid HTML we first + // pass the HTML through jsdom which seeks to fully emulate the + // WHATWG DOM/HTML standards including the ability to handle + // unbalanced HTML in the same way a browser does + html = markdownConverter.render(opts.payload.markdown || ''); + doc = jsdom(html, { + features: { + FetchExternalResources: false, + ProcessExternalResources: false + } + }); + + // grab the rendered + sanitized body HTML + sanitizedHTML = doc.body.innerHTML; + + // free up memory by closing the jsdom "window" + doc.defaultView.close(); + parser = new SimpleDom.HTMLParser(tokenizer, opts.env.dom, SimpleDom.voidMap); + + // generate a new SimpleDom object from the sanitzed HTML return parser.parse('' + '
' - + markdownConverter.render(opts.payload.markdown || '') + + sanitizedHTML + '
' ); } diff --git a/core/server/apps/default-cards/tests/markdown_spec.js b/core/server/apps/default-cards/tests/markdown_spec.js index 0bea0f9b88..1060da6f16 100644 --- a/core/server/apps/default-cards/tests/markdown_spec.js +++ b/core/server/apps/default-cards/tests/markdown_spec.js @@ -17,4 +17,18 @@ describe('Markdown card', function () { var serializer = new SimpleDom.HTMLSerializer([]); serializer.serialize(card.render(opts)).should.match('

HEADING

\n\n
'); }); + + it('Accepts invalid HTML in markdown', function () { + opts = { + env: { + dom: new SimpleDom.Document() + }, + payload: { + markdown: '#HEADING\r\n

Heading 2>' + } + }; + + var serializer = new SimpleDom.HTMLSerializer([]); + serializer.serialize(card.render(opts)).should.match('

HEADING

\n

Heading 2>

'); + }); }); diff --git a/package.json b/package.json index 56e80c5185..a9f3c69fbb 100644 --- a/package.json +++ b/package.json @@ -58,6 +58,7 @@ "image-size": "0.5.2", "intl": "1.2.5", "intl-messageformat": "1.3.0", + "jsdom": "9.12.0", "jsonpath": "0.2.11", "knex": "0.12.9", "knex-migrator": "2.0.16", diff --git a/yarn.lock b/yarn.lock index e6d27064d9..b362335ddf 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10,6 +10,10 @@ JSONSelect@0.4.0: version "4.0.2" resolved "https://registry.yarnpkg.com/JSV/-/JSV-4.0.2.tgz#d077f6825571f82132f9dffaed587b4029feff57" +abab@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/abab/-/abab-1.0.3.tgz#b81de5f7274ec4e756d797cd834f303642724e5d" + abbrev@1, abbrev@1.0.x: version "1.0.9" resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.0.9.tgz#91b4792588a7738c25f35dd6f63752a2f8776135" @@ -21,6 +25,16 @@ accepts@~1.3.3: mime-types "~2.1.11" negotiator "0.6.1" +acorn-globals@^3.1.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/acorn-globals/-/acorn-globals-3.1.0.tgz#fd8270f71fbb4996b004fa880ee5d46573a731bf" + dependencies: + acorn "^4.0.4" + +acorn@^4.0.4: + version "4.0.11" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-4.0.11.tgz#edcda3bd937e7556410d42ed5860f67399c794c0" + addressparser@~0.3.2: version "0.3.2" resolved "https://registry.yarnpkg.com/addressparser/-/addressparser-0.3.2.tgz#59873f35e8fcf6c7361c10239261d76e15348bb2" @@ -142,6 +156,10 @@ array-differ@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/array-differ/-/array-differ-1.0.0.tgz#eff52e3758249d33be402b8bb8e564bb2b5d4031" +array-equal@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/array-equal/-/array-equal-1.0.0.tgz#8c2a5ef2472fd9ea742b04c77a75093ba2757c93" + array-find-index@^1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/array-find-index/-/array-find-index-1.0.2.tgz#df010aa1287e164bbda6f9723b0a96a1ec4187a1" @@ -775,6 +793,10 @@ content-disposition@0.5.2: version "0.5.2" resolved "https://registry.yarnpkg.com/content-disposition/-/content-disposition-0.5.2.tgz#0cf68bb9ddf5f2be7961c3a85178cb85dba78cb4" +content-type-parser@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/content-type-parser/-/content-type-parser-1.0.1.tgz#c3e56988c53c65127fb46d4032a3a900246fdc94" + content-type@^1.0.2, content-type@~1.0.1, content-type@~1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/content-type/-/content-type-1.0.2.tgz#b7d113aee7a8dd27bd21133c4dc2529df1721eed" @@ -902,6 +924,16 @@ csso@~2.3.1: clap "^1.0.9" source-map "^0.5.3" +cssom@0.3.x, "cssom@>= 0.3.2 < 0.4.0": + version "0.3.2" + resolved "https://registry.yarnpkg.com/cssom/-/cssom-0.3.2.tgz#b8036170c79f07a90ff2f16e22284027a243848b" + +"cssstyle@>= 0.2.37 < 0.3.0": + version "0.2.37" + resolved "https://registry.yarnpkg.com/cssstyle/-/cssstyle-0.2.37.tgz#541097234cb2513c83ceed3acddc27ff27987d54" + dependencies: + cssom "0.3.x" + cst@^0.4.3: version "0.4.9" resolved "https://registry.yarnpkg.com/cst/-/cst-0.4.9.tgz#51af14213bf5f8e8e715966ac645e1e2a56c6834" @@ -1216,7 +1248,7 @@ escodegen@0.0.21: optionalDependencies: source-map ">= 0.1.2" -escodegen@1.8.x: +escodegen@1.8.x, escodegen@^1.6.1: version "1.8.1" resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-1.8.1.tgz#5a5b53af4693110bebb0867aa3430dd3b70a1018" dependencies: @@ -2253,6 +2285,12 @@ html-comment-regex@^1.1.0: version "1.1.1" resolved "https://registry.yarnpkg.com/html-comment-regex/-/html-comment-regex-1.1.1.tgz#668b93776eaae55ebde8f3ad464b307a4963625e" +html-encoding-sniffer@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/html-encoding-sniffer/-/html-encoding-sniffer-1.0.1.tgz#79bf7a785ea495fe66165e734153f363ff5437da" + dependencies: + whatwg-encoding "^1.0.1" + html-to-text@3.2.0: version "3.2.0" resolved "https://registry.yarnpkg.com/html-to-text/-/html-to-text-3.2.0.tgz#0dfa5d27ff816b07281c79eaf60d408744ac6d89" @@ -2736,6 +2774,30 @@ jsdoctypeparser@~1.2.0: dependencies: lodash "^3.7.0" +jsdom@9.12.0: + version "9.12.0" + resolved "https://registry.yarnpkg.com/jsdom/-/jsdom-9.12.0.tgz#e8c546fffcb06c00d4833ca84410fed7f8a097d4" + dependencies: + abab "^1.0.3" + acorn "^4.0.4" + acorn-globals "^3.1.0" + array-equal "^1.0.0" + content-type-parser "^1.0.1" + cssom ">= 0.3.2 < 0.4.0" + cssstyle ">= 0.2.37 < 0.3.0" + escodegen "^1.6.1" + html-encoding-sniffer "^1.0.1" + nwmatcher ">= 1.3.9 < 2.0.0" + parse5 "^1.5.1" + request "^2.79.0" + sax "^1.2.1" + symbol-tree "^3.2.1" + tough-cookie "^2.3.2" + webidl-conversions "^4.0.0" + whatwg-encoding "^1.0.1" + whatwg-url "^4.3.0" + xml-name-validator "^2.0.1" + jshint-stylish@2.2.1: version "2.2.1" resolved "https://registry.yarnpkg.com/jshint-stylish/-/jshint-stylish-2.2.1.tgz#242082a2c035ae03fd81044e0570cc4208cf6e61" @@ -3719,6 +3781,10 @@ number-is-nan@^1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/number-is-nan/-/number-is-nan-1.0.1.tgz#097b602b53422a522c1afb8790318336941a011d" +"nwmatcher@>= 1.3.9 < 2.0.0": + version "1.3.9" + resolved "https://registry.yarnpkg.com/nwmatcher/-/nwmatcher-1.3.9.tgz#8bab486ff7fa3dfd086656bbe8b17116d3692d2a" + oauth-sign@~0.8.1: version "0.8.2" resolved "https://registry.yarnpkg.com/oauth-sign/-/oauth-sign-0.8.2.tgz#46a6ab7f0aead8deae9ec0565780b7d4efeb9d43" @@ -3860,6 +3926,10 @@ parse-passwd@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/parse-passwd/-/parse-passwd-1.0.0.tgz#6d5b934a456993b23d37f40a382d6f1666a8e5c6" +parse5@^1.5.1: + version "1.5.1" + resolved "https://registry.yarnpkg.com/parse5/-/parse5-1.5.1.tgz#9b7f3b0de32be78dc2401b17573ccaf0f6f59d94" + parseurl@~1.3.0, parseurl@~1.3.1: version "1.3.1" resolved "https://registry.yarnpkg.com/parseurl/-/parseurl-1.3.1.tgz#c8ab8c9223ba34888aa64a297b28853bec18da56" @@ -4552,7 +4622,7 @@ request@2.75.x: tough-cookie "~2.3.0" tunnel-agent "~0.4.1" -request@^2.65.0, request@^2.81.0: +request@^2.65.0, request@^2.79.0, request@^2.81.0: version "2.81.0" resolved "https://registry.yarnpkg.com/request/-/request-2.81.0.tgz#c6928946a0e06c5f8d6f8a9333469ffda46298a0" dependencies: @@ -4674,7 +4744,7 @@ sax@0.4.2: version "0.4.2" resolved "https://registry.yarnpkg.com/sax/-/sax-0.4.2.tgz#39f3b601733d6bec97105b242a2a40fd6978ac3c" -sax@>=0.6.0, sax@~1.2.1: +sax@>=0.6.0, sax@^1.2.1, sax@~1.2.1: version "1.2.2" resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.2.tgz#fd8631a23bc7826bef5d871bdb87378c95647828" @@ -5090,6 +5160,10 @@ svgo@^0.7.0: sax "~1.2.1" whet.extend "~0.9.9" +symbol-tree@^3.2.1: + version "3.2.2" + resolved "https://registry.yarnpkg.com/symbol-tree/-/symbol-tree-3.2.2.tgz#ae27db38f660a7ae2e1c3b7d1bc290819b8519e6" + tar-pack@^3.4.0: version "3.4.0" resolved "https://registry.yarnpkg.com/tar-pack/-/tar-pack-3.4.0.tgz#23be2d7f671a8339376cbdb0b8fe3fdebf317984" @@ -5192,12 +5266,16 @@ to-single-quotes@^2.0.0: version "2.0.1" resolved "https://registry.yarnpkg.com/to-single-quotes/-/to-single-quotes-2.0.1.tgz#7cc29151f0f5f2c41946f119f5932fe554170125" -tough-cookie@~2.3.0: +tough-cookie@^2.3.2, tough-cookie@~2.3.0: version "2.3.2" resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.3.2.tgz#f081f76e4c85720e6c37a5faced737150d84072a" dependencies: punycode "^1.4.1" +tr46@~0.0.3: + version "0.0.3" + resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" + trim-newlines@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-1.0.0.tgz#5887966bb582a4503a41eb524f7d35011815a613" @@ -5450,6 +5528,14 @@ watchr@~2.3.3: dependencies: bal-util "~1.18.0" +webidl-conversions@^3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-3.0.1.tgz#24534275e2a7bc6be7bc86611cc16ae0a5654871" + +webidl-conversions@^4.0.0: + version "4.0.1" + resolved "https://registry.yarnpkg.com/webidl-conversions/-/webidl-conversions-4.0.1.tgz#8015a17ab83e7e1b311638486ace81da6ce206a0" + websocket-driver@>=0.5.1: version "0.6.5" resolved "https://registry.yarnpkg.com/websocket-driver/-/websocket-driver-0.6.5.tgz#5cb2556ceb85f4373c6d8238aa691c8454e13a36" @@ -5460,6 +5546,19 @@ websocket-extensions@>=0.1.1: version "0.1.1" resolved "https://registry.yarnpkg.com/websocket-extensions/-/websocket-extensions-0.1.1.tgz#76899499c184b6ef754377c2dbb0cd6cb55d29e7" +whatwg-encoding@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/whatwg-encoding/-/whatwg-encoding-1.0.1.tgz#3c6c451a198ee7aec55b1ec61d0920c67801a5f4" + dependencies: + iconv-lite "0.4.13" + +whatwg-url@^4.3.0: + version "4.8.0" + resolved "https://registry.yarnpkg.com/whatwg-url/-/whatwg-url-4.8.0.tgz#d2981aa9148c1e00a41c5a6131166ab4683bbcc0" + dependencies: + tr46 "~0.0.3" + webidl-conversions "^3.0.0" + whet.extend@~0.9.9: version "0.9.9" resolved "https://registry.yarnpkg.com/whet.extend/-/whet.extend-0.9.9.tgz#f877d5bf648c97e5aa542fadc16d6a259b9c11a1" @@ -5532,6 +5631,10 @@ xhr@^2.0.1: parse-headers "^2.0.0" xtend "^4.0.0" +xml-name-validator@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/xml-name-validator/-/xml-name-validator-2.0.1.tgz#4d8b8f1eccd3419aa362061becef515e1e559635" + xml-parse-from-string@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/xml-parse-from-string/-/xml-parse-from-string-1.0.0.tgz#feba5809f3cd2d17d2e4239fa810cd0319fc5da5"