diff --git a/core/server/api/v2/oembed.js b/core/server/api/v2/oembed.js index 4efd21df4c..3e39214b59 100644 --- a/core/server/api/v2/oembed.js +++ b/core/server/api/v2/oembed.js @@ -3,6 +3,7 @@ const {extract, hasProvider} = require('oembed-parser'); const Promise = require('bluebird'); const request = require('../../lib/request'); const cheerio = require('cheerio'); +const _ = require('lodash'); const findUrlWithProvider = (url) => { let provider; @@ -28,9 +29,131 @@ const findUrlWithProvider = (url) => { return {url, provider}; }; -const getOembedUrlFromHTML = (html) => { - return cheerio('link[type="application/json+oembed"]', html).attr('href'); -}; +function unknownProvider(url) { + return Promise.reject(new common.errors.ValidationError({ + message: common.i18n.t('errors.api.oembed.unknownProvider'), + context: url + })); +} + +function knownProvider(url) { + return extract(url).catch((err) => { + return Promise.reject(new common.errors.InternalServerError({ + message: err.message + })); + }); +} + +function isIpOrLocalhost(url) { + try { + const IPV4_REGEX = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/; + const IPV6_REGEX = /:/; // fqdns will not have colons + const HTTP_REGEX = /^https?:/i; + + let {protocol, hostname} = new URL(url); + + if (!HTTP_REGEX.test(protocol) || hostname === 'localhost' || IPV4_REGEX.test(hostname) || IPV6_REGEX.test(hostname)) { + return true; + } + + return false; + } catch (e) { + return true; + } +} + +function fetchOembedData(_url) { + // parse the url then validate the protocol and host to make sure it's + // http(s) and not an IP address or localhost to avoid potential access to + // internal network endpoints + if (isIpOrLocalhost(_url)) { + return unknownProvider(); + } + + // check against known oembed list + let {url, provider} = findUrlWithProvider(_url); + if (provider) { + return knownProvider(url); + } + + // url not in oembed list so fetch it in case it's a redirect or has a + // element + return request(url, { + method: 'GET', + timeout: 2 * 1000, + followRedirect: true, + headers: { + 'user-agent': 'Ghost(https://github.com/TryGhost/Ghost)' + } + }).then((response) => { + // url changed after fetch, see if we were redirected to a known oembed + if (response.url !== url) { + ({url, provider} = findUrlWithProvider(response.url)); + if (provider) { + return knownProvider(url); + } + } + + // check for element + let oembedUrl; + try { + oembedUrl = cheerio('link[type="application/json+oembed"]', response.body).attr('href'); + } catch (e) { + return unknownProvider(url); + } + + if (oembedUrl) { + // make sure the linked url is not an ip address or localhost + if (isIpOrLocalhost(oembedUrl)) { + return unknownProvider(oembedUrl); + } + + // fetch oembed response from embedded rel="alternate" url + return request(oembedUrl, { + method: 'GET', + json: true + }).then((response) => { + // validate the fetched json against the oembed spec to avoid + // leaking non-oembed responses + const body = response.body; + const hasRequiredFields = body.type && body.version; + const hasValidType = ['photo', 'video', 'link', 'rich'].includes(body.type); + + if (hasRequiredFields && hasValidType) { + // extract known oembed fields from the response to limit leaking of unrecognised data + const knownFields = [ + 'type', + 'version', + 'html', + 'url', + 'title', + 'width', + 'height', + 'author_name', + 'author_url', + 'provider_name', + 'provider_url', + 'thumbnail_url', + 'thumbnail_width', + 'thumbnail_height' + ]; + const oembed = _.pick(body, knownFields); + + // ensure we have required data for certain types + if (oembed.type === 'photo' && !oembed.url) { + return; + } + if ((oembed.type === 'video' || oembed.type === 'rich') && (!oembed.html || !oembed.width || !oembed.height)) { + return; + } + + // return the extracted object, don't pass through the response body + return oembed; + } + }).catch(() => {}); + } + }); +} module.exports = { docName: 'oembed', @@ -41,56 +164,11 @@ module.exports = { 'url' ], options: [], - query({data}) { - let {url} = data; - - function unknownProvider() { - return Promise.reject(new common.errors.ValidationError({ - message: common.i18n.t('errors.api.oembed.unknownProvider'), - context: url - })); - } - - function knownProvider(url) { - return extract(url).catch((err) => { - return Promise.reject(new common.errors.InternalServerError({ - message: err.message - })); - }); - } - - let provider; - ({url, provider} = findUrlWithProvider(url)); - - if (provider) { - return knownProvider(url); - } - - // see if the URL is a redirect to cater for shortened urls - return request(url, { - method: 'GET', - timeout: 2 * 1000, - followRedirect: true - }).then((response) => { - if (response.url !== url) { - ({url, provider} = findUrlWithProvider(response.url)); - return provider ? knownProvider(url) : unknownProvider(); - } - - const oembedUrl = getOembedUrlFromHTML(response.body); - - if (!oembedUrl) { - return unknownProvider(); - } - - return request(oembedUrl, { - method: 'GET', - json: true - }).then((response) => { - return response.body; - }); + query({data: {url}}) { + return fetchOembedData(url).then((response) => { + return response || unknownProvider(url); }).catch(() => { - return unknownProvider(); + return unknownProvider(url); }); } } diff --git a/test/regression/api/v2/admin/oembed_spec.js b/test/regression/api/v2/admin/oembed_spec.js new file mode 100644 index 0000000000..35948eb197 --- /dev/null +++ b/test/regression/api/v2/admin/oembed_spec.js @@ -0,0 +1,333 @@ +const nock = require('nock'); +const should = require('should'); +const supertest = require('supertest'); +const testUtils = require('../../../../utils/index'); +const config = require('../../../../../core/server/config/index'); +const localUtils = require('./utils'); + +const ghost = testUtils.startGhost; + +describe('Oembed API (v2)', function () { + let ghostServer, request; + + before(function () { + return ghost() + .then((_ghostServer) => { + ghostServer = _ghostServer; + request = supertest.agent(config.get('url')); + }) + .then(() => { + return localUtils.doAuth(request); + }); + }); + + it('can fetch an embed', function (done) { + let requestMock = nock('https://www.youtube.com') + .get('/oembed') + .query(true) + .reply(200, { + html: '', + thumbnail_width: 480, + width: 480, + author_url: 'https://www.youtube.com/user/gorillaz', + height: 270, + thumbnail_height: 360, + provider_name: 'YouTube', + title: 'Gorillaz - Humility (Official Video)', + provider_url: 'https://www.youtube.com/', + author_name: 'Gorillaz', + version: '1.0', + thumbnail_url: 'https://i.ytimg.com/vi/E5yFcdPAGv0/hqdefault.jpg', + type: 'video' + }); + + request.get(localUtils.API.getApiQuery('oembed/?url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DE5yFcdPAGv0')) + .set('Origin', config.get('url')) + .expect('Content-Type', /json/) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect(200) + .end(function (err, res) { + if (err) { + return done(err); + } + + requestMock.isDone().should.be.true(); + should.exist(res.body.html); + done(); + }); + }); + + describe('with unknown provider', function () { + it('fetches url and follows ', function (done) { + const pageMock = nock('http://test.com') + .get('/') + .reply(200, ''); + + const oembedMock = nock('http://test.com') + .get('/oembed') + .reply(200, { + version: '1.0', + type: 'link' + }); + + const url = encodeURIComponent('http://test.com/'); + request.get(localUtils.API.getApiQuery(`oembed/?url=${url}`)) + .set('Origin', config.get('url')) + .expect('Content-Type', /json/) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect(200) + .end(function (err, res) { + if (err) { + return done(err); + } + pageMock.isDone().should.be.true(); + oembedMock.isDone().should.be.true(); + done(); + }); + }); + + it('rejects invalid oembed responses', function (done) { + const pageMock = nock('http://test.com') + .get('/') + .reply(200, ''); + + const oembedMock = nock('http://test.com') + .get('/oembed') + .reply(200, { + version: '1.0', + html: 'test' + }); + + const url = encodeURIComponent('http://test.com/'); + request.get(localUtils.API.getApiQuery(`oembed/?url=${url}`)) + .set('Origin', config.get('url')) + .expect('Content-Type', /json/) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect(422) + .end(function (err, res) { + if (err) { + return done(err); + } + pageMock.isDone().should.be.true(); + oembedMock.isDone().should.be.true(); + done(); + }); + }); + + it('rejects unknown oembed types', function (done) { + const pageMock = nock('http://test.com') + .get('/') + .reply(200, ''); + + const oembedMock = nock('http://test.com') + .get('/oembed') + .reply(200, { + version: '1.0', + type: 'unknown' + }); + + const url = encodeURIComponent('http://test.com/'); + request.get(localUtils.API.getApiQuery(`oembed/?url=${url}`)) + .set('Origin', config.get('url')) + .expect('Content-Type', /json/) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect(422) + .end(function (err, res) { + if (err) { + return done(err); + } + pageMock.isDone().should.be.true(); + oembedMock.isDone().should.be.true(); + done(); + }); + }); + + it('rejects invalid photo responses', function (done) { + const pageMock = nock('http://test.com') + .get('/') + .reply(200, ''); + + const oembedMock = nock('http://test.com') + .get('/oembed') + .reply(200, { + // no `url` field + version: '1.0', + type: 'photo', + thumbnail_url: 'https://test.com/thumbnail.jpg' + }); + + const url = encodeURIComponent('http://test.com/'); + request.get(localUtils.API.getApiQuery(`oembed/?url=${url}`)) + .set('Origin', config.get('url')) + .expect('Content-Type', /json/) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect(422) + .end(function (err, res) { + if (err) { + return done(err); + } + pageMock.isDone().should.be.true(); + oembedMock.isDone().should.be.true(); + done(); + }); + }); + + it('rejects invalid video responses', function (done) { + const pageMock = nock('http://test.com') + .get('/') + .reply(200, ''); + + const oembedMock = nock('http://test.com') + .get('/oembed') + .reply(200, { + // no `html` field + version: '1.0', + type: 'video', + thumbnail_url: 'https://test.com/thumbnail.jpg' + }); + + const url = encodeURIComponent('http://test.com/'); + request.get(localUtils.API.getApiQuery(`oembed/?url=${url}`)) + .set('Origin', config.get('url')) + .expect('Content-Type', /json/) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect(422) + .end(function (err, res) { + if (err) { + return done(err); + } + pageMock.isDone().should.be.true(); + oembedMock.isDone().should.be.true(); + done(); + }); + }); + + it('strips unknown response fields', function (done) { + const pageMock = nock('http://test.com') + .get('/') + .reply(200, ''); + + const oembedMock = nock('http://test.com') + .get('/oembed') + .reply(200, { + version: '1.0', + type: 'video', + html: '

Test

', + width: 200, + height: 100, + unknown: 'test' + }); + + const url = encodeURIComponent('http://test.com'); + request.get(localUtils.API.getApiQuery(`oembed/?url=${url}`)) + .set('Origin', config.get('url')) + .expect('Content-Type', /json/) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect(200) + .end(function (err, res) { + if (err) { + return done(err); + } + pageMock.isDone().should.be.true(); + oembedMock.isDone().should.be.true(); + + res.body.should.deepEqual({ + version: '1.0', + type: 'video', + html: '

Test

', + width: 200, + height: 100 + }); + should.not.exist(res.body.unknown); + + done(); + }); + }); + + it('skips fetching IPv4 addresses', function (done) { + const pageMock = nock('http://test.com') + .get('/') + .reply(200, ''); + + const oembedMock = nock('http://192.168.0.1') + .get('/oembed') + .reply(200, { + version: '1.0', + type: 'link' + }); + + const url = encodeURIComponent('http://test.com/'); + request.get(localUtils.API.getApiQuery(`oembed/?url=${url}`)) + .set('Origin', config.get('url')) + .expect('Content-Type', /json/) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect(422) + .end(function (err, res) { + if (err) { + return done(err); + } + pageMock.isDone().should.be.true(); + oembedMock.isDone().should.be.false(); + done(); + }); + }); + + it('skips fetching IPv6 addresses', function (done) { + const pageMock = nock('http://test.com') + .get('/') + .reply(200, ''); + + const oembedMock = nock('http://[2607:f0d0:1002:51::4]:9999') + .get('/oembed') + .reply(200, { + version: '1.0', + type: 'link' + }); + + const url = encodeURIComponent('http://test.com/'); + request.get(localUtils.API.getApiQuery(`oembed/?url=${url}`)) + .set('Origin', config.get('url')) + .expect('Content-Type', /json/) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect(422) + .end(function (err, res) { + if (err) { + return done(err); + } + pageMock.isDone().should.be.true(); + oembedMock.isDone().should.be.false(); + done(); + }); + }); + + it('skips fetching localhost', function (done) { + const pageMock = nock('http://test.com') + .get('/') + .reply(200, ''); + + const oembedMock = nock('http://localhost:9999') + .get('/oembed') + .reply(200, { + // no `html` field + version: '1.0', + type: 'video', + thumbnail_url: 'https://test.com/thumbnail.jpg' + }); + + const url = encodeURIComponent('http://test.com/'); + request.get(localUtils.API.getApiQuery(`oembed/?url=${url}`)) + .set('Origin', config.get('url')) + .expect('Content-Type', /json/) + .expect('Cache-Control', testUtils.cacheRules.private) + .expect(422) + .end(function (err, res) { + if (err) { + return done(err); + } + pageMock.isDone().should.be.true(); + oembedMock.isDone().should.be.false(); + done(); + }); + }); + }); +});