diff --git a/core/server/api/canary/oembed.js b/core/server/api/canary/oembed.js
index 9eff037849..c730cf72cd 100644
--- a/core/server/api/canary/oembed.js
+++ b/core/server/api/canary/oembed.js
@@ -1,233 +1,4 @@
-const errors = require('@tryghost/errors');
-const {extract, hasProvider} = require('oembed-parser');
-const Promise = require('bluebird');
-const cheerio = require('cheerio');
-const _ = require('lodash');
-const {CookieJar} = require('tough-cookie');
-const config = require('../../../shared/config');
-const {i18n} = require('../../lib/common');
-const externalRequest = require('../../lib/request-external');
-
-async function fetchBookmarkData(url) {
- const metascraper = require('metascraper')([
- require('metascraper-url')(),
- require('metascraper-title')(),
- require('metascraper-description')(),
- require('metascraper-author')(),
- require('metascraper-publisher')(),
- require('metascraper-image')(),
- require('metascraper-logo-favicon')(),
- require('metascraper-logo')()
- ]);
-
- let scraperResponse;
-
- try {
- const cookieJar = new CookieJar();
- const response = await externalRequest(url, {cookieJar});
- const html = response.body;
- scraperResponse = await metascraper({html, url});
- } catch (err) {
- return Promise.reject(err);
- }
-
- const metadata = Object.assign({}, scraperResponse, {
- thumbnail: scraperResponse.image,
- icon: scraperResponse.logo
- });
- // We want to use standard naming for image and logo
- delete metadata.image;
- delete metadata.logo;
-
- if (metadata.title) {
- return Promise.resolve({
- type: 'bookmark',
- url,
- metadata
- });
- }
-
- return Promise.reject(new errors.ValidationError({
- message: i18n.t('errors.api.oembed.insufficientMetadata'),
- context: url
- }));
-}
-
-const findUrlWithProvider = (url) => {
- let provider;
-
- // build up a list of URL variations to test against because the oembed
- // providers list is not always up to date with scheme or www vs non-www
- let baseUrl = url.replace(/^\/\/|^https?:\/\/(?:www\.)?/, '');
- let testUrls = [
- `http://${baseUrl}`,
- `https://${baseUrl}`,
- `http://www.${baseUrl}`,
- `https://www.${baseUrl}`
- ];
-
- for (let testUrl of testUrls) {
- provider = hasProvider(testUrl);
- if (provider) {
- url = testUrl;
- break;
- }
- }
-
- return {url, provider};
-};
-
-function unknownProvider(url) {
- return Promise.reject(new errors.ValidationError({
- message: i18n.t('errors.api.oembed.unknownProvider'),
- context: url
- }));
-}
-
-function knownProvider(url) {
- return extract(url, {maxwidth: 1280}).catch((err) => {
- return Promise.reject(new errors.InternalServerError({
- message: err.message
- }));
- });
-}
-
-function isIpOrLocalhost(url) {
- try {
- const IPV4_REGEX = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
- const IPV6_REGEX = /:/; // fqdns will not have colons
- const HTTP_REGEX = /^https?:/i;
-
- const siteUrl = new URL(config.get('url'));
- const {protocol, hostname, host} = new URL(url);
-
- // allow requests to Ghost's own url through
- if (siteUrl.host === host) {
- return false;
- }
-
- if (!HTTP_REGEX.test(protocol) || hostname === 'localhost' || IPV4_REGEX.test(hostname) || IPV6_REGEX.test(hostname)) {
- return true;
- }
-
- return false;
- } catch (e) {
- return true;
- }
-}
-
-function fetchOembedData(_url, cardType) {
- // parse the url then validate the protocol and host to make sure it's
- // http(s) and not an IP address or localhost to avoid potential access to
- // internal network endpoints
- if (isIpOrLocalhost(_url)) {
- return unknownProvider();
- }
-
- // check against known oembed list
- let {url, provider} = findUrlWithProvider(_url);
- if (provider) {
- return knownProvider(url);
- }
-
- // url not in oembed list so fetch it in case it's a redirect or has a
- // element
- const cookieJar = new CookieJar();
- return externalRequest(url, {
- method: 'GET',
- timeout: 2 * 1000,
- followRedirect: true,
- cookieJar
- }).then((pageResponse) => {
- // url changed after fetch, see if we were redirected to a known oembed
- if (pageResponse.url !== url) {
- ({url, provider} = findUrlWithProvider(pageResponse.url));
- if (provider) {
- return knownProvider(url);
- }
- }
-
- // check for element
- let oembedUrl;
- try {
- oembedUrl = cheerio('link[type="application/json+oembed"]', pageResponse.body).attr('href');
- } catch (e) {
- return unknownProvider(url);
- }
-
- if (oembedUrl) {
- // make sure the linked url is not an ip address or localhost
- if (isIpOrLocalhost(oembedUrl)) {
- return unknownProvider(oembedUrl);
- }
-
- // for standard WP oembed's we want to insert a bookmark card rather than their blockquote+script
- // which breaks in the editor and most Ghost themes. Only fallback if card type was not explicitly chosen
- if (!cardType && oembedUrl.match(/wp-json\/oembed/)) {
- return;
- }
-
- // fetch oembed response from embedded rel="alternate" url
- return externalRequest(oembedUrl, {
- method: 'GET',
- json: true,
- timeout: 2 * 1000,
- followRedirect: true,
- cookieJar
- }).then((oembedResponse) => {
- // validate the fetched json against the oembed spec to avoid
- // leaking non-oembed responses
- const body = oembedResponse.body;
- const hasRequiredFields = body.type && body.version;
- const hasValidType = ['photo', 'video', 'link', 'rich'].includes(body.type);
-
- if (hasRequiredFields && hasValidType) {
- // extract known oembed fields from the response to limit leaking of unrecognised data
- const knownFields = [
- 'type',
- 'version',
- 'html',
- 'url',
- 'title',
- 'width',
- 'height',
- 'author_name',
- 'author_url',
- 'provider_name',
- 'provider_url',
- 'thumbnail_url',
- 'thumbnail_width',
- 'thumbnail_height'
- ];
- const oembed = _.pick(body, knownFields);
-
- // ensure we have required data for certain types
- if (oembed.type === 'photo' && !oembed.url) {
- return;
- }
- if ((oembed.type === 'video' || oembed.type === 'rich') && (!oembed.html || !oembed.width || !oembed.height)) {
- return;
- }
-
- // return the extracted object, don't pass through the response body
- return oembed;
- }
- }).catch(() => {});
- }
- });
-}
-
-function errorHandler(url) {
- return function (err) {
- // allow specific validation errors through for better error messages
- if (errors.utils.isIgnitionError(err) && err.errorType === 'ValidationError') {
- return Promise.reject(err);
- }
-
- // default to unknown provider to avoid leaking any app specifics
- return unknownProvider(url);
- };
-}
+const {fetchBookmarkData, fetchOembedData, errorHandler, unknownProvider} = require('../../services/oembed');
module.exports = {
docName: 'oembed',
diff --git a/core/server/services/oembed.js b/core/server/services/oembed.js
new file mode 100644
index 0000000000..5a3293f8e2
--- /dev/null
+++ b/core/server/services/oembed.js
@@ -0,0 +1,237 @@
+const Promise = require('bluebird');
+const errors = require('@tryghost/errors');
+const {extract, hasProvider} = require('oembed-parser');
+const cheerio = require('cheerio');
+const _ = require('lodash');
+const {CookieJar} = require('tough-cookie');
+const config = require('../../shared/config');
+const externalRequest = require('../lib/request-external');
+const {i18n} = require('../lib/common');
+
+function unknownProvider(url) {
+ return Promise.reject(new errors.ValidationError({
+ message: i18n.t('errors.api.oembed.unknownProvider'),
+ context: url
+ }));
+}
+
+function knownProvider(url) {
+ return extract(url, {maxwidth: 1280}).catch((err) => {
+ return Promise.reject(new errors.InternalServerError({
+ message: err.message
+ }));
+ });
+}
+
+function errorHandler(url) {
+ return function (err) {
+ // allow specific validation errors through for better error messages
+ if (errors.utils.isIgnitionError(err) && err.errorType === 'ValidationError') {
+ return Promise.reject(err);
+ }
+
+ // default to unknown provider to avoid leaking any app specifics
+ return unknownProvider(url);
+ };
+}
+
+async function fetchBookmarkData(url) {
+ const metascraper = require('metascraper')([
+ require('metascraper-url')(),
+ require('metascraper-title')(),
+ require('metascraper-description')(),
+ require('metascraper-author')(),
+ require('metascraper-publisher')(),
+ require('metascraper-image')(),
+ require('metascraper-logo-favicon')(),
+ require('metascraper-logo')()
+ ]);
+
+ let scraperResponse;
+
+ try {
+ const cookieJar = new CookieJar();
+ const response = await externalRequest(url, {cookieJar});
+ const html = response.body;
+ scraperResponse = await metascraper({html, url});
+ } catch (err) {
+ return Promise.reject(err);
+ }
+
+ const metadata = Object.assign({}, scraperResponse, {
+ thumbnail: scraperResponse.image,
+ icon: scraperResponse.logo
+ });
+ // We want to use standard naming for image and logo
+ delete metadata.image;
+ delete metadata.logo;
+
+ if (metadata.title) {
+ return Promise.resolve({
+ type: 'bookmark',
+ url,
+ metadata
+ });
+ }
+
+ return Promise.reject(new errors.ValidationError({
+ message: i18n.t('errors.api.oembed.insufficientMetadata'),
+ context: url
+ }));
+}
+
+const findUrlWithProvider = (url) => {
+ let provider;
+
+ // build up a list of URL variations to test against because the oembed
+ // providers list is not always up to date with scheme or www vs non-www
+ let baseUrl = url.replace(/^\/\/|^https?:\/\/(?:www\.)?/, '');
+ let testUrls = [
+ `http://${baseUrl}`,
+ `https://${baseUrl}`,
+ `http://www.${baseUrl}`,
+ `https://www.${baseUrl}`
+ ];
+
+ for (let testUrl of testUrls) {
+ provider = hasProvider(testUrl);
+ if (provider) {
+ url = testUrl;
+ break;
+ }
+ }
+
+ return {url, provider};
+};
+
+function isIpOrLocalhost(url) {
+ try {
+ const IPV4_REGEX = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
+ const IPV6_REGEX = /:/; // fqdns will not have colons
+ const HTTP_REGEX = /^https?:/i;
+
+ const siteUrl = new URL(config.get('url'));
+ const {protocol, hostname, host} = new URL(url);
+
+ // allow requests to Ghost's own url through
+ if (siteUrl.host === host) {
+ return false;
+ }
+
+ if (!HTTP_REGEX.test(protocol) || hostname === 'localhost' || IPV4_REGEX.test(hostname) || IPV6_REGEX.test(hostname)) {
+ return true;
+ }
+
+ return false;
+ } catch (e) {
+ return true;
+ }
+}
+
+function fetchOembedData(_url, cardType) {
+ // parse the url then validate the protocol and host to make sure it's
+ // http(s) and not an IP address or localhost to avoid potential access to
+ // internal network endpoints
+ if (isIpOrLocalhost(_url)) {
+ return unknownProvider();
+ }
+
+ // check against known oembed list
+ let {url, provider} = findUrlWithProvider(_url);
+ if (provider) {
+ return knownProvider(url);
+ }
+
+ // url not in oembed list so fetch it in case it's a redirect or has a
+ // element
+ const cookieJar = new CookieJar();
+ return externalRequest(url, {
+ method: 'GET',
+ timeout: 2 * 1000,
+ followRedirect: true,
+ cookieJar
+ }).then((pageResponse) => {
+ // url changed after fetch, see if we were redirected to a known oembed
+ if (pageResponse.url !== url) {
+ ({url, provider} = findUrlWithProvider(pageResponse.url));
+ if (provider) {
+ return knownProvider(url);
+ }
+ }
+
+ // check for element
+ let oembedUrl;
+ try {
+ oembedUrl = cheerio('link[type="application/json+oembed"]', pageResponse.body).attr('href');
+ } catch (e) {
+ return unknownProvider(url);
+ }
+
+ if (oembedUrl) {
+ // make sure the linked url is not an ip address or localhost
+ if (isIpOrLocalhost(oembedUrl)) {
+ return unknownProvider(oembedUrl);
+ }
+
+ // for standard WP oembed's we want to insert a bookmark card rather than their blockquote+script
+ // which breaks in the editor and most Ghost themes. Only fallback if card type was not explicitly chosen
+ if (!cardType && oembedUrl.match(/wp-json\/oembed/)) {
+ return;
+ }
+
+ // fetch oembed response from embedded rel="alternate" url
+ return externalRequest(oembedUrl, {
+ method: 'GET',
+ json: true,
+ timeout: 2 * 1000,
+ followRedirect: true,
+ cookieJar
+ }).then((oembedResponse) => {
+ // validate the fetched json against the oembed spec to avoid
+ // leaking non-oembed responses
+ const body = oembedResponse.body;
+ const hasRequiredFields = body.type && body.version;
+ const hasValidType = ['photo', 'video', 'link', 'rich'].includes(body.type);
+
+ if (hasRequiredFields && hasValidType) {
+ // extract known oembed fields from the response to limit leaking of unrecognised data
+ const knownFields = [
+ 'type',
+ 'version',
+ 'html',
+ 'url',
+ 'title',
+ 'width',
+ 'height',
+ 'author_name',
+ 'author_url',
+ 'provider_name',
+ 'provider_url',
+ 'thumbnail_url',
+ 'thumbnail_width',
+ 'thumbnail_height'
+ ];
+ const oembed = _.pick(body, knownFields);
+
+ // ensure we have required data for certain types
+ if (oembed.type === 'photo' && !oembed.url) {
+ return;
+ }
+ if ((oembed.type === 'video' || oembed.type === 'rich') && (!oembed.html || !oembed.width || !oembed.height)) {
+ return;
+ }
+
+ // return the extracted object, don't pass through the response body
+ return oembed;
+ }
+ }).catch(() => {});
+ }
+ });
+}
+
+module.exports = {
+ fetchBookmarkData,
+ fetchOembedData,
+ errorHandler,
+ unknownProvider
+};
\ No newline at end of file