From fbdabce086ddb51cc31df9e799eb4977372f9a65 Mon Sep 17 00:00:00 2001 From: Hannah Wolfe Date: Sat, 20 Dec 2014 23:48:13 +0000 Subject: [PATCH] Add markdown file handler to importer closes #4691 - adds a file handler for markdown file (.md and .markdown) - handles titles and featured images - gets status, date, and slug from the filename - has a test suite --- .../server/data/importer/handlers/markdown.js | 112 +++++++++ core/server/data/importer/index.js | 20 +- core/test/unit/importer_spec.js | 214 ++++++++++++++++-- .../import/deleted-2014-12-19-test-1.md | 1 + .../import/draft-2014-12-19-test-1.md | 1 + .../import/draft-2014-12-19-test-2.md | 3 + .../import/draft-2014-12-19-test-3.md | 5 + .../import/published-2014-12-19-test-1.md | 3 + .../Roon-Export/published/test.md | 0 9 files changed, 332 insertions(+), 27 deletions(-) create mode 100644 core/server/data/importer/handlers/markdown.js create mode 100644 core/test/utils/fixtures/import/deleted-2014-12-19-test-1.md create mode 100644 core/test/utils/fixtures/import/draft-2014-12-19-test-1.md create mode 100644 core/test/utils/fixtures/import/draft-2014-12-19-test-2.md create mode 100644 core/test/utils/fixtures/import/draft-2014-12-19-test-3.md create mode 100644 core/test/utils/fixtures/import/published-2014-12-19-test-1.md create mode 100644 core/test/utils/fixtures/import/zips/zip-old-roon-export/Roon-Export/published/test.md diff --git a/core/server/data/importer/handlers/markdown.js b/core/server/data/importer/handlers/markdown.js new file mode 100644 index 0000000000..a298e2433c --- /dev/null +++ b/core/server/data/importer/handlers/markdown.js @@ -0,0 +1,112 @@ +var _ = require('lodash'), + Promise = require('bluebird'), + fs = require('fs-extra'), + moment = require('moment'), + + featuredImageRegex = /^(!\[]\(([^)]*?)\)\s+)(?=#)/, + titleRegex = /^#\s?([\w\W]*?)(?=\n)/, + statusRegex = /(published||draft)-/, + dateRegex = /(\d{4}-\d{2}-\d{2})-/, + + processDateTime, + processFileName, + processMarkdownFile, + MarkdownHandler; + +// Takes a date from the filename in y-m-d-h-m form, and converts it into a Date ready to import +processDateTime = function (post, datetime) { + var format = 'YYYY-MM-DD-HH-mm'; + datetime = moment(datetime, format).valueOf(); + + if (post.status && post.status === 'published') { + post.published_at = datetime; + } else { + post.created_at = datetime; + } + + return post; +}; + +processFileName = function (filename) { + var post = {}, + name = filename.split('.')[0], + match; + + // Parse out the status + match = name.match(statusRegex); + if (match) { + post.status = match[1]; + name = name.replace(match[0], ''); + } + + // Parse out the date + match = name.match(dateRegex); + if (match) { + name = name.replace(match[0], ''); + // Default to middle of the day + post = processDateTime(post, match[1] + '-12-00'); + } + + post.slug = name; + post.title = name; + + return post; +}; + +processMarkdownFile = function (filename, content) { + var post = processFileName(filename), + match; + + content = content.replace(/\r\n/gm, '\n'); + + // parse out any image which appears before the title + match = content.match(featuredImageRegex); + if (match) { + content = content.replace(match[1], ''); + post.image = match[2]; + } + + // try to parse out a heading 1 for the title + match = content.match(titleRegex); + if (match) { + content = content.replace(titleRegex, ''); + post.title = match[1]; + } + + content = content.replace(/^\n+/, ''); + + post.markdown = content; + + return post; +}; + +MarkdownHandler = { + type: 'data', + extensions: ['.md', '.markdown'], + types: ['application/octet-stream', 'text/plain'], + directories: [], + + loadFile: function (files, startDir) { + /*jshint unused:false */ + var startDirRegex = startDir ? new RegExp('^' + startDir + '/') : new RegExp(''), + posts = [], + ops = []; + + _.each(files, function (file) { + ops.push(Promise.promisify(fs.readFile)(file.path).then(function (content) { + // normalize the file name + file.name = file.name.replace(startDirRegex, ''); + // don't include deleted posts + if (!/^deleted/.test(file.name)) { + posts.push(processMarkdownFile(file.name, content.toString())); + } + })); + }); + + return Promise.all(ops).then(function () { + return {meta: {}, data: {posts: posts}}; + }); + } +}; + +module.exports = MarkdownHandler; diff --git a/core/server/data/importer/index.js b/core/server/data/importer/index.js index 02bbb4e685..a9fbd17db8 100644 --- a/core/server/data/importer/index.js +++ b/core/server/data/importer/index.js @@ -9,10 +9,11 @@ var _ = require('lodash'), uuid = require('node-uuid'), extract = require('extract-zip'), errors = require('../../errors'), - ImageHandler = require('./handlers/image'), - JSONHandler = require('./handlers/json'), - ImageImporter = require('./importers/image'), - DataImporter = require('./importers/data'), + ImageHandler = require('./handlers/image'), + JSONHandler = require('./handlers/json'), + MarkdownHandler = require('./handlers/markdown'), + ImageImporter = require('./importers/image'), + DataImporter = require('./importers/data'), // Glob levels ROOT_ONLY = 0, @@ -29,7 +30,7 @@ defaults = { function ImportManager() { this.importers = [ImageImporter, DataImporter]; - this.handlers = [ImageHandler, JSONHandler]; + this.handlers = [ImageHandler, JSONHandler, MarkdownHandler]; // Keep track of files to cleanup at the end this.filesToDelete = []; } @@ -139,7 +140,16 @@ _.extend(ImportManager.prototype, { ), dirMatches = glob.sync( this.getDirectoryGlob(this.getDirectories(), ROOT_OR_SINGLE_DIR), {cwd: directory} + ), + oldRoonMatches = glob.sync(this.getDirectoryGlob(['drafts', 'published', 'deleted'], ROOT_OR_SINGLE_DIR), + {cwd: directory}); + + // This is a temporary extra message for the old format roon export which doesn't work with Ghost + if (oldRoonMatches.length > 0) { + throw new errors.UnsupportedMediaTypeError( + 'Your zip file looks like an old format Roon export, please re-export your Roon blog and try again.' ); + } // If this folder contains importable files or a content or images directory if (extMatchesBase.length > 0 || (dirMatches.length > 0 && extMatchesAll.length > 0)) { diff --git a/core/test/unit/importer_spec.js b/core/test/unit/importer_spec.js index 9bd51c1c58..7a4c41d73b 100644 --- a/core/test/unit/importer_spec.js +++ b/core/test/unit/importer_spec.js @@ -5,16 +5,18 @@ var should = require('should'), Promise = require('bluebird'), _ = require('lodash'), testUtils = require('../utils'), + moment = require('moment'), config = require('../../server/config'), path = require('path'), errors = require('../../server/errors'), // Stuff we are testing - ImportManager = require('../../server/data/importer'), - JSONHandler = require('../../server/data/importer/handlers/json'), - ImageHandler = require('../../server/data/importer/handlers/image'), - DataImporter = require('../../server/data/importer/importers/data'), - ImageImporter = require('../../server/data/importer/importers/image'), + ImportManager = require('../../server/data/importer'), + JSONHandler = require('../../server/data/importer/handlers/json'), + ImageHandler = require('../../server/data/importer/handlers/image'), + MarkdownHandler = require('../../server/data/importer/handlers/markdown'), + DataImporter = require('../../server/data/importer/importers/data'), + ImageImporter = require('../../server/data/importer/importers/image'), storage = require('../../server/storage'), sandbox = sinon.sandbox.create(); @@ -29,7 +31,7 @@ describe('Importer', function () { describe('ImportManager', function () { it('has the correct interface', function () { - ImportManager.handlers.should.be.instanceof(Array).and.have.lengthOf(2); + ImportManager.handlers.should.be.instanceof(Array).and.have.lengthOf(3); ImportManager.importers.should.be.instanceof(Array).and.have.lengthOf(2); ImportManager.loadFile.should.be.instanceof(Function); ImportManager.preProcess.should.be.instanceof(Function); @@ -38,18 +40,20 @@ describe('Importer', function () { }); it('gets the correct extensions', function () { - ImportManager.getExtensions().should.be.instanceof(Array).and.have.lengthOf(8); + ImportManager.getExtensions().should.be.instanceof(Array).and.have.lengthOf(10); ImportManager.getExtensions().should.containEql('.json'); ImportManager.getExtensions().should.containEql('.zip'); ImportManager.getExtensions().should.containEql('.jpg'); + ImportManager.getExtensions().should.containEql('.md'); }); it('gets the correct types', function () { - ImportManager.getTypes().should.be.instanceof(Array).and.have.lengthOf(8); + ImportManager.getTypes().should.be.instanceof(Array).and.have.lengthOf(10); ImportManager.getTypes().should.containEql('application/octet-stream'); ImportManager.getTypes().should.containEql('application/json'); ImportManager.getTypes().should.containEql('application/zip'); ImportManager.getTypes().should.containEql('application/x-zip-compressed'); + ImportManager.getTypes().should.containEql('text/plain'); }); it('gets the correct directories', function () { @@ -59,18 +63,30 @@ describe('Importer', function () { }); it('globs extensions correctly', function () { - ImportManager.getGlobPattern(ImportManager.getExtensions()).should.equal('+(.jpg|.jpeg|.gif|.png|.svg|.svgz|.json|.zip)'); - ImportManager.getGlobPattern(ImportManager.getDirectories()).should.equal('+(images|content)'); - ImportManager.getGlobPattern(JSONHandler.extensions).should.equal('+(.json)'); - ImportManager.getGlobPattern(ImageHandler.extensions).should.equal('+(.jpg|.jpeg|.gif|.png|.svg|.svgz)'); - ImportManager.getExtensionGlob(ImportManager.getExtensions()).should.equal('*+(.jpg|.jpeg|.gif|.png|.svg|.svgz|.json|.zip)'); - ImportManager.getDirectoryGlob(ImportManager.getDirectories()).should.equal('+(images|content)'); - ImportManager.getExtensionGlob(ImportManager.getExtensions(), 0).should.equal('*+(.jpg|.jpeg|.gif|.png|.svg|.svgz|.json|.zip)'); - ImportManager.getDirectoryGlob(ImportManager.getDirectories(), 0).should.equal('+(images|content)'); - ImportManager.getExtensionGlob(ImportManager.getExtensions(), 1).should.equal('{*/*,*}+(.jpg|.jpeg|.gif|.png|.svg|.svgz|.json|.zip)'); - ImportManager.getDirectoryGlob(ImportManager.getDirectories(), 1).should.equal('{*/,}+(images|content)'); - ImportManager.getExtensionGlob(ImportManager.getExtensions(), 2).should.equal('**/*+(.jpg|.jpeg|.gif|.png|.svg|.svgz|.json|.zip)'); - ImportManager.getDirectoryGlob(ImportManager.getDirectories(), 2).should.equal('**/+(images|content)'); + ImportManager.getGlobPattern(ImportManager.getExtensions()) + .should.equal('+(.jpg|.jpeg|.gif|.png|.svg|.svgz|.json|.md|.markdown|.zip)'); + ImportManager.getGlobPattern(ImportManager.getDirectories()) + .should.equal('+(images|content)'); + ImportManager.getGlobPattern(JSONHandler.extensions) + .should.equal('+(.json)'); + ImportManager.getGlobPattern(ImageHandler.extensions) + .should.equal('+(.jpg|.jpeg|.gif|.png|.svg|.svgz)'); + ImportManager.getExtensionGlob(ImportManager.getExtensions()) + .should.equal('*+(.jpg|.jpeg|.gif|.png|.svg|.svgz|.json|.md|.markdown|.zip)'); + ImportManager.getDirectoryGlob(ImportManager.getDirectories()) + .should.equal('+(images|content)'); + ImportManager.getExtensionGlob(ImportManager.getExtensions(), 0) + .should.equal('*+(.jpg|.jpeg|.gif|.png|.svg|.svgz|.json|.md|.markdown|.zip)'); + ImportManager.getDirectoryGlob(ImportManager.getDirectories(), 0) + .should.equal('+(images|content)'); + ImportManager.getExtensionGlob(ImportManager.getExtensions(), 1) + .should.equal('{*/*,*}+(.jpg|.jpeg|.gif|.png|.svg|.svgz|.json|.md|.markdown|.zip)'); + ImportManager.getDirectoryGlob(ImportManager.getDirectories(), 1) + .should.equal('{*/,}+(images|content)'); + ImportManager.getExtensionGlob(ImportManager.getExtensions(), 2) + .should.equal('**/*+(.jpg|.jpeg|.gif|.png|.svg|.svgz|.json|.md|.markdown|.zip)'); + ImportManager.getDirectoryGlob(ImportManager.getDirectories(), 2) + .should.equal('**/+(images|content)'); }); // Step 1 of importing is loadFile @@ -109,18 +125,21 @@ describe('Importer', function () { baseDirSpy = sandbox.stub(ImportManager, 'getBaseDirectory').returns(), getFileSpy = sandbox.stub(ImportManager, 'getFilesFromZip'), jsonSpy = sandbox.stub(JSONHandler, 'loadFile').returns(Promise.resolve({posts: []})), - imageSpy = sandbox.stub(ImageHandler, 'loadFile'); + imageSpy = sandbox.stub(ImageHandler, 'loadFile'), + mdSpy = sandbox.stub(MarkdownHandler, 'loadFile'); getFileSpy.withArgs(JSONHandler).returns(['/tmp/dir/myFile.json']); getFileSpy.withArgs(ImageHandler).returns([]); + getFileSpy.withArgs(MarkdownHandler).returns([]); ImportManager.processZip(testZip).then(function (zipResult) { extractSpy.calledOnce.should.be.true; validSpy.calledOnce.should.be.true; baseDirSpy.calledOnce.should.be.true; - getFileSpy.calledTwice.should.be.true; + getFileSpy.calledThrice.should.be.true; jsonSpy.calledOnce.should.be.true; imageSpy.called.should.be.false; + mdSpy.called.should.be.false; ImportManager.processFile(testFile, '.json').then(function (fileResult) { jsonSpy.calledTwice.should.be.true; @@ -164,6 +183,15 @@ describe('Importer', function () { ImportManager.isValidZip.bind(ImportManager, testDir).should.throw(errors.UnsupportedMediaTypeError); }); + + it('shows a special error for old Roon exports', function () { + var testDir = path.resolve('core/test/utils/fixtures/import/zips/zip-old-roon-export'), + msg = 'Your zip file looks like an old format Roon export, ' + + 'please re-export your Roon blog and try again.'; + + ImportManager.isValidZip.bind(ImportManager, testDir).should.throw(errors.UnsupportedMediaTypeError); + ImportManager.isValidZip.bind(ImportManager, testDir).should.throw(msg); + }); }); describe('Get Base Dir', function () { @@ -456,6 +484,148 @@ describe('Importer', function () { storeSpy.lastCall.args[1].targetDir.should.match(/(\/|\\)content(\/|\\)images$/); storeSpy.lastCall.args[1].newPath.should.eql('/content/images/puppy.jpg'); + done(); + }); + }); + }); + + describe('MarkdownHandler', function () { + it('has the correct interface', function () { + MarkdownHandler.type.should.eql('data'); + MarkdownHandler.extensions.should.be.instanceof(Array).and.have.lengthOf(2); + MarkdownHandler.extensions.should.containEql('.md'); + MarkdownHandler.extensions.should.containEql('.markdown'); + MarkdownHandler.types.should.be.instanceof(Array).and.have.lengthOf(2); + MarkdownHandler.types.should.containEql('application/octet-stream'); + MarkdownHandler.types.should.containEql('text/plain'); + MarkdownHandler.loadFile.should.be.instanceof(Function); + }); + + it('does convert a markdown file into a post object', function (done) { + var filename = 'draft-2014-12-19-test-1.md', + file = [{ + path: testUtils.fixtures.getImportFixturePath(filename), + name: filename + }]; + + MarkdownHandler.loadFile(file).then(function (result) { + result.data.posts[0].markdown.should.eql('You\'re live! Nice.'); + result.data.posts[0].status.should.eql('draft'); + result.data.posts[0].slug.should.eql('test-1'); + result.data.posts[0].title.should.eql('test-1'); + result.data.posts[0].created_at.should.eql(1418990400000); + moment(result.data.posts[0].created_at).format('DD MM YY HH:mm').should.eql('19 12 14 12:00'); + result.data.posts[0].should.not.have.property('image'); + + done(); + }); + }); + + it('can parse a title from a markdown file', function (done) { + var filename = 'draft-2014-12-19-test-2.md', + file = [{ + path: testUtils.fixtures.getImportFixturePath(filename), + name: filename + }]; + + MarkdownHandler.loadFile(file).then(function (result) { + result.data.posts[0].markdown.should.eql('You\'re live! Nice.'); + result.data.posts[0].status.should.eql('draft'); + result.data.posts[0].slug.should.eql('test-2'); + result.data.posts[0].title.should.eql('Welcome to Ghost'); + result.data.posts[0].created_at.should.eql(1418990400000); + result.data.posts[0].should.not.have.property('image'); + + done(); + }); + }); + + it('can parse a featured image from a markdown file if there is a title', function (done) { + var filename = 'draft-2014-12-19-test-3.md', + file = [{ + path: testUtils.fixtures.getImportFixturePath(filename), + name: filename + }]; + + MarkdownHandler.loadFile(file).then(function (result) { + result.data.posts[0].markdown.should.eql('You\'re live! Nice.'); + result.data.posts[0].status.should.eql('draft'); + result.data.posts[0].slug.should.eql('test-3'); + result.data.posts[0].title.should.eql('Welcome to Ghost'); + result.data.posts[0].created_at.should.eql(1418990400000); + result.data.posts[0].image.should.eql('/images/kitten.jpg'); + + done(); + }); + }); + + it('can import a published post', function (done) { + var filename = 'published-2014-12-19-test-1.md', + file = [{ + path: testUtils.fixtures.getImportFixturePath(filename), + name: filename + }]; + + MarkdownHandler.loadFile(file).then(function (result) { + result.data.posts[0].markdown.should.eql('You\'re live! Nice.'); + result.data.posts[0].status.should.eql('published'); + result.data.posts[0].slug.should.eql('test-1'); + result.data.posts[0].title.should.eql('Welcome to Ghost'); + result.data.posts[0].published_at.should.eql(1418990400000); + moment(result.data.posts[0].published_at).format('DD MM YY HH:mm').should.eql('19 12 14 12:00'); + result.data.posts[0].should.not.have.property('image'); + + done(); + }); + }); + + it('does not import deleted posts', function (done) { + var filename = 'deleted-2014-12-19-test-1.md', + file = [{ + path: testUtils.fixtures.getImportFixturePath(filename), + name: filename + }]; + + MarkdownHandler.loadFile(file).then(function (result) { + result.data.posts.should.be.empty; + + done(); + }); + }); + + it('can import multiple files', function (done) { + var files = [{ + path: testUtils.fixtures.getImportFixturePath('deleted-2014-12-19-test-1.md'), + name: 'deleted-2014-12-19-test-1.md' + }, { + path: testUtils.fixtures.getImportFixturePath('published-2014-12-19-test-1.md'), + name: 'published-2014-12-19-test-1.md' + }, { + path: testUtils.fixtures.getImportFixturePath('draft-2014-12-19-test-3.md'), + name: 'draft-2014-12-19-test-3.md' + }]; + + MarkdownHandler.loadFile(files).then(function (result) { + // deleted-2014-12-19-test-1.md + // doesn't get imported ;) + + // published-2014-12-19-test-1.md + result.data.posts[0].markdown.should.eql('You\'re live! Nice.'); + result.data.posts[0].status.should.eql('published'); + result.data.posts[0].slug.should.eql('test-1'); + result.data.posts[0].title.should.eql('Welcome to Ghost'); + result.data.posts[0].published_at.should.eql(1418990400000); + moment(result.data.posts[0].published_at).format('DD MM YY HH:mm').should.eql('19 12 14 12:00'); + result.data.posts[0].should.not.have.property('image'); + + // draft-2014-12-19-test-3.md + result.data.posts[1].markdown.should.eql('You\'re live! Nice.'); + result.data.posts[1].status.should.eql('draft'); + result.data.posts[1].slug.should.eql('test-3'); + result.data.posts[1].title.should.eql('Welcome to Ghost'); + result.data.posts[1].created_at.should.eql(1418990400000); + result.data.posts[1].image.should.eql('/images/kitten.jpg'); + done(); }).catch(done); }); diff --git a/core/test/utils/fixtures/import/deleted-2014-12-19-test-1.md b/core/test/utils/fixtures/import/deleted-2014-12-19-test-1.md new file mode 100644 index 0000000000..80744278f0 --- /dev/null +++ b/core/test/utils/fixtures/import/deleted-2014-12-19-test-1.md @@ -0,0 +1 @@ +You're live! Nice. \ No newline at end of file diff --git a/core/test/utils/fixtures/import/draft-2014-12-19-test-1.md b/core/test/utils/fixtures/import/draft-2014-12-19-test-1.md new file mode 100644 index 0000000000..80744278f0 --- /dev/null +++ b/core/test/utils/fixtures/import/draft-2014-12-19-test-1.md @@ -0,0 +1 @@ +You're live! Nice. \ No newline at end of file diff --git a/core/test/utils/fixtures/import/draft-2014-12-19-test-2.md b/core/test/utils/fixtures/import/draft-2014-12-19-test-2.md new file mode 100644 index 0000000000..84ba78a150 --- /dev/null +++ b/core/test/utils/fixtures/import/draft-2014-12-19-test-2.md @@ -0,0 +1,3 @@ +# Welcome to Ghost + +You're live! Nice. \ No newline at end of file diff --git a/core/test/utils/fixtures/import/draft-2014-12-19-test-3.md b/core/test/utils/fixtures/import/draft-2014-12-19-test-3.md new file mode 100644 index 0000000000..c1b5f5a9dd --- /dev/null +++ b/core/test/utils/fixtures/import/draft-2014-12-19-test-3.md @@ -0,0 +1,5 @@ +![](/images/kitten.jpg) + +# Welcome to Ghost + +You're live! Nice. \ No newline at end of file diff --git a/core/test/utils/fixtures/import/published-2014-12-19-test-1.md b/core/test/utils/fixtures/import/published-2014-12-19-test-1.md new file mode 100644 index 0000000000..63ada3fd7d --- /dev/null +++ b/core/test/utils/fixtures/import/published-2014-12-19-test-1.md @@ -0,0 +1,3 @@ +#Welcome to Ghost + +You're live! Nice. \ No newline at end of file diff --git a/core/test/utils/fixtures/import/zips/zip-old-roon-export/Roon-Export/published/test.md b/core/test/utils/fixtures/import/zips/zip-old-roon-export/Roon-Export/published/test.md new file mode 100644 index 0000000000..e69de29bb2