0
Fork 0
mirror of https://github.com/fastmail/Squire.git synced 2024-12-31 11:54:03 -05:00
Squire/source/Clean.js
Neil Jenkins 7fc3ab55c3 Better cleanTree fn.
- Better checks to precisely trim white space only at beginning and end of block
- Don't keep the contents of <head> or <style> tags if inserted into body.
- Always keep styles to preserve fidelity on paste.

Fixes #95.
2015-06-19 14:20:19 +07:00

322 lines
10 KiB
JavaScript

/*jshint strict:false, undef:false, unused:false */
var allowedBlock = /^(?:A(?:DDRESS|RTICLE|SIDE|UDIO)|BLOCKQUOTE|CAPTION|D(?:[DLT]|IV)|F(?:IGURE|OOTER)|H[1-6]|HEADER|L(?:ABEL|EGEND|I)|O(?:L|UTPUT)|P(?:RE)?|SECTION|T(?:ABLE|BODY|D|FOOT|H|HEAD|R)|UL)$/;
var fontSizes = {
1: 10,
2: 13,
3: 16,
4: 18,
5: 24,
6: 32,
7: 48
};
var spanToSemantic = {
backgroundColor: {
regexp: notWS,
replace: function ( doc, colour ) {
return createElement( doc, 'SPAN', {
'class': 'highlight',
style: 'background-color: ' + colour
});
}
},
color: {
regexp: notWS,
replace: function ( doc, colour ) {
return createElement( doc, 'SPAN', {
'class': 'colour',
style: 'color:' + colour
});
}
},
fontWeight: {
regexp: /^bold/i,
replace: function ( doc ) {
return createElement( doc, 'B' );
}
},
fontStyle: {
regexp: /^italic/i,
replace: function ( doc ) {
return createElement( doc, 'I' );
}
},
fontFamily: {
regexp: notWS,
replace: function ( doc, family ) {
return createElement( doc, 'SPAN', {
'class': 'font',
style: 'font-family:' + family
});
}
},
fontSize: {
regexp: notWS,
replace: function ( doc, size ) {
return createElement( doc, 'SPAN', {
'class': 'size',
style: 'font-size:' + size
});
}
}
};
var replaceWithTag = function ( tag ) {
return function ( node, parent ) {
var el = createElement( node.ownerDocument, tag );
parent.replaceChild( el, node );
el.appendChild( empty( node ) );
return el;
};
};
var stylesRewriters = {
SPAN: function ( span, parent ) {
var style = span.style,
doc = span.ownerDocument,
attr, converter, css, newTreeBottom, newTreeTop, el;
for ( attr in spanToSemantic ) {
converter = spanToSemantic[ attr ];
css = style[ attr ];
if ( css && converter.regexp.test( css ) ) {
el = converter.replace( doc, css );
if ( newTreeBottom ) {
newTreeBottom.appendChild( el );
}
newTreeBottom = el;
if ( !newTreeTop ) {
newTreeTop = el;
}
}
}
if ( newTreeTop ) {
newTreeBottom.appendChild( empty( span ) );
parent.replaceChild( newTreeTop, span );
}
return newTreeBottom || span;
},
STRONG: replaceWithTag( 'B' ),
EM: replaceWithTag( 'I' ),
STRIKE: replaceWithTag( 'S' ),
FONT: function ( node, parent ) {
var face = node.face,
size = node.size,
colour = node.color,
doc = node.ownerDocument,
fontSpan, sizeSpan, colourSpan,
newTreeBottom, newTreeTop;
if ( face ) {
fontSpan = createElement( doc, 'SPAN', {
'class': 'font',
style: 'font-family:' + face
});
newTreeTop = fontSpan;
newTreeBottom = fontSpan;
}
if ( size ) {
sizeSpan = createElement( doc, 'SPAN', {
'class': 'size',
style: 'font-size:' + fontSizes[ size ] + 'px'
});
if ( !newTreeTop ) {
newTreeTop = sizeSpan;
}
if ( newTreeBottom ) {
newTreeBottom.appendChild( sizeSpan );
}
newTreeBottom = sizeSpan;
}
if ( colour && /^#?([\dA-F]{3}){1,2}$/i.test( colour ) ) {
if ( colour.charAt( 0 ) !== '#' ) {
colour = '#' + colour;
}
colourSpan = createElement( doc, 'SPAN', {
'class': 'colour',
style: 'color:' + colour
});
if ( !newTreeTop ) {
newTreeTop = colourSpan;
}
if ( newTreeBottom ) {
newTreeBottom.appendChild( colourSpan );
}
newTreeBottom = colourSpan;
}
if ( !newTreeTop ) {
newTreeTop = newTreeBottom = createElement( doc, 'SPAN' );
}
parent.replaceChild( newTreeTop, node );
newTreeBottom.appendChild( empty( node ) );
return newTreeBottom;
},
TT: function ( node, parent ) {
var el = createElement( node.ownerDocument, 'SPAN', {
'class': 'font',
style: 'font-family:menlo,consolas,"courier new",monospace'
});
parent.replaceChild( el, node );
el.appendChild( empty( node ) );
return el;
}
};
/*
Two purposes:
1. Remove nodes we don't want, such as weird <o:p> tags, comment nodes
and whitespace nodes.
2. Convert inline tags into our preferred format.
*/
var cleanTree = function cleanTree ( node ) {
var children = node.childNodes,
blockParent, i, l, child, nodeName, nodeType, rewriter, childLength,
startsWithWS, endsWithWS, data;
blockParent = node;
while ( isInline( blockParent ) ) {
blockParent = blockParent.parentNode;
}
if ( !isBlock( blockParent ) ) {
blockParent = null;
}
contentWalker.root = blockParent;
for ( i = 0, l = children.length; i < l; i += 1 ) {
child = children[i];
nodeName = child.nodeName;
nodeType = child.nodeType;
rewriter = stylesRewriters[ nodeName ];
if ( nodeType === ELEMENT_NODE ) {
childLength = child.childNodes.length;
if ( rewriter ) {
child = rewriter( child, node );
} else if ( !allowedBlock.test( nodeName ) &&
!isInline( child ) ) {
i -= 1;
if ( nodeName === 'HEAD' || nodeName === 'STYLE' ) {
node.removeChild( child );
l -= 1;
} else {
l += childLength - 1;
node.replaceChild( empty( child ), child );
}
continue;
}
if ( childLength ) {
cleanTree( child );
}
} else {
if ( nodeType === TEXT_NODE ) {
data = child.data;
// Use \s instead of notWS, because we want to remove nodes
// which are just nbsp, in order to cleanup <div>nbsp<br></div>
// construct.
startsWithWS = /\s/.test( data.charAt( 0 ) );
endsWithWS = /\s/.test( data.charAt( data.length - 1 ) );
if ( !startsWithWS && !endsWithWS ) {
continue;
}
if ( startsWithWS ) {
contentWalker.currentNode = child;
if ( !blockParent || !contentWalker.previousNode() ) {
data = data.replace( /^\s+/g, '' );
}
}
if ( endsWithWS ) {
contentWalker.currentNode = child;
if ( !blockParent || !contentWalker.nextNode() ) {
data = data.replace( /\s+$/g, '' );
}
}
if ( data ) {
child.data = data;
continue;
}
}
node.removeChild( child );
i -= 1;
l -= 1;
}
}
return node;
};
// ---
var removeEmptyInlines = function removeEmptyInlines ( root ) {
var children = root.childNodes,
l = children.length,
child;
while ( l-- ) {
child = children[l];
if ( child.nodeType === ELEMENT_NODE && !isLeaf( child ) ) {
removeEmptyInlines( child );
if ( isInline( child ) && !child.firstChild ) {
root.removeChild( child );
}
} else if ( child.nodeType === TEXT_NODE && !child.data ) {
root.removeChild( child );
}
}
};
// ---
var notWSTextNode = function ( node ) {
return node.nodeType === ELEMENT_NODE ?
node.nodeName === 'BR' :
notWS.test( node.data );
};
var isLineBreak = function ( br ) {
var block = br.parentNode,
walker;
while ( isInline( block ) ) {
block = block.parentNode;
}
walker = new TreeWalker(
block, SHOW_ELEMENT|SHOW_TEXT, notWSTextNode );
walker.currentNode = br;
return !!walker.nextNode();
};
// <br> elements are treated specially, and differently depending on the
// browser, when in rich text editor mode. When adding HTML from external
// sources, we must remove them, replacing the ones that actually affect
// line breaks by wrapping the inline text in a <div>. Browsers that want <br>
// elements at the end of each block will then have them added back in a later
// fixCursor method call.
var cleanupBRs = function ( root ) {
var brs = root.querySelectorAll( 'BR' ),
brBreaksLine = [],
l = brs.length,
i, br, parent;
// Must calculate whether the <br> breaks a line first, because if we
// have two <br>s next to each other, after the first one is converted
// to a block split, the second will be at the end of a block and
// therefore seem to not be a line break. But in its original context it
// was, so we should also convert it to a block split.
for ( i = 0; i < l; i += 1 ) {
brBreaksLine[i] = isLineBreak( brs[i] );
}
while ( l-- ) {
br = brs[l];
// Cleanup may have removed it
parent = br.parentNode;
if ( !parent ) { continue; }
// If it doesn't break a line, just remove it; it's not doing
// anything useful. We'll add it back later if required by the
// browser. If it breaks a line, wrap the content in div tags
// and replace the brs.
if ( !brBreaksLine[l] ) {
detach( br );
} else if ( !isInline( parent ) ) {
fixContainer( parent );
}
}
};