0
Fork 0
mirror of https://github.com/fastmail/Squire.git synced 2024-12-22 15:23:29 -05:00
Squire/source/Clean.js

353 lines
12 KiB
JavaScript

/*jshint strict:false, undef:false, unused:false */
var linkRegExp = /\b((?:(?:ht|f)tps?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,}\/)(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))|([\w\-.%+]+@(?:[\w\-]+\.)+[A-Z]{2,}\b)/i;
var addLinks = function ( frag ) {
var doc = frag.ownerDocument,
walker = new TreeWalker( frag, SHOW_TEXT,
function ( node ) {
return !getNearest( node, 'A' );
}, false ),
node, data, parent, match, index, endIndex, child;
while ( node = walker.nextNode() ) {
data = node.data;
parent = node.parentNode;
while ( match = linkRegExp.exec( data ) ) {
index = match.index;
endIndex = index + match[0].length;
if ( index ) {
child = doc.createTextNode( data.slice( 0, index ) );
parent.insertBefore( child, node );
}
child = doc.createElement( 'A' );
child.textContent = data.slice( index, endIndex );
child.href = match[1] ?
/^(?:ht|f)tps?:/.test( match[1] ) ?
match[1] :
'http://' + match[1] :
'mailto:' + match[2];
parent.insertBefore( child, node );
node.data = data = data.slice( endIndex );
}
}
};
var allowedBlock = /^(?:A(?:DDRESS|RTICLE|SIDE|UDIO)|BLOCKQUOTE|CAPTION|D(?:[DLT]|IV)|F(?:IGURE|OOTER)|H[1-6]|HEADER|L(?:ABEL|EGEND|I)|O(?:L|UTPUT)|P(?:RE)?|SECTION|T(?:ABLE|BODY|D|FOOT|H|HEAD|R)|UL)$/;
var fontSizes = {
1: 10,
2: 13,
3: 16,
4: 18,
5: 24,
6: 32,
7: 48
};
var spanToSemantic = {
backgroundColor: {
regexp: notWS,
replace: function ( doc, colour ) {
return createElement( doc, 'SPAN', {
'class': 'highlight',
style: 'background-color: ' + colour
});
}
},
color: {
regexp: notWS,
replace: function ( doc, colour ) {
return createElement( doc, 'SPAN', {
'class': 'colour',
style: 'color:' + colour
});
}
},
fontWeight: {
regexp: /^bold/i,
replace: function ( doc ) {
return createElement( doc, 'B' );
}
},
fontStyle: {
regexp: /^italic/i,
replace: function ( doc ) {
return createElement( doc, 'I' );
}
},
fontFamily: {
regexp: notWS,
replace: function ( doc, family ) {
return createElement( doc, 'SPAN', {
'class': 'font',
style: 'font-family:' + family
});
}
},
fontSize: {
regexp: notWS,
replace: function ( doc, size ) {
return createElement( doc, 'SPAN', {
'class': 'size',
style: 'font-size:' + size
});
}
}
};
var replaceWithTag = function ( tag ) {
return function ( node, parent ) {
var el = createElement( node.ownerDocument, tag );
parent.replaceChild( el, node );
el.appendChild( empty( node ) );
return el;
};
};
var stylesRewriters = {
SPAN: function ( span, parent ) {
var style = span.style,
doc = span.ownerDocument,
attr, converter, css, newTreeBottom, newTreeTop, el;
for ( attr in spanToSemantic ) {
converter = spanToSemantic[ attr ];
css = style[ attr ];
if ( css && converter.regexp.test( css ) ) {
el = converter.replace( doc, css );
if ( newTreeBottom ) {
newTreeBottom.appendChild( el );
}
newTreeBottom = el;
if ( !newTreeTop ) {
newTreeTop = el;
}
}
}
if ( newTreeTop ) {
newTreeBottom.appendChild( empty( span ) );
parent.replaceChild( newTreeTop, span );
}
return newTreeBottom || span;
},
STRONG: replaceWithTag( 'B' ),
EM: replaceWithTag( 'I' ),
STRIKE: replaceWithTag( 'S' ),
FONT: function ( node, parent ) {
var face = node.face,
size = node.size,
colour = node.color,
doc = node.ownerDocument,
fontSpan, sizeSpan, colourSpan,
newTreeBottom, newTreeTop;
if ( face ) {
fontSpan = createElement( doc, 'SPAN', {
'class': 'font',
style: 'font-family:' + face
});
newTreeTop = fontSpan;
newTreeBottom = fontSpan;
}
if ( size ) {
sizeSpan = createElement( doc, 'SPAN', {
'class': 'size',
style: 'font-size:' + fontSizes[ size ] + 'px'
});
if ( !newTreeTop ) {
newTreeTop = sizeSpan;
}
if ( newTreeBottom ) {
newTreeBottom.appendChild( sizeSpan );
}
newTreeBottom = sizeSpan;
}
if ( colour && /^#?([\dA-F]{3}){1,2}$/i.test( colour ) ) {
if ( colour.charAt( 0 ) !== '#' ) {
colour = '#' + colour;
}
colourSpan = createElement( doc, 'SPAN', {
'class': 'colour',
style: 'color:' + colour
});
if ( !newTreeTop ) {
newTreeTop = colourSpan;
}
if ( newTreeBottom ) {
newTreeBottom.appendChild( colourSpan );
}
newTreeBottom = colourSpan;
}
if ( !newTreeTop ) {
newTreeTop = newTreeBottom = createElement( doc, 'SPAN' );
}
parent.replaceChild( newTreeTop, node );
newTreeBottom.appendChild( empty( node ) );
return newTreeBottom;
},
TT: function ( node, parent ) {
var el = createElement( node.ownerDocument, 'SPAN', {
'class': 'font',
style: 'font-family:menlo,consolas,"courier new",monospace'
});
parent.replaceChild( el, node );
el.appendChild( empty( node ) );
return el;
}
};
var removeEmptyInlines = function ( root ) {
var children = root.childNodes,
l = children.length,
child;
while ( l-- ) {
child = children[l];
if ( child.nodeType === ELEMENT_NODE && !isLeaf( child ) ) {
removeEmptyInlines( child );
if ( isInline( child ) && !child.firstChild ) {
root.removeChild( child );
}
} else if ( child.nodeType === TEXT_NODE && !child.data ) {
root.removeChild( child );
}
}
};
/*
Two purposes:
1. Remove nodes we don't want, such as weird <o:p> tags, comment nodes
and whitespace nodes.
2. Convert inline tags into our preferred format.
*/
var cleanTree = function ( node, allowStyles ) {
var children = node.childNodes,
i, l, child, nodeName, nodeType, rewriter, childLength,
data, j, ll;
for ( i = 0, l = children.length; i < l; i += 1 ) {
child = children[i];
nodeName = child.nodeName;
nodeType = child.nodeType;
rewriter = stylesRewriters[ nodeName ];
if ( nodeType === ELEMENT_NODE ) {
childLength = child.childNodes.length;
if ( rewriter ) {
child = rewriter( child, node );
} else if ( !allowedBlock.test( nodeName ) &&
!isInline( child ) ) {
i -= 1;
l += childLength - 1;
node.replaceChild( empty( child ), child );
continue;
} else if ( !allowStyles && child.style.cssText ) {
child.removeAttribute( 'style' );
}
if ( childLength ) {
cleanTree( child, allowStyles );
}
} else {
if ( nodeType === TEXT_NODE ) {
data = child.data;
// Use \S instead of notWS, because we want to remove nodes
// which are just nbsp, in order to cleanup <div>nbsp<br></div>
// construct.
if ( /\S/.test( data ) ) {
// If the parent node is inline, don't trim this node as
// it probably isn't at the end of the block.
if ( isInline( node ) ) {
continue;
}
j = 0;
ll = data.length;
if ( !i || !isInline( children[ i - 1 ] ) ) {
while ( j < ll && !notWS.test( data.charAt( j ) ) ) {
j += 1;
}
if ( j ) {
child.data = data = data.slice( j );
ll -= j;
}
}
if ( i + 1 === l || !isInline( children[ i + 1 ] ) ) {
j = ll;
while ( j > 0 && !notWS.test( data.charAt( j - 1 ) ) ) {
j -= 1;
}
if ( j < ll ) {
child.data = data.slice( 0, j );
}
}
continue;
}
// If we have just white space, it may still be important if it
// separates two inline nodes, e.g. "<a>link</a> <a>link</a>".
else if ( i && i + 1 < l &&
isInline( children[ i - 1 ] ) &&
isInline( children[ i + 1 ] ) ) {
child.data = ' ';
continue;
}
}
node.removeChild( child );
i -= 1;
l -= 1;
}
}
return node;
};
var notWSTextNode = function ( node ) {
return node.nodeType === ELEMENT_NODE ?
node.nodeName === 'BR' :
notWS.test( node.data );
};
var isLineBreak = function ( br ) {
var block = br.parentNode,
walker;
while ( isInline( block ) ) {
block = block.parentNode;
}
walker = new TreeWalker(
block, SHOW_ELEMENT|SHOW_TEXT, notWSTextNode );
walker.currentNode = br;
return !!walker.nextNode();
};
// <br> elements are treated specially, and differently depending on the
// browser, when in rich text editor mode. When adding HTML from external
// sources, we must remove them, replacing the ones that actually affect
// line breaks with a split of the block element containing it (and wrapping
// any not inside a block). Browsers that want <br> elements at the end of
// each block will then have them added back in a later fixCursor method
// call.
var cleanupBRs = function ( root ) {
var brs = root.querySelectorAll( 'BR' ),
brBreaksLine = [],
l = brs.length,
i, br, parent;
// Must calculate whether the <br> breaks a line first, because if we
// have two <br>s next to each other, after the first one is converted
// to a block split, the second will be at the end of a block and
// therefore seem to not be a line break. But in its original context it
// was, so we should also convert it to a block split.
for ( i = 0; i < l; i += 1 ) {
brBreaksLine[i] = isLineBreak( brs[i] );
}
while ( l-- ) {
br = brs[l];
// Cleanup may have removed it
parent = br.parentNode;
if ( !parent ) { continue; }
// If it doesn't break a line, just remove it; it's not doing
// anything useful. We'll add it back later if required by the
// browser. If it breaks a line, wrap the content in div tags
// and replace the brs.
if ( !brBreaksLine[l] ) {
detach( br );
} else if ( !isInline( parent ) ) {
fixContainer( parent );
}
}
};