0
Fork 0
mirror of https://github.com/fastmail/Squire.git synced 2025-01-18 04:32:28 -05:00

Improve link detection regexp.

Fixes the pathological handling of unmatched brackets, which could hang the
browser. Adds support for mailto: query params. Removes support for nested
parentheses in URLs, as these are rare. Adds comment with formatted version of
regex to make it easier to modify in future.
This commit is contained in:
Neil Jenkins 2020-01-28 11:08:15 +11:00
parent f8a5b1ee19
commit 9916a4c300
3 changed files with 103 additions and 3 deletions

View file

@ -4359,7 +4359,57 @@ proto.insertImage = function ( src, attributes ) {
return img;
};
proto.linkRegExp = /\b((?:(?:ht|f)tps?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,}\/)(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))|([\w\-.%+]+@(?:[\w\-]+\.)+[A-Z]{2,}\b)(?:\?[^&?\s]+=[^&?\s]+(?:&[^&?\s]+=[^&?\s]+)*)?/i;
/*
const linkRegExp = new RegExp(
// Only look on boundaries
'\\b(?:' +
// Capture group 1: URLs
'(' +
// Add links to URLS
// Starts with:
'(?:' +
// http(s):// or ftp://
'(?:ht|f)tps?:\\/\\/' +
// or
'|' +
// www.
'www\\d{0,3}[.]' +
// or
'|' +
// foo90.com/
'[a-z0-9][a-z0-9.\\-]*[.][a-z]{2,}\\/' +
')' +
// Then we get one or more:
'(?:' +
// Run of non-spaces, non ()<>
'[^\\s()<>]+' +
// or
'|' +
// balanced parentheses (one level deep only)
'\\([^\\s()<>]+\\)' +
')+' +
// And we finish with
'(?:' +
// Not a space or punctuation character
'[^\\s?&`!()\\[\\]{};:\'".,<>«»“”‘’]' +
// or
'|' +
// Balanced parentheses.
'\\([^\\s()<>]+\\)' +
')' +
// Capture group 2: Emails
')|(' +
// Add links to emails
'[\\w\\-.%+]+@(?:[\\w\\-]+\\.)+[a-z]{2,}\\b' +
// Allow query parameters in the mailto: style
'(?:' +
'[?][^&?\\s]+=[^\\s?&`!()\\[\\]{};:\'".,<>«»“”‘’]+' +
'(?:&[^&?\\s]+=[^\\s?&`!()\\[\\]{};:\'".,<>«»“”‘’]+)*' +
')?' +
'))', 'i' );
*/
proto.linkRegExp = /\b(?:((?:(?:ht|f)tps?:\/\/|www\d{0,3}[.]|[a-z0-9][a-z0-9.\-]*[.][a-z]{2,}\/)(?:[^\s()<>]+|\([^\s()<>]+\))+(?:[^\s?&`!()\[\]{};:'".,<>«»“”‘’]|\([^\s()<>]+\)))|([\w\-.%+]+@(?:[\w\-]+\.)+[a-z]{2,}\b(?:[?][^&?\s]+=[^\s?&`!()\[\]{};:'".,<>«»“”‘’]+(?:&[^&?\s]+=[^\s?&`!()\[\]{};:'".,<>«»“”‘’]+)*)?))/i;
var addLinks = function ( frag, root, self ) {
var doc = frag.ownerDocument;

File diff suppressed because one or more lines are too long

View file

@ -1789,7 +1789,57 @@ proto.insertImage = function ( src, attributes ) {
return img;
};
proto.linkRegExp = /\b((?:(?:ht|f)tps?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,}\/)(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\((?:[^\s()<>]+|(?:\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))|([\w\-.%+]+@(?:[\w\-]+\.)+[A-Z]{2,}\b)(?:\?[^&?\s]+=[^&?\s]+(?:&[^&?\s]+=[^&?\s]+)*)?/i;
/*
const linkRegExp = new RegExp(
// Only look on boundaries
'\\b(?:' +
// Capture group 1: URLs
'(' +
// Add links to URLS
// Starts with:
'(?:' +
// http(s):// or ftp://
'(?:ht|f)tps?:\\/\\/' +
// or
'|' +
// www.
'www\\d{0,3}[.]' +
// or
'|' +
// foo90.com/
'[a-z0-9][a-z0-9.\\-]*[.][a-z]{2,}\\/' +
')' +
// Then we get one or more:
'(?:' +
// Run of non-spaces, non ()<>
'[^\\s()<>]+' +
// or
'|' +
// balanced parentheses (one level deep only)
'\\([^\\s()<>]+\\)' +
')+' +
// And we finish with
'(?:' +
// Not a space or punctuation character
'[^\\s?&`!()\\[\\]{};:\'".,<>«»“”‘’]' +
// or
'|' +
// Balanced parentheses.
'\\([^\\s()<>]+\\)' +
')' +
// Capture group 2: Emails
')|(' +
// Add links to emails
'[\\w\\-.%+]+@(?:[\\w\\-]+\\.)+[a-z]{2,}\\b' +
// Allow query parameters in the mailto: style
'(?:' +
'[?][^&?\\s]+=[^\\s?&`!()\\[\\]{};:\'".,<>«»“”‘’]+' +
'(?:&[^&?\\s]+=[^\\s?&`!()\\[\\]{};:\'".,<>«»“”‘’]+)*' +
')?' +
'))', 'i' );
*/
proto.linkRegExp = /\b(?:((?:(?:ht|f)tps?:\/\/|www\d{0,3}[.]|[a-z0-9][a-z0-9.\-]*[.][a-z]{2,}\/)(?:[^\s()<>]+|\([^\s()<>]+\))+(?:[^\s?&`!()\[\]{};:'".,<>«»“”‘’]|\([^\s()<>]+\)))|([\w\-.%+]+@(?:[\w\-]+\.)+[a-z]{2,}\b(?:[?][^&?\s]+=[^\s?&`!()\[\]{};:'".,<>«»“”‘’]+(?:&[^&?\s]+=[^\s?&`!()\[\]{};:'".,<>«»“”‘’]+)*)?))/i;
var addLinks = function ( frag, root, self ) {
var doc = frag.ownerDocument;