utils: Simplify URL regex to only support one layer of parentheses

The author of the original URL-matching regex warns[0] that the pattern may
cause certain regex engines to lock up with certain input, namely patterns
that contain parentheses. It turns out SpiderMonkey is affected, but rather
than switching to the author's improved version (that is still crazy), sim-
plify the pattern a bit by removing support for nested parentheses in URLs.
Even a single pair of parentheses is extremely rare, so this is unlikely to
make a noticeable difference (other than not locking up SpiderMonkey of
course) ...

[0] http://daringfireball.net/2010/07/improved_regex_for_matching_urls
This commit is contained in:
Florian Müllner 2018-02-27 13:20:02 +01:00 committed by Florian Müllner
parent cb4252e888
commit 5cc42b18b0
2 changed files with 6 additions and 2 deletions

View File

@ -17,7 +17,7 @@ const Params = imports.misc.params;
var SCROLL_TIME = 0.1; var SCROLL_TIME = 0.1;
// http://daringfireball.net/2010/07/improved_regex_for_matching_urls // http://daringfireball.net/2010/07/improved_regex_for_matching_urls
const _balancedParens = '\\((?:[^\\s()<>]+|(?:\\(?:[^\\s()<>]+\\)))*\\)'; const _balancedParens = '\\([^\\s()<>]+\\)';
const _leadingJunk = '[\\s`(\\[{\'\\"<\u00AB\u201C\u2018]'; const _leadingJunk = '[\\s`(\\[{\'\\"<\u00AB\u201C\u2018]';
const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]'; const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]';

View File

@ -38,6 +38,10 @@ const tests = [
output: [ { url: 'http://www.gnome.org:99/port', pos: 10 } ] }, output: [ { url: 'http://www.gnome.org:99/port', pos: 10 } ] },
{ input: 'This is an ftp://www.gnome.org/ test.', { input: 'This is an ftp://www.gnome.org/ test.',
output: [ { url: 'ftp://www.gnome.org/', pos: 11 } ] }, output: [ { url: 'ftp://www.gnome.org/', pos: 11 } ] },
{ input: 'https://www.gnome.org/(some_url,_with_very_unusual_characters)',
output: [ { url: 'https://www.gnome.org/(some_url,_with_very_unusual_characters)', pos: 0 } ] },
{ input: 'https://www.gnome.org/(some_url_with_unbalanced_parenthesis',
output: [ { url: 'https://www.gnome.org/', pos: 0 } ] },
{ input: 'Visit http://www.gnome.org/ and http://developer.gnome.org', { input: 'Visit http://www.gnome.org/ and http://developer.gnome.org',
output: [ { url: 'http://www.gnome.org/', pos: 6 }, output: [ { url: 'http://www.gnome.org/', pos: 6 },