utils: Simplify URL regex to only support one layer of parentheses
The author of the original URL-matching regex warns[0] that the pattern may cause certain regex engines to lock up with certain input, namely patterns that contain parentheses. It turns out SpiderMonkey is affected, but rather than switching to the author's improved version (that is still crazy), sim- plify the pattern a bit by removing support for nested parentheses in URLs. Even a single pair of parentheses is extremely rare, so this is unlikely to make a noticeable difference (other than not locking up SpiderMonkey of course) ... [0] http://daringfireball.net/2010/07/improved_regex_for_matching_urls
This commit is contained in:
parent
cb4252e888
commit
5cc42b18b0
@ -17,7 +17,7 @@ const Params = imports.misc.params;
|
||||
var SCROLL_TIME = 0.1;
|
||||
|
||||
// http://daringfireball.net/2010/07/improved_regex_for_matching_urls
|
||||
const _balancedParens = '\\((?:[^\\s()<>]+|(?:\\(?:[^\\s()<>]+\\)))*\\)';
|
||||
const _balancedParens = '\\([^\\s()<>]+\\)';
|
||||
const _leadingJunk = '[\\s`(\\[{\'\\"<\u00AB\u201C\u2018]';
|
||||
const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]';
|
||||
|
||||
|
@ -38,6 +38,10 @@ const tests = [
|
||||
output: [ { url: 'http://www.gnome.org:99/port', pos: 10 } ] },
|
||||
{ input: 'This is an ftp://www.gnome.org/ test.',
|
||||
output: [ { url: 'ftp://www.gnome.org/', pos: 11 } ] },
|
||||
{ input: 'https://www.gnome.org/(some_url,_with_very_unusual_characters)',
|
||||
output: [ { url: 'https://www.gnome.org/(some_url,_with_very_unusual_characters)', pos: 0 } ] },
|
||||
{ input: 'https://www.gnome.org/(some_url_with_unbalanced_parenthesis',
|
||||
output: [ { url: 'https://www.gnome.org/', pos: 0 } ] },
|
||||
|
||||
{ input: 'Visit http://www.gnome.org/ and http://developer.gnome.org',
|
||||
output: [ { url: 'http://www.gnome.org/', pos: 6 },
|
||||
|
Loading…
Reference in New Issue
Block a user