Florian Müllner 5cc42b18b0 utils: Simplify URL regex to only support one layer of parentheses
The author of the original URL-matching regex warns[0] that the pattern may
cause certain regex engines to lock up with certain input, namely patterns
that contain parentheses. It turns out SpiderMonkey is affected, but rather
than switching to the author's improved version (that is still crazy), sim-
plify the pattern a bit by removing support for nested parentheses in URLs.
Even a single pair of parentheses is extremely rare, so this is unlikely to
make a noticeable difference (other than not locking up SpiderMonkey of
course) ...

[0] http://daringfireball.net/2010/07/improved_regex_for_matching_urls
2018-04-13 18:15:44 +00:00

76 lines
3.0 KiB
JavaScript

// -*- mode: js; js-indent-level: 4; indent-tabs-mode: nil -*-
// Test cases for MessageTray URLification
const JsUnit = imports.jsUnit;
const Environment = imports.ui.environment;
Environment.init();
const Util = imports.misc.util;
const tests = [
{ input: 'This is a test',
output: [] },
{ input: 'This is http://www.gnome.org a test',
output: [ { url: 'http://www.gnome.org', pos: 8 } ] },
{ input: 'This is http://www.gnome.org',
output: [ { url: 'http://www.gnome.org', pos: 8 } ] },
{ input: 'http://www.gnome.org a test',
output: [ { url: 'http://www.gnome.org', pos: 0 } ] },
{ input: 'http://www.gnome.org',
output: [ { url: 'http://www.gnome.org', pos: 0 } ] },
{ input: 'Go to http://www.gnome.org.',
output: [ { url: 'http://www.gnome.org', pos: 6 } ] },
{ input: 'Go to http://www.gnome.org/.',
output: [ { url: 'http://www.gnome.org/', pos: 6 } ] },
{ input: '(Go to http://www.gnome.org!)',
output: [ { url: 'http://www.gnome.org', pos: 7 } ] },
{ input: 'Use GNOME (http://www.gnome.org).',
output: [ { url: 'http://www.gnome.org', pos: 11 } ] },
{ input: 'This is a http://www.gnome.org/path test.',
output: [ { url: 'http://www.gnome.org/path', pos: 10 } ] },
{ input: 'This is a www.gnome.org scheme-less test.',
output: [ { url: 'www.gnome.org', pos: 10 } ] },
{ input: 'This is a www.gnome.org/scheme-less test.',
output: [ { url: 'www.gnome.org/scheme-less', pos: 10 } ] },
{ input: 'This is a http://www.gnome.org:99/port test.',
output: [ { url: 'http://www.gnome.org:99/port', pos: 10 } ] },
{ input: 'This is an ftp://www.gnome.org/ test.',
output: [ { url: 'ftp://www.gnome.org/', pos: 11 } ] },
{ input: 'https://www.gnome.org/(some_url,_with_very_unusual_characters)',
output: [ { url: 'https://www.gnome.org/(some_url,_with_very_unusual_characters)', pos: 0 } ] },
{ input: 'https://www.gnome.org/(some_url_with_unbalanced_parenthesis',
output: [ { url: 'https://www.gnome.org/', pos: 0 } ] },
{ input: 'Visit http://www.gnome.org/ and http://developer.gnome.org',
output: [ { url: 'http://www.gnome.org/', pos: 6 },
{ url: 'http://developer.gnome.org', pos: 32 } ] },
{ input: 'This is not.a.domain test.',
output: [ ] },
{ input: 'This is not:a.url test.',
output: [ ] },
{ input: 'This is not:/a.url/ test.',
output: [ ] },
{ input: 'This is not:/a.url/ test.',
output: [ ] },
{ input: 'This is not@a.url/ test.',
output: [ ] },
{ input: 'This is surely@not.a/url test.',
output: [ ] }
];
for (let i = 0; i < tests.length; i++) {
let match = Util.findUrls(tests[i].input);
JsUnit.assertEquals('Test ' + i + ' match length',
match.length, tests[i].output.length);
for (let j = 0; j < match.length; j++) {
JsUnit.assertEquals('Test ' + i + ', match ' + j + ' url',
match[j].url, tests[i].output[j].url);
JsUnit.assertEquals('Test ' + i + ', match ' + j + ' position',
match[j].pos, tests[i].output[j].pos);
}
}