findUrl: document the URL-matching regex

Explode the regex onto multiple lines, and add comments explaining the
pieces. Also, change ()s to (?:)s (non-capturing groups) where
appropriate, and replace the UTF-8 characters with \u escapes so that
they actually work.

https://bugzilla.gnome.org/show_bug.cgi?id=636252
This commit is contained in:
Dan Winship 2011-04-13 09:18:00 -04:00
parent 9ae8d90be4
commit 92a85071bc

View File

@ -7,8 +7,30 @@ const Shell = imports.gi.Shell;
const Main = imports.ui.main; const Main = imports.ui.main;
/* http://daringfireball.net/2010/07/improved_regex_for_matching_urls */ // http://daringfireball.net/2010/07/improved_regex_for_matching_urls
const _urlRegexp = new RegExp('\\b(([a-z][\\w-]+:(/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)([^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'\\".,<>?«»“”‘’]))', 'gi'); const _balancedParens = '\\((?:[^\\s()<>]+|(?:\\(?:[^\\s()<>]+\\)))*\\)';
const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]';
const _urlRegexp = new RegExp(
'\\b(' +
'(?:' +
'[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])' + // scheme:data
'|' +
'www\\d{0,3}[.]' + // www.
'|' +
'[a-z0-9.\\-]+[.][a-z]{2,4}/' + // foo.xx/
')' +
'(?:' + // one or more:
'[^\\s()<>]+' + // run of non-space non-()
'|' + // or
_balancedParens + // balanced parens
')+' +
'(?:' + // end with:
_balancedParens + // balanced parens
'|' + // or
_notTrailingJunk + // last non-junk char
')' +
')', 'gi');
// findUrls: // findUrls:
// @str: string to find URLs in // @str: string to find URLs in