From 92a85071bc910f9082b76b0345975b2773b4267e Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Wed, 13 Apr 2011 09:18:00 -0400 Subject: [PATCH] findUrl: document the URL-matching regex Explode the regex onto multiple lines, and add comments explaining the pieces. Also, change ()s to (?:)s (non-capturing groups) where appropriate, and replace the UTF-8 characters with \u escapes so that they actually work. https://bugzilla.gnome.org/show_bug.cgi?id=636252 --- js/misc/util.js | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/js/misc/util.js b/js/misc/util.js index fc0b54ae2..7c1485836 100644 --- a/js/misc/util.js +++ b/js/misc/util.js @@ -7,8 +7,30 @@ const Shell = imports.gi.Shell; const Main = imports.ui.main; -/* http://daringfireball.net/2010/07/improved_regex_for_matching_urls */ -const _urlRegexp = new RegExp('\\b(([a-z][\\w-]+:(/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)([^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'\\".,<>?«»“”‘’]))', 'gi'); +// http://daringfireball.net/2010/07/improved_regex_for_matching_urls +const _balancedParens = '\\((?:[^\\s()<>]+|(?:\\(?:[^\\s()<>]+\\)))*\\)'; +const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]'; + +const _urlRegexp = new RegExp( + '\\b(' + + '(?:' + + '[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])' + // scheme:data + '|' + + 'www\\d{0,3}[.]' + // www. + '|' + + '[a-z0-9.\\-]+[.][a-z]{2,4}/' + // foo.xx/ + ')' + + '(?:' + // one or more: + '[^\\s()<>]+' + // run of non-space non-() + '|' + // or + _balancedParens + // balanced parens + ')+' + + '(?:' + // end with: + _balancedParens + // balanced parens + '|' + // or + _notTrailingJunk + // last non-junk char + ')' + + ')', 'gi'); // findUrls: // @str: string to find URLs in