findUrl: be pickier about what can precede a URL
findUrl() was seeing strings like "You have 1 new message in foo@example.com/Inbox" and finding the URL "[http://]example.com/Inbox". Require that URLs either start at the start of the string, or are preceded by whitespace or an open paren/quote/etc. (Since JS doesn't have look-behind assertions like perl does, we have to actually match the URL-preceding character in the regex, and then adjust the result findUrl returns accordingly.) https://bugzilla.gnome.org/show_bug.cgi?id=636252
This commit is contained in:
parent
563221698c
commit
e2898bea5c
@ -9,10 +9,12 @@ const Main = imports.ui.main;
|
||||
|
||||
// http://daringfireball.net/2010/07/improved_regex_for_matching_urls
|
||||
const _balancedParens = '\\((?:[^\\s()<>]+|(?:\\(?:[^\\s()<>]+\\)))*\\)';
|
||||
const _leadingJunk = '[\\s`(\\[{\'\\"<\u00AB\u201C\u2018]';
|
||||
const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]';
|
||||
|
||||
const _urlRegexp = new RegExp(
|
||||
'\\b(' +
|
||||
'(^|' + _leadingJunk + ')' +
|
||||
'(' +
|
||||
'(?:' +
|
||||
'[a-z][\\w-]+://' + // scheme://
|
||||
'|' +
|
||||
@ -43,7 +45,7 @@ const _urlRegexp = new RegExp(
|
||||
function findUrls(str) {
|
||||
let res = [], match;
|
||||
while ((match = _urlRegexp.exec(str)))
|
||||
res.push({ url: match[0], pos: match.index });
|
||||
res.push({ url: match[2], pos: match.index + match[1].length });
|
||||
return res;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user