findUrl: be pickier about what can precede a URL
findUrl() was seeing strings like "You have 1 new message in foo@example.com/Inbox" and finding the URL "[http://]example.com/Inbox". Require that URLs either start at the start of the string, or are preceded by whitespace or an open paren/quote/etc. (Since JS doesn't have look-behind assertions like perl does, we have to actually match the URL-preceding character in the regex, and then adjust the result findUrl returns accordingly.) https://bugzilla.gnome.org/show_bug.cgi?id=636252
This commit is contained in:
parent
563221698c
commit
e2898bea5c
@ -9,10 +9,12 @@ const Main = imports.ui.main;
|
|||||||
|
|
||||||
// http://daringfireball.net/2010/07/improved_regex_for_matching_urls
|
// http://daringfireball.net/2010/07/improved_regex_for_matching_urls
|
||||||
const _balancedParens = '\\((?:[^\\s()<>]+|(?:\\(?:[^\\s()<>]+\\)))*\\)';
|
const _balancedParens = '\\((?:[^\\s()<>]+|(?:\\(?:[^\\s()<>]+\\)))*\\)';
|
||||||
|
const _leadingJunk = '[\\s`(\\[{\'\\"<\u00AB\u201C\u2018]';
|
||||||
const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]';
|
const _notTrailingJunk = '[^\\s`!()\\[\\]{};:\'\\".,<>?\u00AB\u00BB\u201C\u201D\u2018\u2019]';
|
||||||
|
|
||||||
const _urlRegexp = new RegExp(
|
const _urlRegexp = new RegExp(
|
||||||
'\\b(' +
|
'(^|' + _leadingJunk + ')' +
|
||||||
|
'(' +
|
||||||
'(?:' +
|
'(?:' +
|
||||||
'[a-z][\\w-]+://' + // scheme://
|
'[a-z][\\w-]+://' + // scheme://
|
||||||
'|' +
|
'|' +
|
||||||
@ -43,7 +45,7 @@ const _urlRegexp = new RegExp(
|
|||||||
function findUrls(str) {
|
function findUrls(str) {
|
||||||
let res = [], match;
|
let res = [], match;
|
||||||
while ((match = _urlRegexp.exec(str)))
|
while ((match = _urlRegexp.exec(str)))
|
||||||
res.push({ url: match[0], pos: match.index });
|
res.push({ url: match[2], pos: match.index + match[1].length });
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user