Sort out the mess that is Javascript's \b - Fixes vector-im/vector-web#189

\b is *the worst*. From MDN:

Note: JavaScript's regular expression engine defines a specific set of
characters to be "word" characters. Any character not in that set is considered
a word break. This set of characters is fairly limited: it consists solely of
the Roman alphabet in both upper- and lower-case, decimal digits, and the
underscore character. Accented characters, such as "é" or "ü" are,
unfortunately, treated as word breaks.

We fix this by matching on whitespace instead, but then need to tweak the
replace() code since that bluntly replaces the entire match (which now includes
whitespace). It all works now and I can happily tab-complete non-ascii names.
This commit is contained in:
Kegan Dougal 2015-12-22 17:38:24 +00:00
parent 452c265e6a
commit 62c378a619

View file

@ -20,8 +20,14 @@ const KEY_TAB = 9;
const KEY_SHIFT = 16; const KEY_SHIFT = 16;
const KEY_WINDOWS = 91; const KEY_WINDOWS = 91;
// word boundary -> 1 or more non-whitespace chars (group) -> end of line // NB: DO NOT USE \b its "words" are roman alphabet only!
const MATCH_REGEX = /\b(\S+)$/; //
// Capturing group containing the start
// of line or a whitespace char
// \_______________ __________Capturing group of 1 or more non-whitespace chars
// _|__ _|_ followed by the end of line
// / \/ \
const MATCH_REGEX = /(^|\s)(\S+)$/;
class TabComplete { class TabComplete {
@ -239,8 +245,22 @@ class TabComplete {
} }
_replaceWith(newVal, includeSuffix) { _replaceWith(newVal, includeSuffix) {
// The regex to replace the input matches a character of whitespace AND
// the partial word. If we just use string.replace() with the regex it will
// replace the partial word AND the character of whitespace. We want to
// preserve whatever that character is (\n, \t, etc) so find out what it is now.
var boundaryChar;
var res = MATCH_REGEX.exec(this.originalText);
if (res) {
boundaryChar = res[1]; // the first captured group
}
if (boundaryChar === undefined) {
console.warn("Failed to find boundary char on text: '%s'", this.originalText);
boundaryChar = "";
}
var replacementText = ( var replacementText = (
newVal + ( boundaryChar + newVal + (
includeSuffix ? includeSuffix ?
(this.isFirstWord ? this.opts.startingWordSuffix : this.opts.wordSuffix) : (this.isFirstWord ? this.opts.startingWordSuffix : this.opts.wordSuffix) :
"" ""
@ -258,16 +278,17 @@ class TabComplete {
this.matchedList = []; this.matchedList = [];
return; return;
} }
var [ ,group] = res; // ES6 destructuring; ignore first element // ES6 destructuring; ignore first element (the complete match)
this.isFirstWord = group.length === this.originalText.length; var [ , boundaryGroup, partialGroup] = res;
this.isFirstWord = partialGroup.length === this.originalText.length;
this.matchedList = [ this.matchedList = [
new Entry(group) // first entry is always the original partial new Entry(partialGroup) // first entry is always the original partial
]; ];
// find matching entries in the set of entries given to us // find matching entries in the set of entries given to us
this.list.forEach((entry) => { this.list.forEach((entry) => {
if (entry.text.toLowerCase().indexOf(group.toLowerCase()) === 0) { if (entry.text.toLowerCase().indexOf(partialGroup.toLowerCase()) === 0) {
this.matchedList.push(entry); this.matchedList.push(entry);
} }
}); });