From 62c378a619e9c95fbac5d2cd7c2fbf156c7d93eb Mon Sep 17 00:00:00 2001
From: Kegan Dougal <kegan@matrix.org>
Date: Tue, 22 Dec 2015 17:38:24 +0000
Subject: [PATCH] Sort out the mess that is Javascript's \b - Fixes
 vector-im/vector-web#189
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

\b is *the worst*. From MDN:

Note: JavaScript's regular expression engine defines a specific set of
characters to be "word" characters. Any character not in that set is considered
a word break. This set of characters is fairly limited: it consists solely of
the Roman alphabet in both upper- and lower-case, decimal digits, and the
underscore character. Accented characters, such as "é" or "ü" are,
unfortunately, treated as word breaks.

We fix this by matching on whitespace instead, but then need to tweak the
replace() code since that bluntly replaces the entire match (which now includes
whitespace). It all works now and I can happily tab-complete non-ascii names.
---
 src/TabComplete.js | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/src/TabComplete.js b/src/TabComplete.js
index e806833242..64c4479efb 100644
--- a/src/TabComplete.js
+++ b/src/TabComplete.js
@@ -20,8 +20,14 @@ const KEY_TAB = 9;
 const KEY_SHIFT = 16;
 const KEY_WINDOWS = 91;
 
-// word boundary -> 1 or more non-whitespace chars (group) -> end of line
-const MATCH_REGEX = /\b(\S+)$/;
+// NB: DO NOT USE \b its "words" are roman alphabet only!
+//
+// Capturing group containing the start
+// of line or a whitespace char
+//     \_______________       __________Capturing group of 1 or more non-whitespace chars
+//                    _|__  _|_         followed by the end of line
+//                   /    \/   \
+const MATCH_REGEX = /(^|\s)(\S+)$/;
 
 class TabComplete {
 
@@ -239,8 +245,22 @@ class TabComplete {
     }
 
     _replaceWith(newVal, includeSuffix) {
+        // The regex to replace the input matches a character of whitespace AND
+        // the partial word. If we just use string.replace() with the regex it will
+        // replace the partial word AND the character of whitespace. We want to
+        // preserve whatever that character is (\n, \t, etc) so find out what it is now.
+        var boundaryChar;
+        var res = MATCH_REGEX.exec(this.originalText);
+        if (res) {
+            boundaryChar = res[1]; // the first captured group
+        }
+        if (boundaryChar === undefined) {
+            console.warn("Failed to find boundary char on text: '%s'", this.originalText);
+            boundaryChar = "";
+        }
+
         var replacementText = (
-            newVal + (
+            boundaryChar + newVal + (
                 includeSuffix ?
                     (this.isFirstWord ? this.opts.startingWordSuffix : this.opts.wordSuffix) :
                     ""
@@ -258,16 +278,17 @@ class TabComplete {
             this.matchedList = [];
             return;
         }
-        var [ ,group] = res; // ES6 destructuring; ignore first element
-        this.isFirstWord = group.length === this.originalText.length;
+        // ES6 destructuring; ignore first element (the complete match)
+        var [ , boundaryGroup, partialGroup] = res;
+        this.isFirstWord = partialGroup.length === this.originalText.length;
 
         this.matchedList = [
-            new Entry(group) // first entry is always the original partial
+            new Entry(partialGroup) // first entry is always the original partial
         ];
 
         // find matching entries in the set of entries given to us
         this.list.forEach((entry) => {
-            if (entry.text.toLowerCase().indexOf(group.toLowerCase()) === 0) {
+            if (entry.text.toLowerCase().indexOf(partialGroup.toLowerCase()) === 0) {
                 this.matchedList.push(entry);
             }
         });