Decend into P & DIV elements while parsing a message.

Also split on newline so all newlines are represented by a newlinepart
2019-05-22 13:00:39 +02:00 · 2019-05-22 13:00:39 +02:00 · 723086e4d7
commit 723086e4d7
parent 5f5a2f7140
1 changed files with 88 additions and 31 deletions
--- a/src/editor/deserialize.js
+++ b/src/editor/deserialize.js
@ -18,54 +18,111 @@ limitations under the License.
 import { MATRIXTO_URL_PATTERN } from '../linkify-matrix';
 import { PlainPart, UserPillPart, RoomPillPart, NewlinePart } from "./parts";

-function parseHtmlMessage(html, room) {
 const REGEX_MATRIXTO = new RegExp(MATRIXTO_URL_PATTERN);
-    // no nodes from parsing here should be inserted in the document,
-    // as scripts in event handlers, etc would be executed then.
-    // we're only taking text, so that is fine
-    const nodes = Array.from(new DOMParser().parseFromString(html, "text/html").body.childNodes);
-    const parts = nodes.map(n => {
-        switch (n.nodeType) {
-            case Node.TEXT_NODE:
-                return new PlainPart(n.nodeValue);
-            case Node.ELEMENT_NODE:
-                switch (n.nodeName) {
-                    case "MX-REPLY":
-                        return null;
-                    case "A": {
-                        const {href} = n;
+
+function parseLink(a, parts, room) {
+    const {href} = a;
    const pillMatch = REGEX_MATRIXTO.exec(href) || [];
    const resourceId = pillMatch[1]; // The room/user ID
    const prefix = pillMatch[2]; // The first character of prefix
    switch (prefix) {
-                            case "@": return new UserPillPart(resourceId, n.textContent, room.getMember(resourceId));
-                            case "#": return new RoomPillPart(resourceId);
+        case "@":
+            parts.push(new UserPillPart(
+                resourceId,
+                a.textContent,
+                room.getMember(resourceId),
+            ));
+            break;
+        case "#":
+            parts.push(new RoomPillPart(resourceId));
+            break;
        default: {
-                                if (href === n.textContent) {
-                                    return new PlainPart(n.textContent);
+            if (href === a.textContent) {
+                    parts.push(new PlainPart(a.textContent));
            } else {
-                                    return new PlainPart(`[${n.textContent}](${href})`);
+                    parts.push(new PlainPart(`[${a.textContent}](${href})`));
+            }
+            break;
        }
    }
 }
+
+function parseHtmlMessage(html, room) {
+    // no nodes from parsing here should be inserted in the document,
+    // as scripts in event handlers, etc would be executed then.
+    // we're only taking text, so that is fine
+    const root = new DOMParser().parseFromString(html, "text/html").body;
+    let n = root.firstChild;
+    const parts = [];
+    let isFirstNode = true;
+    while (n && n !== root) {
+        switch (n.nodeType) {
+            case Node.TEXT_NODE:
+                // the plainpart doesn't accept \n and will cause
+                // a newlinepart to be created.
+                if (n.nodeValue !== "\n") {
+                    parts.push(new PlainPart(n.nodeValue));
+                }
+                break;
+            case Node.ELEMENT_NODE:
+                switch (n.nodeName) {
+                    case "DIV":
+                    case "P": {
+                        // block element should cause line break if not first
+                        if (!isFirstNode) {
+                            parts.push(new NewlinePart("\n"));
+                        }
+                        // decend into paragraph or div
+                        if (n.firstChild) {
+                            n = n.firstChild;
+                            continue;
+                        } else {
+                            break;
+                        }
+                    }
+                    case "A": {
+                        parseLink(n, parts, room);
+                        break;
                    }
                    case "BR":
-                        return new NewlinePart("\n");
+                        parts.push(new NewlinePart("\n"));
+                        break;
                    case "EM":
-                        return new PlainPart(`*${n.textContent}*`);
+                        parts.push(new PlainPart(`*${n.textContent}*`));
+                        break;
                    case "STRONG":
-                        return new PlainPart(`**${n.textContent}**`);
-                    case "PRE":
-                        return new PlainPart(`\`\`\`\n${n.textContent}\`\`\``);
+                        parts.push(new PlainPart(`**${n.textContent}**`));
+                        break;
+                    case "PRE": {
+                        // block element should cause line break if not first
+                        if (!isFirstNode) {
+                            parts.push(new NewlinePart("\n"));
+                        }
+                        const preLines = `\`\`\`\n${n.textContent}\`\`\``.split("\n");
+                        preLines.forEach((l, i) => {
+                            parts.push(new PlainPart(l));
+                            if (i < preLines.length - 1) {
+                                parts.push(new NewlinePart("\n"));
+                            }
+                        });
+                        break;
+                    }
                    case "CODE":
-                        return new PlainPart(`\`${n.textContent}\``);
+                        parts.push(new PlainPart(`\`${n.textContent}\``));
+                        break;
                    default:
-                        return new PlainPart(n.textContent);
+                        parts.push(new PlainPart(n.textContent));
+                        break;
                }
-            default:
-                return null;
+                break;
+        }
+        // go up if we can't go next
+        if (!n.nextSibling) {
+            n = n.parentElement;
+        }
+        n = n.nextSibling;
+        isFirstNode = false;
    }
-    }).filter(p => !!p);
    return parts;
 }