Merge pull request #3040 from matrix-org/bwindels/editor-remaining-md

Fix: better HTML > MD conversion for editing, including lists and quotes
2019-05-31 09:21:43 +00:00 · 2019-05-31 09:21:43 +00:00 · 2d8c523642
commit 2d8c523642
parent 5593b499d9 78fbea307c
2 changed files with 165 additions and 90 deletions
--- a/src/editor/deserialize.js
+++ b/src/editor/deserialize.js
@ -17,32 +17,125 @@ limitations under the License.
 import { MATRIXTO_URL_PATTERN } from '../linkify-matrix';
 import { PlainPart, UserPillPart, RoomPillPart, NewlinePart } from "./parts";
 import { walkDOMDepthFirst } from "./dom";
 const REGEX_MATRIXTO = new RegExp(MATRIXTO_URL_PATTERN);
-function parseLink(a, parts, room) {
+function parseLink(a, room) {
    const {href} = a;
    const pillMatch = REGEX_MATRIXTO.exec(href) || [];
    const resourceId = pillMatch[1]; // The room/user ID
    const prefix = pillMatch[2]; // The first character of prefix
    switch (prefix) {
        case "@":
-            parts.push(new UserPillPart(
+            return new UserPillPart(
                resourceId,
                a.textContent,
                room.getMember(resourceId),
-            ));
+            );
            break;
        case "#":
-            parts.push(new RoomPillPart(resourceId));
+            return new RoomPillPart(resourceId);
            break;
        default: {
            if (href === a.textContent) {
-                    parts.push(new PlainPart(a.textContent));
+                return new PlainPart(a.textContent);
            } else {
-                    parts.push(new PlainPart(`[${a.textContent}](${href})`));
+                return new PlainPart(`[${a.textContent}](${href})`);
            }
-            break;
+        }
    }
 }
 function parseCodeBlock(n) {
    const parts = [];
    const preLines = ("```\n" + n.textContent + "```").split("\n");
    preLines.forEach((l, i) => {
        parts.push(new PlainPart(l));
        if (i < preLines.length - 1) {
            parts.push(new NewlinePart("\n"));
        }
    });
    return parts;
 }
 function parseElement(n, room) {
    switch (n.nodeName) {
        case "A":
            return parseLink(n, room);
        case "BR":
            return new NewlinePart("\n");
        case "EM":
            return new PlainPart(`*${n.textContent}*`);
        case "STRONG":
            return new PlainPart(`**${n.textContent}**`);
        case "PRE":
            return parseCodeBlock(n);
        case "CODE":
            return new PlainPart(`\`${n.textContent}\``);
        case "DEL":
            return new PlainPart(`<del>${n.textContent}</del>`);
        case "LI":
            if (n.parentElement.nodeName === "OL") {
                return new PlainPart(` 1. `);
            } else {
                return new PlainPart(` - `);
            }
        default:
            // don't textify block nodes we'll decend into
            if (!checkDecendInto(n)) {
                return new PlainPart(n.textContent);
            }
    }
 }
 function checkDecendInto(node) {
    switch (node.nodeName) {
        case "PRE":
            // a code block is textified in parseCodeBlock
            // as we don't want to preserve markup in it,
            // so no need to decend into it
            return false;
        default:
            return checkBlockNode(node);
    }
 }
 function checkBlockNode(node) {
    switch (node.nodeName) {
        case "PRE":
        case "BLOCKQUOTE":
        case "DIV":
        case "P":
        case "UL":
        case "OL":
        case "LI":
            return true;
        default:
            return false;
    }
 }
 function checkIgnored(n) {
    if (n.nodeType === Node.TEXT_NODE) {
        // riot adds \n text nodes in a lot of places,
        // which should be ignored
        return n.nodeValue === "\n";
    } else if (n.nodeType === Node.ELEMENT_NODE) {
        return n.nodeName === "MX-REPLY";
    }
    return true;
 }
 function prefixQuoteLines(isFirstNode, parts) {
    const PREFIX = "> ";
    // a newline (to append a > to) wouldn't be added to parts for the first line
    // if there was no content before the BLOCKQUOTE, so handle that
    if (isFirstNode) {
        parts.splice(0, 0, new PlainPart(PREFIX));
    }
    for (let i = 0; i < parts.length; i += 1) {
        if (parts[i].type === "newline") {
            parts.splice(i + 1, 0, new PlainPart(PREFIX));
            i += 1;
        }
    }
 }
@ -51,83 +144,64 @@ function parseHtmlMessage(html, room) {
    // no nodes from parsing here should be inserted in the document,
    // as scripts in event handlers, etc would be executed then.
    // we're only taking text, so that is fine
-    const root = new DOMParser().parseFromString(html, "text/html").body;
+    const rootNode = new DOMParser().parseFromString(html, "text/html").body;
    let n = root.firstChild;
    const parts = [];
-    let isFirstNode = true;
+    let lastNode;
-    while (n && n !== root) {
+    let inQuote = false;
-        switch (n.nodeType) {
+
-            case Node.TEXT_NODE:
+    function onNodeEnter(n) {
-                // the plainpart doesn't accept \n and will cause
+        if (checkIgnored(n)) {
-                // a newlinepart to be created.
+            return false;
                if (n.nodeValue !== "\n") {
                    parts.push(new PlainPart(n.nodeValue));
                }
                break;
            case Node.ELEMENT_NODE:
                switch (n.nodeName) {
                    case "MX-REPLY":
                        break;
                    case "DIV":
                    case "P": {
                        // block element should cause line break if not first
                        if (!isFirstNode) {
                            parts.push(new NewlinePart("\n"));
                        }
                        // decend into paragraph or div
                        if (n.firstChild) {
                            n = n.firstChild;
                            continue;
                        } else {
                            break;
                        }
                    }
                    case "A": {
                        parseLink(n, parts, room);
                        break;
                    }
                    case "BR":
                        parts.push(new NewlinePart("\n"));
                        break;
                    case "EM":
                        parts.push(new PlainPart(`*${n.textContent}*`));
                        break;
                    case "STRONG":
                        parts.push(new PlainPart(`**${n.textContent}**`));
                        break;
                    case "PRE": {
                        // block element should cause line break if not first
                        if (!isFirstNode) {
                            parts.push(new NewlinePart("\n"));
                        }
                        const preLines = `\`\`\`\n${n.textContent}\`\`\``.split("\n");
                        preLines.forEach((l, i) => {
                            parts.push(new PlainPart(l));
                            if (i < preLines.length - 1) {
                                parts.push(new NewlinePart("\n"));
                            }
                        });
                        break;
                    }
                    case "CODE":
                        parts.push(new PlainPart(`\`${n.textContent}\``));
                        break;
                    case "DEL":
                        parts.push(new PlainPart(`<del>${n.textContent}</del>`));
                        break;
                    default:
                        parts.push(new PlainPart(n.textContent));
                        break;
                }
                break;
        }
-        // go up if we can't go next
+        if (n.nodeName === "BLOCKQUOTE") {
-        if (!n.nextSibling) {
+            inQuote = true;
            n = n.parentElement;
        }
-        n = n.nextSibling;
+
-        isFirstNode = false;
+        const newParts = [];
        if (lastNode && (checkBlockNode(lastNode) || checkBlockNode(n))) {
            newParts.push(new NewlinePart("\n"));
        }
        if (n.nodeType === Node.TEXT_NODE) {
            newParts.push(new PlainPart(n.nodeValue));
        } else if (n.nodeType === Node.ELEMENT_NODE) {
            const parseResult = parseElement(n, room);
            if (parseResult) {
                if (Array.isArray(parseResult)) {
                    newParts.push(...parseResult);
                } else {
                    newParts.push(parseResult);
                }
            }
        }
        if (newParts.length && inQuote) {
            const isFirstPart = parts.length === 0;
            prefixQuoteLines(isFirstPart, newParts);
        }
        parts.push(...newParts);
        // extra newline after quote, only if there something behind it...
        if (lastNode && lastNode.nodeName === "BLOCKQUOTE") {
            parts.push(new NewlinePart("\n"));
        }
        lastNode = null;
        return checkDecendInto(n);
    }
    function onNodeLeave(n) {
        if (checkIgnored(n)) {
            return;
        }
        if (n.nodeName === "BLOCKQUOTE") {
            inQuote = false;
        }
        lastNode = n;
    }
    walkDOMDepthFirst(rootNode, onNodeEnter, onNodeLeave);
    return parts;
 }
--- a/src/editor/dom.js
+++ b/src/editor/dom.js
@ -15,22 +15,22 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
-function walkDOMDepthFirst(editor, enterNodeCallback, leaveNodeCallback) {
+export function walkDOMDepthFirst(rootNode, enterNodeCallback, leaveNodeCallback) {
-    let node = editor.firstChild;
+    let node = rootNode.firstChild;
-    while (node && node !== editor) {
+    while (node && node !== rootNode) {
-        enterNodeCallback(node);
+        const shouldDecend = enterNodeCallback(node);
-        if (node.firstChild) {
+        if (shouldDecend && node.firstChild) {
            node = node.firstChild;
        } else if (node.nextSibling) {
            node = node.nextSibling;
        } else {
-            while (!node.nextSibling && node !== editor) {
+            while (!node.nextSibling && node !== rootNode) {
                node = node.parentElement;
-                if (node !== editor) {
+                if (node !== rootNode) {
                    leaveNodeCallback(node);
                }
            }
-            if (node !== editor) {
+            if (node !== rootNode) {
                node = node.nextSibling;
            }
        }
@ -62,6 +62,7 @@ export function getCaretOffsetAndText(editor, sel) {
            }
            text += nodeText;
        }
        return true;
    }
    function leaveNodeCallback(node) {