Fix a variety of issues with HTML → Markdown conversion (#8004)

* Fix a variety of issues with HTML → Markdown conversion Signed-off-by: Robin Townsend <robin@robin.town> * Fix lint Signed-off-by: Robin Townsend <robin@robin.town> * Fix @room pill formatting not being applied to link text Signed-off-by: Robin Townsend <robin@robin.town>
2022-03-09 07:43:05 -05:00 · 2022-03-09 07:43:05 -05:00 · c10ac9e4a0
commit c10ac9e4a0
parent 65691202f7
3 changed files with 422 additions and 260 deletions
--- a/src/editor/deserialize.ts
+++ b/src/editor/deserialize.ts
@ -17,190 +17,110 @@ limitations under the License.
 import { MatrixEvent } from "matrix-js-sdk/src/models/event";
 import { walkDOMDepthFirst } from "./dom";
 import { checkBlockNode } from "../HtmlUtils";
 import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks";
 import { Part, PartCreator, Type } from "./parts";
 import SdkConfig from "../SdkConfig";
 import { textToHtmlRainbow } from "../utils/colour";
-function parseAtRoomMentions(text: string, partCreator: PartCreator): Part[] {
+const LIST_TYPES = ["UL", "OL", "LI"];
 // Escapes all markup in the given text
 function escape(text: string): string {
    return text.replace(/[\\*_[\]`<]|^>/g, match => `\\${match}`);
 }
 // Finds the length of the longest backtick sequence in the given text, used for
 // escaping backticks in code blocks
 function longestBacktickSequence(text: string): number {
    let length = 0;
    let currentLength = 0;
    for (const c of text) {
        if (c === "`") {
            currentLength++;
        } else {
            length = Math.max(length, currentLength);
            currentLength = 0;
        }
    }
    return Math.max(length, currentLength);
 }
 function isListChild(n: Node): boolean {
    return LIST_TYPES.includes(n.parentNode?.nodeName);
 }
 function parseAtRoomMentions(text: string, pc: PartCreator): Part[] {
    const ATROOM = "@room";
    const parts: Part[] = [];
    text.split(ATROOM).forEach((textPart, i, arr) => {
        if (textPart.length) {
-            parts.push(...partCreator.plainWithEmoji(textPart));
+            parts.push(...pc.plainWithEmoji(escape(textPart)));
        }
        // it's safe to never append @room after the last textPart
        // as split will report an empty string at the end if
        // `text` ended in @room.
        const isLast = i === arr.length - 1;
        if (!isLast) {
-            parts.push(partCreator.atRoomPill(ATROOM));
+            parts.push(pc.atRoomPill(ATROOM));
        }
    });
    return parts;
 }
-function parseLink(a: HTMLAnchorElement, partCreator: PartCreator): Part[] {
+function parseLink(n: Node, pc: PartCreator): Part[] {
-    const { href } = a;
+    const { href } = n as HTMLAnchorElement;
    const resourceId = getPrimaryPermalinkEntity(href); // The room/user ID
-    const prefix = resourceId ? resourceId[0] : undefined; // First character of ID
+
-    switch (prefix) {
+    switch (resourceId?.[0]) {
-        case "@":
+        case "@": return [pc.userPill(n.textContent, resourceId)];
-            return [partCreator.userPill(a.textContent, resourceId)];
+        case "#": return [pc.roomPill(resourceId)];
-        case "#":
+    }
-            return [partCreator.roomPill(resourceId)];
+
-        default: {
+    const children = Array.from(n.childNodes);
-            if (href === a.textContent) {
+    if (href === n.textContent && children.every(c => c.nodeType === Node.TEXT_NODE)) {
-                return partCreator.plainWithEmoji(a.textContent);
+        return parseAtRoomMentions(n.textContent, pc);
-            } else {
+    } else {
-                return partCreator.plainWithEmoji(`[${a.textContent.replace(/[[\\\]]/g, c => "\\" + c)}](${href})`);
+        return [pc.plain("["), ...parseChildren(n, pc), pc.plain(`](${href})`)];
            }
        }
    }
 }
-function parseImage(img: HTMLImageElement, partCreator: PartCreator): Part[] {
+function parseImage(n: Node, pc: PartCreator): Part[] {
-    const { src } = img;
+    const { alt, src } = n as HTMLImageElement;
-    return partCreator.plainWithEmoji(`![${img.alt.replace(/[[\\\]]/g, c => "\\" + c)}](${src})`);
+    return pc.plainWithEmoji(`![${escape(alt)}](${src})`);
 }
-function parseCodeBlock(n: HTMLElement, partCreator: PartCreator): Part[] {
+function parseCodeBlock(n: Node, pc: PartCreator): Part[] {
    const parts: Part[] = [];
    let language = "";
-    if (n.firstChild && n.firstChild.nodeName === "CODE") {
+    if (n.firstChild?.nodeName === "CODE") {
-        for (const className of (<HTMLElement>n.firstChild).classList) {
+        for (const className of (n.firstChild as HTMLElement).classList) {
            if (className.startsWith("language-") && !className.startsWith("language-_")) {
                language = className.substr("language-".length);
                break;
            }
        }
    }
-    const preLines = ("```" + language + "\n" + n.textContent + "```").split("\n");
+
-    preLines.forEach((l, i) => {
+    const text = n.textContent.replace(/\n$/, "");
-        parts.push(...partCreator.plainWithEmoji(l));
+    // Escape backticks by using even more backticks for the fence if necessary
-        if (i < preLines.length - 1) {
+    const fence = "`".repeat(Math.max(3, longestBacktickSequence(text) + 1));
-            parts.push(partCreator.newline());
+    const parts: Part[] = [...pc.plainWithEmoji(fence + language), pc.newline()];
-        }
+
    text.split("\n").forEach(line => {
        parts.push(...pc.plainWithEmoji(line));
        parts.push(pc.newline());
    });
    parts.push(pc.plain(fence));
    return parts;
 }
-function parseHeader(el: HTMLElement, partCreator: PartCreator): Part {
+function parseHeader(n: Node, pc: PartCreator): Part[] {
-    const depth = parseInt(el.nodeName.substr(1), 10);
+    const depth = parseInt(n.nodeName.substr(1), 10);
-    return partCreator.plain("#".repeat(depth) + " ");
+    const prefix = pc.plain("#".repeat(depth) + " ");
-}
+    return [prefix, ...parseChildren(n, pc)];
 interface IState {
    listIndex: number[];
    listDepth?: number;
 }
 function parseElement(
    n: HTMLElement,
    partCreator: PartCreator,
    lastNode: Node | undefined,
    state: IState,
 ): Part | Part[] {
    switch (n.nodeName) {
        case "H1":
        case "H2":
        case "H3":
        case "H4":
        case "H5":
        case "H6":
            return parseHeader(n, partCreator);
        case "A":
            return parseLink(<HTMLAnchorElement>n, partCreator);
        case "IMG":
            return parseImage(<HTMLImageElement>n, partCreator);
        case "BR":
            return partCreator.newline();
        case "HR":
            // the newline arrangement here is quite specific otherwise it may be misconstrued as marking the previous
            // text line as a header instead of acting as a horizontal rule.
            return [
                partCreator.newline(),
                partCreator.plain("---"),
                partCreator.newline(),
            ];
        case "EM":
            return partCreator.plainWithEmoji(`_${n.textContent}_`);
        case "STRONG":
            return partCreator.plainWithEmoji(`**${n.textContent}**`);
        case "PRE":
            return parseCodeBlock(n, partCreator);
        case "CODE":
            return partCreator.plainWithEmoji(`\`${n.textContent}\``);
        case "DEL":
            return partCreator.plainWithEmoji(`<del>${n.textContent}</del>`);
        case "SUB":
            return partCreator.plainWithEmoji(`<sub>${n.textContent}</sub>`);
        case "SUP":
            return partCreator.plainWithEmoji(`<sup>${n.textContent}</sup>`);
        case "U":
            return partCreator.plainWithEmoji(`<u>${n.textContent}</u>`);
        case "LI": {
            const BASE_INDENT = 4;
            const depth = state.listDepth - 1;
            const indent = " ".repeat(BASE_INDENT * depth);
            if (n.parentElement.nodeName === "OL") {
                // The markdown parser doesn't do nested indexed lists at all, but this supports it anyway.
                const index = state.listIndex[state.listIndex.length - 1];
                state.listIndex[state.listIndex.length - 1] += 1;
                return partCreator.plain(`${indent}${index}. `);
            } else {
                return partCreator.plain(`${indent}- `);
            }
        }
        case "P": {
            if (lastNode) {
                return partCreator.newline();
            }
            break;
        }
        case "DIV":
        case "SPAN": {
            // math nodes are translated back into delimited latex strings
            if (n.hasAttribute("data-mx-maths")) {
                const delimLeft = (n.nodeName == "SPAN") ?
                    ((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['left'] || "\\(" :
                    ((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['left'] || "\\[";
                const delimRight = (n.nodeName == "SPAN") ?
                    ((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['right'] || "\\)" :
                    ((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['right'] || "\\]";
                const tex = n.getAttribute("data-mx-maths");
                return partCreator.plainWithEmoji(delimLeft + tex + delimRight);
            } else if (!checkDescendInto(n)) {
                return partCreator.plainWithEmoji(n.textContent);
            }
            break;
        }
        case "OL":
            state.listIndex.push((<HTMLOListElement>n).start || 1);
            /* falls through */
        case "UL":
            state.listDepth = (state.listDepth || 0) + 1;
            /* falls through */
        default:
            // don't textify block nodes we'll descend into
            if (!checkDescendInto(n)) {
                return partCreator.plainWithEmoji(n.textContent);
            }
    }
 }
 function checkDescendInto(node) {
    switch (node.nodeName) {
        case "PRE":
            // a code block is textified in parseCodeBlock
            // as we don't want to preserve markup in it,
            // so no need to descend into it
            return false;
        default:
            return checkBlockNode(node);
    }
 }
 function checkIgnored(n) {
@ -214,144 +134,169 @@ function checkIgnored(n) {
    return true;
 }
-const QUOTE_LINE_PREFIX = "> ";
+function prefixLines(parts: Part[], prefix: string, pc: PartCreator) {
-function prefixQuoteLines(isFirstNode, parts, partCreator) {
+    parts.unshift(pc.plain(prefix));
-    // a newline (to append a > to) wouldn't be added to parts for the first line
+    for (let i = 0; i < parts.length; i++) {
    // if there was no content before the BLOCKQUOTE, so handle that
    if (isFirstNode) {
        parts.splice(0, 0, partCreator.plain(QUOTE_LINE_PREFIX));
    }
    for (let i = 0; i < parts.length; i += 1) {
        if (parts[i].type === Type.Newline) {
-            parts.splice(i + 1, 0, partCreator.plain(QUOTE_LINE_PREFIX));
+            parts.splice(i + 1, 0, pc.plain(prefix));
            i += 1;
        }
    }
 }
-function parseHtmlMessage(html: string, partCreator: PartCreator, isQuotedMessage: boolean): Part[] {
+function parseChildren(n: Node, pc: PartCreator, mkListItem?: (li: Node) => Part[]): Part[] {
    let prev;
    return Array.from(n.childNodes).flatMap(c => {
        const parsed = parseNode(c, pc, mkListItem);
        if (parsed.length && prev && (checkBlockNode(prev) || checkBlockNode(c))) {
            if (isListChild(c)) {
                // Use tighter spacing within lists
                parsed.unshift(pc.newline());
            } else {
                parsed.unshift(pc.newline(), pc.newline());
            }
        }
        if (parsed.length) prev = c;
        return parsed;
    });
 }
 function parseNode(n: Node, pc: PartCreator, mkListItem?: (li: Node) => Part[]): Part[] {
    if (checkIgnored(n)) return [];
    switch (n.nodeType) {
        case Node.TEXT_NODE:
            return parseAtRoomMentions(n.nodeValue, pc);
        case Node.ELEMENT_NODE:
            switch (n.nodeName) {
                case "H1":
                case "H2":
                case "H3":
                case "H4":
                case "H5":
                case "H6":
                    return parseHeader(n, pc);
                case "A":
                    return parseLink(n, pc);
                case "IMG":
                    return parseImage(n, pc);
                case "BR":
                    return [pc.newline()];
                case "HR":
                    return [pc.plain("---")];
                case "EM":
                    return [pc.plain("_"), ...parseChildren(n, pc), pc.plain("_")];
                case "STRONG":
                    return [pc.plain("**"), ...parseChildren(n, pc), pc.plain("**")];
                case "DEL":
                    return [pc.plain("<del>"), ...parseChildren(n, pc), pc.plain("</del>")];
                case "SUB":
                    return [pc.plain("<sub>"), ...parseChildren(n, pc), pc.plain("</sub>")];
                case "SUP":
                    return [pc.plain("<sup>"), ...parseChildren(n, pc), pc.plain("</sup>")];
                case "U":
                    return [pc.plain("<u>"), ...parseChildren(n, pc), pc.plain("</u>")];
                case "PRE":
                    return parseCodeBlock(n, pc);
                case "CODE": {
                    // Escape backticks by using multiple backticks for the fence if necessary
                    const fence = "`".repeat(longestBacktickSequence(n.textContent) + 1);
                    return pc.plainWithEmoji(`${fence}${n.textContent}${fence}`);
                }
                case "BLOCKQUOTE": {
                    const parts = parseChildren(n, pc);
                    prefixLines(parts, "> ", pc);
                    return parts;
                }
                case "LI":
                    return mkListItem?.(n) ?? parseChildren(n, pc);
                case "UL": {
                    const parts = parseChildren(n, pc, li => [pc.plain("- "), ...parseChildren(li, pc)]);
                    if (isListChild(n)) {
                        prefixLines(parts, "    ", pc);
                    }
                    return parts;
                }
                case "OL": {
                    let counter = 1;
                    const parts = parseChildren(n, pc, li => {
                        const parts = [pc.plain(`${counter}. `), ...parseChildren(li, pc)];
                        counter++;
                        return parts;
                    });
                    if (isListChild(n)) {
                        prefixLines(parts, "    ", pc);
                    }
                    return parts;
                }
                case "DIV":
                case "SPAN":
                    // Math nodes are translated back into delimited latex strings
                    if ((n as Element).hasAttribute("data-mx-maths")) {
                        const delims = SdkConfig.get().latex_maths_delims;
                        const delimLeft = (n.nodeName === "SPAN") ?
                            delims?.inline?.left ?? "\\(" :
                            delims?.display?.left ?? "\\[";
                        const delimRight = (n.nodeName === "SPAN") ?
                            delims?.inline?.right ?? "\\)" :
                            delims?.display?.right ?? "\\]";
                        const tex = (n as Element).getAttribute("data-mx-maths");
                        return pc.plainWithEmoji(`${delimLeft}${tex}${delimRight}`);
                    }
            }
    }
    return parseChildren(n, pc);
 }
 function parseHtmlMessage(html: string, pc: PartCreator, isQuotedMessage: boolean): Part[] {
    // no nodes from parsing here should be inserted in the document,
    // as scripts in event handlers, etc would be executed then.
    // we're only taking text, so that is fine
-    const rootNode = new DOMParser().parseFromString(html, "text/html").body;
+    const parts = parseNode(new DOMParser().parseFromString(html, "text/html").body, pc);
-    const parts: Part[] = [];
+    if (isQuotedMessage) {
-    let lastNode: Node;
+        prefixLines(parts, "> ", pc);
    let inQuote = isQuotedMessage;
    const state: IState = {
        listIndex: [],
    };
    function onNodeEnter(n: Node) {
        if (checkIgnored(n)) {
            return false;
        }
        if (n.nodeName === "BLOCKQUOTE") {
            inQuote = true;
        }
        const newParts: Part[] = [];
        if (lastNode && (checkBlockNode(lastNode) || checkBlockNode(n))) {
            newParts.push(partCreator.newline());
        }
        if (n.nodeType === Node.TEXT_NODE) {
            let { nodeValue } = n;
            // Sometimes commonmark adds a newline at the end of the list item text
            if (n.parentNode.nodeName === "LI") {
                nodeValue = nodeValue.trimEnd();
            }
            newParts.push(...parseAtRoomMentions(nodeValue, partCreator));
            const grandParent = n.parentNode.parentNode;
            const isTight = n.parentNode.nodeName !== "P" || grandParent?.nodeName !== "LI";
            if (!isTight) {
                newParts.push(partCreator.newline());
            }
        } else if (n.nodeType === Node.ELEMENT_NODE) {
            const parseResult = parseElement(n as HTMLElement, partCreator, lastNode, state);
            if (parseResult) {
                if (Array.isArray(parseResult)) {
                    newParts.push(...parseResult);
                } else {
                    newParts.push(parseResult);
                }
            }
        }
        if (newParts.length && inQuote) {
            const isFirstPart = parts.length === 0;
            prefixQuoteLines(isFirstPart, newParts, partCreator);
        }
        parts.push(...newParts);
        const descend = checkDescendInto(n);
        // when not descending (like for PRE), onNodeLeave won't be called to set lastNode
        // so do that here.
        lastNode = descend ? null : n;
        return descend;
    }
    function onNodeLeave(n: Node) {
        if (checkIgnored(n)) {
            return;
        }
        switch (n.nodeName) {
            case "BLOCKQUOTE":
                inQuote = false;
                break;
            case "OL":
                state.listIndex.pop();
                /* falls through */
            case "UL":
                state.listDepth -= 1;
                break;
        }
        lastNode = n;
    }
    walkDOMDepthFirst(rootNode, onNodeEnter, onNodeLeave);
    return parts;
 }
-export function parsePlainTextMessage(body: string, partCreator: PartCreator, isQuotedMessage?: boolean): Part[] {
+export function parsePlainTextMessage(body: string, pc: PartCreator, isQuotedMessage?: boolean): Part[] {
    const lines = body.split(/\r\n|\r|\n/g); // split on any new-line combination not just \n, collapses \r\n
    return lines.reduce((parts, line, i) => {
        if (isQuotedMessage) {
-            parts.push(partCreator.plain(QUOTE_LINE_PREFIX));
+            parts.push(pc.plain("> "));
        }
-        parts.push(...parseAtRoomMentions(line, partCreator));
+        parts.push(...parseAtRoomMentions(line, pc));
        const isLast = i === lines.length - 1;
        if (!isLast) {
-            parts.push(partCreator.newline());
+            parts.push(pc.newline());
        }
        return parts;
    }, [] as Part[]);
 }
-export function parseEvent(event: MatrixEvent, partCreator: PartCreator, { isQuotedMessage = false } = {}) {
+export function parseEvent(event: MatrixEvent, pc: PartCreator, { isQuotedMessage = false } = {}) {
    const content = event.getContent();
    let parts: Part[];
    const isEmote = content.msgtype === "m.emote";
    let isRainbow = false;
    if (content.format === "org.matrix.custom.html") {
-        parts = parseHtmlMessage(content.formatted_body || "", partCreator, isQuotedMessage);
+        parts = parseHtmlMessage(content.formatted_body || "", pc, isQuotedMessage);
        if (content.body && content.formatted_body && textToHtmlRainbow(content.body) === content.formatted_body) {
            isRainbow = true;
        }
    } else {
-        parts = parsePlainTextMessage(content.body || "", partCreator, isQuotedMessage);
+        parts = parsePlainTextMessage(content.body || "", pc, isQuotedMessage);
    }
    if (isEmote && isRainbow) {
-        parts.unshift(partCreator.plain("/rainbowme "));
+        parts.unshift(pc.plain("/rainbowme "));
    } else if (isRainbow) {
-        parts.unshift(partCreator.plain("/rainbow "));
+        parts.unshift(pc.plain("/rainbow "));
    } else if (isEmote) {
-        parts.unshift(partCreator.plain("/me "));
+        parts.unshift(pc.plain("/me "));
    }
    return parts;
--- a/test/editor/snapshots/deserialize-test.js.snap
+++ b/test/editor/snapshots/deserialize-test.js.snap
@ -0,0 +1,178 @@
 // Jest Snapshot v1, https://goo.gl/fbAQLP
 exports[`editor/deserialize html messages escapes angle brackets 1`] = `
 Array [
  Object {
    "text": "\\\\> \\\\\\\\<del>no formatting here\\\\\\\\</del>",
    "type": "plain",
  },
 ]
 `;
 exports[`editor/deserialize html messages escapes asterisks 1`] = `
 Array [
  Object {
    "text": "\\\\*hello\\\\*",
    "type": "plain",
  },
 ]
 `;
 exports[`editor/deserialize html messages escapes backslashes 1`] = `
 Array [
  Object {
    "text": "C:\\\\\\\\My Documents",
    "type": "plain",
  },
 ]
 `;
 exports[`editor/deserialize html messages escapes backticks in code blocks 1`] = `
 Array [
  Object {
    "text": "\`\`this → \` is a backtick\`\`",
    "type": "plain",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "\`\`\`\`",
    "type": "plain",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "and here are 3 of them:",
    "type": "plain",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "\`\`\`",
    "type": "plain",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "\`\`\`\`",
    "type": "plain",
  },
 ]
 `;
 exports[`editor/deserialize html messages escapes backticks outside of code blocks 1`] = `
 Array [
  Object {
    "text": "some \\\\\`backticks\\\\\`",
    "type": "plain",
  },
 ]
 `;
 exports[`editor/deserialize html messages escapes square brackets 1`] = `
 Array [
  Object {
    "text": "\\\\[not an actual link\\\\](https://example.org)",
    "type": "plain",
  },
 ]
 `;
 exports[`editor/deserialize html messages escapes underscores 1`] = `
 Array [
  Object {
    "text": "\\\\_\\\\_emphasis\\\\_\\\\_",
    "type": "plain",
  },
 ]
 `;
 exports[`editor/deserialize html messages preserves nested formatting 1`] = `
 Array [
  Object {
    "text": "a<sub>b_c**d<u>e</u>**_</sub>",
    "type": "plain",
  },
 ]
 `;
 exports[`editor/deserialize html messages preserves nested quotes 1`] = `
 Array [
  Object {
    "text": "> foo",
    "type": "plain",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "> ",
    "type": "plain",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "> > bar",
    "type": "plain",
  },
 ]
 `;
 exports[`editor/deserialize html messages surrounds lists with newlines 1`] = `
 Array [
  Object {
    "text": "foo",
    "type": "plain",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "- bar",
    "type": "plain",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "
 ",
    "type": "newline",
  },
  Object {
    "text": "baz",
    "type": "plain",
  },
 ]
 `;
--- a/test/editor/deserialize-test.js
+++ b/test/editor/deserialize-test.js
@ -237,18 +237,6 @@ describe('editor/deserialize', function() {
            expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
            expect(parts[4]).toStrictEqual({ type: "plain", text: "3. Finish" });
        });
        it('non tight lists', () => {
            const html = "<ol><li><p>Start</p></li><li><p>Continue</p></li><li><p>Finish</p></li></ol>";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts.length).toBe(8);
            expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Start" });
            expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" });
            expect(parts[2]).toStrictEqual({ type: "newline", text: "\n" });
            expect(parts[3]).toStrictEqual({ type: "plain", text: "2. Continue" });
            expect(parts[4]).toStrictEqual({ type: "newline", text: "\n" });
            expect(parts[5]).toStrictEqual({ type: "newline", text: "\n" });
            expect(parts[6]).toStrictEqual({ type: "plain", text: "3. Finish" });
        });
        it('nested unordered lists', () => {
            const html = "<ul><li>Oak<ul><li>Spruce<ul><li>Birch</li></ul></li></ul></li></ul>";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
@ -269,13 +257,13 @@ describe('editor/deserialize', function() {
            expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
            expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` });
        });
-        it('nested tight lists', () => {
+        it('nested lists', () => {
            const html = "<ol><li>Oak\n<ol><li>Spruce\n<ol><li>Birch</li></ol></li></ol></li></ol>";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts.length).toBe(5);
-            expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Oak" });
+            expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Oak\n" });
            expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" });
-            expect(parts[2]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES}1. Spruce` });
+            expect(parts[2]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES}1. Spruce\n` });
            expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
            expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` });
        });
@ -291,5 +279,56 @@ describe('editor/deserialize', function() {
            expect(parts.length).toBe(1);
            expect(parts[0]).toStrictEqual({ type: "plain", text: "/me says _DON'T SHOUT_!" });
        });
        it('preserves nested quotes', () => {
            const html = "<blockquote>foo<blockquote>bar</blockquote></blockquote>";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts).toMatchSnapshot();
        });
        it('surrounds lists with newlines', () => {
            const html = "foo<ul><li>bar</li></ul>baz";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts).toMatchSnapshot();
        });
        it('preserves nested formatting', () => {
            const html = "a<sub>b<em>c<strong>d<u>e</u></strong></em></sub>";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts).toMatchSnapshot();
        });
        it('escapes backticks in code blocks', () => {
            const html = "<p><code>this → ` is a backtick</code></p>" +
                "<pre><code>and here are 3 of them:\n```</code></pre>";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts).toMatchSnapshot();
        });
        it('escapes backticks outside of code blocks', () => {
            const html = "some `backticks`";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts).toMatchSnapshot();
        });
        it('escapes backslashes', () => {
            const html = "C:\\My Documents";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts).toMatchSnapshot();
        });
        it('escapes asterisks', () => {
            const html = "*hello*";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts).toMatchSnapshot();
        });
        it('escapes underscores', () => {
            const html = "__emphasis__";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts).toMatchSnapshot();
        });
        it('escapes square brackets', () => {
            const html = "[not an actual link](https://example.org)";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts).toMatchSnapshot();
        });
        it('escapes angle brackets', () => {
            const html = "> \\<del>no formatting here\\</del>";
            const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
            expect(parts).toMatchSnapshot();
        });
    });
 });