Fix a variety of issues with HTML → Markdown conversion (#8004)
* Fix a variety of issues with HTML → Markdown conversion Signed-off-by: Robin Townsend <robin@robin.town> * Fix lint Signed-off-by: Robin Townsend <robin@robin.town> * Fix @room pill formatting not being applied to link text Signed-off-by: Robin Townsend <robin@robin.town>
This commit is contained in:
parent
65691202f7
commit
c10ac9e4a0
3 changed files with 422 additions and 260 deletions
|
@ -17,190 +17,110 @@ limitations under the License.
|
||||||
|
|
||||||
import { MatrixEvent } from "matrix-js-sdk/src/models/event";
|
import { MatrixEvent } from "matrix-js-sdk/src/models/event";
|
||||||
|
|
||||||
import { walkDOMDepthFirst } from "./dom";
|
|
||||||
import { checkBlockNode } from "../HtmlUtils";
|
import { checkBlockNode } from "../HtmlUtils";
|
||||||
import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks";
|
import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks";
|
||||||
import { Part, PartCreator, Type } from "./parts";
|
import { Part, PartCreator, Type } from "./parts";
|
||||||
import SdkConfig from "../SdkConfig";
|
import SdkConfig from "../SdkConfig";
|
||||||
import { textToHtmlRainbow } from "../utils/colour";
|
import { textToHtmlRainbow } from "../utils/colour";
|
||||||
|
|
||||||
function parseAtRoomMentions(text: string, partCreator: PartCreator): Part[] {
|
const LIST_TYPES = ["UL", "OL", "LI"];
|
||||||
|
|
||||||
|
// Escapes all markup in the given text
|
||||||
|
function escape(text: string): string {
|
||||||
|
return text.replace(/[\\*_[\]`<]|^>/g, match => `\\${match}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finds the length of the longest backtick sequence in the given text, used for
|
||||||
|
// escaping backticks in code blocks
|
||||||
|
function longestBacktickSequence(text: string): number {
|
||||||
|
let length = 0;
|
||||||
|
let currentLength = 0;
|
||||||
|
|
||||||
|
for (const c of text) {
|
||||||
|
if (c === "`") {
|
||||||
|
currentLength++;
|
||||||
|
} else {
|
||||||
|
length = Math.max(length, currentLength);
|
||||||
|
currentLength = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Math.max(length, currentLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isListChild(n: Node): boolean {
|
||||||
|
return LIST_TYPES.includes(n.parentNode?.nodeName);
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseAtRoomMentions(text: string, pc: PartCreator): Part[] {
|
||||||
const ATROOM = "@room";
|
const ATROOM = "@room";
|
||||||
const parts: Part[] = [];
|
const parts: Part[] = [];
|
||||||
text.split(ATROOM).forEach((textPart, i, arr) => {
|
text.split(ATROOM).forEach((textPart, i, arr) => {
|
||||||
if (textPart.length) {
|
if (textPart.length) {
|
||||||
parts.push(...partCreator.plainWithEmoji(textPart));
|
parts.push(...pc.plainWithEmoji(escape(textPart)));
|
||||||
}
|
}
|
||||||
// it's safe to never append @room after the last textPart
|
// it's safe to never append @room after the last textPart
|
||||||
// as split will report an empty string at the end if
|
// as split will report an empty string at the end if
|
||||||
// `text` ended in @room.
|
// `text` ended in @room.
|
||||||
const isLast = i === arr.length - 1;
|
const isLast = i === arr.length - 1;
|
||||||
if (!isLast) {
|
if (!isLast) {
|
||||||
parts.push(partCreator.atRoomPill(ATROOM));
|
parts.push(pc.atRoomPill(ATROOM));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return parts;
|
return parts;
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseLink(a: HTMLAnchorElement, partCreator: PartCreator): Part[] {
|
function parseLink(n: Node, pc: PartCreator): Part[] {
|
||||||
const { href } = a;
|
const { href } = n as HTMLAnchorElement;
|
||||||
const resourceId = getPrimaryPermalinkEntity(href); // The room/user ID
|
const resourceId = getPrimaryPermalinkEntity(href); // The room/user ID
|
||||||
const prefix = resourceId ? resourceId[0] : undefined; // First character of ID
|
|
||||||
switch (prefix) {
|
switch (resourceId?.[0]) {
|
||||||
case "@":
|
case "@": return [pc.userPill(n.textContent, resourceId)];
|
||||||
return [partCreator.userPill(a.textContent, resourceId)];
|
case "#": return [pc.roomPill(resourceId)];
|
||||||
case "#":
|
}
|
||||||
return [partCreator.roomPill(resourceId)];
|
|
||||||
default: {
|
const children = Array.from(n.childNodes);
|
||||||
if (href === a.textContent) {
|
if (href === n.textContent && children.every(c => c.nodeType === Node.TEXT_NODE)) {
|
||||||
return partCreator.plainWithEmoji(a.textContent);
|
return parseAtRoomMentions(n.textContent, pc);
|
||||||
} else {
|
} else {
|
||||||
return partCreator.plainWithEmoji(`[${a.textContent.replace(/[[\\\]]/g, c => "\\" + c)}](${href})`);
|
return [pc.plain("["), ...parseChildren(n, pc), pc.plain(`](${href})`)];
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseImage(img: HTMLImageElement, partCreator: PartCreator): Part[] {
|
function parseImage(n: Node, pc: PartCreator): Part[] {
|
||||||
const { src } = img;
|
const { alt, src } = n as HTMLImageElement;
|
||||||
return partCreator.plainWithEmoji(`![${img.alt.replace(/[[\\\]]/g, c => "\\" + c)}](${src})`);
|
return pc.plainWithEmoji(`![${escape(alt)}](${src})`);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseCodeBlock(n: HTMLElement, partCreator: PartCreator): Part[] {
|
function parseCodeBlock(n: Node, pc: PartCreator): Part[] {
|
||||||
const parts: Part[] = [];
|
|
||||||
let language = "";
|
let language = "";
|
||||||
if (n.firstChild && n.firstChild.nodeName === "CODE") {
|
if (n.firstChild?.nodeName === "CODE") {
|
||||||
for (const className of (<HTMLElement>n.firstChild).classList) {
|
for (const className of (n.firstChild as HTMLElement).classList) {
|
||||||
if (className.startsWith("language-") && !className.startsWith("language-_")) {
|
if (className.startsWith("language-") && !className.startsWith("language-_")) {
|
||||||
language = className.substr("language-".length);
|
language = className.substr("language-".length);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const preLines = ("```" + language + "\n" + n.textContent + "```").split("\n");
|
|
||||||
preLines.forEach((l, i) => {
|
const text = n.textContent.replace(/\n$/, "");
|
||||||
parts.push(...partCreator.plainWithEmoji(l));
|
// Escape backticks by using even more backticks for the fence if necessary
|
||||||
if (i < preLines.length - 1) {
|
const fence = "`".repeat(Math.max(3, longestBacktickSequence(text) + 1));
|
||||||
parts.push(partCreator.newline());
|
const parts: Part[] = [...pc.plainWithEmoji(fence + language), pc.newline()];
|
||||||
}
|
|
||||||
|
text.split("\n").forEach(line => {
|
||||||
|
parts.push(...pc.plainWithEmoji(line));
|
||||||
|
parts.push(pc.newline());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
parts.push(pc.plain(fence));
|
||||||
return parts;
|
return parts;
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseHeader(el: HTMLElement, partCreator: PartCreator): Part {
|
function parseHeader(n: Node, pc: PartCreator): Part[] {
|
||||||
const depth = parseInt(el.nodeName.substr(1), 10);
|
const depth = parseInt(n.nodeName.substr(1), 10);
|
||||||
return partCreator.plain("#".repeat(depth) + " ");
|
const prefix = pc.plain("#".repeat(depth) + " ");
|
||||||
}
|
return [prefix, ...parseChildren(n, pc)];
|
||||||
|
|
||||||
interface IState {
|
|
||||||
listIndex: number[];
|
|
||||||
listDepth?: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
function parseElement(
|
|
||||||
n: HTMLElement,
|
|
||||||
partCreator: PartCreator,
|
|
||||||
lastNode: Node | undefined,
|
|
||||||
state: IState,
|
|
||||||
): Part | Part[] {
|
|
||||||
switch (n.nodeName) {
|
|
||||||
case "H1":
|
|
||||||
case "H2":
|
|
||||||
case "H3":
|
|
||||||
case "H4":
|
|
||||||
case "H5":
|
|
||||||
case "H6":
|
|
||||||
return parseHeader(n, partCreator);
|
|
||||||
case "A":
|
|
||||||
return parseLink(<HTMLAnchorElement>n, partCreator);
|
|
||||||
case "IMG":
|
|
||||||
return parseImage(<HTMLImageElement>n, partCreator);
|
|
||||||
case "BR":
|
|
||||||
return partCreator.newline();
|
|
||||||
case "HR":
|
|
||||||
// the newline arrangement here is quite specific otherwise it may be misconstrued as marking the previous
|
|
||||||
// text line as a header instead of acting as a horizontal rule.
|
|
||||||
return [
|
|
||||||
partCreator.newline(),
|
|
||||||
partCreator.plain("---"),
|
|
||||||
partCreator.newline(),
|
|
||||||
];
|
|
||||||
case "EM":
|
|
||||||
return partCreator.plainWithEmoji(`_${n.textContent}_`);
|
|
||||||
case "STRONG":
|
|
||||||
return partCreator.plainWithEmoji(`**${n.textContent}**`);
|
|
||||||
case "PRE":
|
|
||||||
return parseCodeBlock(n, partCreator);
|
|
||||||
case "CODE":
|
|
||||||
return partCreator.plainWithEmoji(`\`${n.textContent}\``);
|
|
||||||
case "DEL":
|
|
||||||
return partCreator.plainWithEmoji(`<del>${n.textContent}</del>`);
|
|
||||||
case "SUB":
|
|
||||||
return partCreator.plainWithEmoji(`<sub>${n.textContent}</sub>`);
|
|
||||||
case "SUP":
|
|
||||||
return partCreator.plainWithEmoji(`<sup>${n.textContent}</sup>`);
|
|
||||||
case "U":
|
|
||||||
return partCreator.plainWithEmoji(`<u>${n.textContent}</u>`);
|
|
||||||
case "LI": {
|
|
||||||
const BASE_INDENT = 4;
|
|
||||||
const depth = state.listDepth - 1;
|
|
||||||
const indent = " ".repeat(BASE_INDENT * depth);
|
|
||||||
if (n.parentElement.nodeName === "OL") {
|
|
||||||
// The markdown parser doesn't do nested indexed lists at all, but this supports it anyway.
|
|
||||||
const index = state.listIndex[state.listIndex.length - 1];
|
|
||||||
state.listIndex[state.listIndex.length - 1] += 1;
|
|
||||||
return partCreator.plain(`${indent}${index}. `);
|
|
||||||
} else {
|
|
||||||
return partCreator.plain(`${indent}- `);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case "P": {
|
|
||||||
if (lastNode) {
|
|
||||||
return partCreator.newline();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case "DIV":
|
|
||||||
case "SPAN": {
|
|
||||||
// math nodes are translated back into delimited latex strings
|
|
||||||
if (n.hasAttribute("data-mx-maths")) {
|
|
||||||
const delimLeft = (n.nodeName == "SPAN") ?
|
|
||||||
((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['left'] || "\\(" :
|
|
||||||
((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['left'] || "\\[";
|
|
||||||
const delimRight = (n.nodeName == "SPAN") ?
|
|
||||||
((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['right'] || "\\)" :
|
|
||||||
((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['right'] || "\\]";
|
|
||||||
const tex = n.getAttribute("data-mx-maths");
|
|
||||||
return partCreator.plainWithEmoji(delimLeft + tex + delimRight);
|
|
||||||
} else if (!checkDescendInto(n)) {
|
|
||||||
return partCreator.plainWithEmoji(n.textContent);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case "OL":
|
|
||||||
state.listIndex.push((<HTMLOListElement>n).start || 1);
|
|
||||||
/* falls through */
|
|
||||||
case "UL":
|
|
||||||
state.listDepth = (state.listDepth || 0) + 1;
|
|
||||||
/* falls through */
|
|
||||||
default:
|
|
||||||
// don't textify block nodes we'll descend into
|
|
||||||
if (!checkDescendInto(n)) {
|
|
||||||
return partCreator.plainWithEmoji(n.textContent);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function checkDescendInto(node) {
|
|
||||||
switch (node.nodeName) {
|
|
||||||
case "PRE":
|
|
||||||
// a code block is textified in parseCodeBlock
|
|
||||||
// as we don't want to preserve markup in it,
|
|
||||||
// so no need to descend into it
|
|
||||||
return false;
|
|
||||||
default:
|
|
||||||
return checkBlockNode(node);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function checkIgnored(n) {
|
function checkIgnored(n) {
|
||||||
|
@ -214,144 +134,169 @@ function checkIgnored(n) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const QUOTE_LINE_PREFIX = "> ";
|
function prefixLines(parts: Part[], prefix: string, pc: PartCreator) {
|
||||||
function prefixQuoteLines(isFirstNode, parts, partCreator) {
|
parts.unshift(pc.plain(prefix));
|
||||||
// a newline (to append a > to) wouldn't be added to parts for the first line
|
for (let i = 0; i < parts.length; i++) {
|
||||||
// if there was no content before the BLOCKQUOTE, so handle that
|
|
||||||
if (isFirstNode) {
|
|
||||||
parts.splice(0, 0, partCreator.plain(QUOTE_LINE_PREFIX));
|
|
||||||
}
|
|
||||||
for (let i = 0; i < parts.length; i += 1) {
|
|
||||||
if (parts[i].type === Type.Newline) {
|
if (parts[i].type === Type.Newline) {
|
||||||
parts.splice(i + 1, 0, partCreator.plain(QUOTE_LINE_PREFIX));
|
parts.splice(i + 1, 0, pc.plain(prefix));
|
||||||
i += 1;
|
i += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseHtmlMessage(html: string, partCreator: PartCreator, isQuotedMessage: boolean): Part[] {
|
function parseChildren(n: Node, pc: PartCreator, mkListItem?: (li: Node) => Part[]): Part[] {
|
||||||
|
let prev;
|
||||||
|
return Array.from(n.childNodes).flatMap(c => {
|
||||||
|
const parsed = parseNode(c, pc, mkListItem);
|
||||||
|
if (parsed.length && prev && (checkBlockNode(prev) || checkBlockNode(c))) {
|
||||||
|
if (isListChild(c)) {
|
||||||
|
// Use tighter spacing within lists
|
||||||
|
parsed.unshift(pc.newline());
|
||||||
|
} else {
|
||||||
|
parsed.unshift(pc.newline(), pc.newline());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (parsed.length) prev = c;
|
||||||
|
return parsed;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseNode(n: Node, pc: PartCreator, mkListItem?: (li: Node) => Part[]): Part[] {
|
||||||
|
if (checkIgnored(n)) return [];
|
||||||
|
|
||||||
|
switch (n.nodeType) {
|
||||||
|
case Node.TEXT_NODE:
|
||||||
|
return parseAtRoomMentions(n.nodeValue, pc);
|
||||||
|
case Node.ELEMENT_NODE:
|
||||||
|
switch (n.nodeName) {
|
||||||
|
case "H1":
|
||||||
|
case "H2":
|
||||||
|
case "H3":
|
||||||
|
case "H4":
|
||||||
|
case "H5":
|
||||||
|
case "H6":
|
||||||
|
return parseHeader(n, pc);
|
||||||
|
case "A":
|
||||||
|
return parseLink(n, pc);
|
||||||
|
case "IMG":
|
||||||
|
return parseImage(n, pc);
|
||||||
|
case "BR":
|
||||||
|
return [pc.newline()];
|
||||||
|
case "HR":
|
||||||
|
return [pc.plain("---")];
|
||||||
|
case "EM":
|
||||||
|
return [pc.plain("_"), ...parseChildren(n, pc), pc.plain("_")];
|
||||||
|
case "STRONG":
|
||||||
|
return [pc.plain("**"), ...parseChildren(n, pc), pc.plain("**")];
|
||||||
|
case "DEL":
|
||||||
|
return [pc.plain("<del>"), ...parseChildren(n, pc), pc.plain("</del>")];
|
||||||
|
case "SUB":
|
||||||
|
return [pc.plain("<sub>"), ...parseChildren(n, pc), pc.plain("</sub>")];
|
||||||
|
case "SUP":
|
||||||
|
return [pc.plain("<sup>"), ...parseChildren(n, pc), pc.plain("</sup>")];
|
||||||
|
case "U":
|
||||||
|
return [pc.plain("<u>"), ...parseChildren(n, pc), pc.plain("</u>")];
|
||||||
|
case "PRE":
|
||||||
|
return parseCodeBlock(n, pc);
|
||||||
|
case "CODE": {
|
||||||
|
// Escape backticks by using multiple backticks for the fence if necessary
|
||||||
|
const fence = "`".repeat(longestBacktickSequence(n.textContent) + 1);
|
||||||
|
return pc.plainWithEmoji(`${fence}${n.textContent}${fence}`);
|
||||||
|
}
|
||||||
|
case "BLOCKQUOTE": {
|
||||||
|
const parts = parseChildren(n, pc);
|
||||||
|
prefixLines(parts, "> ", pc);
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
case "LI":
|
||||||
|
return mkListItem?.(n) ?? parseChildren(n, pc);
|
||||||
|
case "UL": {
|
||||||
|
const parts = parseChildren(n, pc, li => [pc.plain("- "), ...parseChildren(li, pc)]);
|
||||||
|
if (isListChild(n)) {
|
||||||
|
prefixLines(parts, " ", pc);
|
||||||
|
}
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
case "OL": {
|
||||||
|
let counter = 1;
|
||||||
|
const parts = parseChildren(n, pc, li => {
|
||||||
|
const parts = [pc.plain(`${counter}. `), ...parseChildren(li, pc)];
|
||||||
|
counter++;
|
||||||
|
return parts;
|
||||||
|
});
|
||||||
|
if (isListChild(n)) {
|
||||||
|
prefixLines(parts, " ", pc);
|
||||||
|
}
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
case "DIV":
|
||||||
|
case "SPAN":
|
||||||
|
// Math nodes are translated back into delimited latex strings
|
||||||
|
if ((n as Element).hasAttribute("data-mx-maths")) {
|
||||||
|
const delims = SdkConfig.get().latex_maths_delims;
|
||||||
|
const delimLeft = (n.nodeName === "SPAN") ?
|
||||||
|
delims?.inline?.left ?? "\\(" :
|
||||||
|
delims?.display?.left ?? "\\[";
|
||||||
|
const delimRight = (n.nodeName === "SPAN") ?
|
||||||
|
delims?.inline?.right ?? "\\)" :
|
||||||
|
delims?.display?.right ?? "\\]";
|
||||||
|
const tex = (n as Element).getAttribute("data-mx-maths");
|
||||||
|
|
||||||
|
return pc.plainWithEmoji(`${delimLeft}${tex}${delimRight}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseChildren(n, pc);
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseHtmlMessage(html: string, pc: PartCreator, isQuotedMessage: boolean): Part[] {
|
||||||
// no nodes from parsing here should be inserted in the document,
|
// no nodes from parsing here should be inserted in the document,
|
||||||
// as scripts in event handlers, etc would be executed then.
|
// as scripts in event handlers, etc would be executed then.
|
||||||
// we're only taking text, so that is fine
|
// we're only taking text, so that is fine
|
||||||
const rootNode = new DOMParser().parseFromString(html, "text/html").body;
|
const parts = parseNode(new DOMParser().parseFromString(html, "text/html").body, pc);
|
||||||
const parts: Part[] = [];
|
if (isQuotedMessage) {
|
||||||
let lastNode: Node;
|
prefixLines(parts, "> ", pc);
|
||||||
let inQuote = isQuotedMessage;
|
|
||||||
const state: IState = {
|
|
||||||
listIndex: [],
|
|
||||||
};
|
|
||||||
|
|
||||||
function onNodeEnter(n: Node) {
|
|
||||||
if (checkIgnored(n)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (n.nodeName === "BLOCKQUOTE") {
|
|
||||||
inQuote = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const newParts: Part[] = [];
|
|
||||||
if (lastNode && (checkBlockNode(lastNode) || checkBlockNode(n))) {
|
|
||||||
newParts.push(partCreator.newline());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (n.nodeType === Node.TEXT_NODE) {
|
|
||||||
let { nodeValue } = n;
|
|
||||||
|
|
||||||
// Sometimes commonmark adds a newline at the end of the list item text
|
|
||||||
if (n.parentNode.nodeName === "LI") {
|
|
||||||
nodeValue = nodeValue.trimEnd();
|
|
||||||
}
|
|
||||||
newParts.push(...parseAtRoomMentions(nodeValue, partCreator));
|
|
||||||
|
|
||||||
const grandParent = n.parentNode.parentNode;
|
|
||||||
const isTight = n.parentNode.nodeName !== "P" || grandParent?.nodeName !== "LI";
|
|
||||||
if (!isTight) {
|
|
||||||
newParts.push(partCreator.newline());
|
|
||||||
}
|
|
||||||
} else if (n.nodeType === Node.ELEMENT_NODE) {
|
|
||||||
const parseResult = parseElement(n as HTMLElement, partCreator, lastNode, state);
|
|
||||||
if (parseResult) {
|
|
||||||
if (Array.isArray(parseResult)) {
|
|
||||||
newParts.push(...parseResult);
|
|
||||||
} else {
|
|
||||||
newParts.push(parseResult);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (newParts.length && inQuote) {
|
|
||||||
const isFirstPart = parts.length === 0;
|
|
||||||
prefixQuoteLines(isFirstPart, newParts, partCreator);
|
|
||||||
}
|
|
||||||
|
|
||||||
parts.push(...newParts);
|
|
||||||
|
|
||||||
const descend = checkDescendInto(n);
|
|
||||||
// when not descending (like for PRE), onNodeLeave won't be called to set lastNode
|
|
||||||
// so do that here.
|
|
||||||
lastNode = descend ? null : n;
|
|
||||||
return descend;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function onNodeLeave(n: Node) {
|
|
||||||
if (checkIgnored(n)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
switch (n.nodeName) {
|
|
||||||
case "BLOCKQUOTE":
|
|
||||||
inQuote = false;
|
|
||||||
break;
|
|
||||||
case "OL":
|
|
||||||
state.listIndex.pop();
|
|
||||||
/* falls through */
|
|
||||||
case "UL":
|
|
||||||
state.listDepth -= 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
lastNode = n;
|
|
||||||
}
|
|
||||||
|
|
||||||
walkDOMDepthFirst(rootNode, onNodeEnter, onNodeLeave);
|
|
||||||
|
|
||||||
return parts;
|
return parts;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function parsePlainTextMessage(body: string, partCreator: PartCreator, isQuotedMessage?: boolean): Part[] {
|
export function parsePlainTextMessage(body: string, pc: PartCreator, isQuotedMessage?: boolean): Part[] {
|
||||||
const lines = body.split(/\r\n|\r|\n/g); // split on any new-line combination not just \n, collapses \r\n
|
const lines = body.split(/\r\n|\r|\n/g); // split on any new-line combination not just \n, collapses \r\n
|
||||||
return lines.reduce((parts, line, i) => {
|
return lines.reduce((parts, line, i) => {
|
||||||
if (isQuotedMessage) {
|
if (isQuotedMessage) {
|
||||||
parts.push(partCreator.plain(QUOTE_LINE_PREFIX));
|
parts.push(pc.plain("> "));
|
||||||
}
|
}
|
||||||
parts.push(...parseAtRoomMentions(line, partCreator));
|
parts.push(...parseAtRoomMentions(line, pc));
|
||||||
const isLast = i === lines.length - 1;
|
const isLast = i === lines.length - 1;
|
||||||
if (!isLast) {
|
if (!isLast) {
|
||||||
parts.push(partCreator.newline());
|
parts.push(pc.newline());
|
||||||
}
|
}
|
||||||
return parts;
|
return parts;
|
||||||
}, [] as Part[]);
|
}, [] as Part[]);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function parseEvent(event: MatrixEvent, partCreator: PartCreator, { isQuotedMessage = false } = {}) {
|
export function parseEvent(event: MatrixEvent, pc: PartCreator, { isQuotedMessage = false } = {}) {
|
||||||
const content = event.getContent();
|
const content = event.getContent();
|
||||||
let parts: Part[];
|
let parts: Part[];
|
||||||
const isEmote = content.msgtype === "m.emote";
|
const isEmote = content.msgtype === "m.emote";
|
||||||
let isRainbow = false;
|
let isRainbow = false;
|
||||||
|
|
||||||
if (content.format === "org.matrix.custom.html") {
|
if (content.format === "org.matrix.custom.html") {
|
||||||
parts = parseHtmlMessage(content.formatted_body || "", partCreator, isQuotedMessage);
|
parts = parseHtmlMessage(content.formatted_body || "", pc, isQuotedMessage);
|
||||||
if (content.body && content.formatted_body && textToHtmlRainbow(content.body) === content.formatted_body) {
|
if (content.body && content.formatted_body && textToHtmlRainbow(content.body) === content.formatted_body) {
|
||||||
isRainbow = true;
|
isRainbow = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
parts = parsePlainTextMessage(content.body || "", partCreator, isQuotedMessage);
|
parts = parsePlainTextMessage(content.body || "", pc, isQuotedMessage);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isEmote && isRainbow) {
|
if (isEmote && isRainbow) {
|
||||||
parts.unshift(partCreator.plain("/rainbowme "));
|
parts.unshift(pc.plain("/rainbowme "));
|
||||||
} else if (isRainbow) {
|
} else if (isRainbow) {
|
||||||
parts.unshift(partCreator.plain("/rainbow "));
|
parts.unshift(pc.plain("/rainbow "));
|
||||||
} else if (isEmote) {
|
} else if (isEmote) {
|
||||||
parts.unshift(partCreator.plain("/me "));
|
parts.unshift(pc.plain("/me "));
|
||||||
}
|
}
|
||||||
|
|
||||||
return parts;
|
return parts;
|
||||||
|
|
178
test/editor/__snapshots__/deserialize-test.js.snap
Normal file
178
test/editor/__snapshots__/deserialize-test.js.snap
Normal file
|
@ -0,0 +1,178 @@
|
||||||
|
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||||
|
|
||||||
|
exports[`editor/deserialize html messages escapes angle brackets 1`] = `
|
||||||
|
Array [
|
||||||
|
Object {
|
||||||
|
"text": "\\\\> \\\\\\\\<del>no formatting here\\\\\\\\</del>",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`editor/deserialize html messages escapes asterisks 1`] = `
|
||||||
|
Array [
|
||||||
|
Object {
|
||||||
|
"text": "\\\\*hello\\\\*",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`editor/deserialize html messages escapes backslashes 1`] = `
|
||||||
|
Array [
|
||||||
|
Object {
|
||||||
|
"text": "C:\\\\\\\\My Documents",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`editor/deserialize html messages escapes backticks in code blocks 1`] = `
|
||||||
|
Array [
|
||||||
|
Object {
|
||||||
|
"text": "\`\`this → \` is a backtick\`\`",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "\`\`\`\`",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "and here are 3 of them:",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "\`\`\`",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "\`\`\`\`",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`editor/deserialize html messages escapes backticks outside of code blocks 1`] = `
|
||||||
|
Array [
|
||||||
|
Object {
|
||||||
|
"text": "some \\\\\`backticks\\\\\`",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`editor/deserialize html messages escapes square brackets 1`] = `
|
||||||
|
Array [
|
||||||
|
Object {
|
||||||
|
"text": "\\\\[not an actual link\\\\](https://example.org)",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`editor/deserialize html messages escapes underscores 1`] = `
|
||||||
|
Array [
|
||||||
|
Object {
|
||||||
|
"text": "\\\\_\\\\_emphasis\\\\_\\\\_",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`editor/deserialize html messages preserves nested formatting 1`] = `
|
||||||
|
Array [
|
||||||
|
Object {
|
||||||
|
"text": "a<sub>b_c**d<u>e</u>**_</sub>",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`editor/deserialize html messages preserves nested quotes 1`] = `
|
||||||
|
Array [
|
||||||
|
Object {
|
||||||
|
"text": "> foo",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "> ",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "> > bar",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`editor/deserialize html messages surrounds lists with newlines 1`] = `
|
||||||
|
Array [
|
||||||
|
Object {
|
||||||
|
"text": "foo",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "- bar",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "
|
||||||
|
",
|
||||||
|
"type": "newline",
|
||||||
|
},
|
||||||
|
Object {
|
||||||
|
"text": "baz",
|
||||||
|
"type": "plain",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
`;
|
|
@ -237,18 +237,6 @@ describe('editor/deserialize', function() {
|
||||||
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
|
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
|
||||||
expect(parts[4]).toStrictEqual({ type: "plain", text: "3. Finish" });
|
expect(parts[4]).toStrictEqual({ type: "plain", text: "3. Finish" });
|
||||||
});
|
});
|
||||||
it('non tight lists', () => {
|
|
||||||
const html = "<ol><li><p>Start</p></li><li><p>Continue</p></li><li><p>Finish</p></li></ol>";
|
|
||||||
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
|
||||||
expect(parts.length).toBe(8);
|
|
||||||
expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Start" });
|
|
||||||
expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" });
|
|
||||||
expect(parts[2]).toStrictEqual({ type: "newline", text: "\n" });
|
|
||||||
expect(parts[3]).toStrictEqual({ type: "plain", text: "2. Continue" });
|
|
||||||
expect(parts[4]).toStrictEqual({ type: "newline", text: "\n" });
|
|
||||||
expect(parts[5]).toStrictEqual({ type: "newline", text: "\n" });
|
|
||||||
expect(parts[6]).toStrictEqual({ type: "plain", text: "3. Finish" });
|
|
||||||
});
|
|
||||||
it('nested unordered lists', () => {
|
it('nested unordered lists', () => {
|
||||||
const html = "<ul><li>Oak<ul><li>Spruce<ul><li>Birch</li></ul></li></ul></li></ul>";
|
const html = "<ul><li>Oak<ul><li>Spruce<ul><li>Birch</li></ul></li></ul></li></ul>";
|
||||||
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
@ -269,13 +257,13 @@ describe('editor/deserialize', function() {
|
||||||
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
|
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
|
||||||
expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` });
|
expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` });
|
||||||
});
|
});
|
||||||
it('nested tight lists', () => {
|
it('nested lists', () => {
|
||||||
const html = "<ol><li>Oak\n<ol><li>Spruce\n<ol><li>Birch</li></ol></li></ol></li></ol>";
|
const html = "<ol><li>Oak\n<ol><li>Spruce\n<ol><li>Birch</li></ol></li></ol></li></ol>";
|
||||||
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
expect(parts.length).toBe(5);
|
expect(parts.length).toBe(5);
|
||||||
expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Oak" });
|
expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Oak\n" });
|
||||||
expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" });
|
expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" });
|
||||||
expect(parts[2]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES}1. Spruce` });
|
expect(parts[2]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES}1. Spruce\n` });
|
||||||
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
|
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
|
||||||
expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` });
|
expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` });
|
||||||
});
|
});
|
||||||
|
@ -291,5 +279,56 @@ describe('editor/deserialize', function() {
|
||||||
expect(parts.length).toBe(1);
|
expect(parts.length).toBe(1);
|
||||||
expect(parts[0]).toStrictEqual({ type: "plain", text: "/me says _DON'T SHOUT_!" });
|
expect(parts[0]).toStrictEqual({ type: "plain", text: "/me says _DON'T SHOUT_!" });
|
||||||
});
|
});
|
||||||
|
it('preserves nested quotes', () => {
|
||||||
|
const html = "<blockquote>foo<blockquote>bar</blockquote></blockquote>";
|
||||||
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
expect(parts).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
it('surrounds lists with newlines', () => {
|
||||||
|
const html = "foo<ul><li>bar</li></ul>baz";
|
||||||
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
expect(parts).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
it('preserves nested formatting', () => {
|
||||||
|
const html = "a<sub>b<em>c<strong>d<u>e</u></strong></em></sub>";
|
||||||
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
expect(parts).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
it('escapes backticks in code blocks', () => {
|
||||||
|
const html = "<p><code>this → ` is a backtick</code></p>" +
|
||||||
|
"<pre><code>and here are 3 of them:\n```</code></pre>";
|
||||||
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
expect(parts).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
it('escapes backticks outside of code blocks', () => {
|
||||||
|
const html = "some `backticks`";
|
||||||
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
expect(parts).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
it('escapes backslashes', () => {
|
||||||
|
const html = "C:\\My Documents";
|
||||||
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
expect(parts).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
it('escapes asterisks', () => {
|
||||||
|
const html = "*hello*";
|
||||||
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
expect(parts).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
it('escapes underscores', () => {
|
||||||
|
const html = "__emphasis__";
|
||||||
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
expect(parts).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
it('escapes square brackets', () => {
|
||||||
|
const html = "[not an actual link](https://example.org)";
|
||||||
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
expect(parts).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
it('escapes angle brackets', () => {
|
||||||
|
const html = "> \\<del>no formatting here\\</del>";
|
||||||
|
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
|
||||||
|
expect(parts).toMatchSnapshot();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in a new issue