2019-05-07 15:31:37 +00:00
|
|
|
/*
|
|
|
|
Copyright 2019 New Vector Ltd
|
2020-04-14 23:49:08 +00:00
|
|
|
Copyright 2019, 2020 The Matrix.org Foundation C.I.C.
|
2019-05-07 15:31:37 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2020-04-14 23:49:08 +00:00
|
|
|
import { MatrixEvent } from "matrix-js-sdk/src/models/event";
|
|
|
|
|
2019-05-29 12:46:15 +00:00
|
|
|
import { walkDOMDepthFirst } from "./dom";
|
2019-07-23 07:12:24 +00:00
|
|
|
import { checkBlockNode } from "../HtmlUtils";
|
2020-04-14 23:49:08 +00:00
|
|
|
import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks";
|
|
|
|
import { PartCreator } from "./parts";
|
2019-05-22 11:00:39 +00:00
|
|
|
|
2020-04-14 23:49:08 +00:00
|
|
|
function parseAtRoomMentions(text: string, partCreator: PartCreator) {
|
2019-06-14 16:25:02 +00:00
|
|
|
const ATROOM = "@room";
|
|
|
|
const parts = [];
|
|
|
|
text.split(ATROOM).forEach((textPart, i, arr) => {
|
|
|
|
if (textPart.length) {
|
|
|
|
parts.push(partCreator.plain(textPart));
|
|
|
|
}
|
2019-06-18 07:50:31 +00:00
|
|
|
// it's safe to never append @room after the last textPart
|
2019-06-18 06:40:58 +00:00
|
|
|
// as split will report an empty string at the end if
|
|
|
|
// `text` ended in @room.
|
2019-06-14 16:25:02 +00:00
|
|
|
const isLast = i === arr.length - 1;
|
|
|
|
if (!isLast) {
|
|
|
|
parts.push(partCreator.atRoomPill(ATROOM));
|
|
|
|
}
|
|
|
|
});
|
|
|
|
return parts;
|
|
|
|
}
|
|
|
|
|
2020-04-14 23:49:08 +00:00
|
|
|
function parseLink(a: HTMLAnchorElement, partCreator: PartCreator) {
|
2019-05-22 11:00:39 +00:00
|
|
|
const {href} = a;
|
2019-10-01 02:37:24 +00:00
|
|
|
const resourceId = getPrimaryPermalinkEntity(href); // The room/user ID
|
|
|
|
const prefix = resourceId ? resourceId[0] : undefined; // First character of ID
|
2019-05-22 11:00:39 +00:00
|
|
|
switch (prefix) {
|
|
|
|
case "@":
|
2019-06-14 10:16:34 +00:00
|
|
|
return partCreator.userPill(a.textContent, resourceId);
|
2019-05-22 11:00:39 +00:00
|
|
|
case "#":
|
2019-06-14 10:16:34 +00:00
|
|
|
return partCreator.roomPill(resourceId);
|
2019-05-22 11:00:39 +00:00
|
|
|
default: {
|
|
|
|
if (href === a.textContent) {
|
2019-06-14 10:16:34 +00:00
|
|
|
return partCreator.plain(a.textContent);
|
2019-05-22 11:00:39 +00:00
|
|
|
} else {
|
2020-04-15 01:31:30 +00:00
|
|
|
return partCreator.plain(`[${a.textContent.replace(/[[\\\]]/g, c => "\\" + c)}](${href})`);
|
2019-05-22 11:00:39 +00:00
|
|
|
}
|
2019-05-29 12:46:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-14 23:49:08 +00:00
|
|
|
function parseCodeBlock(n: HTMLElement, partCreator: PartCreator) {
|
2019-05-29 12:46:15 +00:00
|
|
|
const parts = [];
|
2019-10-13 11:04:54 +00:00
|
|
|
let language = "";
|
|
|
|
if (n.firstChild && n.firstChild.nodeName === "CODE") {
|
2020-04-14 23:49:08 +00:00
|
|
|
for (const className of (<HTMLElement>n.firstChild).classList) {
|
2019-10-13 11:04:54 +00:00
|
|
|
if (className.startsWith("language-")) {
|
|
|
|
language = className.substr("language-".length);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
const preLines = ("```" + language + "\n" + n.textContent + "```").split("\n");
|
2019-05-29 12:46:15 +00:00
|
|
|
preLines.forEach((l, i) => {
|
2019-06-14 10:16:34 +00:00
|
|
|
parts.push(partCreator.plain(l));
|
2019-05-29 12:46:15 +00:00
|
|
|
if (i < preLines.length - 1) {
|
2019-06-14 10:16:34 +00:00
|
|
|
parts.push(partCreator.newline());
|
2019-05-29 12:46:15 +00:00
|
|
|
}
|
|
|
|
});
|
|
|
|
return parts;
|
|
|
|
}
|
|
|
|
|
2020-04-14 23:49:08 +00:00
|
|
|
function parseHeader(el: HTMLElement, partCreator: PartCreator) {
|
2019-09-02 14:23:56 +00:00
|
|
|
const depth = parseInt(el.nodeName.substr(1), 10);
|
|
|
|
return partCreator.plain("#".repeat(depth) + " ");
|
|
|
|
}
|
|
|
|
|
2020-04-14 23:49:08 +00:00
|
|
|
interface IState {
|
|
|
|
listIndex: number[];
|
|
|
|
listDepth?: number;
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseElement(n: HTMLElement, partCreator: PartCreator, lastNode: HTMLElement | undefined, state: IState) {
|
2019-05-29 12:46:15 +00:00
|
|
|
switch (n.nodeName) {
|
2019-09-02 14:23:56 +00:00
|
|
|
case "H1":
|
|
|
|
case "H2":
|
|
|
|
case "H3":
|
|
|
|
case "H4":
|
|
|
|
case "H5":
|
|
|
|
case "H6":
|
|
|
|
return parseHeader(n, partCreator);
|
2019-05-29 12:46:15 +00:00
|
|
|
case "A":
|
2020-04-14 23:49:08 +00:00
|
|
|
return parseLink(<HTMLAnchorElement>n, partCreator);
|
2019-05-29 12:46:15 +00:00
|
|
|
case "BR":
|
2019-06-14 10:16:34 +00:00
|
|
|
return partCreator.newline();
|
2019-05-29 12:46:15 +00:00
|
|
|
case "EM":
|
2019-09-23 12:59:53 +00:00
|
|
|
return partCreator.plain(`_${n.textContent}_`);
|
2019-05-29 12:46:15 +00:00
|
|
|
case "STRONG":
|
2019-06-14 10:16:34 +00:00
|
|
|
return partCreator.plain(`**${n.textContent}**`);
|
2019-05-29 12:46:15 +00:00
|
|
|
case "PRE":
|
2019-06-14 10:16:34 +00:00
|
|
|
return parseCodeBlock(n, partCreator);
|
2019-05-29 12:46:15 +00:00
|
|
|
case "CODE":
|
2019-06-14 10:16:34 +00:00
|
|
|
return partCreator.plain(`\`${n.textContent}\``);
|
2019-05-29 12:46:15 +00:00
|
|
|
case "DEL":
|
2019-06-14 10:16:34 +00:00
|
|
|
return partCreator.plain(`<del>${n.textContent}</del>`);
|
2019-06-21 09:40:27 +00:00
|
|
|
case "LI": {
|
|
|
|
const indent = " ".repeat(state.listDepth - 1);
|
2019-05-29 12:46:15 +00:00
|
|
|
if (n.parentElement.nodeName === "OL") {
|
2019-10-13 11:27:12 +00:00
|
|
|
// The markdown parser doesn't do nested indexed lists at all, but this supports it anyway.
|
2019-10-22 12:49:02 +00:00
|
|
|
const index = state.listIndex[state.listIndex.length - 1];
|
2019-10-18 16:58:55 +00:00
|
|
|
state.listIndex[state.listIndex.length - 1] += 1;
|
2019-10-13 11:27:12 +00:00
|
|
|
return partCreator.plain(`${indent}${index}. `);
|
2019-05-29 12:46:15 +00:00
|
|
|
} else {
|
2019-06-21 09:40:27 +00:00
|
|
|
return partCreator.plain(`${indent}- `);
|
2019-05-29 12:46:15 +00:00
|
|
|
}
|
2019-06-21 09:40:27 +00:00
|
|
|
}
|
2019-09-23 13:06:22 +00:00
|
|
|
case "P": {
|
|
|
|
if (lastNode) {
|
|
|
|
return partCreator.newline();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2019-06-21 09:40:27 +00:00
|
|
|
case "OL":
|
2020-04-14 23:49:08 +00:00
|
|
|
state.listIndex.push((<HTMLOListElement>n).start || 1);
|
2020-04-14 23:53:35 +00:00
|
|
|
/* falls through */
|
2019-06-21 09:40:27 +00:00
|
|
|
case "UL":
|
|
|
|
state.listDepth = (state.listDepth || 0) + 1;
|
2020-04-14 23:53:35 +00:00
|
|
|
/* falls through */
|
2019-05-29 12:46:15 +00:00
|
|
|
default:
|
2019-10-13 11:10:11 +00:00
|
|
|
// don't textify block nodes we'll descend into
|
|
|
|
if (!checkDescendInto(n)) {
|
2019-06-14 10:16:34 +00:00
|
|
|
return partCreator.plain(n.textContent);
|
2019-05-29 12:46:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-13 11:10:11 +00:00
|
|
|
function checkDescendInto(node) {
|
2019-05-29 12:46:15 +00:00
|
|
|
switch (node.nodeName) {
|
|
|
|
case "PRE":
|
|
|
|
// a code block is textified in parseCodeBlock
|
|
|
|
// as we don't want to preserve markup in it,
|
2019-10-13 11:10:11 +00:00
|
|
|
// so no need to descend into it
|
2019-05-29 12:46:15 +00:00
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
return checkBlockNode(node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function checkIgnored(n) {
|
|
|
|
if (n.nodeType === Node.TEXT_NODE) {
|
|
|
|
// riot adds \n text nodes in a lot of places,
|
|
|
|
// which should be ignored
|
|
|
|
return n.nodeValue === "\n";
|
|
|
|
} else if (n.nodeType === Node.ELEMENT_NODE) {
|
|
|
|
return n.nodeName === "MX-REPLY";
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-08-20 10:34:35 +00:00
|
|
|
const QUOTE_LINE_PREFIX = "> ";
|
2019-06-14 10:16:34 +00:00
|
|
|
function prefixQuoteLines(isFirstNode, parts, partCreator) {
|
2019-05-29 12:46:15 +00:00
|
|
|
// a newline (to append a > to) wouldn't be added to parts for the first line
|
|
|
|
// if there was no content before the BLOCKQUOTE, so handle that
|
|
|
|
if (isFirstNode) {
|
2019-08-20 10:34:35 +00:00
|
|
|
parts.splice(0, 0, partCreator.plain(QUOTE_LINE_PREFIX));
|
2019-05-29 12:46:15 +00:00
|
|
|
}
|
|
|
|
for (let i = 0; i < parts.length; i += 1) {
|
|
|
|
if (parts[i].type === "newline") {
|
2019-08-20 10:34:35 +00:00
|
|
|
parts.splice(i + 1, 0, partCreator.plain(QUOTE_LINE_PREFIX));
|
2019-05-29 12:46:15 +00:00
|
|
|
i += 1;
|
2019-05-22 11:00:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-14 23:49:08 +00:00
|
|
|
function parseHtmlMessage(html: string, partCreator: PartCreator, isQuotedMessage: boolean) {
|
2019-05-08 09:13:36 +00:00
|
|
|
// no nodes from parsing here should be inserted in the document,
|
|
|
|
// as scripts in event handlers, etc would be executed then.
|
|
|
|
// we're only taking text, so that is fine
|
2019-05-29 12:46:15 +00:00
|
|
|
const rootNode = new DOMParser().parseFromString(html, "text/html").body;
|
2019-05-22 11:00:39 +00:00
|
|
|
const parts = [];
|
2019-05-29 12:46:15 +00:00
|
|
|
let lastNode;
|
2019-08-20 10:34:35 +00:00
|
|
|
let inQuote = isQuotedMessage;
|
2020-04-14 23:49:08 +00:00
|
|
|
const state: IState = {
|
2019-10-13 11:27:12 +00:00
|
|
|
listIndex: [],
|
|
|
|
};
|
2019-05-29 12:46:15 +00:00
|
|
|
|
|
|
|
function onNodeEnter(n) {
|
|
|
|
if (checkIgnored(n)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (n.nodeName === "BLOCKQUOTE") {
|
|
|
|
inQuote = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
const newParts = [];
|
|
|
|
if (lastNode && (checkBlockNode(lastNode) || checkBlockNode(n))) {
|
2019-06-14 10:16:34 +00:00
|
|
|
newParts.push(partCreator.newline());
|
2019-05-29 12:46:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (n.nodeType === Node.TEXT_NODE) {
|
2019-06-14 16:25:02 +00:00
|
|
|
newParts.push(...parseAtRoomMentions(n.nodeValue, partCreator));
|
2019-05-29 12:46:15 +00:00
|
|
|
} else if (n.nodeType === Node.ELEMENT_NODE) {
|
2019-09-23 13:06:22 +00:00
|
|
|
const parseResult = parseElement(n, partCreator, lastNode, state);
|
2019-05-29 12:46:15 +00:00
|
|
|
if (parseResult) {
|
|
|
|
if (Array.isArray(parseResult)) {
|
|
|
|
newParts.push(...parseResult);
|
|
|
|
} else {
|
|
|
|
newParts.push(parseResult);
|
2019-05-07 15:31:37 +00:00
|
|
|
}
|
2019-05-29 12:46:15 +00:00
|
|
|
}
|
2019-05-07 15:31:37 +00:00
|
|
|
}
|
2019-05-29 12:46:15 +00:00
|
|
|
|
|
|
|
if (newParts.length && inQuote) {
|
|
|
|
const isFirstPart = parts.length === 0;
|
2019-06-14 10:16:34 +00:00
|
|
|
prefixQuoteLines(isFirstPart, newParts, partCreator);
|
2019-05-29 12:46:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
parts.push(...newParts);
|
|
|
|
|
2019-10-13 11:10:11 +00:00
|
|
|
const descend = checkDescendInto(n);
|
|
|
|
// when not descending (like for PRE), onNodeLeave won't be called to set lastNode
|
2019-08-02 14:36:09 +00:00
|
|
|
// so do that here.
|
2019-10-13 11:10:11 +00:00
|
|
|
lastNode = descend ? null : n;
|
|
|
|
return descend;
|
2019-05-29 12:46:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function onNodeLeave(n) {
|
|
|
|
if (checkIgnored(n)) {
|
|
|
|
return;
|
|
|
|
}
|
2019-06-21 09:40:27 +00:00
|
|
|
switch (n.nodeName) {
|
|
|
|
case "BLOCKQUOTE":
|
|
|
|
inQuote = false;
|
|
|
|
break;
|
|
|
|
case "OL":
|
2019-10-13 11:27:12 +00:00
|
|
|
state.listIndex.pop();
|
2020-04-14 23:53:35 +00:00
|
|
|
/* falls through */
|
2019-06-21 09:40:27 +00:00
|
|
|
case "UL":
|
|
|
|
state.listDepth -= 1;
|
|
|
|
break;
|
2019-05-22 11:00:39 +00:00
|
|
|
}
|
2019-05-29 12:46:15 +00:00
|
|
|
lastNode = n;
|
2019-05-22 11:00:39 +00:00
|
|
|
}
|
2019-05-29 12:46:15 +00:00
|
|
|
|
|
|
|
walkDOMDepthFirst(rootNode, onNodeEnter, onNodeLeave);
|
|
|
|
|
2019-05-07 15:31:37 +00:00
|
|
|
return parts;
|
|
|
|
}
|
|
|
|
|
2020-04-14 23:49:08 +00:00
|
|
|
export function parsePlainTextMessage(body: string, partCreator: PartCreator, isQuotedMessage: boolean) {
|
2020-01-22 13:37:27 +00:00
|
|
|
const lines = body.split(/\r\n|\r|\n/g); // split on any new-line combination not just \n, collapses \r\n
|
2020-04-14 23:53:35 +00:00
|
|
|
return lines.reduce((parts, line, i) => {
|
2019-08-20 10:34:35 +00:00
|
|
|
if (isQuotedMessage) {
|
|
|
|
parts.push(partCreator.plain(QUOTE_LINE_PREFIX));
|
|
|
|
}
|
|
|
|
parts.push(...parseAtRoomMentions(line, partCreator));
|
2019-08-22 13:41:40 +00:00
|
|
|
const isLast = i === lines.length - 1;
|
|
|
|
if (!isLast) {
|
|
|
|
parts.push(partCreator.newline());
|
|
|
|
}
|
2019-08-20 10:34:35 +00:00
|
|
|
return parts;
|
|
|
|
}, []);
|
|
|
|
}
|
|
|
|
|
2020-04-14 23:49:08 +00:00
|
|
|
export function parseEvent(event: MatrixEvent, partCreator: PartCreator, {isQuotedMessage = false} = {}) {
|
2019-05-07 15:31:37 +00:00
|
|
|
const content = event.getContent();
|
2019-06-14 09:01:52 +00:00
|
|
|
let parts;
|
2019-05-07 15:31:37 +00:00
|
|
|
if (content.format === "org.matrix.custom.html") {
|
2019-08-20 10:34:35 +00:00
|
|
|
parts = parseHtmlMessage(content.formatted_body || "", partCreator, isQuotedMessage);
|
2019-05-07 15:31:37 +00:00
|
|
|
} else {
|
2019-08-20 10:34:35 +00:00
|
|
|
parts = parsePlainTextMessage(content.body || "", partCreator, isQuotedMessage);
|
2019-05-07 15:31:37 +00:00
|
|
|
}
|
2019-06-14 09:01:52 +00:00
|
|
|
if (content.msgtype === "m.emote") {
|
2019-06-14 10:16:34 +00:00
|
|
|
parts.unshift(partCreator.plain("/me "));
|
2019-06-14 09:01:52 +00:00
|
|
|
}
|
|
|
|
return parts;
|
2019-05-07 15:31:37 +00:00
|
|
|
}
|