2016-09-22 16:18:12 +00:00
|
|
|
/*
|
|
|
|
Copyright 2016 OpenMarket Ltd
|
2021-07-01 19:33:25 +00:00
|
|
|
Copyright 2021 The Matrix.org Foundation C.I.C.
|
2016-09-22 16:18:12 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2021-01-27 13:06:59 +00:00
|
|
|
import * as commonmark from 'commonmark';
|
2021-07-01 19:33:25 +00:00
|
|
|
import { escape } from "lodash";
|
2017-02-02 14:17:07 +00:00
|
|
|
|
2020-10-10 08:12:53 +00:00
|
|
|
const ALLOWED_HTML_TAGS = ['sub', 'sup', 'del', 'u'];
|
2017-02-02 14:17:07 +00:00
|
|
|
|
|
|
|
// These types of node are definitely text
|
|
|
|
const TEXT_NODES = ['text', 'softbreak', 'linebreak', 'paragraph', 'document'];
|
|
|
|
|
2021-07-01 20:31:17 +00:00
|
|
|
// As far as @types/commonmark is concerned, these are not public, so add them
|
|
|
|
interface CommonmarkHtmlRendererInternal extends commonmark.HtmlRenderer {
|
|
|
|
paragraph: (node: commonmark.Node, entering: boolean) => void;
|
|
|
|
link: (node: commonmark.Node, entering: boolean) => void;
|
|
|
|
html_inline: (node: commonmark.Node) => void; // eslint-disable-line camelcase
|
|
|
|
html_block: (node: commonmark.Node) => void; // eslint-disable-line camelcase
|
|
|
|
}
|
|
|
|
|
|
|
|
function isAllowedHtmlTag(node: commonmark.Node): boolean {
|
2020-10-14 08:35:57 +00:00
|
|
|
if (node.literal != null &&
|
2021-07-01 19:33:25 +00:00
|
|
|
node.literal.match('^<((div|span) data-mx-maths="[^"]*"|/(div|span))>$') != null) {
|
2020-10-14 08:35:57 +00:00
|
|
|
return true;
|
2020-09-21 21:02:19 +00:00
|
|
|
}
|
|
|
|
|
2017-02-02 14:17:07 +00:00
|
|
|
// Regex won't work for tags with attrs, but we only
|
|
|
|
// allow <del> anyway.
|
|
|
|
const matches = /^<\/?(.*)>$/.exec(node.literal);
|
|
|
|
if (matches && matches.length == 2) {
|
|
|
|
const tag = matches[1];
|
|
|
|
return ALLOWED_HTML_TAGS.indexOf(tag) > -1;
|
|
|
|
}
|
2020-10-14 08:35:57 +00:00
|
|
|
|
2017-02-02 14:17:07 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns true if the parse output containing the node
|
|
|
|
* comprises multiple block level elements (ie. lines),
|
|
|
|
* or false if it is only a single line.
|
|
|
|
*/
|
2021-07-01 20:31:17 +00:00
|
|
|
function isMultiLine(node: commonmark.Node): boolean {
|
2017-10-11 16:56:17 +00:00
|
|
|
let par = node;
|
2017-02-02 14:17:07 +00:00
|
|
|
while (par.parent) {
|
|
|
|
par = par.parent;
|
|
|
|
}
|
|
|
|
return par.firstChild != par.lastChild;
|
|
|
|
}
|
2016-09-22 16:18:12 +00:00
|
|
|
|
|
|
|
/**
|
2017-02-02 11:27:07 +00:00
|
|
|
* Class that wraps commonmark, adding the ability to see whether
|
2016-09-22 16:18:12 +00:00
|
|
|
* a given message actually uses any markdown syntax or whether
|
|
|
|
* it's plain text.
|
|
|
|
*/
|
|
|
|
export default class Markdown {
|
2021-07-01 19:33:25 +00:00
|
|
|
private input: string;
|
2021-07-01 20:31:17 +00:00
|
|
|
private parsed: commonmark.Node;
|
2021-07-01 19:33:25 +00:00
|
|
|
|
2016-09-22 16:18:12 +00:00
|
|
|
constructor(input) {
|
2018-01-07 20:30:18 +00:00
|
|
|
this.input = input;
|
2017-02-02 11:34:39 +00:00
|
|
|
|
|
|
|
const parser = new commonmark.Parser();
|
|
|
|
this.parsed = parser.parse(this.input);
|
2016-09-22 16:18:12 +00:00
|
|
|
}
|
|
|
|
|
2021-07-01 20:31:17 +00:00
|
|
|
isPlainText(): boolean {
|
2017-02-02 14:17:07 +00:00
|
|
|
const walker = this.parsed.walker();
|
|
|
|
|
|
|
|
let ev;
|
|
|
|
while ( (ev = walker.next()) ) {
|
|
|
|
const node = ev.node;
|
|
|
|
if (TEXT_NODES.indexOf(node.type) > -1) {
|
|
|
|
// definitely text
|
|
|
|
continue;
|
|
|
|
} else if (node.type == 'html_inline' || node.type == 'html_block') {
|
|
|
|
// if it's an allowed html tag, we need to render it and therefore
|
|
|
|
// we will need to use HTML. If it's not allowed, it's not HTML since
|
|
|
|
// we'll just be treating it as text.
|
2021-07-01 19:33:25 +00:00
|
|
|
if (isAllowedHtmlTag(node)) {
|
2017-02-02 14:17:07 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
2016-09-22 16:18:12 +00:00
|
|
|
}
|
2017-02-02 14:17:07 +00:00
|
|
|
return true;
|
2016-09-22 16:18:12 +00:00
|
|
|
}
|
|
|
|
|
2021-07-01 20:31:17 +00:00
|
|
|
toHTML({ externalLinks = false } = {}): string {
|
2017-02-17 18:06:00 +00:00
|
|
|
const renderer = new commonmark.HtmlRenderer({
|
|
|
|
safe: false,
|
|
|
|
|
|
|
|
// Set soft breaks to hard HTML breaks: commonmark
|
|
|
|
// puts softbreaks in for multiple lines in a blockquote,
|
|
|
|
// so if these are just newline characters then the
|
|
|
|
// block quote ends up all on one line
|
2020-08-03 15:02:26 +00:00
|
|
|
// (https://github.com/vector-im/element-web/issues/3154)
|
2017-02-17 18:06:00 +00:00
|
|
|
softbreak: '<br />',
|
2021-07-01 20:31:17 +00:00
|
|
|
}) as CommonmarkHtmlRendererInternal;
|
2018-05-23 13:00:54 +00:00
|
|
|
|
|
|
|
// Trying to strip out the wrapping <p/> causes a lot more complication
|
|
|
|
// than it's worth, i think. For instance, this code will go and strip
|
|
|
|
// out any <p/> tag (no matter where it is in the tree) which doesn't
|
|
|
|
// contain \n's.
|
|
|
|
// On the flip side, <p/>s are quite opionated and restricted on where
|
|
|
|
// you can nest them.
|
|
|
|
//
|
|
|
|
// Let's try sending with <p/>s anyway for now, though.
|
2018-07-18 09:48:54 +00:00
|
|
|
|
2021-07-01 19:33:25 +00:00
|
|
|
const realParagraph = renderer.paragraph;
|
2017-01-18 18:29:11 +00:00
|
|
|
|
2021-07-01 22:02:51 +00:00
|
|
|
renderer.paragraph = function(node: commonmark.Node, entering: boolean) {
|
2017-01-18 18:29:11 +00:00
|
|
|
// If there is only one top level node, just return the
|
|
|
|
// bare text: it's a single line of text and so should be
|
|
|
|
// 'inline', rather than unnecessarily wrapped in its own
|
|
|
|
// p tag. If, however, we have multiple nodes, each gets
|
|
|
|
// its own p tag to keep them as separate paragraphs.
|
2021-11-30 10:25:08 +00:00
|
|
|
// However, if it's a blockquote, adds a p tag anyway
|
|
|
|
// in order to avoid deviation to commonmark and unexpected
|
|
|
|
// results when parsing the formatted HTML.
|
|
|
|
if (node.parent.type === 'block_quote'|| isMultiLine(node)) {
|
2021-07-01 19:33:25 +00:00
|
|
|
realParagraph.call(this, node, entering);
|
2017-01-17 21:20:05 +00:00
|
|
|
}
|
2017-01-20 14:22:27 +00:00
|
|
|
};
|
2018-07-18 09:48:54 +00:00
|
|
|
|
2020-01-22 22:08:34 +00:00
|
|
|
renderer.link = function(node, entering) {
|
|
|
|
const attrs = this.attrs(node);
|
|
|
|
if (entering) {
|
|
|
|
attrs.push(['href', this.esc(node.destination)]);
|
|
|
|
if (node.title) {
|
|
|
|
attrs.push(['title', this.esc(node.title)]);
|
|
|
|
}
|
|
|
|
// Modified link behaviour to treat them all as external and
|
|
|
|
// thus opening in a new tab.
|
|
|
|
if (externalLinks) {
|
|
|
|
attrs.push(['target', '_blank']);
|
2020-02-23 22:14:29 +00:00
|
|
|
attrs.push(['rel', 'noreferrer noopener']);
|
2020-01-22 22:08:34 +00:00
|
|
|
}
|
|
|
|
this.tag('a', attrs);
|
|
|
|
} else {
|
|
|
|
this.tag('/a');
|
|
|
|
}
|
|
|
|
};
|
2017-01-17 21:20:05 +00:00
|
|
|
|
2021-07-01 22:02:51 +00:00
|
|
|
renderer.html_inline = function(node: commonmark.Node) {
|
2021-07-01 19:33:25 +00:00
|
|
|
if (isAllowedHtmlTag(node)) {
|
|
|
|
this.lit(node.literal);
|
|
|
|
return;
|
|
|
|
} else {
|
|
|
|
this.lit(escape(node.literal));
|
|
|
|
}
|
|
|
|
};
|
2018-07-18 09:48:54 +00:00
|
|
|
|
2021-07-01 22:02:51 +00:00
|
|
|
renderer.html_block = function(node: commonmark.Node) {
|
2021-07-01 19:33:25 +00:00
|
|
|
/*
|
2017-02-02 14:17:07 +00:00
|
|
|
// as with `paragraph`, we only insert line breaks
|
|
|
|
// if there are multiple lines in the markdown.
|
|
|
|
const isMultiLine = is_multi_line(node);
|
|
|
|
if (isMultiLine) this.cr();
|
2021-07-01 19:33:25 +00:00
|
|
|
*/
|
|
|
|
renderer.html_inline(node);
|
|
|
|
/*
|
2017-02-02 14:17:07 +00:00
|
|
|
if (isMultiLine) this.cr();
|
2021-07-01 19:33:25 +00:00
|
|
|
*/
|
2017-10-11 16:56:17 +00:00
|
|
|
};
|
2017-02-02 14:17:07 +00:00
|
|
|
|
2017-02-02 11:45:21 +00:00
|
|
|
return renderer.render(this.parsed);
|
2017-01-18 18:29:11 +00:00
|
|
|
}
|
|
|
|
|
2017-02-02 11:27:07 +00:00
|
|
|
/*
|
2017-02-02 14:17:07 +00:00
|
|
|
* Render the markdown message to plain text. That is, essentially
|
2017-02-02 11:27:07 +00:00
|
|
|
* just remove any backslashes escaping what would otherwise be
|
|
|
|
* markdown syntax
|
2020-08-03 15:02:26 +00:00
|
|
|
* (to fix https://github.com/vector-im/element-web/issues/2870).
|
2018-05-12 19:04:58 +00:00
|
|
|
*
|
|
|
|
* N.B. this does **NOT** render arbitrary MD to plain text - only MD
|
|
|
|
* which has no formatting. Otherwise it emits HTML(!).
|
2017-02-02 11:27:07 +00:00
|
|
|
*/
|
2021-07-01 20:31:17 +00:00
|
|
|
toPlaintext(): string {
|
|
|
|
const renderer = new commonmark.HtmlRenderer({ safe: false }) as CommonmarkHtmlRendererInternal;
|
2017-01-18 18:29:11 +00:00
|
|
|
|
2021-07-01 22:02:51 +00:00
|
|
|
renderer.paragraph = function(node: commonmark.Node, entering: boolean) {
|
2017-02-02 14:17:07 +00:00
|
|
|
// as with toHTML, only append lines to paragraphs if there are
|
|
|
|
// multiple paragraphs
|
2021-07-01 19:33:25 +00:00
|
|
|
if (isMultiLine(node)) {
|
2017-02-02 14:17:07 +00:00
|
|
|
if (!entering && node.next) {
|
2017-01-18 18:29:11 +00:00
|
|
|
this.lit('\n\n');
|
2016-12-02 18:58:35 +00:00
|
|
|
}
|
2016-09-22 17:57:46 +00:00
|
|
|
}
|
2017-01-20 14:22:27 +00:00
|
|
|
};
|
2018-05-23 02:43:40 +00:00
|
|
|
|
2021-07-01 22:02:51 +00:00
|
|
|
renderer.html_block = function(node: commonmark.Node) {
|
2017-02-02 14:17:07 +00:00
|
|
|
this.lit(node.literal);
|
2021-07-01 19:33:25 +00:00
|
|
|
if (isMultiLine(node) && node.next) this.lit('\n\n');
|
2017-10-11 16:56:17 +00:00
|
|
|
};
|
2016-09-22 17:57:46 +00:00
|
|
|
|
2017-02-02 11:45:21 +00:00
|
|
|
return renderer.render(this.parsed);
|
2016-09-22 16:18:12 +00:00
|
|
|
}
|
|
|
|
}
|