Switch from cheerio to DOMParser (#10929)
* Add tests around feature_latex_maths * Switch from cheerio to DOMParser * strict * Iterate
This commit is contained in:
parent
151b0efe73
commit
72d1bd910a
7 changed files with 98 additions and 99 deletions
|
@ -68,7 +68,6 @@
|
|||
"@testing-library/react-hooks": "^8.0.1",
|
||||
"await-lock": "^2.1.0",
|
||||
"blurhash": "^1.1.3",
|
||||
"cheerio": "^1.0.0-rc.9",
|
||||
"classnames": "^2.2.6",
|
||||
"commonmark": "^0.30.0",
|
||||
"counterpart": "^0.18.6",
|
||||
|
|
|
@ -19,7 +19,6 @@ limitations under the License.
|
|||
|
||||
import React, { LegacyRef, ReactElement, ReactNode } from "react";
|
||||
import sanitizeHtml from "sanitize-html";
|
||||
import { load as cheerio } from "cheerio";
|
||||
import classNames from "classnames";
|
||||
import EMOJIBASE_REGEX from "emojibase-regex";
|
||||
import { merge, split } from "lodash";
|
||||
|
@ -549,27 +548,19 @@ export function bodyToHtml(content: IContent, highlights: Optional<string[]>, op
|
|||
}
|
||||
|
||||
safeBody = sanitizeHtml(formattedBody!, sanitizeParams);
|
||||
const phtml = cheerio(safeBody, {
|
||||
// @ts-ignore: The `_useHtmlParser2` internal option is the
|
||||
// simplest way to both parse and render using `htmlparser2`.
|
||||
_useHtmlParser2: true,
|
||||
decodeEntities: false,
|
||||
});
|
||||
const isPlainText = phtml.html() === phtml.root().text();
|
||||
const phtml = new DOMParser().parseFromString(safeBody, "text/html");
|
||||
const isPlainText = phtml.body.innerHTML === phtml.body.textContent;
|
||||
isHtmlMessage = !isPlainText;
|
||||
|
||||
if (isHtmlMessage && SettingsStore.getValue("feature_latex_maths")) {
|
||||
// @ts-ignore - The types for `replaceWith` wrongly expect
|
||||
// Cheerio instance to be returned.
|
||||
phtml('div, span[data-mx-maths!=""]').replaceWith(function (i, e) {
|
||||
return katex.renderToString(decode(phtml(e).attr("data-mx-maths")), {
|
||||
[...phtml.querySelectorAll<HTMLElement>("div, span[data-mx-maths]")].forEach((e) => {
|
||||
e.outerHTML = katex.renderToString(decode(e.getAttribute("data-mx-maths")), {
|
||||
throwOnError: false,
|
||||
// @ts-ignore - `e` can be an Element, not just a Node
|
||||
displayMode: e.name == "div",
|
||||
displayMode: e.tagName == "DIV",
|
||||
output: "htmlAndMathml",
|
||||
});
|
||||
});
|
||||
safeBody = phtml.html();
|
||||
safeBody = phtml.body.innerHTML;
|
||||
}
|
||||
} else if (highlighter) {
|
||||
safeBody = highlighter.applyHighlights(escapeHtml(plainBody), safeHighlights!).join("");
|
||||
|
|
|
@ -16,7 +16,6 @@ limitations under the License.
|
|||
*/
|
||||
|
||||
import { encode } from "html-entities";
|
||||
import { load as cheerio } from "cheerio";
|
||||
import escapeHtml from "escape-html";
|
||||
|
||||
import Markdown from "../Markdown";
|
||||
|
@ -133,8 +132,7 @@ export function htmlSerializeFromMdIfNeeded(md: string, { forceHTML = false } =
|
|||
});
|
||||
});
|
||||
|
||||
// make sure div tags always start on a new line, otherwise it will confuse
|
||||
// the markdown parser
|
||||
// make sure div tags always start on a new line, otherwise it will confuse the markdown parser
|
||||
md = md.replace(/(.)<div/g, function (m, p1) {
|
||||
return `${p1}\n<div`;
|
||||
});
|
||||
|
@ -143,39 +141,29 @@ export function htmlSerializeFromMdIfNeeded(md: string, { forceHTML = false } =
|
|||
const parser = new Markdown(md);
|
||||
if (!parser.isPlainText() || forceHTML) {
|
||||
// feed Markdown output to HTML parser
|
||||
const phtml = cheerio(parser.toHTML(), {
|
||||
// @ts-ignore: The `_useHtmlParser2` internal option is the
|
||||
// simplest way to both parse and render using `htmlparser2`.
|
||||
_useHtmlParser2: true,
|
||||
decodeEntities: false,
|
||||
});
|
||||
const phtml = new DOMParser().parseFromString(parser.toHTML(), "text/html");
|
||||
|
||||
if (SettingsStore.getValue("feature_latex_maths")) {
|
||||
// original Markdown without LaTeX replacements
|
||||
const parserOrig = new Markdown(orig);
|
||||
const phtmlOrig = cheerio(parserOrig.toHTML(), {
|
||||
// @ts-ignore: The `_useHtmlParser2` internal option is the
|
||||
// simplest way to both parse and render using `htmlparser2`.
|
||||
_useHtmlParser2: true,
|
||||
decodeEntities: false,
|
||||
});
|
||||
const phtmlOrig = new DOMParser().parseFromString(parserOrig.toHTML(), "text/html");
|
||||
|
||||
// since maths delimiters are handled before Markdown,
|
||||
// code blocks could contain mangled content.
|
||||
// replace code blocks with original content
|
||||
phtmlOrig("code").each(function (i) {
|
||||
phtml("code").eq(i).text(phtmlOrig("code").eq(i).text());
|
||||
[...phtmlOrig.getElementsByTagName("code")].forEach((e, i) => {
|
||||
phtml.getElementsByTagName("code").item(i)!.textContent = e.textContent;
|
||||
});
|
||||
|
||||
// add fallback output for latex math, which should not be interpreted as markdown
|
||||
phtml("div, span").each(function (i, e) {
|
||||
const tex = phtml(e).attr("data-mx-maths");
|
||||
[...phtml.querySelectorAll("div, span")].forEach((e, i) => {
|
||||
const tex = e.getAttribute("data-mx-maths");
|
||||
if (tex) {
|
||||
phtml(e).html(`<code>${tex}</code>`);
|
||||
e.innerHTML = `<code>${tex}</code>`;
|
||||
}
|
||||
});
|
||||
}
|
||||
return phtml.html();
|
||||
return phtml.body.innerHTML;
|
||||
}
|
||||
// ensure removal of escape backslashes in non-Markdown messages
|
||||
if (md.indexOf("\\") > -1) {
|
||||
|
|
|
@ -131,4 +131,40 @@ describe("bodyToHtml", () => {
|
|||
|
||||
expect(asFragment()).toMatchSnapshot();
|
||||
});
|
||||
|
||||
describe("feature_latex_maths", () => {
|
||||
beforeEach(() => {
|
||||
jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths");
|
||||
});
|
||||
|
||||
it("should render inline katex", () => {
|
||||
const html = getHtml({
|
||||
body: "hello \\xi world",
|
||||
msgtype: "m.text",
|
||||
formatted_body: 'hello <span data-mx-maths="\\xi"><code>\\xi</code></span> world',
|
||||
format: "org.matrix.custom.html",
|
||||
});
|
||||
expect(html).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it("should render block katex", () => {
|
||||
const html = getHtml({
|
||||
body: "hello \\xi world",
|
||||
msgtype: "m.text",
|
||||
formatted_body: '<p>hello</p><div data-mx-maths="\\xi"><code>\\xi</code></div><p>world</p>',
|
||||
format: "org.matrix.custom.html",
|
||||
});
|
||||
expect(html).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it("should not mangle code blocks", () => {
|
||||
const html = getHtml({
|
||||
body: "hello \\xi world",
|
||||
msgtype: "m.text",
|
||||
formatted_body: "<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>",
|
||||
format: "org.matrix.custom.html",
|
||||
});
|
||||
expect(html).toMatchSnapshot();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -15,3 +15,9 @@ exports[`bodyToHtml should generate big emoji for an emoji-only reply to a messa
|
|||
</span>
|
||||
</DocumentFragment>
|
||||
`;
|
||||
|
||||
exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;
|
||||
|
||||
exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"<p>hello</p><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span></span><p>world</p>"`;
|
||||
|
||||
exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;
|
||||
|
|
|
@ -17,6 +17,7 @@ limitations under the License.
|
|||
import EditorModel from "../../src/editor/model";
|
||||
import { htmlSerializeIfNeeded } from "../../src/editor/serialize";
|
||||
import { createPartCreator } from "./mock";
|
||||
import SettingsStore from "../../src/settings/SettingsStore";
|
||||
|
||||
describe("editor/serialize", function () {
|
||||
describe("with markdown", function () {
|
||||
|
@ -75,6 +76,7 @@ describe("editor/serialize", function () {
|
|||
expect(html).toBe("*hello* world < hey world!");
|
||||
});
|
||||
});
|
||||
|
||||
describe("with plaintext", function () {
|
||||
it("markdown remains plaintext", function () {
|
||||
const pc = createPartCreator();
|
||||
|
@ -102,4 +104,42 @@ describe("editor/serialize", function () {
|
|||
expect(html).toBe("hello world");
|
||||
});
|
||||
});
|
||||
|
||||
describe("feature_latex_maths", () => {
|
||||
beforeEach(() => {
|
||||
jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths");
|
||||
});
|
||||
|
||||
it("should support inline katex", () => {
|
||||
const pc = createPartCreator();
|
||||
const model = new EditorModel([pc.plain("hello $\\xi$ world")], pc);
|
||||
const html = htmlSerializeIfNeeded(model, {});
|
||||
expect(html).toMatchInlineSnapshot(`"hello <span data-mx-maths="\\xi"><code>\\xi</code></span> world"`);
|
||||
});
|
||||
|
||||
it("should support block katex", () => {
|
||||
const pc = createPartCreator();
|
||||
const model = new EditorModel([pc.plain("hello \n$$\\xi$$\n world")], pc);
|
||||
const html = htmlSerializeIfNeeded(model, {});
|
||||
expect(html).toMatchInlineSnapshot(`
|
||||
"<p>hello</p>
|
||||
<div data-mx-maths="\\xi"><code>\\xi</code></div>
|
||||
<p>world</p>
|
||||
"
|
||||
`);
|
||||
});
|
||||
|
||||
it("should not mangle code blocks", () => {
|
||||
const pc = createPartCreator();
|
||||
const model = new EditorModel([pc.plain("hello\n```\n$\\xi$\n```\nworld")], pc);
|
||||
const html = htmlSerializeIfNeeded(model, {});
|
||||
expect(html).toMatchInlineSnapshot(`
|
||||
"<p>hello</p>
|
||||
<pre><code>$\\xi$
|
||||
</code></pre>
|
||||
<p>world</p>
|
||||
"
|
||||
`);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
63
yarn.lock
63
yarn.lock
|
@ -3028,11 +3028,6 @@ blurhash@^1.1.3:
|
|||
resolved "https://registry.yarnpkg.com/blurhash/-/blurhash-1.1.5.tgz#3034104cd5dce5a3e5caa871ae2f0f1f2d0ab566"
|
||||
integrity sha512-a+LO3A2DfxTaTztsmkbLYmUzUeApi0LZuKalwbNmqAHR6HhJGMt1qSV/R3wc+w4DL28holjqO3Bg74aUGavGjg==
|
||||
|
||||
boolbase@^1.0.0:
|
||||
version "1.0.0"
|
||||
resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e"
|
||||
integrity sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==
|
||||
|
||||
brace-expansion@^1.1.7:
|
||||
version "1.1.11"
|
||||
resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd"
|
||||
|
@ -3189,31 +3184,6 @@ check-more-types@^2.24.0:
|
|||
resolved "https://registry.yarnpkg.com/check-more-types/-/check-more-types-2.24.0.tgz#1420ffb10fd444dcfc79b43891bbfffd32a84600"
|
||||
integrity sha512-Pj779qHxV2tuapviy1bSZNEL1maXr13bPYpsvSDB68HlYcYuhlDrmGd63i0JHMCLKzc7rUSNIrpdJlhVlNwrxA==
|
||||
|
||||
cheerio-select@^2.1.0:
|
||||
version "2.1.0"
|
||||
resolved "https://registry.yarnpkg.com/cheerio-select/-/cheerio-select-2.1.0.tgz#4d8673286b8126ca2a8e42740d5e3c4884ae21b4"
|
||||
integrity sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==
|
||||
dependencies:
|
||||
boolbase "^1.0.0"
|
||||
css-select "^5.1.0"
|
||||
css-what "^6.1.0"
|
||||
domelementtype "^2.3.0"
|
||||
domhandler "^5.0.3"
|
||||
domutils "^3.0.1"
|
||||
|
||||
cheerio@^1.0.0-rc.9:
|
||||
version "1.0.0-rc.12"
|
||||
resolved "https://registry.yarnpkg.com/cheerio/-/cheerio-1.0.0-rc.12.tgz#788bf7466506b1c6bf5fae51d24a2c4d62e47683"
|
||||
integrity sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==
|
||||
dependencies:
|
||||
cheerio-select "^2.1.0"
|
||||
dom-serializer "^2.0.0"
|
||||
domhandler "^5.0.3"
|
||||
domutils "^3.0.1"
|
||||
htmlparser2 "^8.0.1"
|
||||
parse5 "^7.0.0"
|
||||
parse5-htmlparser2-tree-adapter "^7.0.0"
|
||||
|
||||
chokidar@^3.4.0, chokidar@^3.5.1:
|
||||
version "3.5.3"
|
||||
resolved "https://registry.yarnpkg.com/chokidar/-/chokidar-3.5.3.tgz#1cf37c8707b932bd1af1ae22c0432e2acd1903bd"
|
||||
|
@ -3519,17 +3489,6 @@ css-functions-list@^3.1.0:
|
|||
resolved "https://registry.yarnpkg.com/css-functions-list/-/css-functions-list-3.1.0.tgz#cf5b09f835ad91a00e5959bcfc627cd498e1321b"
|
||||
integrity sha512-/9lCvYZaUbBGvYUgYGFJ4dcYiyqdhSjG7IPVluoV8A1ILjkF7ilmhp1OGUz8n+nmBcu0RNrQAzgD8B6FJbrt2w==
|
||||
|
||||
css-select@^5.1.0:
|
||||
version "5.1.0"
|
||||
resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.1.0.tgz#b8ebd6554c3637ccc76688804ad3f6a6fdaea8a6"
|
||||
integrity sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==
|
||||
dependencies:
|
||||
boolbase "^1.0.0"
|
||||
css-what "^6.1.0"
|
||||
domhandler "^5.0.2"
|
||||
domutils "^3.0.1"
|
||||
nth-check "^2.0.1"
|
||||
|
||||
css-tree@^2.3.1:
|
||||
version "2.3.1"
|
||||
resolved "https://registry.yarnpkg.com/css-tree/-/css-tree-2.3.1.tgz#10264ce1e5442e8572fc82fbe490644ff54b5c20"
|
||||
|
@ -3538,11 +3497,6 @@ css-tree@^2.3.1:
|
|||
mdn-data "2.0.30"
|
||||
source-map-js "^1.0.1"
|
||||
|
||||
css-what@^6.1.0:
|
||||
version "6.1.0"
|
||||
resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4"
|
||||
integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==
|
||||
|
||||
css.escape@^1.5.1:
|
||||
version "1.5.1"
|
||||
resolved "https://registry.yarnpkg.com/css.escape/-/css.escape-1.5.1.tgz#42e27d4fa04ae32f931a4b4d4191fa9cddee97cb"
|
||||
|
@ -5080,7 +5034,7 @@ html-tags@^3.3.1:
|
|||
resolved "https://registry.yarnpkg.com/html-tags/-/html-tags-3.3.1.tgz#a04026a18c882e4bba8a01a3d39cfe465d40b5ce"
|
||||
integrity sha512-ztqyC3kLto0e9WbNp0aeP+M3kTt+nbaIveGmUxAtZa+8iFgKLUOD4YKM5j+f3QD89bra7UeumolZHKuOXnTmeQ==
|
||||
|
||||
htmlparser2@^8.0.0, htmlparser2@^8.0.1:
|
||||
htmlparser2@^8.0.0:
|
||||
version "8.0.2"
|
||||
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.2.tgz#f002151705b383e62433b5cf466f5b716edaec21"
|
||||
integrity sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==
|
||||
|
@ -6767,13 +6721,6 @@ npm-run-path@^4.0.0, npm-run-path@^4.0.1:
|
|||
dependencies:
|
||||
path-key "^3.0.0"
|
||||
|
||||
nth-check@^2.0.1:
|
||||
version "2.1.1"
|
||||
resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.1.1.tgz#c9eab428effce36cd6b92c924bdb000ef1f1ed1d"
|
||||
integrity sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==
|
||||
dependencies:
|
||||
boolbase "^1.0.0"
|
||||
|
||||
nwsapi@^2.2.2:
|
||||
version "2.2.3"
|
||||
resolved "https://registry.yarnpkg.com/nwsapi/-/nwsapi-2.2.3.tgz#00e04dfd5a4a751e5ec2fecdc75dfd2f0db820fa"
|
||||
|
@ -6982,14 +6929,6 @@ parse-srcset@^1.0.2:
|
|||
resolved "https://registry.yarnpkg.com/parse-srcset/-/parse-srcset-1.0.2.tgz#f2bd221f6cc970a938d88556abc589caaaa2bde1"
|
||||
integrity sha512-/2qh0lav6CmI15FzA3i/2Bzk2zCgQhGMkvhOhKNcBVQ1ldgpbfiNTVslmooUmWJcADi1f1kIeynbDRVzNlfR6Q==
|
||||
|
||||
parse5-htmlparser2-tree-adapter@^7.0.0:
|
||||
version "7.0.0"
|
||||
resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.0.0.tgz#23c2cc233bcf09bb7beba8b8a69d46b08c62c2f1"
|
||||
integrity sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==
|
||||
dependencies:
|
||||
domhandler "^5.0.2"
|
||||
parse5 "^7.0.0"
|
||||
|
||||
parse5@^7.0.0, parse5@^7.1.1:
|
||||
version "7.1.2"
|
||||
resolved "https://registry.yarnpkg.com/parse5/-/parse5-7.1.2.tgz#0736bebbfd77793823240a23b7fc5e010b7f8e32"
|
||||
|
|
Loading…
Reference in a new issue