Use LaTeX delimiters by default, add /tex command

Since parsing for $'s as maths delimiters is tricky, switch the default
to \(...\) for inline and \[...\] for display maths as it is used in
LaTeX. Add /tex command to explicitly parse in TeX mode, which uses
$...$ for inline and $$...$$ for display maths.

Signed-off-by: Sven Mäder <maeder@phys.ethz.ch>
This commit is contained in:
Sven Mäder 2020-12-20 23:14:56 +01:00
parent fa02630c4e
commit 78b3f50bfd
4 changed files with 99 additions and 26 deletions

View file

@ -48,6 +48,7 @@ import SettingsStore from "./settings/SettingsStore";
import {UIFeature} from "./settings/UIFeature";
import {CHAT_EFFECTS} from "./effects"
import CallHandler from "./CallHandler";
import {markdownSerializeIfNeeded} from './editor/serialize';
// XXX: workaround for https://github.com/microsoft/TypeScript/issues/31816
interface HTMLInputEvent extends Event {
@ -223,6 +224,23 @@ export const Commands = [
},
category: CommandCategories.messages,
}),
new Command({
command: 'tex',
args: '<message>',
description: _td('Sends a message in TeX mode, using $ and $$ delimiters for maths'),
runFn: function(roomId, args) {
if (SettingsStore.getValue("feature_latex_maths")) {
if (args) {
let html = markdownSerializeIfNeeded(args, {forceHTML: false}, {forceTEX: true});
return success(MatrixClientPeg.get().sendHtmlMessage(roomId, args, html));
}
return reject(this.getUsage());
} else {
return reject("Render LaTeX maths in messages needs to be enabled in Labs");
}
},
category: CommandCategories.messages,
}),
new Command({
command: 'ddg',
args: '<query>',

View file

@ -136,11 +136,11 @@ function parseElement(n: HTMLElement, partCreator: PartCreator, lastNode: HTMLEl
// math nodes are translated back into delimited latex strings
if (n.hasAttribute("data-mx-maths")) {
const delimLeft = (n.nodeName == "SPAN") ?
(SdkConfig.get()['latex_maths_delims'] || {})['inline_left'] || "$" :
(SdkConfig.get()['latex_maths_delims'] || {})['display_left'] || "$$";
(SdkConfig.get()['latex_maths_delims'] || {})['inline_left'] || "\\(" :
(SdkConfig.get()['latex_maths_delims'] || {})['display_left'] || "\\[";
const delimRight = (n.nodeName == "SPAN") ?
(SdkConfig.get()['latex_maths_delims'] || {})['inline_right'] || "$" :
(SdkConfig.get()['latex_maths_delims'] || {})['display_right'] || "$$";
(SdkConfig.get()['latex_maths_delims'] || {})['inline_right'] || "\\)" :
(SdkConfig.get()['latex_maths_delims'] || {})['display_right'] || "\\]";
const tex = n.getAttribute("data-mx-maths");
return partCreator.plain(delimLeft + tex + delimRight);
} else if (!checkDescendInto(n)) {

View file

@ -41,24 +41,57 @@ export function mdSerialize(model: EditorModel) {
}, "");
}
export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} = {}) {
let md = mdSerialize(model);
export function markdownSerializeIfNeeded(md: string, {forceHTML = false} = {}, {forceTEX = false} = {}) {
// copy of raw input to remove unwanted math later
const orig = md;
if (SettingsStore.getValue("feature_latex_maths")) {
const displayPattern = (SdkConfig.get()['latex_maths_delims'] || {})['display_pattern'] ||
"\\$\\$(([^$]|\\\\\\$)*)\\$\\$";
const inlinePattern = (SdkConfig.get()['latex_maths_delims'] || {})['inline_pattern'] ||
"\\$(([^$]|\\\\\\$)*)\\$";
if (forceTEX) {
// detect math with tex delimiters, inline: $...$, display $$...$$
// preferably use negative lookbehinds, not supported in all major browsers:
// const displayPattern = "^(?<!\\\\)\\$\\$(?![ \\t])(([^$]|\\\\\\$)+?)\\$\\$$";
// const inlinePattern = "(?:^|\\s)(?<!\\\\)\\$(?!\\s)(([^$]|\\\\\\$)+?)(?<!\\\\|\\s)\\$";
// conditions for display math detection ($$...$$):
// - left delimiter ($$) is not escaped by a backslash
// - pattern starts at the beginning of a line
// - left delimiter is not followed by a space or tab character
// - pattern ends at the end of a line
const displayPattern = "^(?!\\\\)\\$\\$(?![ \\t])(([^$]|\\\\\\$)+?)\\$\\$$";
// conditions for inline math detection ($...$):
// - left and right delimiters ($) are not escaped by backslashes
// - pattern starts at the beginning of a line or follows a whitespace character
// - left delimiter is not followed by a whitespace character
// - right delimiter is not preseeded by a whitespace character
const inlinePattern = "(^|\\s)(?!\\\\)\\$(?!\\s)(([^$]|\\\\\\$)*[^\\\\\\s\\$](?:\\\\\\$)?)\\$";
md = md.replace(RegExp(displayPattern, "gm"), function(m, p1) {
const p1e = AllHtmlEntities.encode(p1);
return `<div data-mx-maths="${p1e}">\n\n</div>\n\n`;
});
md = md.replace(RegExp(inlinePattern, "gm"), function(m, p1) {
const p1e = AllHtmlEntities.encode(p1);
return `<span data-mx-maths="${p1e}"></span>`;
md = md.replace(RegExp(inlinePattern, "gm"), function(m, p1, p2) {
const p2e = AllHtmlEntities.encode(p2);
return `${p1}<span data-mx-maths="${p2e}"></span>`;
});
} else {
// detect math with latex delimiters, inline: \(...\), display \[...\]
const displayPattern = (SdkConfig.get()['latex_maths_delims'] || {})['display_pattern'] ||
"^\\\\\\[(.*?)\\\\\\]$";
const inlinePattern = (SdkConfig.get()['latex_maths_delims'] || {})['inline_pattern'] ||
"(^|\\s)\\\\\\((.*?)\\\\\\)";
md = md.replace(RegExp(displayPattern, "gms"), function(m, p1) {
const p1e = AllHtmlEntities.encode(p1);
return `<div data-mx-maths="${p1e}">\n\n</div>\n\n`;
});
md = md.replace(RegExp(inlinePattern, "gms"), function(m, p1, p2) {
const p2e = AllHtmlEntities.encode(p2);
return `${p1}<span data-mx-maths="${p2e}"></span>`;
});
}
// make sure div tags always start on a new line, otherwise it will confuse
// the markdown parser
@ -69,7 +102,21 @@ export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} =
if (!parser.isPlainText() || forceHTML) {
// feed Markdown output to HTML parser
const phtml = cheerio.load(parser.toHTML(),
{ _useHtmlParser2: true, decodeEntities: false })
{ _useHtmlParser2: true, decodeEntities: false });
if (SettingsStore.getValue("feature_latex_maths")) {
// original Markdown without LaTeX replacements
const parserOrig = new Markdown(orig);
const phtmlOrig = cheerio.load(parserOrig.toHTML(),
{ _useHtmlParser2: true, decodeEntities: false });
// since maths delimiters are handled before Markdown,
// code blocks could contain mangled content.
// replace code blocks with original content
phtml('code').contents('div, span').each(function(i) {
const origData = phtmlOrig('code').contents('div, span')[i].data;
phtml('code').contents('div, span')[i].data = origData;
});
// add fallback output for latex math, which should not be interpreted as markdown
phtml('div, span').each(function(i, e) {
@ -78,6 +125,7 @@ export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} =
phtml(e).html(`<code>${tex}</code>`)
}
});
}
return phtml.html();
}
// ensure removal of escape backslashes in non-Markdown messages
@ -86,6 +134,12 @@ export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} =
}
}
export function htmlSerializeIfNeeded(model: EditorModel, {forceHTML = false} = {}) {
let md = mdSerialize(model);
return markdownSerializeIfNeeded(md, {forceHTML: forceHTML});
}
export function textSerialize(model: EditorModel) {
return model.parts.reduce((text, part) => {
switch (part.type) {

View file

@ -416,6 +416,7 @@
"Prepends ( ͡° ͜ʖ ͡°) to a plain-text message": "Prepends ( ͡° ͜ʖ ͡°) to a plain-text message",
"Sends a message as plain text, without interpreting it as markdown": "Sends a message as plain text, without interpreting it as markdown",
"Sends a message as html, without interpreting it as markdown": "Sends a message as html, without interpreting it as markdown",
"Sends a message in TeX mode, using $ and $$ delimiters for maths": "Sends a message in TeX mode, using $ and $$ delimiters for maths",
"Searches DuckDuckGo for results": "Searches DuckDuckGo for results",
"/ddg is not a command": "/ddg is not a command",
"To use it, just wait for autocomplete results to load and tab through them.": "To use it, just wait for autocomplete results to load and tab through them.",