2015-11-27 15:02:32 +00:00
|
|
|
/*
|
2016-01-07 04:06:39 +00:00
|
|
|
Copyright 2015, 2016 OpenMarket Ltd
|
2018-02-09 12:20:05 +00:00
|
|
|
Copyright 2017, 2018 New Vector Ltd
|
2019-06-29 06:28:09 +00:00
|
|
|
Copyright 2019 Michael Telatynski <7t3chguy@gmail.com>
|
2019-10-01 02:17:54 +00:00
|
|
|
Copyright 2019 The Matrix.org Foundation C.I.C.
|
2015-11-27 15:02:32 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2024-04-16 10:34:44 +00:00
|
|
|
import React, { LegacyRef, ReactNode } from "react";
|
2019-01-31 21:26:07 +00:00
|
|
|
import sanitizeHtml from "sanitize-html";
|
2016-07-04 22:34:57 +00:00
|
|
|
import classNames from "classnames";
|
2020-09-19 14:30:00 +00:00
|
|
|
import katex from "katex";
|
2022-12-02 11:10:54 +00:00
|
|
|
import { decode } from "html-entities";
|
2023-08-07 08:24:58 +00:00
|
|
|
import { IContent } from "matrix-js-sdk/src/matrix";
|
2022-07-12 06:27:45 +00:00
|
|
|
import { Optional } from "matrix-events-sdk";
|
2023-04-25 08:30:32 +00:00
|
|
|
import escapeHtml from "escape-html";
|
2023-08-21 19:06:40 +00:00
|
|
|
import { getEmojiFromUnicode } from "@matrix-org/emojibase-bindings";
|
2016-07-04 22:34:57 +00:00
|
|
|
|
2021-06-22 16:23:13 +00:00
|
|
|
import { IExtendedSanitizeOptions } from "./@types/sanitize-html";
|
|
|
|
import SettingsStore from "./settings/SettingsStore";
|
2022-02-22 11:14:56 +00:00
|
|
|
import { stripHTMLReply, stripPlainReply } from "./utils/Reply";
|
2023-07-10 15:09:39 +00:00
|
|
|
import { PERMITTED_URL_SCHEMES } from "./utils/UrlUtils";
|
2024-04-16 10:34:44 +00:00
|
|
|
import { sanitizeHtmlParams, transformTags } from "./Linkify";
|
2024-06-26 09:34:07 +00:00
|
|
|
import { graphemeSegmenter } from "./utils/strings";
|
2024-04-16 10:34:44 +00:00
|
|
|
|
|
|
|
export { Linkify, linkifyElement, linkifyAndSanitizeHtml } from "./Linkify";
|
2016-08-09 16:10:05 +00:00
|
|
|
|
2017-09-15 10:43:55 +00:00
|
|
|
// Anything outside the basic multilingual plane will be a surrogate pair
|
|
|
|
const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/;
|
2019-05-19 14:23:43 +00:00
|
|
|
// And there a bunch more symbol characters that emojibase has within the
|
2017-09-15 10:43:55 +00:00
|
|
|
// BMP, so this includes the ranges from 'letterlike symbols' to
|
|
|
|
// 'miscellaneous symbols and arrows' which should catch all of them
|
|
|
|
// (with plenty of false positives, but that's OK)
|
|
|
|
const SYMBOL_PATTERN = /([\u2100-\u2bff])/;
|
|
|
|
|
2023-07-13 14:08:18 +00:00
|
|
|
// Regex pattern for non-emoji characters that can appear in an "all-emoji" message
|
2024-07-04 17:48:07 +00:00
|
|
|
// (Zero-Width Space, other whitespace)
|
|
|
|
const EMOJI_SEPARATOR_REGEX = /[\u200B\s]/g;
|
|
|
|
|
|
|
|
// Regex for emoji. This includes any RGI_Emoji sequence followed by an optional
|
|
|
|
// emoji presentation VS (U+FE0F), but not those sequences that are followed by
|
|
|
|
// a text presentation VS (U+FE0E). We also count lone regional indicators
|
|
|
|
// (U+1F1E6-U+1F1FF). Technically this regex produces false negatives for emoji
|
|
|
|
// followed by U+FE0E when the emoji doesn't have a text variant, but in
|
|
|
|
// practice this doesn't matter.
|
|
|
|
export const EMOJI_REGEX = (() => {
|
|
|
|
try {
|
|
|
|
// Per our support policy, v mode is available to us, but we still don't
|
|
|
|
// want the app to completely crash on older platforms. We use the
|
|
|
|
// constructor here to avoid a syntax error on such platforms.
|
|
|
|
return new RegExp("\\p{RGI_Emoji}(?!\\uFE0E)(?:(?<!\\uFE0F)\\uFE0F)?|[\\u{1f1e6}-\\u{1f1ff}]", "v");
|
|
|
|
} catch (_e) {
|
|
|
|
// v mode not supported; fall back to matching nothing
|
|
|
|
return /(?!)/;
|
|
|
|
}
|
|
|
|
})();
|
2019-03-06 14:53:24 +00:00
|
|
|
|
2024-07-04 17:48:07 +00:00
|
|
|
const BIGEMOJI_REGEX = (() => {
|
|
|
|
try {
|
|
|
|
return new RegExp(`^(${EMOJI_REGEX.source})+$`, "iv");
|
|
|
|
} catch (_e) {
|
|
|
|
// Fall back, just like for EMOJI_REGEX
|
|
|
|
return /(?!)/;
|
|
|
|
}
|
|
|
|
})();
|
2019-05-19 16:06:21 +00:00
|
|
|
|
2017-09-08 22:05:27 +00:00
|
|
|
/*
|
|
|
|
* Return true if the given string contains emoji
|
2019-05-19 14:23:43 +00:00
|
|
|
* Uses a much, much simpler regex than emojibase's so will give false
|
2017-09-08 22:05:27 +00:00
|
|
|
* positives, but useful for fast-path testing strings to see if they
|
|
|
|
* need emojification.
|
|
|
|
*/
|
2023-03-29 07:23:54 +00:00
|
|
|
function mightContainEmoji(str?: string): boolean {
|
|
|
|
return !!str && (SURROGATE_PAIR_PATTERN.test(str) || SYMBOL_PATTERN.test(str));
|
2017-09-08 22:05:27 +00:00
|
|
|
}
|
|
|
|
|
2019-05-17 10:52:03 +00:00
|
|
|
/**
|
|
|
|
* Returns the shortcode for an emoji character.
|
|
|
|
*
|
|
|
|
* @param {String} char The emoji character
|
|
|
|
* @return {String} The shortcode (such as :thumbup:)
|
|
|
|
*/
|
2021-06-22 16:23:13 +00:00
|
|
|
export function unicodeToShortcode(char: string): string {
|
2021-07-21 02:41:49 +00:00
|
|
|
const shortcodes = getEmojiFromUnicode(char)?.shortcodes;
|
2021-07-21 07:17:55 +00:00
|
|
|
return shortcodes?.length ? `:${shortcodes[0]}:` : "";
|
2017-03-14 11:50:13 +00:00
|
|
|
}
|
|
|
|
|
2017-06-23 16:02:54 +00:00
|
|
|
/*
|
|
|
|
* Given an untrusted HTML string, return a React node with an sanitized version
|
|
|
|
* of that HTML.
|
|
|
|
*/
|
2021-06-22 16:23:13 +00:00
|
|
|
export function sanitizedHtmlNode(insaneHtml: string): ReactNode {
|
2017-09-13 11:04:46 +00:00
|
|
|
const saneHtml = sanitizeHtml(insaneHtml, sanitizeHtmlParams);
|
2017-06-23 16:02:54 +00:00
|
|
|
|
|
|
|
return <div dangerouslySetInnerHTML={{ __html: saneHtml }} dir="auto" />;
|
|
|
|
}
|
|
|
|
|
2021-06-22 16:23:13 +00:00
|
|
|
export function getHtmlText(insaneHtml: string): string {
|
2021-04-22 15:17:53 +00:00
|
|
|
return sanitizeHtml(insaneHtml, {
|
|
|
|
allowedTags: [],
|
|
|
|
allowedAttributes: {},
|
|
|
|
selfClosing: [],
|
|
|
|
allowedSchemes: [],
|
|
|
|
disallowedTagsMode: "discard",
|
2021-06-29 12:11:58 +00:00
|
|
|
});
|
2020-07-08 07:50:25 +00:00
|
|
|
}
|
|
|
|
|
2018-02-09 12:20:05 +00:00
|
|
|
/**
|
|
|
|
* Tests if a URL from an untrusted source may be safely put into the DOM
|
|
|
|
* The biggest threat here is javascript: URIs.
|
|
|
|
* Note that the HTML sanitiser library has its own internal logic for
|
|
|
|
* doing this, to which we pass the same list of schemes. This is used in
|
|
|
|
* other places we need to sanitise URLs.
|
|
|
|
* @return true if permitted, otherwise false
|
|
|
|
*/
|
2021-06-22 16:23:13 +00:00
|
|
|
export function isUrlPermitted(inputUrl: string): boolean {
|
2018-02-09 12:20:05 +00:00
|
|
|
try {
|
|
|
|
// URL parser protocol includes the trailing colon
|
2021-07-16 12:18:12 +00:00
|
|
|
return PERMITTED_URL_SCHEMES.includes(new URL(inputUrl).protocol.slice(0, -1));
|
2018-02-09 12:20:05 +00:00
|
|
|
} catch (e) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-18 09:10:42 +00:00
|
|
|
// this is the same as the above except with less rewriting
|
2020-09-22 17:06:10 +00:00
|
|
|
const composerSanitizeHtmlParams: IExtendedSanitizeOptions = {
|
2020-07-08 07:40:58 +00:00
|
|
|
...sanitizeHtmlParams,
|
|
|
|
transformTags: {
|
|
|
|
"code": transformTags["code"],
|
|
|
|
"*": transformTags["*"],
|
|
|
|
},
|
2015-11-27 15:02:32 +00:00
|
|
|
};
|
|
|
|
|
2022-06-07 20:20:32 +00:00
|
|
|
// reduced set of allowed tags to avoid turning topics into Myspace
|
|
|
|
const topicSanitizeHtmlParams: IExtendedSanitizeOptions = {
|
|
|
|
...sanitizeHtmlParams,
|
|
|
|
allowedTags: [
|
|
|
|
"font", // custom to matrix for IRC-style font coloring
|
|
|
|
"del", // for markdown
|
2024-06-13 13:44:48 +00:00
|
|
|
"s",
|
2022-06-07 20:20:32 +00:00
|
|
|
"a",
|
|
|
|
"sup",
|
|
|
|
"sub",
|
|
|
|
"b",
|
|
|
|
"i",
|
|
|
|
"u",
|
|
|
|
"strong",
|
|
|
|
"em",
|
|
|
|
"strike",
|
|
|
|
"br",
|
|
|
|
"div",
|
|
|
|
"span",
|
|
|
|
],
|
|
|
|
};
|
|
|
|
|
2020-07-08 07:40:58 +00:00
|
|
|
abstract class BaseHighlighter<T extends React.ReactNode> {
|
2024-01-02 18:56:39 +00:00
|
|
|
public constructor(
|
|
|
|
public highlightClass: string,
|
|
|
|
public highlightLink?: string,
|
|
|
|
) {}
|
2015-12-23 23:50:35 +00:00
|
|
|
|
2016-02-17 19:50:04 +00:00
|
|
|
/**
|
2023-04-25 08:30:32 +00:00
|
|
|
* Apply the highlights to a section of text
|
2016-02-17 19:50:04 +00:00
|
|
|
*
|
|
|
|
* @param {string} safeSnippet The snippet of text to apply the highlights
|
2023-04-25 08:30:32 +00:00
|
|
|
* to. This input must be sanitised as it will be treated as HTML.
|
2016-02-17 19:50:04 +00:00
|
|
|
* @param {string[]} safeHighlights A list of substrings to highlight,
|
|
|
|
* sorted by descending length.
|
|
|
|
*
|
|
|
|
* returns a list of results (strings for HtmlHighligher, react nodes for
|
|
|
|
* TextHighlighter).
|
|
|
|
*/
|
2020-07-08 07:40:58 +00:00
|
|
|
public applyHighlights(safeSnippet: string, safeHighlights: string[]): T[] {
|
2017-10-11 16:56:17 +00:00
|
|
|
let lastOffset = 0;
|
2023-04-25 08:30:32 +00:00
|
|
|
let offset: number;
|
2020-07-08 07:40:58 +00:00
|
|
|
let nodes: T[] = [];
|
2015-11-29 03:22:01 +00:00
|
|
|
|
2017-10-11 16:56:17 +00:00
|
|
|
const safeHighlight = safeHighlights[0];
|
2015-12-28 03:14:50 +00:00
|
|
|
while ((offset = safeSnippet.toLowerCase().indexOf(safeHighlight.toLowerCase(), lastOffset)) >= 0) {
|
2015-11-29 03:22:01 +00:00
|
|
|
// handle preamble
|
|
|
|
if (offset > lastOffset) {
|
2020-07-08 07:40:58 +00:00
|
|
|
const subSnippet = safeSnippet.substring(lastOffset, offset);
|
|
|
|
nodes = nodes.concat(this.applySubHighlights(subSnippet, safeHighlights));
|
2015-11-29 03:22:01 +00:00
|
|
|
}
|
|
|
|
|
2016-02-17 19:50:04 +00:00
|
|
|
// do highlight. use the original string rather than safeHighlight
|
|
|
|
// to preserve the original casing.
|
2017-10-11 16:56:17 +00:00
|
|
|
const endOffset = offset + safeHighlight.length;
|
2020-07-08 07:40:58 +00:00
|
|
|
nodes.push(this.processSnippet(safeSnippet.substring(offset, endOffset), true));
|
2015-11-29 03:22:01 +00:00
|
|
|
|
2016-02-17 19:50:04 +00:00
|
|
|
lastOffset = endOffset;
|
2015-11-29 03:22:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// handle postamble
|
2016-09-16 15:02:08 +00:00
|
|
|
if (lastOffset !== safeSnippet.length) {
|
2020-07-08 07:40:58 +00:00
|
|
|
const subSnippet = safeSnippet.substring(lastOffset, undefined);
|
|
|
|
nodes = nodes.concat(this.applySubHighlights(subSnippet, safeHighlights));
|
2015-11-29 13:00:58 +00:00
|
|
|
}
|
|
|
|
return nodes;
|
2015-12-23 23:50:35 +00:00
|
|
|
}
|
2015-11-29 13:00:58 +00:00
|
|
|
|
2020-07-08 07:40:58 +00:00
|
|
|
private applySubHighlights(safeSnippet: string, safeHighlights: string[]): T[] {
|
2016-02-10 20:25:32 +00:00
|
|
|
if (safeHighlights[1]) {
|
2015-11-29 13:00:58 +00:00
|
|
|
// recurse into this range to check for the next set of highlight matches
|
2016-02-10 20:25:32 +00:00
|
|
|
return this.applyHighlights(safeSnippet, safeHighlights.slice(1));
|
2017-10-11 16:56:17 +00:00
|
|
|
} else {
|
2015-11-29 13:00:58 +00:00
|
|
|
// no more highlights to be found, just return the unhighlighted string
|
2020-07-08 07:40:58 +00:00
|
|
|
return [this.processSnippet(safeSnippet, false)];
|
2015-12-23 23:50:35 +00:00
|
|
|
}
|
|
|
|
}
|
2020-07-08 07:40:58 +00:00
|
|
|
|
|
|
|
protected abstract processSnippet(snippet: string, highlight: boolean): T;
|
2016-02-17 19:50:04 +00:00
|
|
|
}
|
|
|
|
|
2020-07-08 07:40:58 +00:00
|
|
|
class HtmlHighlighter extends BaseHighlighter<string> {
|
2016-02-17 19:50:04 +00:00
|
|
|
/* highlight the given snippet if required
|
|
|
|
*
|
|
|
|
* snippet: content of the span; must have been sanitised
|
|
|
|
* highlight: true to highlight as a search match
|
|
|
|
*
|
|
|
|
* returns an HTML string
|
|
|
|
*/
|
2020-07-08 07:40:58 +00:00
|
|
|
protected processSnippet(snippet: string, highlight: boolean): string {
|
2016-02-17 19:50:04 +00:00
|
|
|
if (!highlight) {
|
|
|
|
// nothing required here
|
|
|
|
return snippet;
|
|
|
|
}
|
|
|
|
|
2020-07-08 07:40:58 +00:00
|
|
|
let span = `<span class="${this.highlightClass}">${snippet}</span>`;
|
2016-02-17 19:50:04 +00:00
|
|
|
|
|
|
|
if (this.highlightLink) {
|
2020-07-08 07:40:58 +00:00
|
|
|
span = `<a href="${encodeURI(this.highlightLink)}">${span}</a>`;
|
2016-02-17 19:50:04 +00:00
|
|
|
}
|
|
|
|
return span;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-08 07:40:58 +00:00
|
|
|
interface IOpts {
|
|
|
|
highlightLink?: string;
|
|
|
|
disableBigEmoji?: boolean;
|
|
|
|
stripReplyFallback?: boolean;
|
|
|
|
returnString?: boolean;
|
|
|
|
forComposerQuote?: boolean;
|
2021-06-30 12:01:26 +00:00
|
|
|
ref?: React.Ref<HTMLSpanElement>;
|
2020-07-08 07:40:58 +00:00
|
|
|
}
|
2015-11-27 15:02:32 +00:00
|
|
|
|
2021-06-22 16:23:13 +00:00
|
|
|
export interface IOptsReturnNode extends IOpts {
|
2023-04-25 08:30:32 +00:00
|
|
|
returnString?: false | undefined;
|
2021-06-22 16:23:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
export interface IOptsReturnString extends IOpts {
|
|
|
|
returnString: true;
|
|
|
|
}
|
|
|
|
|
2023-01-12 13:25:14 +00:00
|
|
|
const emojiToHtmlSpan = (emoji: string): string =>
|
2022-01-24 12:53:05 +00:00
|
|
|
`<span class='mx_Emoji' title='${unicodeToShortcode(emoji)}'>${emoji}</span>`;
|
2023-01-12 13:25:14 +00:00
|
|
|
const emojiToJsxSpan = (emoji: string, key: number): JSX.Element => (
|
2022-01-24 12:53:05 +00:00
|
|
|
<span key={key} className="mx_Emoji" title={unicodeToShortcode(emoji)}>
|
|
|
|
{emoji}
|
|
|
|
</span>
|
|
|
|
);
|
2022-01-21 10:10:57 +00:00
|
|
|
|
2022-01-20 23:33:41 +00:00
|
|
|
/**
|
|
|
|
* Wraps emojis in <span> to style them separately from the rest of message. Consecutive emojis (and modifiers) are wrapped
|
|
|
|
* in the same <span>.
|
|
|
|
* @param {string} message the text to format
|
|
|
|
* @param {boolean} isHtmlMessage whether the message contains HTML
|
|
|
|
* @returns if isHtmlMessage is true, returns an array of strings, otherwise return an array of React Elements for emojis
|
|
|
|
* and plain text for everything else
|
|
|
|
*/
|
2023-05-25 08:32:20 +00:00
|
|
|
export function formatEmojis(message: string | undefined, isHtmlMessage?: false): JSX.Element[];
|
|
|
|
export function formatEmojis(message: string | undefined, isHtmlMessage: true): string[];
|
2023-06-22 13:39:36 +00:00
|
|
|
export function formatEmojis(message: string | undefined, isHtmlMessage?: boolean): (JSX.Element | string)[] {
|
2022-01-21 10:10:57 +00:00
|
|
|
const emojiToSpan = isHtmlMessage ? emojiToHtmlSpan : emojiToJsxSpan;
|
2022-01-20 23:33:41 +00:00
|
|
|
const result: (JSX.Element | string)[] = [];
|
2023-05-25 08:32:20 +00:00
|
|
|
if (!message) return result;
|
|
|
|
|
2022-01-20 23:33:41 +00:00
|
|
|
let text = "";
|
|
|
|
let key = 0;
|
2022-01-21 10:10:57 +00:00
|
|
|
|
2024-06-26 09:34:07 +00:00
|
|
|
for (const data of graphemeSegmenter.segment(message)) {
|
2024-07-04 17:48:07 +00:00
|
|
|
if (EMOJI_REGEX.test(data.segment)) {
|
2022-01-20 23:33:41 +00:00
|
|
|
if (text) {
|
|
|
|
result.push(text);
|
|
|
|
text = "";
|
|
|
|
}
|
2024-06-26 09:34:07 +00:00
|
|
|
result.push(emojiToSpan(data.segment, key));
|
2022-01-21 10:10:57 +00:00
|
|
|
key++;
|
2022-01-20 23:33:41 +00:00
|
|
|
} else {
|
2024-06-26 09:34:07 +00:00
|
|
|
text += data.segment;
|
2022-01-20 23:33:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (text) {
|
|
|
|
result.push(text);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-05-21 02:48:59 +00:00
|
|
|
/* turn a matrix event body into html
|
|
|
|
*
|
|
|
|
* content: 'content' of the MatrixEvent
|
|
|
|
*
|
|
|
|
* highlights: optional list of words to highlight, ordered by longest word first
|
|
|
|
*
|
|
|
|
* opts.highlightLink: optional href to add to highlighted words
|
|
|
|
* opts.disableBigEmoji: optional argument to disable the big emoji class.
|
|
|
|
* opts.stripReplyFallback: optional argument specifying the event is a reply and so fallback needs removing
|
|
|
|
* opts.returnString: return an HTML string rather than JSX elements
|
2018-07-18 09:10:42 +00:00
|
|
|
* opts.forComposerQuote: optional param to lessen the url rewriting done by sanitization, for quoting into composer
|
2019-12-08 01:01:19 +00:00
|
|
|
* opts.ref: React ref to attach to any React components returned (not compatible with opts.returnString)
|
2018-05-21 02:48:59 +00:00
|
|
|
*/
|
2022-07-12 06:27:45 +00:00
|
|
|
export function bodyToHtml(content: IContent, highlights: Optional<string[]>, opts: IOptsReturnString): string;
|
|
|
|
export function bodyToHtml(content: IContent, highlights: Optional<string[]>, opts: IOptsReturnNode): ReactNode;
|
2023-01-12 13:25:14 +00:00
|
|
|
export function bodyToHtml(content: IContent, highlights: Optional<string[]>, opts: IOpts = {}): ReactNode | string {
|
2023-02-13 17:01:43 +00:00
|
|
|
const isFormattedBody = content.format === "org.matrix.custom.html" && typeof content.formatted_body === "string";
|
2017-09-15 11:03:32 +00:00
|
|
|
let bodyHasEmoji = false;
|
2021-12-02 09:25:12 +00:00
|
|
|
let isHtmlMessage = false;
|
2017-09-15 11:03:32 +00:00
|
|
|
|
2018-07-18 09:10:42 +00:00
|
|
|
let sanitizeParams = sanitizeHtmlParams;
|
|
|
|
if (opts.forComposerQuote) {
|
|
|
|
sanitizeParams = composerSanitizeHtmlParams;
|
|
|
|
}
|
|
|
|
|
2020-07-08 07:40:58 +00:00
|
|
|
let strippedBody: string;
|
2023-02-13 17:01:43 +00:00
|
|
|
let safeBody: string | undefined; // safe, sanitised HTML, preferred over `strippedBody` which is fully plaintext
|
2022-08-05 07:08:56 +00:00
|
|
|
|
2016-09-16 15:02:08 +00:00
|
|
|
try {
|
2022-08-05 07:08:56 +00:00
|
|
|
// sanitizeHtml can hang if an unclosed HTML tag is thrown at it
|
|
|
|
// A search for `<foo` will make the browser crash an alternative would be to escape HTML special characters
|
|
|
|
// but that would bring no additional benefit as the highlighter does not work with those special chars
|
|
|
|
const safeHighlights = highlights
|
|
|
|
?.filter((highlight: string): boolean => !highlight.includes("<"))
|
|
|
|
.map((highlight: string): string => sanitizeHtml(highlight, sanitizeParams));
|
2018-03-13 17:15:16 +00:00
|
|
|
|
2019-10-10 16:36:22 +00:00
|
|
|
let formattedBody = typeof content.formatted_body === "string" ? content.formatted_body : null;
|
2020-08-12 09:40:25 +00:00
|
|
|
const plainBody = typeof content.body === "string" ? content.body : "";
|
2019-10-10 16:36:22 +00:00
|
|
|
|
2022-02-22 11:14:56 +00:00
|
|
|
if (opts.stripReplyFallback && formattedBody) formattedBody = stripHTMLReply(formattedBody);
|
|
|
|
strippedBody = opts.stripReplyFallback ? stripPlainReply(plainBody) : plainBody;
|
2023-02-13 17:01:43 +00:00
|
|
|
bodyHasEmoji = mightContainEmoji(isFormattedBody ? formattedBody! : plainBody);
|
2018-03-13 17:15:16 +00:00
|
|
|
|
2022-08-05 07:08:56 +00:00
|
|
|
const highlighter = safeHighlights?.length
|
|
|
|
? new HtmlHighlighter("mx_EventTile_searchHighlight", opts.highlightLink)
|
|
|
|
: null;
|
|
|
|
|
2021-12-02 09:25:12 +00:00
|
|
|
if (isFormattedBody) {
|
2022-08-05 07:08:56 +00:00
|
|
|
if (highlighter) {
|
|
|
|
// XXX: We sanitize the HTML whilst also highlighting its text nodes, to avoid accidentally trying
|
|
|
|
// to highlight HTML tags themselves. However, this does mean that we don't highlight textnodes which
|
|
|
|
// are interrupted by HTML tags (not that we did before) - e.g. foo<span/>bar won't get highlighted
|
|
|
|
// by an attempt to search for 'foobar'. Then again, the search query probably wouldn't work either
|
|
|
|
// XXX: hacky bodge to temporarily apply a textFilter to the sanitizeParams structure.
|
|
|
|
sanitizeParams.textFilter = function (safeText) {
|
2023-02-13 17:01:43 +00:00
|
|
|
return highlighter.applyHighlights(safeText, safeHighlights!).join("");
|
2022-08-05 07:08:56 +00:00
|
|
|
};
|
|
|
|
}
|
2021-12-02 09:25:12 +00:00
|
|
|
|
2023-02-13 17:01:43 +00:00
|
|
|
safeBody = sanitizeHtml(formattedBody!, sanitizeParams);
|
2023-05-23 13:31:05 +00:00
|
|
|
const phtml = new DOMParser().parseFromString(safeBody, "text/html");
|
|
|
|
const isPlainText = phtml.body.innerHTML === phtml.body.textContent;
|
2022-11-07 13:45:34 +00:00
|
|
|
isHtmlMessage = !isPlainText;
|
2020-10-14 21:16:28 +00:00
|
|
|
|
2021-12-02 09:25:12 +00:00
|
|
|
if (isHtmlMessage && SettingsStore.getValue("feature_latex_maths")) {
|
2024-04-16 16:53:57 +00:00
|
|
|
[...phtml.querySelectorAll<HTMLElement>("div[data-mx-maths], span[data-mx-maths]")].forEach((e) => {
|
2023-05-23 13:31:05 +00:00
|
|
|
e.outerHTML = katex.renderToString(decode(e.getAttribute("data-mx-maths")), {
|
2020-10-14 21:16:28 +00:00
|
|
|
throwOnError: false,
|
2023-05-23 13:31:05 +00:00
|
|
|
displayMode: e.tagName == "DIV",
|
2020-10-14 21:16:28 +00:00
|
|
|
output: "htmlAndMathml",
|
|
|
|
});
|
2020-09-20 11:59:22 +00:00
|
|
|
});
|
2023-05-23 13:31:05 +00:00
|
|
|
safeBody = phtml.body.innerHTML;
|
2020-09-22 10:54:23 +00:00
|
|
|
}
|
2022-08-05 07:08:56 +00:00
|
|
|
} else if (highlighter) {
|
2023-04-25 08:30:32 +00:00
|
|
|
safeBody = highlighter.applyHighlights(escapeHtml(plainBody), safeHighlights!).join("");
|
2018-03-13 17:15:16 +00:00
|
|
|
}
|
2017-10-11 16:56:17 +00:00
|
|
|
} finally {
|
2018-07-18 09:10:42 +00:00
|
|
|
delete sanitizeParams.textFilter;
|
2016-09-16 15:02:08 +00:00
|
|
|
}
|
2016-07-04 22:34:57 +00:00
|
|
|
|
2017-09-15 11:03:32 +00:00
|
|
|
let emojiBody = false;
|
2017-10-14 18:40:45 +00:00
|
|
|
if (!opts.disableBigEmoji && bodyHasEmoji) {
|
2023-05-18 08:32:14 +00:00
|
|
|
const contentBody = safeBody ?? strippedBody;
|
2020-12-21 15:24:59 +00:00
|
|
|
let contentBodyTrimmed = contentBody !== undefined ? contentBody.trim() : "";
|
2019-03-05 12:33:37 +00:00
|
|
|
|
2023-01-31 13:08:35 +00:00
|
|
|
// Remove zero width joiner, zero width spaces and other spaces in body
|
|
|
|
// text. This ensures that emojis with spaces in between or that are made
|
|
|
|
// up of multiple unicode characters are still counted as purely emoji
|
|
|
|
// messages.
|
|
|
|
contentBodyTrimmed = contentBodyTrimmed.replace(EMOJI_SEPARATOR_REGEX, "");
|
2019-03-05 12:33:37 +00:00
|
|
|
|
2019-05-19 16:06:21 +00:00
|
|
|
const match = BIGEMOJI_REGEX.exec(contentBodyTrimmed);
|
2019-05-19 15:48:15 +00:00
|
|
|
emojiBody =
|
2023-02-13 17:01:43 +00:00
|
|
|
match?.[0]?.length === contentBodyTrimmed.length &&
|
2019-04-10 17:00:04 +00:00
|
|
|
// Prevent user pills expanding for users with only emoji in
|
2019-10-01 02:37:50 +00:00
|
|
|
// their username. Permalinks (links in pills) can be any URL
|
|
|
|
// now, so we just check for an HTTP-looking thing.
|
2020-04-22 22:27:33 +00:00
|
|
|
(strippedBody === safeBody || // replies have the html fallbacks, account for that here
|
|
|
|
content.formatted_body === undefined ||
|
2019-10-01 03:08:34 +00:00
|
|
|
(!content.formatted_body.includes("http:") && !content.formatted_body.includes("https:")));
|
2017-09-15 11:03:32 +00:00
|
|
|
}
|
2015-11-27 15:02:32 +00:00
|
|
|
|
2023-05-18 08:32:14 +00:00
|
|
|
if (isFormattedBody && bodyHasEmoji && safeBody) {
|
|
|
|
// This has to be done after the emojiBody check above as to not break big emoji on replies
|
|
|
|
safeBody = formatEmojis(safeBody, true).join("");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (opts.returnString) {
|
|
|
|
return safeBody ?? strippedBody;
|
|
|
|
}
|
|
|
|
|
2016-09-16 15:02:08 +00:00
|
|
|
const className = classNames({
|
|
|
|
"mx_EventTile_body": true,
|
|
|
|
"mx_EventTile_bigEmoji": emojiBody,
|
2019-05-23 09:22:30 +00:00
|
|
|
"markdown-body": isHtmlMessage && !emojiBody,
|
2016-09-16 15:02:08 +00:00
|
|
|
});
|
2018-03-13 17:15:16 +00:00
|
|
|
|
2023-02-13 17:01:43 +00:00
|
|
|
let emojiBodyElements: JSX.Element[] | undefined;
|
2022-08-05 07:08:56 +00:00
|
|
|
if (!safeBody && bodyHasEmoji) {
|
2022-01-20 23:33:41 +00:00
|
|
|
emojiBodyElements = formatEmojis(strippedBody, false) as JSX.Element[];
|
|
|
|
}
|
|
|
|
|
2022-08-05 07:08:56 +00:00
|
|
|
return safeBody ? (
|
2020-08-29 00:11:08 +00:00
|
|
|
<span
|
|
|
|
key="body"
|
|
|
|
ref={opts.ref}
|
|
|
|
className={className}
|
|
|
|
dangerouslySetInnerHTML={{ __html: safeBody }}
|
|
|
|
dir="auto"
|
2022-01-20 23:33:41 +00:00
|
|
|
/>
|
|
|
|
) : (
|
|
|
|
<span key="body" ref={opts.ref} className={className} dir="auto">
|
|
|
|
{emojiBodyElements || strippedBody}
|
|
|
|
</span>
|
|
|
|
);
|
2016-09-16 15:02:08 +00:00
|
|
|
}
|
2015-11-27 15:02:32 +00:00
|
|
|
|
2022-06-07 20:20:32 +00:00
|
|
|
/**
|
|
|
|
* Turn a room topic into html
|
|
|
|
* @param topic plain text topic
|
|
|
|
* @param htmlTopic optional html topic
|
|
|
|
* @param ref React ref to attach to any React components returned
|
|
|
|
* @param allowExtendedHtml whether to allow extended HTML tags such as headings and lists
|
|
|
|
* @return The HTML-ified node.
|
|
|
|
*/
|
|
|
|
export function topicToHtml(
|
2023-03-29 07:23:54 +00:00
|
|
|
topic?: string,
|
2022-06-07 20:20:32 +00:00
|
|
|
htmlTopic?: string,
|
2023-03-29 07:23:54 +00:00
|
|
|
ref?: LegacyRef<HTMLSpanElement>,
|
2022-06-07 20:20:32 +00:00
|
|
|
allowExtendedHtml = false,
|
|
|
|
): ReactNode {
|
|
|
|
if (!SettingsStore.getValue("feature_html_topic")) {
|
2023-02-13 17:01:43 +00:00
|
|
|
htmlTopic = undefined;
|
2022-06-07 20:20:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let isFormattedTopic = !!htmlTopic;
|
|
|
|
let topicHasEmoji = false;
|
|
|
|
let safeTopic = "";
|
|
|
|
|
|
|
|
try {
|
2023-02-13 17:01:43 +00:00
|
|
|
topicHasEmoji = mightContainEmoji(isFormattedTopic ? htmlTopic! : topic);
|
2022-06-07 20:20:32 +00:00
|
|
|
|
|
|
|
if (isFormattedTopic) {
|
2023-02-13 17:01:43 +00:00
|
|
|
safeTopic = sanitizeHtml(htmlTopic!, allowExtendedHtml ? sanitizeHtmlParams : topicSanitizeHtmlParams);
|
2022-06-07 20:20:32 +00:00
|
|
|
if (topicHasEmoji) {
|
|
|
|
safeTopic = formatEmojis(safeTopic, true).join("");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch {
|
|
|
|
isFormattedTopic = false; // Fall back to plain-text topic
|
|
|
|
}
|
|
|
|
|
2023-05-25 08:32:20 +00:00
|
|
|
let emojiBodyElements: JSX.Element[] | undefined;
|
2022-06-07 20:20:32 +00:00
|
|
|
if (!isFormattedTopic && topicHasEmoji) {
|
|
|
|
emojiBodyElements = formatEmojis(topic, false);
|
|
|
|
}
|
|
|
|
|
2024-05-10 10:00:43 +00:00
|
|
|
if (isFormattedTopic) {
|
|
|
|
if (!safeTopic) return null;
|
|
|
|
return <span ref={ref} dangerouslySetInnerHTML={{ __html: safeTopic }} dir="auto" />;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!emojiBodyElements && !topic) return null;
|
|
|
|
return (
|
2022-08-17 13:35:33 +00:00
|
|
|
<span ref={ref} dir="auto">
|
2022-06-07 20:20:32 +00:00
|
|
|
{emojiBodyElements || topic}
|
|
|
|
</span>
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2019-07-23 07:12:24 +00:00
|
|
|
/**
|
|
|
|
* Returns if a node is a block element or not.
|
|
|
|
* Only takes html nodes into account that are allowed in matrix messages.
|
|
|
|
*
|
|
|
|
* @param {Node} node
|
|
|
|
* @returns {bool}
|
|
|
|
*/
|
2021-06-22 16:23:13 +00:00
|
|
|
export function checkBlockNode(node: Node): boolean {
|
2019-07-23 07:12:24 +00:00
|
|
|
switch (node.nodeName) {
|
|
|
|
case "H1":
|
|
|
|
case "H2":
|
|
|
|
case "H3":
|
|
|
|
case "H4":
|
|
|
|
case "H5":
|
|
|
|
case "H6":
|
|
|
|
case "PRE":
|
|
|
|
case "BLOCKQUOTE":
|
|
|
|
case "P":
|
|
|
|
case "UL":
|
|
|
|
case "OL":
|
|
|
|
case "LI":
|
|
|
|
case "HR":
|
|
|
|
case "TABLE":
|
|
|
|
case "THEAD":
|
|
|
|
case "TBODY":
|
|
|
|
case "TR":
|
|
|
|
case "TH":
|
|
|
|
case "TD":
|
|
|
|
return true;
|
2020-09-20 13:20:35 +00:00
|
|
|
case "DIV":
|
|
|
|
// don't treat math nodes as block nodes for deserializing
|
|
|
|
return !(node as HTMLElement).hasAttribute("data-mx-maths");
|
2019-07-23 07:12:24 +00:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|