Use grapheme-splitter instead of lodash for saving emoji from being ripped apart (#10976)
* Use grapheme-splitter instead of lodash for saving emoji from being ripped apart * Move to a more appropriate place * Add tests and improve types
This commit is contained in:
parent
277a3c0146
commit
f4a265b2c7
7 changed files with 55 additions and 22 deletions
|
@ -82,6 +82,7 @@
|
||||||
"focus-visible": "^5.2.0",
|
"focus-visible": "^5.2.0",
|
||||||
"gfm.css": "^1.1.2",
|
"gfm.css": "^1.1.2",
|
||||||
"glob-to-regexp": "^0.4.1",
|
"glob-to-regexp": "^0.4.1",
|
||||||
|
"grapheme-splitter": "^1.0.4",
|
||||||
"highlight.js": "^11.3.1",
|
"highlight.js": "^11.3.1",
|
||||||
"html-entities": "^2.0.0",
|
"html-entities": "^2.0.0",
|
||||||
"is-ip": "^3.1.0",
|
"is-ip": "^3.1.0",
|
||||||
|
|
|
@ -18,11 +18,11 @@ import { RoomMember } from "matrix-js-sdk/src/models/room-member";
|
||||||
import { User } from "matrix-js-sdk/src/models/user";
|
import { User } from "matrix-js-sdk/src/models/user";
|
||||||
import { Room } from "matrix-js-sdk/src/models/room";
|
import { Room } from "matrix-js-sdk/src/models/room";
|
||||||
import { ResizeMethod } from "matrix-js-sdk/src/@types/partials";
|
import { ResizeMethod } from "matrix-js-sdk/src/@types/partials";
|
||||||
import { split } from "lodash";
|
|
||||||
|
|
||||||
import DMRoomMap from "./utils/DMRoomMap";
|
import DMRoomMap from "./utils/DMRoomMap";
|
||||||
import { mediaFromMxc } from "./customisations/Media";
|
import { mediaFromMxc } from "./customisations/Media";
|
||||||
import { isLocalRoom } from "./utils/localRoom/isLocalRoom";
|
import { isLocalRoom } from "./utils/localRoom/isLocalRoom";
|
||||||
|
import { getFirstGrapheme } from "./utils/strings";
|
||||||
|
|
||||||
// Not to be used for BaseAvatar urls as that has similar default avatar fallback already
|
// Not to be used for BaseAvatar urls as that has similar default avatar fallback already
|
||||||
export function avatarUrlForMember(
|
export function avatarUrlForMember(
|
||||||
|
@ -133,8 +133,7 @@ export function getInitialLetter(name: string): string | undefined {
|
||||||
name = name.substring(1);
|
name = name.substring(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// rely on the grapheme cluster splitter in lodash so that we don't break apart compound emojis
|
return getFirstGrapheme(name).toUpperCase();
|
||||||
return split(name, "", 1)[0].toUpperCase();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function avatarUrlForRoom(
|
export function avatarUrlForRoom(
|
||||||
|
|
|
@ -21,13 +21,14 @@ import React, { LegacyRef, ReactElement, ReactNode } from "react";
|
||||||
import sanitizeHtml from "sanitize-html";
|
import sanitizeHtml from "sanitize-html";
|
||||||
import classNames from "classnames";
|
import classNames from "classnames";
|
||||||
import EMOJIBASE_REGEX from "emojibase-regex";
|
import EMOJIBASE_REGEX from "emojibase-regex";
|
||||||
import { merge, split } from "lodash";
|
import { merge } from "lodash";
|
||||||
import katex from "katex";
|
import katex from "katex";
|
||||||
import { decode } from "html-entities";
|
import { decode } from "html-entities";
|
||||||
import { IContent } from "matrix-js-sdk/src/models/event";
|
import { IContent } from "matrix-js-sdk/src/models/event";
|
||||||
import { Optional } from "matrix-events-sdk";
|
import { Optional } from "matrix-events-sdk";
|
||||||
import _Linkify from "linkify-react";
|
import _Linkify from "linkify-react";
|
||||||
import escapeHtml from "escape-html";
|
import escapeHtml from "escape-html";
|
||||||
|
import GraphemeSplitter from "grapheme-splitter";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
_linkifyElement,
|
_linkifyElement,
|
||||||
|
@ -463,14 +464,18 @@ const emojiToJsxSpan = (emoji: string, key: number): JSX.Element => (
|
||||||
* @returns if isHtmlMessage is true, returns an array of strings, otherwise return an array of React Elements for emojis
|
* @returns if isHtmlMessage is true, returns an array of strings, otherwise return an array of React Elements for emojis
|
||||||
* and plain text for everything else
|
* and plain text for everything else
|
||||||
*/
|
*/
|
||||||
function formatEmojis(message: string | undefined, isHtmlMessage: boolean): (JSX.Element | string)[] {
|
export function formatEmojis(message: string | undefined, isHtmlMessage?: false): JSX.Element[];
|
||||||
|
export function formatEmojis(message: string | undefined, isHtmlMessage: true): string[];
|
||||||
|
export function formatEmojis(message: string | undefined, isHtmlMessage: boolean): (JSX.Element | string)[] {
|
||||||
const emojiToSpan = isHtmlMessage ? emojiToHtmlSpan : emojiToJsxSpan;
|
const emojiToSpan = isHtmlMessage ? emojiToHtmlSpan : emojiToJsxSpan;
|
||||||
const result: (JSX.Element | string)[] = [];
|
const result: (JSX.Element | string)[] = [];
|
||||||
|
if (!message) return result;
|
||||||
|
|
||||||
let text = "";
|
let text = "";
|
||||||
let key = 0;
|
let key = 0;
|
||||||
|
|
||||||
// We use lodash's grapheme splitter to avoid breaking apart compound emojis
|
const splitter = new GraphemeSplitter();
|
||||||
for (const char of split(message, "")) {
|
for (const char of splitter.iterateGraphemes(message)) {
|
||||||
if (EMOJIBASE_REGEX.test(char)) {
|
if (EMOJIBASE_REGEX.test(char)) {
|
||||||
if (text) {
|
if (text) {
|
||||||
result.push(text);
|
result.push(text);
|
||||||
|
@ -661,7 +666,7 @@ export function topicToHtml(
|
||||||
isFormattedTopic = false; // Fall back to plain-text topic
|
isFormattedTopic = false; // Fall back to plain-text topic
|
||||||
}
|
}
|
||||||
|
|
||||||
let emojiBodyElements: ReturnType<typeof formatEmojis> | undefined;
|
let emojiBodyElements: JSX.Element[] | undefined;
|
||||||
if (!isFormattedTopic && topicHasEmoji) {
|
if (!isFormattedTopic && topicHasEmoji) {
|
||||||
emojiBodyElements = formatEmojis(topic, false);
|
emojiBodyElements = formatEmojis(topic, false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,11 +15,11 @@ See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { split } from "lodash";
|
|
||||||
import EMOJIBASE_REGEX from "emojibase-regex";
|
import EMOJIBASE_REGEX from "emojibase-regex";
|
||||||
import { MatrixClient } from "matrix-js-sdk/src/client";
|
import { MatrixClient } from "matrix-js-sdk/src/client";
|
||||||
import { RoomMember } from "matrix-js-sdk/src/models/room-member";
|
import { RoomMember } from "matrix-js-sdk/src/models/room-member";
|
||||||
import { Room } from "matrix-js-sdk/src/models/room";
|
import { Room } from "matrix-js-sdk/src/models/room";
|
||||||
|
import GraphemeSplitter from "grapheme-splitter";
|
||||||
|
|
||||||
import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
|
import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
|
||||||
import { unicodeToShortcode } from "../HtmlUtils";
|
import { unicodeToShortcode } from "../HtmlUtils";
|
||||||
|
@ -27,6 +27,7 @@ import * as Avatar from "../Avatar";
|
||||||
import defaultDispatcher from "../dispatcher/dispatcher";
|
import defaultDispatcher from "../dispatcher/dispatcher";
|
||||||
import { Action } from "../dispatcher/actions";
|
import { Action } from "../dispatcher/actions";
|
||||||
import SettingsStore from "../settings/SettingsStore";
|
import SettingsStore from "../settings/SettingsStore";
|
||||||
|
import { getFirstGrapheme } from "../utils/strings";
|
||||||
|
|
||||||
const REGIONAL_EMOJI_SEPARATOR = String.fromCodePoint(0x200b);
|
const REGIONAL_EMOJI_SEPARATOR = String.fromCodePoint(0x200b);
|
||||||
|
|
||||||
|
@ -133,8 +134,7 @@ abstract class BasePart {
|
||||||
// To only need to grapheme split the bits of the string we're working on.
|
// To only need to grapheme split the bits of the string we're working on.
|
||||||
let buffer = str;
|
let buffer = str;
|
||||||
while (buffer) {
|
while (buffer) {
|
||||||
// We use lodash's grapheme splitter to avoid breaking apart compound emojis
|
const char = getFirstGrapheme(buffer);
|
||||||
const [char] = split(buffer, "", 2);
|
|
||||||
if (!this.acceptsInsertion(char, offset + str.length - buffer.length, inputType)) {
|
if (!this.acceptsInsertion(char, offset + str.length - buffer.length, inputType)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -562,8 +562,7 @@ export class PartCreator {
|
||||||
case "\n":
|
case "\n":
|
||||||
return new NewlinePart();
|
return new NewlinePart();
|
||||||
default:
|
default:
|
||||||
// We use lodash's grapheme splitter to avoid breaking apart compound emojis
|
if (EMOJIBASE_REGEX.test(getFirstGrapheme(input))) {
|
||||||
if (EMOJIBASE_REGEX.test(split(input, "", 2)[0])) {
|
|
||||||
return new EmojiPart();
|
return new EmojiPart();
|
||||||
}
|
}
|
||||||
return new PlainPart();
|
return new PlainPart();
|
||||||
|
@ -639,8 +638,8 @@ export class PartCreator {
|
||||||
const parts: (PlainPart | EmojiPart)[] = [];
|
const parts: (PlainPart | EmojiPart)[] = [];
|
||||||
let plainText = "";
|
let plainText = "";
|
||||||
|
|
||||||
// We use lodash's grapheme splitter to avoid breaking apart compound emojis
|
const splitter = new GraphemeSplitter();
|
||||||
for (const char of split(text, "")) {
|
for (const char of splitter.iterateGraphemes(text)) {
|
||||||
if (EMOJIBASE_REGEX.test(char)) {
|
if (EMOJIBASE_REGEX.test(char)) {
|
||||||
if (plainText) {
|
if (plainText) {
|
||||||
parts.push(this.plain(plainText));
|
parts.push(this.plain(plainText));
|
||||||
|
|
|
@ -21,6 +21,7 @@ limitations under the License.
|
||||||
* @param text the plaintext to put in the user's clipboard
|
* @param text the plaintext to put in the user's clipboard
|
||||||
*/
|
*/
|
||||||
import { logger } from "matrix-js-sdk/src/logger";
|
import { logger } from "matrix-js-sdk/src/logger";
|
||||||
|
import GraphemeSplitter from "grapheme-splitter";
|
||||||
|
|
||||||
export async function copyPlaintext(text: string): Promise<boolean> {
|
export async function copyPlaintext(text: string): Promise<boolean> {
|
||||||
try {
|
try {
|
||||||
|
@ -83,3 +84,15 @@ export function copyNode(ref?: Element | null): boolean {
|
||||||
export function getSelectedText(): string {
|
export function getSelectedText(): string {
|
||||||
return window.getSelection()!.toString();
|
return window.getSelection()!.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the first grapheme in the given string,
|
||||||
|
* especially useful for strings containing emoji, will not break compound emoji up.
|
||||||
|
* @param str string to parse
|
||||||
|
* @returns the first grapheme or an empty string if given an empty string
|
||||||
|
*/
|
||||||
|
export function getFirstGrapheme(str: string): string {
|
||||||
|
const splitter = new GraphemeSplitter();
|
||||||
|
const result = splitter.iterateGraphemes(str).next();
|
||||||
|
return result.done ? "" : result.value;
|
||||||
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ import { mocked } from "jest-mock";
|
||||||
import { render, screen } from "@testing-library/react";
|
import { render, screen } from "@testing-library/react";
|
||||||
import { IContent } from "matrix-js-sdk/src/models/event";
|
import { IContent } from "matrix-js-sdk/src/models/event";
|
||||||
|
|
||||||
import { bodyToHtml, topicToHtml } from "../src/HtmlUtils";
|
import { bodyToHtml, formatEmojis, topicToHtml } from "../src/HtmlUtils";
|
||||||
import SettingsStore from "../src/settings/SettingsStore";
|
import SettingsStore from "../src/settings/SettingsStore";
|
||||||
|
|
||||||
jest.mock("../src/settings/SettingsStore");
|
jest.mock("../src/settings/SettingsStore");
|
||||||
|
@ -168,3 +168,19 @@ describe("bodyToHtml", () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("formatEmojis", () => {
|
||||||
|
it.each([
|
||||||
|
["🏴", [["🏴", "flag-england"]]],
|
||||||
|
["🏴", [["🏴", "flag-scotland"]]],
|
||||||
|
["🏴", [["🏴", "flag-wales"]]],
|
||||||
|
])("%s emoji", (emoji, expectations) => {
|
||||||
|
const res = formatEmojis(emoji, false);
|
||||||
|
expect(res).toHaveLength(expectations.length);
|
||||||
|
for (let i = 0; i < res.length; i++) {
|
||||||
|
const [emoji, title] = expectations[i];
|
||||||
|
expect(res[i].props.children).toEqual(emoji);
|
||||||
|
expect(res[i].props.title).toEqual(`:${title}:`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
|
@ -1,5 +1,11 @@
|
||||||
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
// Jest Snapshot v1, https://goo.gl/fbAQLP
|
||||||
|
|
||||||
|
exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;
|
||||||
|
|
||||||
|
exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"<p>hello</p><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span></span><p>world</p>"`;
|
||||||
|
|
||||||
|
exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;
|
||||||
|
|
||||||
exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `
|
exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `
|
||||||
<DocumentFragment>
|
<DocumentFragment>
|
||||||
<span
|
<span
|
||||||
|
@ -15,9 +21,3 @@ exports[`bodyToHtml should generate big emoji for an emoji-only reply to a messa
|
||||||
</span>
|
</span>
|
||||||
</DocumentFragment>
|
</DocumentFragment>
|
||||||
`;
|
`;
|
||||||
|
|
||||||
exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;
|
|
||||||
|
|
||||||
exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"<p>hello</p><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span></span><p>world</p>"`;
|
|
||||||
|
|
||||||
exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;
|
|
||||||
|
|
Loading…
Reference in a new issue