Merge pull request #3745 from matrix-org/t3chguy/get_rid_of_emoji_stripped_data

Get rid of stripped-emoji.json in favour of an in-memory single source of truth
This commit is contained in:
Michael Telatynski 2019-12-19 19:25:50 +00:00 committed by GitHub
commit a410cc004f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 114 additions and 122 deletions

View file

@ -43,7 +43,6 @@
"diff-i18n": "cp src/i18n/strings/en_EN.json src/i18n/strings/en_EN_orig.json && ./scripts/gen-i18n.js && node scripts/compare-file.js src/i18n/strings/en_EN_orig.json src/i18n/strings/en_EN.json",
"build": "yarn reskindex && yarn start:init",
"build:watch": "babel src -w --skip-initial-build -d lib --source-maps --copy-files",
"emoji-data-strip": "node scripts/emoji-data-strip.js",
"start": "yarn start:init && yarn start:all",
"start:all": "concurrently --kill-others-on-fail --prefix \"{time} [{name}]\" -n build,reskindex \"yarn build:watch\" \"yarn reskindex:watch\"",
"start:init": "babel src -d lib --source-maps --copy-files",

View file

@ -1,30 +0,0 @@
#!/usr/bin/env node
// This generates src/stripped-emoji.json as used by the EmojiProvider autocomplete
// provider.
const EMOJIBASE = require('emojibase-data/en/compact.json');
const fs = require('fs');
const output = EMOJIBASE.map(
(datum) => {
const newDatum = {
name: datum.annotation,
shortname: `:${datum.shortcodes[0]}:`,
category: datum.group,
emoji_order: datum.order,
};
if (datum.shortcodes.length > 1) {
newDatum.aliases = datum.shortcodes.slice(1).map(s => `:${s}:`);
}
if (datum.emoticon) {
newDatum.aliases_ascii = [ datum.emoticon ];
}
return newDatum;
}
);
// Write to a file in src. Changes should be checked into git. This file is copied by
// babel using --copy-files
fs.writeFileSync('./src/stripped-emoji.json', JSON.stringify(output));

View file

@ -32,9 +32,9 @@ import classNames from 'classnames';
import MatrixClientPeg from './MatrixClientPeg';
import url from 'url';
import EMOJIBASE from 'emojibase-data/en/compact.json';
import EMOJIBASE_REGEX from 'emojibase-regex';
import {tryTransformPermalinkToLocalHref} from "./utils/permalinks/Permalinks";
import {SHORTCODE_TO_EMOJI, getEmojiFromUnicode} from "./emoji";
linkifyMatrix(linkify);
@ -58,8 +58,6 @@ const COLOR_REGEX = /^#[0-9a-fA-F]{6}$/;
const PERMITTED_URL_SCHEMES = ['http', 'https', 'ftp', 'mailto', 'magnet'];
const VARIATION_SELECTOR = String.fromCharCode(0xFE0F);
/*
* Return true if the given string contains emoji
* Uses a much, much simpler regex than emojibase's so will give false
@ -71,21 +69,6 @@ function mightContainEmoji(str) {
return SURROGATE_PAIR_PATTERN.test(str) || SYMBOL_PATTERN.test(str);
}
/**
* Find emoji data in emojibase by character.
*
* @param {String} char The emoji character
* @return {Object} The emoji data
*/
export function findEmojiData(char) {
// Check against both the char and the char with an empty variation selector
// appended because that's how emojibase stores its base emojis which have
// variations.
// See also https://github.com/vector-im/riot-web/issues/9785.
const emptyVariation = char + VARIATION_SELECTOR;
return EMOJIBASE.find(e => e.unicode === char || e.unicode === emptyVariation);
}
/**
* Returns the shortcode for an emoji character.
*
@ -93,7 +76,7 @@ export function findEmojiData(char) {
* @return {String} The shortcode (such as :thumbup:)
*/
export function unicodeToShortcode(char) {
const data = findEmojiData(char);
const data = getEmojiFromUnicode(char);
return (data && data.shortcodes ? `:${data.shortcodes[0]}:` : '');
}
@ -105,7 +88,7 @@ export function unicodeToShortcode(char) {
*/
export function shortcodeToUnicode(shortcode) {
shortcode = shortcode.slice(1, shortcode.length - 1);
const data = EMOJIBASE.find(e => e.shortcodes && e.shortcodes.includes(shortcode));
const data = SHORTCODE_TO_EMOJI.get(shortcode);
return data ? data.unicode : null;
}

View file

@ -2,6 +2,7 @@
Copyright 2016 Aviral Dasgupta
Copyright 2017 Vector Creations Ltd
Copyright 2017, 2018 New Vector Ltd
Copyright 2019 The Matrix.org Foundation C.I.C.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@ -28,7 +29,7 @@ import SettingsStore from "../settings/SettingsStore";
import { shortcodeToUnicode } from '../HtmlUtils';
import EMOTICON_REGEX from 'emojibase-regex/emoticon';
import EmojiData from '../stripped-emoji.json';
import EMOJIBASE from 'emojibase-data/en/compact.json';
const LIMIT = 20;
@ -38,19 +39,15 @@ const EMOJI_REGEX = new RegExp('(' + EMOTICON_REGEX.source + '|:[+-\\w]*:?)$', '
// XXX: it's very unclear why we bother with this generated emojidata file.
// all it means is that we end up bloating the bundle with precomputed stuff
// which would be trivial to calculate and cache on demand.
const EMOJI_SHORTNAMES = Object.keys(EmojiData).map((key) => EmojiData[key]).sort(
(a, b) => {
if (a.category === b.category) {
return a.emoji_order - b.emoji_order;
}
return a.category - b.category;
},
).map((a, index) => {
const EMOJI_SHORTNAMES = EMOJIBASE.sort((a, b) => {
if (a.group === b.group) {
return a.order - b.order;
}
return a.group - b.group;
}).map((emoji, index) => {
return {
name: a.name,
shortname: a.shortname,
aliases: a.aliases ? a.aliases.join(' ') : '',
aliases_ascii: a.aliases_ascii ? a.aliases_ascii.join(' ') : '',
emoji,
shortname: `:${emoji.shortcodes[0]}:`,
// Include the index so that we can preserve the original order
_orderBy: index,
};
@ -69,12 +66,15 @@ export default class EmojiProvider extends AutocompleteProvider {
constructor() {
super(EMOJI_REGEX);
this.matcher = new QueryMatcher(EMOJI_SHORTNAMES, {
keys: ['aliases_ascii', 'shortname', 'aliases'],
keys: ['emoji.emoticon', 'shortname'],
funcs: [
(o) => o.emoji.shortcodes.length > 1 ? o.emoji.shortcodes.slice(1).map(s => `:${s}:`).join(" ") : "", // aliases
],
// For matching against ascii equivalents
shouldMatchWordsOnly: false,
});
this.nameMatcher = new QueryMatcher(EMOJI_SHORTNAMES, {
keys: ['name'],
keys: ['emoji.annotation'],
// For removing punctuation
shouldMatchWordsOnly: true,
});
@ -96,7 +96,7 @@ export default class EmojiProvider extends AutocompleteProvider {
const sorters = [];
// make sure that emoticons come first
sorters.push((c) => score(matchedString, c.aliases_ascii));
sorters.push((c) => score(matchedString, c.emoji.emoticon || ""));
// then sort by score (Infinity if matchedString not in shortname)
sorters.push((c) => score(matchedString, c.shortname));
@ -110,8 +110,7 @@ export default class EmojiProvider extends AutocompleteProvider {
sorters.push((c) => c._orderBy);
completions = _sortBy(_uniq(completions), sorters);
completions = completions.map((result) => {
const { shortname } = result;
completions = completions.map(({shortname}) => {
const unicode = shortcodeToUnicode(shortname);
return {
completion: unicode,

View file

@ -71,6 +71,7 @@ export default class QueryMatcher {
}
for (const keyValue of keyValues) {
if (!keyValue) continue; // skip falsy keyValues
const key = stripDiacritics(keyValue).toLowerCase();
if (!this._items.has(key)) {
this._items.set(key, []);

View file

@ -16,54 +16,12 @@ limitations under the License.
import React from 'react';
import PropTypes from 'prop-types';
import EMOJIBASE from 'emojibase-data/en/compact.json';
import sdk from '../../../index';
import { _t } from '../../../languageHandler';
import * as recent from './recent';
const EMOJIBASE_CATEGORY_IDS = [
"people", // smileys
"people", // actually people
"control", // modifiers and such, not displayed in picker
"nature",
"foods",
"places",
"activity",
"objects",
"symbols",
"flags",
];
const DATA_BY_CATEGORY = {
"people": [],
"nature": [],
"foods": [],
"places": [],
"activity": [],
"objects": [],
"symbols": [],
"flags": [],
};
const DATA_BY_EMOJI = {};
const VARIATION_SELECTOR = String.fromCharCode(0xFE0F);
EMOJIBASE.forEach(emoji => {
if (emoji.unicode.includes(VARIATION_SELECTOR)) {
// Clone data into variation-less version
emoji = Object.assign({}, emoji, {
unicode: emoji.unicode.replace(VARIATION_SELECTOR, ""),
});
}
DATA_BY_EMOJI[emoji.unicode] = emoji;
const categoryId = EMOJIBASE_CATEGORY_IDS[emoji.group];
if (DATA_BY_CATEGORY.hasOwnProperty(categoryId)) {
DATA_BY_CATEGORY[categoryId].push(emoji);
}
// This is used as the string to match the query against when filtering emojis.
emoji.filterString = `${emoji.annotation}\n${emoji.shortcodes.join('\n')}}\n${emoji.emoticon || ''}`.toLowerCase();
});
import {DATA_BY_CATEGORY, getEmojiFromUnicode} from "../../../emoji";
export const CATEGORY_HEADER_HEIGHT = 22;
export const EMOJI_HEIGHT = 37;
@ -91,7 +49,7 @@ class EmojiPicker extends React.Component {
// Convert recent emoji characters to emoji data, removing unknowns.
this.recentlyUsed = recent.get()
.map(unicode => DATA_BY_EMOJI[unicode])
.map(unicode => getEmojiFromUnicode(unicode))
.filter(data => !!data);
this.memoizedDataByCategory = {
recent: this.recentlyUsed,

View file

@ -19,15 +19,15 @@ import PropTypes from 'prop-types';
import sdk from '../../../index';
import { _t } from '../../../languageHandler';
import { findEmojiData } from '../../../HtmlUtils';
import {getEmojiFromUnicode} from "../../../emoji";
// We use the variation-selector Heart in Quick Reactions for some reason
const QUICK_REACTIONS = ["👍", "👎", "😄", "🎉", "😕", "❤️", "🚀", "👀"].map(emoji => {
const data = findEmojiData(emoji);
const data = getEmojiFromUnicode(emoji);
if (!data) {
throw new Error(`Emoji ${emoji} doesn't exist in emojibase`);
}
// Prefer our unicode value for quick reactions (which does not have
// variation selectors).
// Prefer our unicode value for quick reactions as we sometimes use variation selectors.
return Object.assign({}, data, { unicode: emoji });
});

View file

@ -34,11 +34,11 @@ import {parsePlainTextMessage} from '../../../editor/deserialize';
import {renderModel} from '../../../editor/render';
import {Room} from 'matrix-js-sdk';
import TypingStore from "../../../stores/TypingStore";
import EMOJIBASE from 'emojibase-data/en/compact.json';
import SettingsStore from "../../../settings/SettingsStore";
import EMOTICON_REGEX from 'emojibase-regex/emoticon';
import sdk from '../../../index';
import {Key} from "../../../Keyboard";
import {EMOTICON_TO_EMOJI} from "../../../emoji";
const REGEX_EMOTICON_WHITESPACE = new RegExp('(?:^|\\s)(' + EMOTICON_REGEX.source + ')\\s$');
@ -108,7 +108,8 @@ export default class BasicMessageEditor extends React.Component {
const emoticonMatch = REGEX_EMOTICON_WHITESPACE.exec(range.text);
if (emoticonMatch) {
const query = emoticonMatch[1].toLowerCase().replace("-", "");
const data = EMOJIBASE.find(e => e.emoticon ? e.emoticon.toLowerCase() === query : false);
const data = EMOTICON_TO_EMOJI.get(query);
if (data) {
const {partCreator} = model;
const hasPrecedingSpace = emoticonMatch[0][0] === " ";

View file

@ -48,7 +48,6 @@ import Markdown from '../../../Markdown';
import MessageComposerStore from '../../../stores/MessageComposerStore';
import ContentMessages from '../../../ContentMessages';
import EMOJIBASE from 'emojibase-data/en/compact.json';
import EMOTICON_REGEX from 'emojibase-regex/emoticon';
import SettingsStore, {SettingLevel} from "../../../settings/SettingsStore";
@ -61,6 +60,7 @@ import AccessibleButton from '../elements/AccessibleButton';
import {findEditableEvent} from '../../../utils/EventUtils';
import SlateComposerHistoryManager from "../../../SlateComposerHistoryManager";
import TypingStore from "../../../stores/TypingStore";
import {EMOTICON_TO_EMOJI} from "../../../emoji";
const REGEX_EMOTICON_WHITESPACE = new RegExp('(?:^|\\s)(' + EMOTICON_REGEX.source + ')\\s$');
@ -464,7 +464,7 @@ export default class MessageComposerInput extends React.Component {
const emoticonMatch = REGEX_EMOTICON_WHITESPACE.exec(text.slice(0, currentStartOffset));
if (emoticonMatch) {
const query = emoticonMatch[1].toLowerCase().replace("-", "");
const data = EMOJIBASE.find(e => e.emoticon ? e.emoticon.toLowerCase() === query : false);
const data = EMOTICON_TO_EMOJI.get(query);
// only perform replacement if we found a match, otherwise we would be not letting user type
if (data) {

82
src/emoji.js Normal file
View file

@ -0,0 +1,82 @@
/*
Copyright 2019 The Matrix.org Foundation C.I.C.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import EMOJIBASE from 'emojibase-data/en/compact.json';
export const VARIATION_SELECTOR = String.fromCharCode(0xFE0F);
// The unicode is stored without the variant selector
const UNICODE_TO_EMOJI = new Map(); // not exported as gets for it are handled by getEmojiFromUnicode
export const EMOTICON_TO_EMOJI = new Map();
export const SHORTCODE_TO_EMOJI = new Map();
export const getEmojiFromUnicode = unicode => UNICODE_TO_EMOJI.get(unicode.replace(VARIATION_SELECTOR, ""));
const EMOJIBASE_GROUP_ID_TO_CATEGORY = [
"people", // smileys
"people", // actually people
"control", // modifiers and such, not displayed in picker
"nature",
"foods",
"places",
"activity",
"objects",
"symbols",
"flags",
];
export const DATA_BY_CATEGORY = {
"people": [],
"nature": [],
"foods": [],
"places": [],
"activity": [],
"objects": [],
"symbols": [],
"flags": [],
};
// Store various mappings from unicode/emoticon/shortcode to the Emoji objects
EMOJIBASE.forEach(emoji => {
if (emoji.unicode.includes(VARIATION_SELECTOR)) {
// Clone data into variation-less version
emoji = Object.assign({}, emoji, {
unicode: emoji.unicode.replace(VARIATION_SELECTOR, ""),
});
}
const categoryId = EMOJIBASE_GROUP_ID_TO_CATEGORY[emoji.group];
if (DATA_BY_CATEGORY.hasOwnProperty(categoryId)) {
DATA_BY_CATEGORY[categoryId].push(emoji);
}
// This is used as the string to match the query against when filtering emojis
emoji.filterString = `${emoji.annotation}\n${emoji.shortcodes.join('\n')}}\n${emoji.emoticon || ''}`.toLowerCase();
// Add mapping from unicode to Emoji object
UNICODE_TO_EMOJI.set(emoji.unicode, emoji);
if (emoji.emoticon) {
// Add mapping from emoticon to Emoji object
EMOTICON_TO_EMOJI.set(emoji.emoticon, emoji);
}
if (emoji.shortcodes) {
// Add mapping from each shortcode to Emoji object
emoji.shortcodes.forEach(shortcode => {
SHORTCODE_TO_EMOJI.set(shortcode, emoji);
});
}
});

File diff suppressed because one or more lines are too long