use sanitize-html's textFilter callback to only apply highlights to textNodes when highlighting HTML. fixes https://github.com/vector-im/vector-web/issues/294

This commit is contained in:
Matthew Hodgson 2016-02-10 20:25:32 +00:00
parent f016a327b1
commit d055dbe522

View file

@ -17,6 +17,7 @@ limitations under the License.
'use strict'; 'use strict';
var React = require('react'); var React = require('react');
var ReactDOMServer = require('react-dom/server')
var sanitizeHtml = require('sanitize-html'); var sanitizeHtml = require('sanitize-html');
var highlight = require('highlight.js'); var highlight = require('highlight.js');
@ -57,24 +58,17 @@ class Highlighter {
this._key = 0; this._key = 0;
} }
applyHighlights(safeSnippet, highlights) { applyHighlights(safeSnippet, safeHighlights) {
var lastOffset = 0; var lastOffset = 0;
var offset; var offset;
var nodes = []; var nodes = [];
// XXX: when highlighting HTML, synapse performs the search on the plaintext body, var safeHighlight = safeHighlights[0];
// but we're attempting to apply the highlights here to the HTML body. This is
// never going to end well - we really should be hooking into the sanitzer HTML
// parser to only attempt to highlight text nodes to avoid corrupting tags.
// If and when this happens, we'll probably have to split this method in two between
// HTML and plain-text highlighting.
var safeHighlight = this.html ? sanitizeHtml(highlights[0], sanitizeHtmlParams) : highlights[0];
while ((offset = safeSnippet.toLowerCase().indexOf(safeHighlight.toLowerCase(), lastOffset)) >= 0) { while ((offset = safeSnippet.toLowerCase().indexOf(safeHighlight.toLowerCase(), lastOffset)) >= 0) {
// handle preamble // handle preamble
if (offset > lastOffset) { if (offset > lastOffset) {
var subSnippet = safeSnippet.substring(lastOffset, offset); var subSnippet = safeSnippet.substring(lastOffset, offset);
nodes = nodes.concat(this._applySubHighlights(subSnippet, highlights)); nodes = nodes.concat(this._applySubHighlights(subSnippet, safeHighlights));
} }
// do highlight // do highlight
@ -86,15 +80,15 @@ class Highlighter {
// handle postamble // handle postamble
if (lastOffset != safeSnippet.length) { if (lastOffset != safeSnippet.length) {
var subSnippet = safeSnippet.substring(lastOffset, undefined); var subSnippet = safeSnippet.substring(lastOffset, undefined);
nodes = nodes.concat(this._applySubHighlights(subSnippet, highlights)); nodes = nodes.concat(this._applySubHighlights(subSnippet, safeHighlights));
} }
return nodes; return nodes;
} }
_applySubHighlights(safeSnippet, highlights) { _applySubHighlights(safeSnippet, safeHighlights) {
if (highlights[1]) { if (safeHighlights[1]) {
// recurse into this range to check for the next set of highlight matches // recurse into this range to check for the next set of highlight matches
return this.applyHighlights(safeSnippet, highlights.slice(1)); return this.applyHighlights(safeSnippet, safeHighlights.slice(1));
} }
else { else {
// no more highlights to be found, just return the unhighlighted string // no more highlights to be found, just return the unhighlighted string
@ -132,7 +126,7 @@ module.exports = {
* *
* content: 'content' of the MatrixEvent * content: 'content' of the MatrixEvent
* *
* highlights: optional list of words to highlight * highlights: optional list of words to highlight, ordered by longest word first
* *
* opts.onHighlightClick: optional callback function to be called when a * opts.onHighlightClick: optional callback function to be called when a
* highlighted word is clicked * highlighted word is clicked
@ -142,28 +136,38 @@ module.exports = {
var isHtml = (content.format === "org.matrix.custom.html"); var isHtml = (content.format === "org.matrix.custom.html");
var safeBody; var safeBody, body;
if (isHtml) { if (isHtml) {
// XXX: We sanitize the HTML whilst also highlighting its text nodes, to avoid accidentally trying
// to highlight HTML tags themselves. However, this does mean that we don't highlight textnodes which
// are interrupted by HTML tags (not that we did before) - e.g. foo<span/>bar won't get highlighted
// by an attempt to search for 'foobar'. Then again, the search query probably wouldn't work either
if (highlights && highlights.length > 0) {
var highlighter = new Highlighter(isHtml, "mx_EventTile_searchHighlight", opts.onHighlightClick);
// XXX: hacky bodge to temporarily apply a textFilter to the sanitizeHtmlParams structure.
sanitizeHtmlParams.textFilter = function(safeText) {
var html = highlighter.applyHighlights(safeText, highlights).map(function(span) {
// XXX: rather clunky conversion from the react nodes returned by applyHighlights
// (which need to be nodes for the non-html highlighting case), to convert them
// back into raw HTML given that's what sanitize-html works in terms of.
return ReactDOMServer.renderToString(span);
}).join('');
return html;
};
}
safeBody = sanitizeHtml(content.formatted_body, sanitizeHtmlParams); safeBody = sanitizeHtml(content.formatted_body, sanitizeHtmlParams);
delete sanitizeHtmlParams.textFilter;
return <span className="markdown-body" dangerouslySetInnerHTML={{ __html: safeBody }} />;
} else { } else {
safeBody = content.body; safeBody = content.body;
} if (highlights && highlights.length > 0) {
var highlighter = new Highlighter(isHtml, "mx_EventTile_searchHighlight", opts.onHighlightClick);
var body; return highlighter.applyHighlights(safeBody, highlights);
if (highlights && highlights.length > 0) {
var highlighter = new Highlighter(isHtml, "mx_EventTile_searchHighlight", opts.onHighlightClick);
body = highlighter.applyHighlights(safeBody, highlights);
}
else {
if (isHtml) {
body = <span className="markdown-body" dangerouslySetInnerHTML={{ __html: safeBody }} />;
} }
else { else {
body = safeBody; return safeBody;
} }
} }
return body;
}, },
highlightDom: function(element) { highlightDom: function(element) {