TextEncoder polyfill
Apparently Safari doesn't sport a TextEncoder, so here's a polyfill for it.
This commit is contained in:
parent
07b3c58c61
commit
1d5d44d63d
4 changed files with 333 additions and 0 deletions
131
src/utils/TextDecoderPolyfill.js
Normal file
131
src/utils/TextDecoderPolyfill.js
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
/*
|
||||||
|
Copyright 2017 Vector Creations Ltd
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
// Polyfill for TextDecoder.
|
||||||
|
|
||||||
|
const REPLACEMENT_CHAR = '\uFFFD';
|
||||||
|
|
||||||
|
export default class TextDecoder {
|
||||||
|
/**
|
||||||
|
* Decode a UTF-8 byte array as a javascript string
|
||||||
|
*
|
||||||
|
* @param {Uint8Array} u8Array UTF-8-encoded onput
|
||||||
|
* @return {str}
|
||||||
|
*/
|
||||||
|
decode(u8Array) {
|
||||||
|
let u0, u1, u2, u3;
|
||||||
|
|
||||||
|
let str = '';
|
||||||
|
let idx = 0;
|
||||||
|
while (idx < u8Array.length) {
|
||||||
|
u0 = u8Array[idx++];
|
||||||
|
if (!(u0 & 0x80)) {
|
||||||
|
str += String.fromCharCode(u0);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((u0 & 0xC0) != 0xC0) {
|
||||||
|
// continuation byte where we expect a leading byte
|
||||||
|
str += REPLACEMENT_CHAR;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (u0 > 0xF4) {
|
||||||
|
// this would imply a 5-byte or longer encoding, which is
|
||||||
|
// invalid and unsupported here.
|
||||||
|
str += REPLACEMENT_CHAR;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
u1 = u8Array[idx++];
|
||||||
|
if (u1 === undefined) {
|
||||||
|
str += REPLACEMENT_CHAR;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((u1 & 0xC0) != 0x80) {
|
||||||
|
// leading byte where we expect a continuation byte
|
||||||
|
str += REPLACEMENT_CHAR.repeat(2);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u1 &= 0x3F;
|
||||||
|
if (!(u0 & 0x20)) {
|
||||||
|
const u = ((u0 & 0x1F) << 6) | u1;
|
||||||
|
if (u < 0x80) {
|
||||||
|
// over-long
|
||||||
|
str += REPLACEMENT_CHAR.repeat(2);
|
||||||
|
} else {
|
||||||
|
str += String.fromCharCode(u);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
u2 = u8Array[idx++];
|
||||||
|
if (u2 === undefined) {
|
||||||
|
str += REPLACEMENT_CHAR.repeat(2);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ((u2 & 0xC0) != 0x80) {
|
||||||
|
// leading byte where we expect a continuation byte
|
||||||
|
str += REPLACEMENT_CHAR.repeat(3);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u2 &= 0x3F;
|
||||||
|
if (!(u0 & 0x10)) {
|
||||||
|
const u = ((u0 & 0x0F) << 12) | (u1 << 6) | u2;
|
||||||
|
if (u < 0x800) {
|
||||||
|
// over-long
|
||||||
|
str += REPLACEMENT_CHAR.repeat(3);
|
||||||
|
} else if (u == 0xFEFF && idx == 3) {
|
||||||
|
// byte-order mark: do not add to output
|
||||||
|
} else {
|
||||||
|
str += String.fromCharCode(u);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
u3 = u8Array[idx++];
|
||||||
|
if (u3 === undefined) {
|
||||||
|
str += REPLACEMENT_CHAR.repeat(3);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ((u3 & 0xC0) != 0x80) {
|
||||||
|
// leading byte where we expect a continuation byte
|
||||||
|
str += REPLACEMENT_CHAR.repeat(4);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
u3 &= 0x3F;
|
||||||
|
const u = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | u3;
|
||||||
|
if (u < 0x10000) {
|
||||||
|
// over-long
|
||||||
|
str += REPLACEMENT_CHAR.repeat(4);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (u > 0x1FFFF) {
|
||||||
|
// unicode stops here.
|
||||||
|
str += REPLACEMENT_CHAR.repeat(4);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// encode as utf-16
|
||||||
|
const v = u - 0x10000;
|
||||||
|
str += String.fromCharCode(0xD800 | (v >> 10), 0xDC00 | (v & 0x3FF));
|
||||||
|
}
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
}
|
78
src/utils/TextEncoderPolyfill.js
Normal file
78
src/utils/TextEncoderPolyfill.js
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
/*
|
||||||
|
Copyright 2017 Vector Creations Ltd
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
// Polyfill for TextEncoder. Based on emscripten's stringToUTF8Array.
|
||||||
|
|
||||||
|
function utf8len(str) {
|
||||||
|
var len = 0;
|
||||||
|
for (var i = 0; i < str.length; ++i) {
|
||||||
|
var u = str.charCodeAt(i);
|
||||||
|
if (u >= 0xD800 && u <= 0xDFFF && i < str.length-1) {
|
||||||
|
// lead surrogate - combine with next surrogate
|
||||||
|
u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (u <= 0x7F) {
|
||||||
|
++len;
|
||||||
|
} else if (u <= 0x7FF) {
|
||||||
|
len += 2;
|
||||||
|
} else if (u <= 0xFFFF) {
|
||||||
|
len += 3;
|
||||||
|
} else {
|
||||||
|
len += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default class TextEncoder {
|
||||||
|
/**
|
||||||
|
* Encode a javascript string as utf-8
|
||||||
|
*
|
||||||
|
* @param {String} str String to encode
|
||||||
|
* @return {Uint8Array} UTF-8-encoded output
|
||||||
|
*/
|
||||||
|
encode(str) {
|
||||||
|
const outU8Array = new Uint8Array(utf8len(str));
|
||||||
|
var outIdx = 0;
|
||||||
|
for (var i = 0; i < str.length; ++i) {
|
||||||
|
var u = str.charCodeAt(i);
|
||||||
|
if (u >= 0xD800 && u <= 0xDFFF && i < str.length-1) {
|
||||||
|
// lead surrogate - combine with next surrogate
|
||||||
|
u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (u <= 0x7F) {
|
||||||
|
outU8Array[outIdx++] = u;
|
||||||
|
} else if (u <= 0x7FF) {
|
||||||
|
outU8Array[outIdx++] = 0xC0 | (u >> 6);
|
||||||
|
outU8Array[outIdx++] = 0x80 | (u & 63);
|
||||||
|
} else if (u <= 0xFFFF) {
|
||||||
|
outU8Array[outIdx++] = 0xE0 | (u >> 12);
|
||||||
|
outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63);
|
||||||
|
outU8Array[outIdx++] = 0x80 | (u & 63);
|
||||||
|
} else {
|
||||||
|
outU8Array[outIdx++] = 0xF0 | (u >> 18);
|
||||||
|
outU8Array[outIdx++] = 0x80 | ((u >> 12) & 63);
|
||||||
|
outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63);
|
||||||
|
outU8Array[outIdx++] = 0x80 | (u & 63);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return outU8Array;
|
||||||
|
}
|
||||||
|
}
|
85
test/utils/TextDecoderPolyfill-test.js
Normal file
85
test/utils/TextDecoderPolyfill-test.js
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
/*
|
||||||
|
Copyright 2017 Vector Creations Ltd
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
import TextDecoderPolyfill from 'utils/TextDecoderPolyfill';
|
||||||
|
|
||||||
|
import * as testUtils from '../test-utils';
|
||||||
|
import expect from 'expect';
|
||||||
|
|
||||||
|
describe('textDecoderPolyfill', function() {
|
||||||
|
beforeEach(function() {
|
||||||
|
testUtils.beforeEach(this);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should correctly decode a range of strings', function() {
|
||||||
|
const decoder = new TextDecoderPolyfill();
|
||||||
|
|
||||||
|
expect(decoder.decode(Uint8Array.of(65, 66, 67))).toEqual('ABC');
|
||||||
|
expect(decoder.decode(Uint8Array.of(0xC3, 0xA6))).toEqual('æ');
|
||||||
|
expect(decoder.decode(Uint8Array.of(0xE2, 0x82, 0xAC))).toEqual('€');
|
||||||
|
expect(decoder.decode(Uint8Array.of(0xF0, 0x9F, 0x92, 0xA9))).toEqual('\uD83D\uDCA9');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should ignore byte-order marks', function() {
|
||||||
|
const decoder = new TextDecoderPolyfill();
|
||||||
|
expect(decoder.decode(Uint8Array.of(0xEF, 0xBB, 0xBF, 65)))
|
||||||
|
.toEqual('A');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should not ignore byte-order marks in the middle of the array', function() {
|
||||||
|
const decoder = new TextDecoderPolyfill();
|
||||||
|
expect(decoder.decode(Uint8Array.of(65, 0xEF, 0xBB, 0xBF, 66)))
|
||||||
|
.toEqual('A\uFEFFB');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should reject overlong encodings', function() {
|
||||||
|
const decoder = new TextDecoderPolyfill();
|
||||||
|
|
||||||
|
// euro, as 4 bytes
|
||||||
|
expect(decoder.decode(Uint8Array.of(65, 0xF0, 0x82, 0x82, 0xAC, 67)))
|
||||||
|
.toEqual('A\uFFFD\uFFFD\uFFFD\uFFFDC');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should reject 5 and 6-byte encodings', function() {
|
||||||
|
const decoder = new TextDecoderPolyfill();
|
||||||
|
|
||||||
|
expect(decoder.decode(Uint8Array.of(65, 0xF8, 0x82, 0x82, 0x82, 0x82, 67)))
|
||||||
|
.toEqual('A\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDC');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should reject code points beyond 0x10000', function() {
|
||||||
|
const decoder = new TextDecoderPolyfill();
|
||||||
|
|
||||||
|
expect(decoder.decode(Uint8Array.of(0xF4, 0xA0, 0x80, 0x80)))
|
||||||
|
.toEqual('\uFFFD\uFFFD\uFFFD\uFFFD');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should cope with end-of-string', function() {
|
||||||
|
const decoder = new TextDecoderPolyfill();
|
||||||
|
|
||||||
|
expect(decoder.decode(Uint8Array.of(65, 0xC3)))
|
||||||
|
.toEqual('A\uFFFD');
|
||||||
|
|
||||||
|
expect(decoder.decode(Uint8Array.of(65, 0xE2, 0x82)))
|
||||||
|
.toEqual('A\uFFFD\uFFFD');
|
||||||
|
|
||||||
|
expect(decoder.decode(Uint8Array.of(65, 0xF0, 0x9F, 0x92)))
|
||||||
|
.toEqual('A\uFFFD\uFFFD\uFFFD');
|
||||||
|
});
|
||||||
|
|
||||||
|
});
|
39
test/utils/TextEncoderPolyfill-test.js
Normal file
39
test/utils/TextEncoderPolyfill-test.js
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
/*
|
||||||
|
Copyright 2017 Vector Creations Ltd
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
import TextEncoderPolyfill from 'utils/TextEncoderPolyfill';
|
||||||
|
|
||||||
|
import * as testUtils from '../test-utils';
|
||||||
|
import expect from 'expect';
|
||||||
|
|
||||||
|
describe('textEncoderPolyfill', function() {
|
||||||
|
beforeEach(function() {
|
||||||
|
testUtils.beforeEach(this);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should correctly encode a range of strings', function() {
|
||||||
|
const encoder = new TextEncoderPolyfill();
|
||||||
|
|
||||||
|
expect(encoder.encode('ABC')).toEqual(Uint8Array.of(65, 66, 67));
|
||||||
|
expect(encoder.encode('æ')).toEqual(Uint8Array.of(0xC3, 0xA6));
|
||||||
|
expect(encoder.encode('€')).toEqual(Uint8Array.of(0xE2, 0x82, 0xAC));
|
||||||
|
|
||||||
|
// PILE OF POO (💩)
|
||||||
|
expect(encoder.encode('\uD83D\uDCA9')).toEqual(Uint8Array.of(0xF0, 0x9F, 0x92, 0xA9));
|
||||||
|
});
|
||||||
|
});
|
Loading…
Reference in a new issue