// Copyright (C) 2011 Milo Yip // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #ifndef RAPIDJSON_ENCODEDSTREAM_H_ #define RAPIDJSON_ENCODEDSTREAM_H_ #include "rapidjson.h" #ifdef __GNUC__ RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_OFF(effc++) #endif namespace rapidjson { //! Input byte stream wrapper with a statically bound encoding. /*! \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. \tparam InputByteStream Type of input byte stream. For example, FileReadStream. */ template class EncodedInputStream { RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); public: typedef typename Encoding::Ch Ch; EncodedInputStream(InputByteStream& is) : is_(is) { current_ = Encoding::TakeBOM(is_); } Ch Peek() const { return current_; } Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; } size_t Tell() const { return is_.Tell(); } // Not implemented void Put(Ch) { RAPIDJSON_ASSERT(false); } void Flush() { RAPIDJSON_ASSERT(false); } Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } private: EncodedInputStream(const EncodedInputStream&); EncodedInputStream& operator=(const EncodedInputStream&); InputByteStream& is_; Ch current_; }; //! Output byte stream wrapper with statically bound encoding. /*! \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE. \tparam InputByteStream Type of input byte stream. For example, FileWriteStream. */ template class EncodedOutputStream { RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); public: typedef typename Encoding::Ch Ch; EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { if (putBOM) Encoding::PutBOM(os_); } void Put(Ch c) { Encoding::Put(os_, c); } void Flush() { os_.Flush(); } // Not implemented Ch Peek() const { RAPIDJSON_ASSERT(false); } Ch Take() { RAPIDJSON_ASSERT(false); } size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } private: EncodedOutputStream(const EncodedOutputStream&); EncodedOutputStream& operator=(const EncodedOutputStream&); OutputByteStream& os_; }; #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8::x, UTF16LE::x, UTF16BE::x, UTF32LE::x, UTF32BE::x //! Input stream wrapper with dynamically bound encoding and automatic encoding detection. /*! \tparam CharType Type of character for reading. \tparam InputByteStream type of input byte stream to be wrapped. */ template class AutoUTFInputStream { RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); public: typedef CharType Ch; //! Constructor. /*! \param is input stream to be wrapped. \param type UTF encoding type if it is not detected from the stream. */ AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) { DetectType(); static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) }; takeFunc_ = f[type_]; current_ = takeFunc_(*is_); } UTFType GetType() const { return type_; } bool HasBOM() const { return hasBOM_; } Ch Peek() const { return current_; } Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; } size_t Tell() const { return is_->Tell(); } // Not implemented void Put(Ch) { RAPIDJSON_ASSERT(false); } void Flush() { RAPIDJSON_ASSERT(false); } Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } private: AutoUTFInputStream(const AutoUTFInputStream&); AutoUTFInputStream& operator=(const AutoUTFInputStream&); // Detect encoding type with BOM or RFC 4627 void DetectType() { // BOM (Byte Order Mark): // 00 00 FE FF UTF-32BE // FF FE 00 00 UTF-32LE // FE FF UTF-16BE // FF FE UTF-16LE // EF BB BF UTF-8 const unsigned char* c = (const unsigned char *)is_->Peek4(); if (!c) return; unsigned bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24); hasBOM_ = false; if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); } else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); } else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); } else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); } // RFC 4627: Section 3 // "Since the first two characters of a JSON text will always be ASCII // characters [RFC0020], it is possible to determine whether an octet // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking // at the pattern of nulls in the first four octets." // 00 00 00 xx UTF-32BE // 00 xx 00 xx UTF-16BE // xx 00 00 00 UTF-32LE // xx 00 xx 00 UTF-16LE // xx xx xx xx UTF-8 if (!hasBOM_) { unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0); switch (pattern) { case 0x08: type_ = kUTF32BE; break; case 0x0A: type_ = kUTF16BE; break; case 0x01: type_ = kUTF32LE; break; case 0x05: type_ = kUTF16LE; break; case 0x0F: type_ = kUTF8; break; default: break; // Use type defined by user. } } // Runtime check whether the size of character type is sufficient. It only perform checks with assertion. switch (type_) { case kUTF8: // Do nothing break; case kUTF16LE: case kUTF16BE: RAPIDJSON_ASSERT(sizeof(Ch) >= 2); break; case kUTF32LE: case kUTF32BE: RAPIDJSON_ASSERT(sizeof(Ch) >= 4); break; default: RAPIDJSON_ASSERT(false); // Invalid type } } typedef Ch (*TakeFunc)(InputByteStream& is); InputByteStream* is_; UTFType type_; Ch current_; TakeFunc takeFunc_; bool hasBOM_; }; //! Output stream wrapper with dynamically bound encoding and automatic encoding detection. /*! \tparam CharType Type of character for writing. \tparam InputByteStream type of output byte stream to be wrapped. */ template class AutoUTFOutputStream { RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); public: typedef CharType Ch; //! Constructor. /*! \param os output stream to be wrapped. \param type UTF encoding type. \param putBOM Whether to write BOM at the beginning of the stream. */ AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) { // RUntime check whether the size of character type is sufficient. It only perform checks with assertion. switch (type_) { case kUTF16LE: case kUTF16BE: RAPIDJSON_ASSERT(sizeof(Ch) >= 2); break; case kUTF32LE: case kUTF32BE: RAPIDJSON_ASSERT(sizeof(Ch) >= 4); break; case kUTF8: // Do nothing break; default: RAPIDJSON_ASSERT(false); // Invalid UTFType } static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) }; putFunc_ = f[type_]; if (putBOM) PutBOM(); } UTFType GetType() const { return type_; } void Put(Ch c) { putFunc_(*os_, c); } void Flush() { os_->Flush(); } // Not implemented Ch Peek() const { RAPIDJSON_ASSERT(false); } Ch Take() { RAPIDJSON_ASSERT(false); } size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; } Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } private: AutoUTFOutputStream(const AutoUTFOutputStream&); AutoUTFOutputStream& operator=(const AutoUTFOutputStream&); void PutBOM() { typedef void (*PutBOMFunc)(OutputByteStream&); static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) }; f[type_](*os_); } typedef void (*PutFunc)(OutputByteStream&, Ch); OutputByteStream* os_; UTFType type_; PutFunc putFunc_; }; #undef RAPIDJSON_ENCODINGS_FUNC } // namespace rapidjson #ifdef __GNUC__ RAPIDJSON_DIAG_POP #endif #endif // RAPIDJSON_FILESTREAM_H_