Merge pull request #336

ea58576 Add missing file - i18n.cpp (moneromooo-monero)
2015-07-15 01:08:04 +02:00 · 2015-07-15 01:08:04 +02:00 · b4849310dc
commit b4849310dc
parent ad841cb1b9 ea58576cf6
1 changed files with 297 additions and 0 deletions
--- a/src/common/i18n.cpp
+++ b/src/common/i18n.cpp
@ -0,0 +1,297 @@
+// Copyright (c) 2014-2015, The Monero Project
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without modification, are
+// permitted provided that the following conditions are met:
+// 
+// 1. Redistributions of source code must retain the above copyright notice, this list of
+//    conditions and the following disclaimer.
+// 
+// 2. Redistributions in binary form must reproduce the above copyright notice, this list
+//    of conditions and the following disclaimer in the documentation and/or other
+//    materials provided with the distribution.
+// 
+// 3. Neither the name of the copyright holder nor the names of its contributors may be
+//    used to endorse or promote products derived from this software without specific
+//    prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <string>
+#include <map>
+#include "include_base_utils.h"
+#include "file_io_utils.h"
+#include "common/util.h"
+#include "common/i18n.h"
+
+static const unsigned char qm_magic[16] = {0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95, 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd};
+
+static std::map<std::string,std::string> i18n_entries;
+
+/* Logging isn't initialized yet when this is run */
+/* add std::flush, because std::endl doesn't seem to flush, contrary to expected */
+// #define i18n_log(x) do { std::cout << __FILE__ << ":" << __LINE__ << ": " << x << std::endl; std::cout << std::flush; } while(0)
+#define i18n_log(x) ((void)0)
+
+static std::string get_language()
+{
+  const char *e;
+
+  e = getenv("LANG");
+  i18n_log("LANG=" << e);
+  if (!e || !*e) {
+    e = getenv("LC_ALL");
+    i18n_log("LC_ALL=" << e);
+  }
+  if (!e || !*e)
+    e = "en";
+
+  std::string language = e;
+  std::transform(language.begin(), language.end(), language.begin(), tolower);
+  return language;
+}
+
+static uint32_t be32(const unsigned char *data)
+{
+  return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
+}
+
+static std::string utf16(const unsigned char *data, uint32_t len)
+{
+  std::string s;
+  while (len >= 2) {
+    uint32_t code = (data[0] << 8) | data[1];
+    data += 2;
+    len -= 2;
+    if (code >= 0xd800 && code <= 0xdbfff && len >= 2) {
+      uint32_t next = (data[0] << 8) | data[1];
+      if (next >= 0xdc00 && next <= 0xdfff) {
+        code = (code << 10) + next - 0x35dfc00;
+        data += 2;
+        len -= 2;
+      }
+    }
+    if (code <= 0x7f) {
+      s += (char)code;
+    }
+    else if (code <= 0x7ff) {
+      s += 0xc0 | (code >> 6);
+      s += 0x80 | (code & 0x3f);
+    }
+    else if (code <= 0xffff) {
+      s += 0xe0 | (code >> 12);
+      s += 0x80 | ((code >> 6) & 0x3f);
+      s += 0x80 | (code & 0x3f);
+    }
+    else {
+      s += 0xf0 | (code >> 18);
+      s += 0x80 | ((code >> 12) & 0x3f);
+      s += 0x80 | ((code >> 6) & 0x3f);
+      s += 0x80 | (code & 0x3f);
+    }
+  }
+  return s;
+}
+
+static std::string utf8(const unsigned char *data, uint32_t len)
+{
+  /* assume well formedness */
+  return std::string((const char *)data,len);
+}
+
+int i18n_set_language(const char *directory, const char *base)
+{
+  std::string language, filename, contents;
+  const unsigned char *data;
+  size_t datalen;
+  size_t idx;
+  unsigned char chunk_type;
+  uint32_t chunk_size;
+  uint32_t num_messages = (uint32_t)-1;
+  uint32_t messages_idx = (uint32_t)-1;
+  uint32_t offsets_idx = (uint32_t)-1;
+  std::string translation, source, context;
+
+  i18n_log("i18n_set_language(" << directory << "," << base << ")");
+  if (!directory || !base)
+    return -1;
+
+  language = get_language();
+  filename = std::string(directory) + "/" + base + "_" + language + ".qm";
+  i18n_log("Loading translations for language " << language);
+
+  boost::system::error_code ignored_ec;
+  if (!boost::filesystem::exists(filename, ignored_ec)) {
+    i18n_log("Translations file not found: " << filename);
+    const char *underscore = strchr(language.c_str(), '_');
+    if (underscore) {
+      std::string fallback_language = std::string(language, 0, underscore - language.c_str());
+      filename = std::string(directory) + "/" + base + "_" + fallback_language + ".qm";
+      i18n_log("Not found, loading translations for language " << fallback_language);
+      if (!boost::filesystem::exists(filename, ignored_ec)) {
+        i18n_log("Translations file not found: " << filename);
+        return -1;
+      }
+    }
+  }
+
+  if (!epee::file_io_utils::load_file_to_string(filename, contents)) {
+    i18n_log("Failed to load translations file: " << filename);
+    return -1;
+  }
+
+  data = (const unsigned char*)contents.c_str();
+  datalen = contents.size();
+  idx = 0;
+  i18n_log("Translations file size: " << datalen);
+
+  /* Format of the QM file (AFAICT):
+   *   16 bytes magic
+   *   chunk list: N instances of chunks:
+   *     1 byte: chunk type (0x42: offsets, 0x69: messages)
+   *     4 bytes: chunk length, big endian
+   *     D bytes: "chunk length" bytes of data
+   *
+   *   0x42 chunk: N instances of subchunks:
+   *     1 byte: subchunk type
+   *       0x01: end, no data
+   *       0x02: unsupported
+   *       0x03: translation
+   *         4 bytes: string length, big endian
+   *         N bytes: string data, UTF-16 (or UCS2-BE ?)
+   *       0x04: unsupported
+   *       0x05: obsolete, unsupported
+   *       0x06: source text
+   *       0x07: context
+   *       0x08: obsolete, unsupported
+   *       other: unsupported
+   *     4 bytes: subchunk length, big endian - except for 0x01, which has none
+   *     S bytes: "chunk length" bytes of data
+   *   0x69 chunk:
+   *     string data indexed by the 0x42 chunk data
+   */
+  if (datalen < sizeof(qm_magic) || memcmp(data, qm_magic, sizeof(qm_magic))) {
+    i18n_log("Bad translations file format: " << filename);
+    return -1;
+  }
+  idx += sizeof(qm_magic);
+
+  while (idx < datalen) {
+    if (idx + 5 > datalen) {
+      i18n_log("Bad translations file format: " << filename);
+      return -1;
+    }
+    chunk_type = data[idx++];
+    chunk_size = be32(data+idx);
+    idx += 4;
+
+    i18n_log("Found " << chunk_type << " of " << chunk_size << " bytes");
+    if (chunk_size >= datalen || idx > datalen - chunk_size) {
+      i18n_log("Bad translations file format: " << filename);
+      return -1;
+    }
+
+    switch (chunk_type) {
+      case 0x42:
+        i18n_log("Found offsets at " << idx);
+        /* two 32 bit integers, and possible padding */
+        offsets_idx = idx;
+        num_messages = chunk_size / 8;
+        break;
+      case 0x69:
+        i18n_log("Found messages at " << idx);
+        messages_idx = idx;
+        break;
+      default:
+        i18n_log("Found unsupported chunk type: " << chunk_type);
+        break;
+    }
+
+    idx += chunk_size;
+  }
+
+  if (offsets_idx == (uint32_t)-1) {
+    i18n_log("No offsets chunk found");
+    return -1;
+  }
+  if (messages_idx == (uint32_t)-1) {
+    i18n_log("No messages chunk found");
+    return -1;
+  }
+
+  for (uint32_t m = 0; m < num_messages; ++m) {
+    be32(data+offsets_idx+m*8); // unused
+    idx = be32(data+offsets_idx+m*8+4);
+    idx += messages_idx;
+
+    if (idx > datalen || idx + 1 > datalen) {
+      i18n_log("Bad translations file format: " << filename);
+      return -1;
+    }
+
+    while (1) {
+      if (idx + 5 > datalen) {
+        i18n_log("Bad translations file format: " << filename);
+        return -1;
+      }
+      chunk_type = data[idx++];
+      chunk_size = 0;
+      if (chunk_type == 0x01) {
+        i18n_entries[context + std::string("",1) + source] = translation;
+        context = std::string();
+        source = std::string();
+        translation = std::string();
+        break;
+      }
+
+      chunk_size = be32(data+idx);
+      idx += 4;
+      i18n_log("Found " << chunk_type << " of " << chunk_size << " bytes");
+      if (chunk_size >= datalen || idx > datalen - chunk_size) {
+        i18n_log("Bad translations file format: " << filename);
+        return -1;
+      }
+      switch (chunk_type) {
+        case 0x03: // translation, UTF-16
+          translation = utf16(data+idx, chunk_size);
+          i18n_log("Found translation: " << translation);
+          break;
+        case 0x06: // source, UTF-8
+          source = utf8(data+idx, chunk_size);
+          i18n_log("Found source: " << source);
+          break;
+        case 0x07: // context, UTF-8
+          context = utf8(data+idx, chunk_size);
+          i18n_log("Found context: " << context);
+          break;
+      }
+      idx += chunk_size;
+    }
+  }
+
+  return 0;
+}
+
+/* The entries is constant by that time */
+const char *i18n_translate(const char *s, const std::string &context)
+{
+  const std::string key = context + std::string("", 1) + s;
+  std::map<std::string,std::string>::const_iterator i = i18n_entries.find(key);
+  if (i == i18n_entries.end())
+    return s;
+  return (*i).second.c_str();
+}
+
+