Merge pull request #176

b94b8cd Added unit test. Fails for Japanese for some reason. (Oran Juice) 4c8a628 Remove iostream header put in during testing (Oran Juice) 9875f5b Variable unique prefix lengths for seed (Oran Juice)
2014-10-15 12:14:44 +02:00 · 2014-10-15 12:14:44 +02:00 · db5ca73fdd
commit db5ca73fdd
parent 0790aea6c8 b94b8cd798
9 changed files with 215 additions and 50 deletions
--- a/src/mnemonics/electrum-words.cpp
+++ b/src/mnemonics/electrum-words.cpp
@ -61,7 +61,6 @@

 namespace
 {
-  const int seed_length = 24;

  /*!
   * \brief Finds the word list that contains the seed words and puts the indices
@ -69,11 +68,11 @@ namespace
   * \param  seed            List of words to match.
   * \param  has_checksum    If word list passed checksum test, we need to only do a prefix check.
   * \param  matched_indices The indices where the seed words were found are added to this.
+   * \param  language        Language instance pointer to write to after it is found.
   * \return                 true if all the words were present in some language false if not.
   */
  bool find_seed_language(const std::vector<std::string> &seed,
-    bool has_checksum, std::vector<uint32_t> &matched_indices, uint32_t &word_list_length,
-    std::string &language_name)
+    bool has_checksum, std::vector<uint32_t> &matched_indices, Language::Base **language)
  {
    // If there's a new language added, add an instance of it here.
    std::vector<Language::Base*> language_instances({
@ -83,18 +82,6 @@ namespace
      Language::Singleton<Language::Japanese>::instance(),
      Language::Singleton<Language::OldEnglish>::instance()
    });
-    // To hold trimmed seed words in case of a checksum being present.
-    std::vector<std::string> trimmed_seed;
-    if (has_checksum)
-    {
-      // If it had a checksum, we'll just compare the unique prefix
-      // So we create a list of trimmed seed words
-      for (std::vector<std::string>::const_iterator it = seed.begin(); it != seed.end(); it++)
-      {
-        trimmed_seed.push_back(it->length() > Language::unique_prefix_length ?
-          it->substr(0, Language::unique_prefix_length) : *it);
-      }
-    }

    // Iterate through all the languages and find a match
    for (std::vector<Language::Base*>::iterator it1 = language_instances.begin();
@ -104,23 +91,22 @@ namespace
      const std::unordered_map<std::string, uint32_t> &trimmed_word_map = (*it1)->get_trimmed_word_map();
      // To iterate through seed words
      std::vector<std::string>::const_iterator it2;
-      // To iterate through trimmed seed words
-      std::vector<std::string>::iterator it3;
      bool full_match = true;

+      std::string trimmed_word;
      // Iterate through all the words and see if they're all present
-      for (it2 = seed.begin(), it3 = trimmed_seed.begin();
-        it2 != seed.end(); it2++, it3++)
+      for (it2 = seed.begin(); it2 != seed.end(); it2++)
      {
        if (has_checksum)
        {
+          trimmed_word = it2->substr(0, (*it1)->get_unique_prefix_length());
          // Use the trimmed words and map
-          if (trimmed_word_map.count(*it3) == 0)
+          if (trimmed_word_map.count(trimmed_word) == 0)
          {
            full_match = false;
            break;
          }
-          matched_indices.push_back(trimmed_word_map.at(*it3));
+          matched_indices.push_back(trimmed_word_map.at(trimmed_word));
        }
        else
        {
@ -134,8 +120,7 @@ namespace
      }
      if (full_match)
      {
-        word_list_length = (*it1)->get_word_list().size();
-        language_name = (*it1)->get_language_name();
+        *language = *it1;
        return true;
      }
      // Some didn't match. Clear the index array.
@ -146,18 +131,20 @@ namespace

  /*!
   * \brief Creates a checksum index in the word list array on the list of words.
-   * \param  word_list Vector of words
-   * \return           Checksum index
+   * \param  word_list            Vector of words
+   * \param unique_prefix_length  the prefix length of each word to use for checksum
+   * \return                      Checksum index
   */
-  uint32_t create_checksum_index(const std::vector<std::string> &word_list)
+  uint32_t create_checksum_index(const std::vector<std::string> &word_list,
+    uint32_t unique_prefix_length)
  {
    std::string trimmed_words = "";

    for (std::vector<std::string>::const_iterator it = word_list.begin(); it != word_list.end(); it++)
    {
-      if (it->length() > 4)
+      if (it->length() > unique_prefix_length)
      {
-        trimmed_words += it->substr(0, Language::unique_prefix_length);
+        trimmed_words += it->substr(0, unique_prefix_length);
      }
      else
      {
@ -166,25 +153,26 @@ namespace
    }
    boost::crc_32_type result;
    result.process_bytes(trimmed_words.data(), trimmed_words.length());
-    return result.checksum() % seed_length;
+    return result.checksum() % crypto::ElectrumWords::seed_length;
  }

  /*!
   * \brief Does the checksum test on the seed passed.
-   * \param seed    Vector of seed words
-   * \return        True if the test passed false if not.
+   * \param seed                  Vector of seed words
+   * \param unique_prefix_length  the prefix length of each word to use for checksum
+   * \return                      True if the test passed false if not.
   */
-  bool checksum_test(std::vector<std::string> seed)
+  bool checksum_test(std::vector<std::string> seed, uint32_t unique_prefix_length)
  {
    // The last word is the checksum.
    std::string last_word = seed.back();
    seed.pop_back();

-    std::string checksum = seed[create_checksum_index(seed)];
+    std::string checksum = seed[create_checksum_index(seed, unique_prefix_length)];

-    std::string trimmed_checksum = checksum.length() > 4 ? checksum.substr(0, Language::unique_prefix_length) :
+    std::string trimmed_checksum = checksum.length() > unique_prefix_length ? checksum.substr(0, unique_prefix_length) :
      checksum;
-    std::string trimmed_last_word = checksum.length() > 4 ? last_word.substr(0, Language::unique_prefix_length) :
+    std::string trimmed_last_word = last_word.length() > unique_prefix_length ? last_word.substr(0, unique_prefix_length) :
      last_word;
    return trimmed_checksum == trimmed_last_word;
  }
@ -211,11 +199,12 @@ namespace crypto
     * \param  language_name   Language of the seed as found gets written here.
     * \return                 false if not a multiple of 3 words, or if word is not in the words list
     */
-    bool words_to_bytes(const std::string& words, crypto::secret_key& dst,
+    bool words_to_bytes(std::string words, crypto::secret_key& dst,
      std::string &language_name)
    {
      std::vector<std::string> seed;

+      boost::algorithm::trim(words);
      boost::split(seed, words, boost::is_any_of(" "));

      // error on non-compliant word list
@ -227,9 +216,19 @@ namespace crypto

      // If it is seed with a checksum.
      bool has_checksum = seed.size() == (seed_length + 1);
+
+      std::vector<uint32_t> matched_indices;
+      Language::Base *language;
+      if (!find_seed_language(seed, has_checksum, matched_indices, &language))
+      {
+        return false;
+      }
+      language_name = language->get_language_name();
+      uint32_t word_list_length = language->get_word_list().size();
+
      if (has_checksum)
      {
-        if (!checksum_test(seed))
+        if (!checksum_test(seed, language->get_unique_prefix_length()))
        {
          // Checksum fail
          return false;
@ -237,13 +236,6 @@ namespace crypto
        seed.pop_back();
      }

-      std::vector<uint32_t> matched_indices;
-      uint32_t word_list_length = 0;
-      if (!find_seed_language(seed, has_checksum, matched_indices, word_list_length, language_name))
-      {
-        return false;
-      }
-
      for (unsigned int i=0; i < seed.size() / 3; i++)
      {
        uint32_t val;
@ -335,7 +327,7 @@ namespace crypto
      }

      words.pop_back();
-      words += (' ' + words_store[create_checksum_index(words_store)]);
+      words += (' ' + words_store[create_checksum_index(words_store, language->get_unique_prefix_length())]);
      return false;
    }

--- a/src/mnemonics/electrum-words.h
+++ b/src/mnemonics/electrum-words.h
@ -59,6 +59,7 @@ namespace crypto
  namespace ElectrumWords
  {

+    const int seed_length = 24;
    const std::string old_language_name = "OldEnglish";
    /*!
     * \brief Converts seed words to bytes (secret key).
@ -67,7 +68,7 @@ namespace crypto
     * \param  language_name   Language of the seed as found gets written here.
     * \return                 false if not a multiple of 3 words, or if word is not in the words list
     */
-    bool words_to_bytes(const std::string& words, crypto::secret_key& dst,
+    bool words_to_bytes(std::string words, crypto::secret_key& dst,
      std::string &language_name);

    /*!
--- a/src/mnemonics/english.h
+++ b/src/mnemonics/english.h
@ -1681,6 +1681,7 @@ namespace Language
        "zones",
        "zoom"
      });
+      unique_prefix_length = 3;
      word_map = new std::unordered_map<std::string, uint32_t>;
      trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
      language_name = "English";
--- a/src/mnemonics/japanese.h
+++ b/src/mnemonics/japanese.h
@ -1681,6 +1681,7 @@ namespace Language
        "びじゅつかん",
        "ひしょ"
      });
+      unique_prefix_length = 4;
      word_map = new std::unordered_map<std::string, uint32_t>;
      trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
      language_name = "Japanese";
--- a/src/mnemonics/language_base.h
+++ b/src/mnemonics/language_base.h
@ -45,7 +45,6 @@
 */
 namespace Language
 {
-  const int unique_prefix_length = 4; /*!< Length of the prefix of all words guaranteed to be unique */
  /*!
   * \class Base
   * \brief A base language class which all languages have to inherit from for
@ -58,7 +57,7 @@ namespace Language
    std::unordered_map<std::string, uint32_t> *word_map; /*!< hash table to find word's index */
    std::unordered_map<std::string, uint32_t> *trimmed_word_map; /*!< hash table to find word's trimmed index */
    std::string language_name; /*!< Name of language */
-    int trim_length; /*!< Number of unique starting characters to trim the wordlist to when matching */
+    uint32_t unique_prefix_length; /*!< Number of unique starting characters to trim the wordlist to when matching */
    /*!
     * \brief Populates the word maps after the list is ready.
     */
@ -85,6 +84,7 @@ namespace Language
      word_list = new std::vector<std::string>;
      word_map = new std::unordered_map<std::string, uint32_t>;
      trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
+      unique_prefix_length = 4;
    }
    /*!
     * \brief Returns a pointer to the word list.
@ -122,9 +122,9 @@ namespace Language
     * \brief Returns the number of unique starting characters to be used for matching.
     * \return Number of unique starting characters.
     */
-    int get_trim_length() const
+    uint32_t get_unique_prefix_length() const
    {
-      return trim_length;
+      return unique_prefix_length;
    }
  };
 }
--- a/src/mnemonics/old_english.h
+++ b/src/mnemonics/old_english.h
@ -1681,6 +1681,7 @@ namespace Language
        "weapon",
        "weary"
      });
+      unique_prefix_length = 4;
      word_map = new std::unordered_map<std::string, uint32_t>;
      trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
      language_name = "OldEnglish";
--- a/src/mnemonics/portuguese.h
+++ b/src/mnemonics/portuguese.h
@ -1679,6 +1679,7 @@ namespace Language
        "zenite",
        "zumbi"
      });
+      unique_prefix_length = 4;
      word_map = new std::unordered_map<std::string, uint32_t>;
      trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
      language_name = "Portuguese";
--- a/src/mnemonics/spanish.h
+++ b/src/mnemonics/spanish.h
@ -1681,6 +1681,7 @@ namespace Language
        "ritmo",
        "rito"
      });
+      unique_prefix_length = 4;
      word_map = new std::unordered_map<std::string, uint32_t>;
      trimmed_word_map = new std::unordered_map<std::string, uint32_t>;
      language_name = "Spanish";
--- a/tests/unit_tests/mnemonics.cpp
+++ b/tests/unit_tests/mnemonics.cpp
@ -0,0 +1,167 @@
+// Copyright (c) 2014, The Monero Project
+// 
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without modification, are
+// permitted provided that the following conditions are met:
+// 
+// 1. Redistributions of source code must retain the above copyright notice, this list of
+//    conditions and the following disclaimer.
+// 
+// 2. Redistributions in binary form must reproduce the above copyright notice, this list
+//    of conditions and the following disclaimer in the documentation and/or other
+//    materials provided with the distribution.
+// 
+// 3. Neither the name of the copyright holder nor the names of its contributors may be
+//    used to endorse or promote products derived from this software without specific
+//    prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "gtest/gtest.h"
+#include "mnemonics/electrum-words.h"
+#include "crypto/crypto.h"
+#include <stdlib.h>
+#include <vector>
+#include <time.h>
+#include <iostream>
+#include <boost/algorithm/string.hpp>
+#include "mnemonics/english.h"
+#include "mnemonics/spanish.h"
+#include "mnemonics/portuguese.h"
+#include "mnemonics/japanese.h"
+#include "mnemonics/old_english.h"
+#include "mnemonics/language_base.h"
+#include "mnemonics/singleton.h"
+
+namespace
+{
+  /*!
+   * \brief Returns random index from 0 to max-1
+   * \param  max Range maximum
+   * \return     required random index
+   */
+  uint32_t get_random_index(int max)
+  {
+    return rand() % max;
+  }
+
+  /*!
+   * \brief Print a seed
+   * \param seed word list
+   */
+  void print_seed(const std::vector<std::string> &seed)
+  {
+    for (std::vector<std::string>::const_iterator it = seed.begin(); it != seed.end(); it++)
+    {
+      std::cout << *it << " ";
+    }
+    std::cout << std::endl;
+  }
+
+  /*!
+   * \brief Compares vectors for equality
+   * \param expected expected vector
+   * \param present  current vector
+   */
+  void compare_vectors(const std::vector<std::string> &expected, const std::vector<std::string> &present)
+  {
+    std::vector<std::string>::const_iterator it1, it2;
+    for (it1 = expected.begin(), it2 = present.begin(); it1 != expected.end() && it2 != present.end();
+      it1++, it2++)
+    {
+      ASSERT_STREQ(it1->c_str(), it2->c_str());
+    }
+  }
+
+  /*!
+   * \brief Tests the given language mnemonics.
+   * \param language A Language instance to test
+   */
+  void test_language(const Language::Base &language)
+  {
+    const std::vector<std::string> &word_list = language.get_word_list();
+    std::string seed = "", return_seed = "";
+    // Generate a random seed without checksum
+    for (int ii = 0; ii < crypto::ElectrumWords::seed_length; ii++)
+    {
+      seed += (word_list[get_random_index(word_list.size())] + ' ');
+    }
+    seed.pop_back();
+    std::cout << "Test seed without checksum:\n";
+    std::cout << seed << std::endl;
+
+    crypto::secret_key key;
+    std::string language_name;
+    bool res;
+    std::vector<std::string> seed_vector, return_seed_vector;
+    std::string checksum_word;
+
+    // Convert it to secret key
+    res = crypto::ElectrumWords::words_to_bytes(seed, key, language_name);
+    ASSERT_EQ(true, res);
+    std::cout << "Detected language: " << language_name << std::endl;
+    ASSERT_STREQ(language.get_language_name().c_str(), language_name.c_str());
+
+    // Convert the secret key back to seed
+    crypto::ElectrumWords::bytes_to_words(key, return_seed, language.get_language_name());
+    ASSERT_EQ(true, res);
+    std::cout << "Returned seed:\n";
+    std::cout << return_seed << std::endl;
+    boost::split(seed_vector, seed, boost::is_any_of(" "));
+    boost::split(return_seed_vector, return_seed, boost::is_any_of(" "));
+
+    // Extract the checksum word
+    checksum_word = return_seed_vector.back();
+    return_seed_vector.pop_back();
+    ASSERT_EQ(seed_vector.size(), return_seed_vector.size());
+    // Ensure that the rest of it is same
+    compare_vectors(seed_vector, return_seed_vector);
+
+    // Append the checksum word to repeat the entire process with a seed with checksum
+    seed += (" " + checksum_word);
+    std::cout << "Test seed with checksum:\n";
+    std::cout << seed << std::endl;
+    res = crypto::ElectrumWords::words_to_bytes(seed, key, language_name);
+    ASSERT_EQ(true, res);
+    std::cout << "Detected language: " << language_name << std::endl;
+    ASSERT_STREQ(language.get_language_name().c_str(), language_name.c_str());
+
+    return_seed = "";
+    crypto::ElectrumWords::bytes_to_words(key, return_seed, language.get_language_name());
+    ASSERT_EQ(true, res);
+    std::cout << "Returned seed:\n";
+    std::cout << return_seed << std::endl;
+
+    seed_vector.clear();
+    return_seed_vector.clear();
+    boost::split(seed_vector, seed, boost::is_any_of(" "));
+    boost::split(return_seed_vector, return_seed, boost::is_any_of(" "));
+    ASSERT_EQ(seed_vector.size(), return_seed_vector.size());
+    compare_vectors(seed_vector, return_seed_vector);
+  }
+}
+
+TEST(mnemonics, all_languages)
+{
+  srand(time(NULL));
+  std::vector<Language::Base*> languages({
+    Language::Singleton<Language::English>::instance(),
+    Language::Singleton<Language::Spanish>::instance(),
+    Language::Singleton<Language::Portuguese>::instance(),
+    Language::Singleton<Language::Japanese>::instance(),
+  });
+
+  for (std::vector<Language::Base*>::iterator it = languages.begin(); it != languages.end(); it++)
+  {
+    test_language(*(*it));
+  }
+}