mnemonics: fix language detection with checksum word

If a checksum word is present, language detection would use just the word prefixes. However, a set of word prefixes may be found in more than one language, and so the wrong language may be found first, which could then fail the checksum, since the check may be done with a different unique prefix length from the one it was created from. We now make a checksum test when we we detect a language from prefixes only, to make sure we have the correct one.
2017-01-15 10:48:44 +00:00 · 2017-01-15 10:48:44 +00:00 · f5f4109f9a
commit f5f4109f9a
parent d98db4868d
2 changed files with 45 additions and 1 deletions
--- a/src/mnemonics/electrum-words.cpp
+++ b/src/mnemonics/electrum-words.cpp
@ -64,12 +64,15 @@
 namespace
 {
  uint32_t create_checksum_index(const std::vector<std::string> &word_list,
    uint32_t unique_prefix_length);
  bool checksum_test(std::vector<std::string> seed, uint32_t unique_prefix_length);
  /*!
   * \brief Finds the word list that contains the seed words and puts the indices
   *        where matches occured in matched_indices.
   * \param  seed            List of words to match.
-   * \param  has_checksum    If word list passed checksum test, we need to only do a prefix check.
+   * \param  has_checksum    The seed has a checksum word (maybe not checked).
   * \param  matched_indices The indices where the seed words were found are added to this.
   * \param  language        Language instance pointer to write to after it is found.
   * \return                 true if all the words were present in some language false if not.
@ -88,6 +91,7 @@ namespace
      Language::Singleton<Language::Russian>::instance(),
      Language::Singleton<Language::OldEnglish>::instance()
    });
    Language::Base *fallback = NULL;
    // Iterate through all the languages and find a match
    for (std::vector<Language::Base*>::iterator it1 = language_instances.begin();
@ -125,6 +129,17 @@ namespace
        }
      }
      if (full_match)
      {
        // if we were using prefix only, and we have a checksum, check it now
        // to avoid false positives due to prefix set being too common
        if (has_checksum)
          if (!checksum_test(seed, (*it1)->get_unique_prefix_length()))
          {
            fallback = *it1;
            full_match = false;
          }
      }
      if (full_match)
      {
        *language = *it1;
        return true;
@ -132,6 +147,16 @@ namespace
      // Some didn't match. Clear the index array.
      matched_indices.clear();
    }
    // if we get there, we've not found a good match, but we might have a fallback,
    // if we detected a match which did not fit the checksum, which might be a badly
    // typed/transcribed seed in the right language
    if (fallback)
    {
      *language = fallback;
      return true;
    }
    return false;
  }
--- a/tests/unit_tests/mnemonics.cpp
+++ b/tests/unit_tests/mnemonics.cpp
@ -148,3 +148,22 @@ TEST(mnemonics, all_languages)
    test_language(*(*it));
  }
 }
 TEST(mnemonics, language_detection_with_bad_checksum)
 {
    crypto::secret_key key;
    std::string language_name;
    bool res;
    // This Portuguese (4-prefix) seed has all its words with 3-prefix that's also present in English
    const std::string base_seed = "cinzento luxuriante leonardo gnostico digressao cupula fifa broxar iniquo louvor ovario dorsal ideologo besuntar decurso rosto susto lemure unheiro pagodeiro nitroglicerina eclusa mazurca bigorna";
    const std::string real_checksum = "gnostico";
    res = crypto::ElectrumWords::words_to_bytes(base_seed, key, language_name);
    ASSERT_EQ(true, res);
    ASSERT_STREQ(language_name.c_str(), "Portuguese");
    res = crypto::ElectrumWords::words_to_bytes(base_seed + " " + real_checksum, key, language_name);
    ASSERT_EQ(true, res);
    ASSERT_STREQ(language_name.c_str(), "Portuguese");
 }