crypto,cmake: enable ASM mul impl on ARM; add cmake opt

This was disabled earlier as part of diagnosing failing tests on ARM, which turned out to be due to aliasing, fixed by adding -fno-strict-aliasing. So, re-enabling it back.
2024-12-25 14:47:46 +00:00 · 2016-09-04 04:27:51 +00:00 · 2016-09-04 04:27:51 +00:00 · 24d93370ad
commit 24d93370ad
parent afe3cce7fe
2 changed files with 13 additions and 3 deletions
--- a/src/crypto/CMakeLists.txt
+++ b/src/crypto/CMakeLists.txt
@ -74,3 +74,13 @@ bitmonero_add_library(crypto
  ${crypto_sources}
  ${crypto_headers}
  ${crypto_private_headers})
 if (ARM)
  option(NO_OPTIMIZED_MULTIPLY_ON_ARM
 	   "Compute multiply using generic C implementation instead of ARM ASM" OFF)
  if(NO_OPTIMIZED_MULTIPLY_ON_ARM)
    message(STATUS "Using generic C implementation for multiply")
    set_property(SOURCE slow-hash.c
      PROPERTY COMPILE_DEFINITIONS "NO_OPTIMIZED_MULTIPLY_ON_ARM")
  endif()
 endif()
--- a/src/crypto/slow-hash.c
+++ b/src/crypto/slow-hash.c
@ -679,7 +679,7 @@ void slow_hash_free_state(void)
 #include "aesb.c"
-#ifndef ARM_MUL_IMPL_ASM
+#ifdef NO_OPTIMIZED_MULTIPLY_ON_ARM
 /* The asm corresponds to this C code */
 #define SHORT uint32_t
 #define LONG uint64_t
@ -712,7 +712,7 @@ void mul(const uint8_t *ca, const uint8_t *cb, uint8_t *cres) {
  res[0] = t.tmp[6];
  res[1] = t.tmp[7];
 }
-#else // ARM_MUL_IMPL_ASM (TODO: this fails hash-slow test with GCC 6.1.1)
+#else // !NO_OPTIMIZED_MULTIPLY_ON_ARM
 /* Can work as inline, but actually runs slower. Keep it separate */
 #define mul(a, b, c)	cn_mul128(a, b, c)
@ -747,7 +747,7 @@ __asm__ __volatile__(
  : [A]"r"(aa[1]), [a]"r"(aa[0]), [B]"r"(bb[1]), [b]"r"(bb[0]), [r]"r"(r)
  : "cc", "memory");
 }
-#endif // ARM_MUL_IMPL_ASM
+#endif // NO_OPTIMIZED_MULTIPLY_ON_ARM
 STATIC INLINE void sum_half_blocks(uint8_t* a, const uint8_t* b)
 {