Browse Source

Support VerusHash V2

master
miketout 5 years ago
parent
commit
664230d2b7
  1. 1
      .gitignore
  2. 10
      binding.gyp
  3. 59
      crypto/haraka.c
  4. 254
      crypto/haraka.h
  5. 73
      crypto/haraka_portable.c
  6. 51
      crypto/haraka_portable.h
  7. 1013
      crypto/tinyformat.h
  8. 146
      crypto/uint256.cpp
  9. 164
      crypto/uint256.h
  10. 499
      crypto/utilstrencodings.cpp
  11. 98
      crypto/utilstrencodings.h
  12. 355
      crypto/verus_clhash.cpp
  13. 239
      crypto/verus_clhash.h
  14. 591
      crypto/verus_clhash_portable.cpp
  15. 22
      crypto/verus_hash.cpp
  16. 153
      crypto/verus_hash.h
  17. 152
      verushash.cc

1
.gitignore

@ -1,3 +1,4 @@
.lock-wscript
build/
crypto.node
.vscode/settings.json

10
binding.gyp

@ -8,10 +8,18 @@
"crypto/common.h",
"crypto/haraka.h",
"crypto/haraka_portable.h",
"crypto/verus_clhash.h",
"crypto/verus_hash.h",
"crypto/haraka.c",
"crypto/haraka_portable.c",
"crypto/tinyformat.h",
"crypto/uint256.cpp",
"crypto/uint256.h",
"crypto/utilstrencodings.cpp",
"crypto/utilstrencodings.h",
"crypto/verus_hash.cpp",
"crypto/verus_clhash.cpp",
"crypto/verus_clhash_portable.cpp",
"verushash.cc",
],
"include_dirs": [
@ -31,6 +39,7 @@
"-msse4.2",
"-mssse3",
"-mavx",
"-mpclmul",
"-maes",
],
"cflags": [
@ -44,6 +53,7 @@
"-msse4.2",
"-mssse3",
"-mavx",
"-mpclmul",
"-maes",
],
"link_settings": {

59
crypto/haraka.c

@ -140,6 +140,34 @@ void haraka256(unsigned char *out, const unsigned char *in) {
STORE(out + 16, s[1]);
}
void haraka256_keyed(unsigned char *out, const unsigned char *in, const u128 *rc) {
__m128i s[2], tmp;
s[0] = LOAD(in);
s[1] = LOAD(in + 16);
AES2(s[0], s[1], 0);
MIX2(s[0], s[1]);
AES2(s[0], s[1], 4);
MIX2(s[0], s[1]);
AES2(s[0], s[1], 8);
MIX2(s[0], s[1]);
AES2(s[0], s[1], 12);
MIX2(s[0], s[1]);
AES2(s[0], s[1], 16);
MIX2(s[0], s[1]);
s[0] = _mm_xor_si128(s[0], LOAD(in));
s[1] = _mm_xor_si128(s[1], LOAD(in + 16));
STORE(out, s[0]);
STORE(out + 16, s[1]);
}
void haraka256_4x(unsigned char *out, const unsigned char *in) {
__m128i s[4][2], tmp;
@ -397,6 +425,37 @@ void haraka512_zero(unsigned char *out, const unsigned char *in) {
TRUNCSTORE(out, s[0], s[1], s[2], s[3]);
}
void haraka512_keyed(unsigned char *out, const unsigned char *in, const u128 *rc) {
u128 s[4], tmp;
s[0] = LOAD(in);
s[1] = LOAD(in + 16);
s[2] = LOAD(in + 32);
s[3] = LOAD(in + 48);
AES4(s[0], s[1], s[2], s[3], 0);
MIX4(s[0], s[1], s[2], s[3]);
AES4(s[0], s[1], s[2], s[3], 8);
MIX4(s[0], s[1], s[2], s[3]);
AES4(s[0], s[1], s[2], s[3], 16);
MIX4(s[0], s[1], s[2], s[3]);
AES4(s[0], s[1], s[2], s[3], 24);
MIX4(s[0], s[1], s[2], s[3]);
AES4(s[0], s[1], s[2], s[3], 32);
MIX4(s[0], s[1], s[2], s[3]);
s[0] = _mm_xor_si128(s[0], LOAD(in));
s[1] = _mm_xor_si128(s[1], LOAD(in + 16));
s[2] = _mm_xor_si128(s[2], LOAD(in + 32));
s[3] = _mm_xor_si128(s[3], LOAD(in + 48));
TRUNCSTORE(out, s[0], s[1], s[2], s[3]);
}
void haraka512_4x(unsigned char *out, const unsigned char *in) {
u128 s[4][4], tmp;

254
crypto/haraka.h

@ -1,126 +1,128 @@
/*
The MIT License (MIT)
Copyright (c) 2016 kste
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Optimized Implementations for Haraka256 and Haraka512
*/
#ifndef HARAKA_H_
#define HARAKA_H_
#include "immintrin.h"
#define NUMROUNDS 5
#ifdef _WIN32
typedef unsigned long long u64;
#else
typedef unsigned long u64;
#endif
typedef __m128i u128;
extern u128 rc[40];
#define LOAD(src) _mm_load_si128((u128 *)(src))
#define STORE(dest,src) _mm_storeu_si128((u128 *)(dest),src)
#define AES2(s0, s1, rci) \
s0 = _mm_aesenc_si128(s0, rc[rci]); \
s1 = _mm_aesenc_si128(s1, rc[rci + 1]); \
s0 = _mm_aesenc_si128(s0, rc[rci + 2]); \
s1 = _mm_aesenc_si128(s1, rc[rci + 3]);
#define AES2_4x(s0, s1, s2, s3, rci) \
AES2(s0[0], s0[1], rci); \
AES2(s1[0], s1[1], rci); \
AES2(s2[0], s2[1], rci); \
AES2(s3[0], s3[1], rci);
#define AES2_8x(s0, s1, s2, s3, s4, s5, s6, s7, rci) \
AES2_4x(s0, s1, s2, s3, rci); \
AES2_4x(s4, s5, s6, s7, rci);
#define AES4(s0, s1, s2, s3, rci) \
s0 = _mm_aesenc_si128(s0, rc[rci]); \
s1 = _mm_aesenc_si128(s1, rc[rci + 1]); \
s2 = _mm_aesenc_si128(s2, rc[rci + 2]); \
s3 = _mm_aesenc_si128(s3, rc[rci + 3]); \
s0 = _mm_aesenc_si128(s0, rc[rci + 4]); \
s1 = _mm_aesenc_si128(s1, rc[rci + 5]); \
s2 = _mm_aesenc_si128(s2, rc[rci + 6]); \
s3 = _mm_aesenc_si128(s3, rc[rci + 7]); \
#define AES4_zero(s0, s1, s2, s3, rci) \
s0 = _mm_aesenc_si128(s0, rc0[rci]); \
s1 = _mm_aesenc_si128(s1, rc0[rci + 1]); \
s2 = _mm_aesenc_si128(s2, rc0[rci + 2]); \
s3 = _mm_aesenc_si128(s3, rc0[rci + 3]); \
s0 = _mm_aesenc_si128(s0, rc0[rci + 4]); \
s1 = _mm_aesenc_si128(s1, rc0[rci + 5]); \
s2 = _mm_aesenc_si128(s2, rc0[rci + 6]); \
s3 = _mm_aesenc_si128(s3, rc0[rci + 7]); \
#define AES4_4x(s0, s1, s2, s3, rci) \
AES4(s0[0], s0[1], s0[2], s0[3], rci); \
AES4(s1[0], s1[1], s1[2], s1[3], rci); \
AES4(s2[0], s2[1], s2[2], s2[3], rci); \
AES4(s3[0], s3[1], s3[2], s3[3], rci);
#define AES4_8x(s0, s1, s2, s3, s4, s5, s6, s7, rci) \
AES4_4x(s0, s1, s2, s3, rci); \
AES4_4x(s4, s5, s6, s7, rci);
#define MIX2(s0, s1) \
tmp = _mm_unpacklo_epi32(s0, s1); \
s1 = _mm_unpackhi_epi32(s0, s1); \
s0 = tmp;
#define MIX4(s0, s1, s2, s3) \
tmp = _mm_unpacklo_epi32(s0, s1); \
s0 = _mm_unpackhi_epi32(s0, s1); \
s1 = _mm_unpacklo_epi32(s2, s3); \
s2 = _mm_unpackhi_epi32(s2, s3); \
s3 = _mm_unpacklo_epi32(s0, s2); \
s0 = _mm_unpackhi_epi32(s0, s2); \
s2 = _mm_unpackhi_epi32(s1, tmp); \
s1 = _mm_unpacklo_epi32(s1, tmp);
#define TRUNCSTORE(out, s0, s1, s2, s3) \
*(u64*)(out) = (u64*)(s0)[1]; \
*(u64*)(out + 8) = (u64*)(s1)[1]; \
*(u64*)(out + 16) = (u64*)(s2)[0]; \
*(u64*)(out + 24) = (u64*)(s3)[0];
void load_constants();
void test_implementations();
void load_constants();
void haraka256(unsigned char *out, const unsigned char *in);
void haraka256_4x(unsigned char *out, const unsigned char *in);
void haraka256_8x(unsigned char *out, const unsigned char *in);
void haraka512(unsigned char *out, const unsigned char *in);
void haraka512_zero(unsigned char *out, const unsigned char *in);
void haraka512_4x(unsigned char *out, const unsigned char *in);
void haraka512_8x(unsigned char *out, const unsigned char *in);
#endif
/*
The MIT License (MIT)
Copyright (c) 2016 kste
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Optimized Implementations for Haraka256 and Haraka512
*/
#ifndef HARAKA_H_
#define HARAKA_H_
#include "immintrin.h"
#define NUMROUNDS 5
#ifdef _WIN32
typedef unsigned long long u64;
#else
typedef unsigned long u64;
#endif
typedef __m128i u128;
extern u128 rc[40];
#define LOAD(src) _mm_load_si128((u128 *)(src))
#define STORE(dest,src) _mm_storeu_si128((u128 *)(dest),src)
#define AES2(s0, s1, rci) \
s0 = _mm_aesenc_si128(s0, rc[rci]); \
s1 = _mm_aesenc_si128(s1, rc[rci + 1]); \
s0 = _mm_aesenc_si128(s0, rc[rci + 2]); \
s1 = _mm_aesenc_si128(s1, rc[rci + 3]);
#define AES2_4x(s0, s1, s2, s3, rci) \
AES2(s0[0], s0[1], rci); \
AES2(s1[0], s1[1], rci); \
AES2(s2[0], s2[1], rci); \
AES2(s3[0], s3[1], rci);
#define AES2_8x(s0, s1, s2, s3, s4, s5, s6, s7, rci) \
AES2_4x(s0, s1, s2, s3, rci); \
AES2_4x(s4, s5, s6, s7, rci);
#define AES4(s0, s1, s2, s3, rci) \
s0 = _mm_aesenc_si128(s0, rc[rci]); \
s1 = _mm_aesenc_si128(s1, rc[rci + 1]); \
s2 = _mm_aesenc_si128(s2, rc[rci + 2]); \
s3 = _mm_aesenc_si128(s3, rc[rci + 3]); \
s0 = _mm_aesenc_si128(s0, rc[rci + 4]); \
s1 = _mm_aesenc_si128(s1, rc[rci + 5]); \
s2 = _mm_aesenc_si128(s2, rc[rci + 6]); \
s3 = _mm_aesenc_si128(s3, rc[rci + 7]); \
#define AES4_zero(s0, s1, s2, s3, rci) \
s0 = _mm_aesenc_si128(s0, rc0[rci]); \
s1 = _mm_aesenc_si128(s1, rc0[rci + 1]); \
s2 = _mm_aesenc_si128(s2, rc0[rci + 2]); \
s3 = _mm_aesenc_si128(s3, rc0[rci + 3]); \
s0 = _mm_aesenc_si128(s0, rc0[rci + 4]); \
s1 = _mm_aesenc_si128(s1, rc0[rci + 5]); \
s2 = _mm_aesenc_si128(s2, rc0[rci + 6]); \
s3 = _mm_aesenc_si128(s3, rc0[rci + 7]); \
#define AES4_4x(s0, s1, s2, s3, rci) \
AES4(s0[0], s0[1], s0[2], s0[3], rci); \
AES4(s1[0], s1[1], s1[2], s1[3], rci); \
AES4(s2[0], s2[1], s2[2], s2[3], rci); \
AES4(s3[0], s3[1], s3[2], s3[3], rci);
#define AES4_8x(s0, s1, s2, s3, s4, s5, s6, s7, rci) \
AES4_4x(s0, s1, s2, s3, rci); \
AES4_4x(s4, s5, s6, s7, rci);
#define MIX2(s0, s1) \
tmp = _mm_unpacklo_epi32(s0, s1); \
s1 = _mm_unpackhi_epi32(s0, s1); \
s0 = tmp;
#define MIX4(s0, s1, s2, s3) \
tmp = _mm_unpacklo_epi32(s0, s1); \
s0 = _mm_unpackhi_epi32(s0, s1); \
s1 = _mm_unpacklo_epi32(s2, s3); \
s2 = _mm_unpackhi_epi32(s2, s3); \
s3 = _mm_unpacklo_epi32(s0, s2); \
s0 = _mm_unpackhi_epi32(s0, s2); \
s2 = _mm_unpackhi_epi32(s1, tmp); \
s1 = _mm_unpacklo_epi32(s1, tmp);
#define TRUNCSTORE(out, s0, s1, s2, s3) \
*(u64*)(out) = *(((u64*)&(s0) + 1)); \
*(u64*)(out + 8) = *(((u64*)&(s1) + 1)); \
*(u64*)(out + 16) = *(((u64*)&(s2) + 0)); \
*(u64*)(out + 24) = *(((u64*)&(s3) + 0));
void load_constants();
void test_implementations();
void load_constants();
void haraka256(unsigned char *out, const unsigned char *in);
void haraka256_keyed(unsigned char *out, const unsigned char *in, const u128 *rc);
void haraka256_4x(unsigned char *out, const unsigned char *in);
void haraka256_8x(unsigned char *out, const unsigned char *in);
void haraka512(unsigned char *out, const unsigned char *in);
void haraka512_zero(unsigned char *out, const unsigned char *in);
void haraka512_keyed(unsigned char *out, const unsigned char *in, const u128 *rc);
void haraka512_4x(unsigned char *out, const unsigned char *in);
void haraka512_8x(unsigned char *out, const unsigned char *in);
#endif

73
crypto/haraka_portable.c

@ -147,32 +147,32 @@ void tweak_constants(const unsigned char *pk_seed, const unsigned char *sk_seed,
memcpy(rc, buf, 40*16);
}
static void haraka_S_absorb(unsigned char *s, unsigned int r,
static void haraka_S_absorb(unsigned char *s,
const unsigned char *m, unsigned long long mlen,
unsigned char p)
{
unsigned long long i;
unsigned char t[r];
unsigned char t[32];
while (mlen >= r) {
while (mlen >= 32) {
// XOR block to state
for (i = 0; i < r; ++i) {
for (i = 0; i < 32; ++i) {
s[i] ^= m[i];
}
haraka512_perm(s, s);
mlen -= r;
m += r;
mlen -= 32;
m += 32;
}
for (i = 0; i < r; ++i) {
for (i = 0; i < 32; ++i) {
t[i] = 0;
}
for (i = 0; i < mlen; ++i) {
t[i] = m[i];
}
t[i] = p;
t[r - 1] |= 128;
for (i = 0; i < r; ++i) {
t[32 - 1] |= 128;
for (i = 0; i < 32; ++i) {
s[i] ^= t[i];
}
}
@ -199,7 +199,7 @@ void haraka_S(unsigned char *out, unsigned long long outlen,
for (i = 0; i < 64; i++) {
s[i] = 0;
}
haraka_S_absorb(s, 32, in, inlen, 0x1F);
haraka_S_absorb(s, in, inlen, 0x1F);
haraka_S_squeezeblocks(out, outlen / 32, s, 32);
out += (outlen / 32) * 32;
@ -246,6 +246,40 @@ void haraka512_perm(unsigned char *out, const unsigned char *in)
memcpy(out, s, 64);
}
void haraka512_perm_keyed(unsigned char *out, const unsigned char *in, const u128 *rc)
{
int i, j;
unsigned char s[64], tmp[16];
memcpy(s, in, 16);
memcpy(s + 16, in + 16, 16);
memcpy(s + 32, in + 32, 16);
memcpy(s + 48, in + 48, 16);
for (i = 0; i < 5; ++i) {
// aes round(s)
for (j = 0; j < 2; ++j) {
aesenc(s, (const unsigned char *)&rc[4*2*i + 4*j]);
aesenc(s + 16, (const unsigned char *)&rc[4*2*i + 4*j + 1]);
aesenc(s + 32, (const unsigned char *)&rc[4*2*i + 4*j + 2]);
aesenc(s + 48, (const unsigned char *)&rc[4*2*i + 4*j + 3]);
}
// mixing
unpacklo32(tmp, s, s + 16);
unpackhi32(s, s, s + 16);
unpacklo32(s + 16, s + 32, s + 48);
unpackhi32(s + 32, s + 32, s + 48);
unpacklo32(s + 48, s, s + 32);
unpackhi32(s, s, s + 32);
unpackhi32(s + 32, s + 16, tmp);
unpacklo32(s + 16, s + 16, tmp);
}
memcpy(out, s, 64);
}
void haraka512_port(unsigned char *out, const unsigned char *in)
{
int i;
@ -265,6 +299,25 @@ void haraka512_port(unsigned char *out, const unsigned char *in)
memcpy(out + 24, buf + 48, 8);
}
void haraka512_port_keyed(unsigned char *out, const unsigned char *in, const u128 *rc)
{
int i;
unsigned char buf[64];
haraka512_perm_keyed(buf, in, rc);
/* Feed-forward */
for (i = 0; i < 64; i++) {
buf[i] = buf[i] ^ in[i];
}
/* Truncated */
memcpy(out, buf + 8, 8);
memcpy(out + 8, buf + 24, 8);
memcpy(out + 16, buf + 32, 8);
memcpy(out + 24, buf + 48, 8);
}
void haraka512_perm_zero(unsigned char *out, const unsigned char *in)
{
int i, j;

51
crypto/haraka_portable.h

@ -1,6 +1,54 @@
#ifndef SPX_HARAKA_H
#define SPX_HARAKA_H
#include "immintrin.h"
#define NUMROUNDS 5
#ifdef _WIN32
typedef unsigned long long u64;
#else
typedef unsigned long u64;
#endif
typedef __m128i u128;
extern void aesenc(unsigned char *s, const unsigned char *rk);
#define AES2_EMU(s0, s1, rci) \
aesenc((unsigned char *)&s0, (unsigned char *)&(rc[rci])); \
aesenc((unsigned char *)&s1, (unsigned char *)&(rc[rci + 1])); \
aesenc((unsigned char *)&s0, (unsigned char *)&(rc[rci + 2])); \
aesenc((unsigned char *)&s1, (unsigned char *)&(rc[rci + 3]));
typedef unsigned int uint32_t;
static inline __m128i _mm_unpacklo_epi32_emu(__m128i a, __m128i b)
{
uint32_t result[4];
uint32_t *tmp1 = (uint32_t *)&a, *tmp2 = (uint32_t *)&b;
result[0] = tmp1[0];
result[1] = tmp2[0];
result[2] = tmp1[1];
result[3] = tmp2[1];
return *(__m128i *)result;
}
static inline __m128i _mm_unpackhi_epi32_emu(__m128i a, __m128i b)
{
uint32_t result[4];
uint32_t *tmp1 = (uint32_t *)&a, *tmp2 = (uint32_t *)&b;
result[0] = tmp1[2];
result[1] = tmp2[2];
result[2] = tmp1[3];
result[3] = tmp2[3];
return *(__m128i *)result;
}
#define MIX2_EMU(s0, s1) \
tmp = _mm_unpacklo_epi32_emu(s0, s1); \
s1 = _mm_unpackhi_epi32_emu(s0, s1); \
s0 = tmp;
/* load constants */
void load_constants_port();
@ -18,6 +66,9 @@ void haraka512_perm(unsigned char *out, const unsigned char *in);
/* Implementation of Haraka-512 */
void haraka512_port(unsigned char *out, const unsigned char *in);
/* Implementation of Haraka-512 */
void haraka512_port_keyed(unsigned char *out, const unsigned char *in, const u128 *rc);
/* Applies the 512-bit Haraka permutation to in, using zero key. */
void haraka512_perm_zero(unsigned char *out, const unsigned char *in);

1013
crypto/tinyformat.h

File diff suppressed because it is too large

146
crypto/uint256.cpp

@ -0,0 +1,146 @@
// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2014 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include "uint256.h"
#include "utilstrencodings.h"
#include <stdio.h>
#include <string.h>
template <unsigned int BITS>
base_blob<BITS>::base_blob(const std::vector<unsigned char>& vch)
{
assert(vch.size() == sizeof(data));
memcpy(data, &vch[0], sizeof(data));
}
template <unsigned int BITS>
std::string base_blob<BITS>::GetHex() const
{
char psz[sizeof(data) * 2 + 1];
for (unsigned int i = 0; i < sizeof(data); i++)
sprintf(psz + i * 2, "%02x", data[sizeof(data) - i - 1]);
return std::string(psz, psz + sizeof(data) * 2);
}
template <unsigned int BITS>
void base_blob<BITS>::SetHex(const char* psz)
{
memset(data, 0, sizeof(data));
// skip leading spaces
while (isspace(*psz))
psz++;
// skip 0x
if (psz[0] == '0' && tolower(psz[1]) == 'x')
psz += 2;
// hex string to uint
const char* pbegin = psz;
while (::HexDigit(*psz) != -1)
psz++;
psz--;
unsigned char* p1 = (unsigned char*)data;
unsigned char* pend = p1 + WIDTH;
while (psz >= pbegin && p1 < pend) {
*p1 = ::HexDigit(*psz--);
if (psz >= pbegin) {
*p1 |= ((unsigned char)::HexDigit(*psz--) << 4);
p1++;
}
}
}
template <unsigned int BITS>
void base_blob<BITS>::SetHex(const std::string& str)
{
SetHex(str.c_str());
}
template <unsigned int BITS>
std::string base_blob<BITS>::ToString() const
{
return (GetHex());
}
// Explicit instantiations for base_blob<160>
template base_blob<160>::base_blob(const std::vector<unsigned char>&);
template std::string base_blob<160>::GetHex() const;
template std::string base_blob<160>::ToString() const;
template void base_blob<160>::SetHex(const char*);
template void base_blob<160>::SetHex(const std::string&);
// Explicit instantiations for base_blob<256>
template base_blob<256>::base_blob(const std::vector<unsigned char>&);
template std::string base_blob<256>::GetHex() const;
template std::string base_blob<256>::ToString() const;
template void base_blob<256>::SetHex(const char*);
template void base_blob<256>::SetHex(const std::string&);
static void inline HashMix(uint32_t& a, uint32_t& b, uint32_t& c)
{
// Taken from lookup3, by Bob Jenkins.
a -= c;
a ^= ((c << 4) | (c >> 28));
c += b;
b -= a;
b ^= ((a << 6) | (a >> 26));
a += c;
c -= b;
c ^= ((b << 8) | (b >> 24));
b += a;
a -= c;
a ^= ((c << 16) | (c >> 16));
c += b;
b -= a;
b ^= ((a << 19) | (a >> 13));
a += c;
c -= b;
c ^= ((b << 4) | (b >> 28));
b += a;
}
static void inline HashFinal(uint32_t& a, uint32_t& b, uint32_t& c)
{
// Taken from lookup3, by Bob Jenkins.
c ^= b;
c -= ((b << 14) | (b >> 18));
a ^= c;
a -= ((c << 11) | (c >> 21));
b ^= a;
b -= ((a << 25) | (a >> 7));
c ^= b;
c -= ((b << 16) | (b >> 16));
a ^= c;
a -= ((c << 4) | (c >> 28));
b ^= a;
b -= ((a << 14) | (a >> 18));
c ^= b;
c -= ((b << 24) | (b >> 8));
}
uint64_t uint256::GetHash(const uint256& salt) const
{
uint32_t a, b, c;
const uint32_t *pn = (const uint32_t*)data;
const uint32_t *salt_pn = (const uint32_t*)salt.data;
a = b = c = 0xdeadbeef + WIDTH;
a += pn[0] ^ salt_pn[0];
b += pn[1] ^ salt_pn[1];
c += pn[2] ^ salt_pn[2];
HashMix(a, b, c);
a += pn[3] ^ salt_pn[3];
b += pn[4] ^ salt_pn[4];
c += pn[5] ^ salt_pn[5];
HashMix(a, b, c);
a += pn[6] ^ salt_pn[6];
b += pn[7] ^ salt_pn[7];
HashFinal(a, b, c);
return ((((uint64_t)b) << 32) | c);
}

164
crypto/uint256.h

@ -0,0 +1,164 @@
// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2014 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_UINT256_H
#define BITCOIN_UINT256_H
#include <assert.h>
#include <cstring>
#include <stdexcept>
#include <stdint.h>
#include <string>
#include <vector>
#ifdef _MSC_VER
# define _ALIGN(x) __declspec(align(x))
#else
# define _ALIGN(x) __attribute__ ((aligned(x)))
#endif
/** Template base class for fixed-sized opaque blobs. */
template<unsigned int BITS>
class base_blob
{
protected:
enum { WIDTH=BITS/8 };
uint8_t _ALIGN(4) data[WIDTH];
public:
base_blob()
{
memset(data, 0, sizeof(data));
}
explicit base_blob(const std::vector<unsigned char>& vch);
bool IsNull() const
{
for (int i = 0; i < WIDTH; i++)
if (data[i] != 0)
return false;
return true;
}
void SetNull()
{
memset(data, 0, sizeof(data));
}
friend inline bool operator==(const base_blob& a, const base_blob& b) { return memcmp(a.data, b.data, sizeof(a.data)) == 0; }
friend inline bool operator!=(const base_blob& a, const base_blob& b) { return memcmp(a.data, b.data, sizeof(a.data)) != 0; }
friend inline bool operator<(const base_blob& a, const base_blob& b) { return memcmp(a.data, b.data, sizeof(a.data)) < 0; }
std::string GetHex() const;
void SetHex(const char* psz);
void SetHex(const std::string& str);
std::string ToString() const;
unsigned char* begin()
{
return &data[0];
}
unsigned char* end()
{
return &data[WIDTH];
}
const unsigned char* begin() const
{
return &data[0];
}
const unsigned char* end() const
{
return &data[WIDTH];
}
unsigned int size() const
{
return sizeof(data);
}
unsigned int GetSerializeSize(int nType, int nVersion) const
{
return sizeof(data);
}
template<typename Stream>
void Serialize(Stream& s, int nType, int nVersion) const
{
s.write((char*)data, sizeof(data));
}
template<typename Stream>
void Unserialize(Stream& s, int nType, int nVersion)
{
s.read((char*)data, sizeof(data));
}
};
/** 160-bit opaque blob.
* @note This type is called uint160 for historical reasons only. It is an opaque
* blob of 160 bits and has no integer operations.
*/
class uint160 : public base_blob<160> {
public:
uint160() {}
uint160(const base_blob<160>& b) : base_blob<160>(b) {}
explicit uint160(const std::vector<unsigned char>& vch) : base_blob<160>(vch) {}
};
/** 256-bit opaque blob.
* @note This type is called uint256 for historical reasons only. It is an
* opaque blob of 256 bits and has no integer operations. Use arith_uint256 if
* those are required.
*/
class uint256 : public base_blob<256> {
public:
uint256() {}
uint256(const base_blob<256>& b) : base_blob<256>(b) {}
explicit uint256(const std::vector<unsigned char>& vch) : base_blob<256>(vch) {}
/** A cheap hash function that just returns 64 bits from the result, it can be
* used when the contents are considered uniformly random. It is not appropriate
* when the value can easily be influenced from outside as e.g. a network adversary could
* provide values to trigger worst-case behavior.
* @note The result of this function is not stable between little and big endian.
*/
uint64_t GetCheapHash() const
{
uint64_t result;
memcpy((void*)&result, (void*)data, 8);
return result;
}
/** A more secure, salted hash function.
* @note This hash is not stable between little and big endian.
*/
uint64_t GetHash(const uint256& salt) const;
};
/* uint256 from const char *.
* This is a separate function because the constructor uint256(const char*) can result
* in dangerously catching uint256(0).
*/
inline uint256 uint256S(const char *str)
{
uint256 rv;
rv.SetHex(str);
return rv;
}
/* uint256 from std::string.
* This is a separate function because the constructor uint256(const std::string &str) can result
* in dangerously catching uint256(0) via std::string(const char*).
*/
inline uint256 uint256S(const std::string& str)
{
uint256 rv;
rv.SetHex(str);
return rv;
}
#endif // BITCOIN_UINT256_H

499
crypto/utilstrencodings.cpp

@ -0,0 +1,499 @@
// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2014 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include "utilstrencodings.h"
#include "tinyformat.h"
#include <cstdlib>
#include <cstring>
#include <errno.h>
#include <limits>
using namespace std;
string SanitizeString(const string& str)
{
/**
* safeChars chosen to allow simple messages/URLs/email addresses, but avoid anything
* even possibly remotely dangerous like & or >
*/
static string safeChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890 .,;_/:?@()");
string strResult;
for (std::string::size_type i = 0; i < str.size(); i++)
{
if (safeChars.find(str[i]) != std::string::npos)
strResult.push_back(str[i]);
}
return strResult;
}
const signed char p_util_hexdigit[256] =
{ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1,
-1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, };
signed char HexDigit(char c)
{
return p_util_hexdigit[(unsigned char)c];
}
bool IsHex(const string& str)
{
for(std::string::const_iterator it(str.begin()); it != str.end(); ++it)
{
if (HexDigit(*it) < 0)
return false;
}
return (str.size() > 0) && (str.size()%2 == 0);
}
vector<unsigned char> ParseHex(const char* psz)
{
// convert hex dump to vector
vector<unsigned char> vch;
while (true)
{
while (isspace(*psz))
psz++;
signed char c = HexDigit(*psz++);
if (c == (signed char)-1)
break;
unsigned char n = (c << 4);
c = HexDigit(*psz++);
if (c == (signed char)-1)
break;
n |= c;
vch.push_back(n);
}
return vch;
}
vector<unsigned char> ParseHex(const string& str)
{
return ParseHex(str.c_str());
}
string EncodeBase64(const unsigned char* pch, size_t len)
{
static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
string strRet="";
strRet.reserve((len+2)/3*4);
int mode=0, left=0;
const unsigned char *pchEnd = pch+len;
while (pch<pchEnd)
{
int enc = *(pch++);
switch (mode)
{
case 0: // we have no bits
strRet += pbase64[enc >> 2];
left = (enc & 3) << 4;
mode = 1;
break;
case 1: // we have two bits
strRet += pbase64[left | (enc >> 4)];
left = (enc & 15) << 2;
mode = 2;
break;
case 2: // we have four bits
strRet += pbase64[left | (enc >> 6)];
strRet += pbase64[enc & 63];
mode = 0;
break;
}
}
if (mode)
{
strRet += pbase64[left];
strRet += '=';
if (mode == 1)
strRet += '=';
}
return strRet;
}
string EncodeBase64(const string& str)
{
return EncodeBase64((const unsigned char*)str.c_str(), str.size());
}
vector<unsigned char> DecodeBase64(const char* p, bool* pfInvalid)
{
static const int decode64_table[256] =
{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
if (pfInvalid)
*pfInvalid = false;
vector<unsigned char> vchRet;
vchRet.reserve(strlen(p)*3/4);
int mode = 0;
int left = 0;
while (1)
{
int dec = decode64_table[(unsigned char)*p];
if (dec == -1) break;
p++;
switch (mode)
{
case 0: // we have no bits and get 6
left = dec;
mode = 1;
break;
case 1: // we have 6 bits and keep 4
vchRet.push_back((left<<2) | (dec>>4));
left = dec & 15;
mode = 2;
break;
case 2: // we have 4 bits and get 6, we keep 2
vchRet.push_back((left<<4) | (dec>>2));
left = dec & 3;
mode = 3;
break;
case 3: // we have 2 bits and get 6
vchRet.push_back((left<<6) | dec);
mode = 0;
break;
}
}
if (pfInvalid)
switch (mode)
{
case 0: // 4n base64 characters processed: ok
break;
case 1: // 4n+1 base64 character processed: impossible
*pfInvalid = true;
break;
case 2: // 4n+2 base64 characters processed: require '=='
if (left || p[0] != '=' || p[1] != '=' || decode64_table[(unsigned char)p[2]] != -1)
*pfInvalid = true;
break;
case 3: // 4n+3 base64 characters processed: require '='
if (left || p[0] != '=' || decode64_table[(unsigned char)p[1]] != -1)
*pfInvalid = true;
break;
}
return vchRet;
}
string DecodeBase64(const string& str)
{
vector<unsigned char> vchRet = DecodeBase64(str.c_str());
return (vchRet.size() == 0) ? string() : string((const char*)&vchRet[0], vchRet.size());
}
string EncodeBase32(const unsigned char* pch, size_t len)
{
static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
string strRet="";
strRet.reserve((len+4)/5*8);
int mode=0, left=0;
const unsigned char *pchEnd = pch+len;
while (pch<pchEnd)
{
int enc = *(pch++);
switch (mode)
{
case 0: // we have no bits
strRet += pbase32[enc >> 3];
left = (enc & 7) << 2;
mode = 1;
break;
case 1: // we have three bits
strRet += pbase32[left | (enc >> 6)];
strRet += pbase32[(enc >> 1) & 31];
left = (enc & 1) << 4;
mode = 2;
break;
case 2: // we have one bit
strRet += pbase32[left | (enc >> 4)];
left = (enc & 15) << 1;
mode = 3;
break;
case 3: // we have four bits
strRet += pbase32[left | (enc >> 7)];
strRet += pbase32[(enc >> 2) & 31];
left = (enc & 3) << 3;
mode = 4;
break;
case 4: // we have two bits
strRet += pbase32[left | (enc >> 5)];
strRet += pbase32[enc & 31];
mode = 0;
}
}
static const int nPadding[5] = {0, 6, 4, 3, 1};
if (mode)
{
strRet += pbase32[left];
for (int n=0; n<nPadding[mode]; n++)
strRet += '=';
}
return strRet;
}
string EncodeBase32(const string& str)
{
return EncodeBase32((const unsigned char*)str.c_str(), str.size());
}
vector<unsigned char> DecodeBase32(const char* p, bool* pfInvalid)
{
static const int decode32_table[256] =
{
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2,
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
if (pfInvalid)
*pfInvalid = false;
vector<unsigned char> vchRet;
vchRet.reserve((strlen(p))*5/8);
int mode = 0;
int left = 0;
while (1)
{
int dec = decode32_table[(unsigned char)*p];
if (dec == -1) break;
p++;
switch (mode)
{
case 0: // we have no bits and get 5
left = dec;
mode = 1;
break;
case 1: // we have 5 bits and keep 2
vchRet.push_back((left<<3) | (dec>>2));
left = dec & 3;
mode = 2;
break;
case 2: // we have 2 bits and keep 7
left = left << 5 | dec;
mode = 3;
break;
case 3: // we have 7 bits and keep 4
vchRet.push_back((left<<1) | (dec>>4));
left = dec & 15;
mode = 4;
break;
case 4: // we have 4 bits, and keep 1
vchRet.push_back((left<<4) | (dec>>1));
left = dec & 1;
mode = 5;
break;
case 5: // we have 1 bit, and keep 6
left = left << 5 | dec;
mode = 6;
break;
case 6: // we have 6 bits, and keep 3
vchRet.push_back((left<<2) | (dec>>3));
left = dec & 7;
mode = 7;
break;
case 7: // we have 3 bits, and keep 0
vchRet.push_back((left<<5) | dec);
mode = 0;
break;
}
}
if (pfInvalid)
switch (mode)
{
case 0: // 8n base32 characters processed: ok
break;
case 1: // 8n+1 base32 characters processed: impossible
case 3: // +3
case 6: // +6
*pfInvalid = true;
break;
case 2: // 8n+2 base32 characters processed: require '======'
if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || p[3] != '=' || p[4] != '=' || p[5] != '=' || decode32_table[(unsigned char)p[6]] != -1)
*pfInvalid = true;
break;
case 4: // 8n+4 base32 characters processed: require '===='
if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || p[3] != '=' || decode32_table[(unsigned char)p[4]] != -1)
*pfInvalid = true;
break;
case 5: // 8n+5 base32 characters processed: require '==='
if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || decode32_table[(unsigned char)p[3]] != -1)
*pfInvalid = true;
break;
case 7: // 8n+7 base32 characters processed: require '='
if (left || p[0] != '=' || decode32_table[(unsigned char)p[1]] != -1)
*pfInvalid = true;
break;
}
return vchRet;
}
string DecodeBase32(const string& str)
{
vector<unsigned char> vchRet = DecodeBase32(str.c_str());
return (vchRet.size() == 0) ? string() : string((const char*)&vchRet[0], vchRet.size());
}
bool ParseInt32(const std::string& str, int32_t *out)
{
char *endp = NULL;
errno = 0; // strtol will not set errno if valid
long int n = strtol(str.c_str(), &endp, 10);
if(out) *out = (int)n;
// Note that strtol returns a *long int*, so even if strtol doesn't report a over/underflow
// we still have to check that the returned value is within the range of an *int32_t*. On 64-bit
// platforms the size of these types may be different.
return endp && *endp == 0 && !errno &&
n >= std::numeric_limits<int32_t>::min() &&
n <= std::numeric_limits<int32_t>::max();
}
std::string FormatParagraph(const std::string in, size_t width, size_t indent)
{
std::stringstream out;
size_t col = 0;
size_t ptr = 0;
while(ptr < in.size())
{
// Find beginning of next word
ptr = in.find_first_not_of(' ', ptr);
if (ptr == std::string::npos)
break;
// Find end of next word
size_t endword = in.find_first_of(' ', ptr);
if (endword == std::string::npos)
endword = in.size();
// Add newline and indentation if this wraps over the allowed width
if (col > 0)
{
if ((col + endword - ptr) > width)
{
out << '\n';
for(size_t i=0; i<indent; ++i)
out << ' ';
col = 0;
} else
out << ' ';
}
// Append word
out << in.substr(ptr, endword - ptr);
col += endword - ptr + 1;
ptr = endword;
}
return out.str();
}
std::string i64tostr(int64_t n)
{
return strprintf("%d", n);
}
std::string itostr(int n)
{
return strprintf("%d", n);
}
int64_t atoi64(const char* psz)
{
#ifdef _MSC_VER
return _atoi64(psz);
#else
return strtoll(psz, NULL, 10);
#endif
}
int64_t atoi64(const std::string& str)
{
#ifdef _MSC_VER
return _atoi64(str.c_str());
#else
return strtoll(str.c_str(), NULL, 10);
#endif
}
int atoi(const std::string& str)
{
return atoi(str.c_str());
}

98
crypto/utilstrencodings.h

@ -0,0 +1,98 @@
// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2014 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
/**
* Utilities for converting data from/to strings.
*/
#ifndef BITCOIN_UTILSTRENCODINGS_H
#define BITCOIN_UTILSTRENCODINGS_H
#include <stdint.h>
#include <string>
#include <vector>
#define BEGIN(a) ((char*)&(a))
#define END(a) ((char*)&((&(a))[1]))
#define UBEGIN(a) ((unsigned char*)&(a))
#define UEND(a) ((unsigned char*)&((&(a))[1]))
#define ARRAYLEN(array) (sizeof(array)/sizeof((array)[0]))
/** This is needed because the foreach macro can't get over the comma in pair<t1, t2> */
#define PAIRTYPE(t1, t2) std::pair<t1, t2>
std::string SanitizeString(const std::string& str);
std::vector<unsigned char> ParseHex(const char* psz);
std::vector<unsigned char> ParseHex(const std::string& str);
signed char HexDigit(char c);
bool IsHex(const std::string& str);
std::vector<unsigned char> DecodeBase64(const char* p, bool* pfInvalid = NULL);
std::string DecodeBase64(const std::string& str);
std::string EncodeBase64(const unsigned char* pch, size_t len);
std::string EncodeBase64(const std::string& str);
std::vector<unsigned char> DecodeBase32(const char* p, bool* pfInvalid = NULL);
std::string DecodeBase32(const std::string& str);
std::string EncodeBase32(const unsigned char* pch, size_t len);
std::string EncodeBase32(const std::string& str);
std::string i64tostr(int64_t n);
std::string itostr(int n);
int64_t atoi64(const char* psz);
int64_t atoi64(const std::string& str);
int atoi(const std::string& str);
/**
* Convert string to signed 32-bit integer with strict parse error feedback.
* @returns true if the entire string could be parsed as valid integer,
* false if not the entire string could be parsed or when overflow or underflow occurred.
*/
bool ParseInt32(const std::string& str, int32_t *out);
template<typename T>
std::string HexStr(const T itbegin, const T itend, bool fSpaces=false)
{
std::string rv;
static const char hexmap[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
rv.reserve((itend-itbegin)*3);
for(T it = itbegin; it < itend; ++it)
{
unsigned char val = (unsigned char)(*it);
if(fSpaces && it != itbegin)
rv.push_back(' ');
rv.push_back(hexmap[val>>4]);
rv.push_back(hexmap[val&15]);
}
return rv;
}
template<typename T>
inline std::string HexStr(const T& vch, bool fSpaces=false)
{
return HexStr(vch.begin(), vch.end(), fSpaces);
}
/**
* Format a paragraph of text to a fixed width, adding spaces for
* indentation to any added line.
*/
std::string FormatParagraph(const std::string in, size_t width=79, size_t indent=0);
/**
* Timing-attack-resistant comparison.
* Takes time proportional to length
* of first argument.
*/
template <typename T>
bool TimingResistantEqual(const T& a, const T& b)
{
if (b.size() == 0) return a.size() == 0;
size_t accumulator = a.size() ^ b.size();
for (size_t i = 0; i < a.size(); i++)
accumulator |= a[i] ^ b[i%b.size()];
return accumulator == 0;
}
#endif // BITCOIN_UTILSTRENCODINGS_H

355
crypto/verus_clhash.cpp

@ -0,0 +1,355 @@
/*
* This uses veriations of the clhash algorithm for Verus Coin, licensed
* with the Apache-2.0 open source license.
*
* Copyright (c) 2018 Michael Toutonghi
* Distributed under the Apache 2.0 software license, available in the original form for clhash
* here: https://github.com/lemire/clhash/commit/934da700a2a54d8202929a826e2763831bd43cf7#diff-9879d6db96fd29134fc802214163b95a
*
* Original CLHash code and any portions herein, (C) 2017, 2018 Daniel Lemire and Owen Kaser
* Faster 64-bit universal hashing
* using carry-less multiplications, Journal of Cryptographic Engineering (to appear)
*
* Best used on recent x64 processors (Haswell or better).
*
* This implements an intermediate step in the last part of a Verus block hash. The intent of this step
* is to more effectively equalize FPGAs over GPUs and CPUs.
*
**/
#include "verus_hash.h"
#include <boost/thread.hpp>
#include <assert.h>
#include <string.h>
#include <x86intrin.h>
#ifdef __WIN32
#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno)
#endif
thread_local thread_specific_ptr verusclhasher_key;
thread_local thread_specific_ptr verusclhasher_descr;
#ifdef _WIN32
// attempt to workaround horrible mingw/gcc destructor bug on Windows, which passes garbage in the this pointer
// we use the opportunity of control here to clean up all of our tls variables. we could keep a list, but this is a quick hack
thread_specific_ptr::~thread_specific_ptr() {
if (verusclhasher_key.ptr)
{
verusclhasher_key.reset();
}
if (verusclhasher_descr.ptr)
{
verusclhasher_descr.reset();
}
}
#endif
int __cpuverusoptimized = 0x80;
// multiply the length and the some key, no modulo
static inline __m128i lazyLengthHash(uint64_t keylength, uint64_t length) {
const __m128i lengthvector = _mm_set_epi64x(keylength,length);
const __m128i clprod1 = _mm_clmulepi64_si128( lengthvector, lengthvector, 0x10);
return clprod1;
}
// modulo reduction to 64-bit value. The high 64 bits contain garbage, see precompReduction64
static inline __m128i precompReduction64_si128( __m128i A) {
//const __m128i C = _mm_set_epi64x(1U,(1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); // C is the irreducible poly. (64,4,3,1,0)
const __m128i C = _mm_cvtsi64_si128((1U<<4)+(1U<<3)+(1U<<1)+(1U<<0));
__m128i Q2 = _mm_clmulepi64_si128( A, C, 0x01);
__m128i Q3 = _mm_shuffle_epi8(_mm_setr_epi8(0, 27, 54, 45, 108, 119, 90, 65, (char)216, (char)195, (char)238, (char)245, (char)180, (char)175, (char)130, (char)153),
_mm_srli_si128(Q2,8));
__m128i Q4 = _mm_xor_si128(Q2,A);
const __m128i final = _mm_xor_si128(Q3,Q4);
return final;/// WARNING: HIGH 64 BITS CONTAIN GARBAGE
}
static inline uint64_t precompReduction64( __m128i A) {
return _mm_cvtsi128_si64(precompReduction64_si128(A));
}
// verus intermediate hash extra
static __m128i __verusclmulwithoutreduction64alignedrepeat(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask)
{
__m128i const *pbuf;
// divide key mask by 16 from bytes to __m128i
keyMask >>= 4;
// the random buffer must have at least 32 16 byte dwords after the keymask to work with this
// algorithm. we take the value from the last element inside the keyMask + 2, as that will never
// be used to xor into the accumulator before it is hashed with other values first
__m128i acc = _mm_load_si128(randomsource + (keyMask + 2));
for (int64_t i = 0; i < 32; i++)
{
const uint64_t selector = _mm_cvtsi128_si64(acc);
// get two random locations in the key, which will be mutated and swapped
__m128i *prand = randomsource + ((selector >> 5) & keyMask);
__m128i *prandex = randomsource + ((selector >> 32) & keyMask);
// select random start and order of pbuf processing
pbuf = buf + (selector & 3);
switch (selector & 0x1c)
{
case 0:
{
const __m128i temp1 = _mm_load_si128(prandex);
const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
const __m128i add1 = _mm_xor_si128(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
acc = _mm_xor_si128(clprod1, acc);
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1);
const __m128i temp12 = _mm_load_si128(prand);
_mm_store_si128(prand, tempa2);
const __m128i temp22 = _mm_load_si128(pbuf);
const __m128i add12 = _mm_xor_si128(temp12, temp22);
const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10);
acc = _mm_xor_si128(clprod12, acc);
const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12);
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12);
_mm_store_si128(prandex, tempb2);
break;
}
case 4:
{
const __m128i temp1 = _mm_load_si128(prand);
const __m128i temp2 = _mm_load_si128(pbuf);
const __m128i add1 = _mm_xor_si128(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
acc = _mm_xor_si128(clprod1, acc);
const __m128i clprod2 = _mm_clmulepi64_si128(temp2, temp2, 0x10);
acc = _mm_xor_si128(clprod2, acc);
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1);
const __m128i temp12 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa2);
const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
const __m128i add12 = _mm_xor_si128(temp12, temp22);
acc = _mm_xor_si128(add12, acc);
const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12);
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12);
_mm_store_si128(prand, tempb2);
break;
}
case 8:
{
const __m128i temp1 = _mm_load_si128(prandex);
const __m128i temp2 = _mm_load_si128(pbuf);
const __m128i add1 = _mm_xor_si128(temp1, temp2);
acc = _mm_xor_si128(add1, acc);
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1);
const __m128i temp12 = _mm_load_si128(prand);
_mm_store_si128(prand, tempa2);
const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
const __m128i add12 = _mm_xor_si128(temp12, temp22);
const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10);
acc = _mm_xor_si128(clprod12, acc);
const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10);
acc = _mm_xor_si128(clprod22, acc);
const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12);
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12);
_mm_store_si128(prandex, tempb2);
break;
}
case 0xc:
{
const __m128i temp1 = _mm_load_si128(prand);
const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
const __m128i add1 = _mm_xor_si128(temp1, temp2);
// cannot be zero here
const int32_t divisor = (uint32_t)selector;
acc = _mm_xor_si128(add1, acc);
const int64_t dividend = _mm_cvtsi128_si64(acc);
const __m128i modulo = _mm_cvtsi32_si128(dividend % divisor);
acc = _mm_xor_si128(modulo, acc);
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1);
if (dividend & 1)
{
const __m128i temp12 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa2);
const __m128i temp22 = _mm_load_si128(pbuf);
const __m128i add12 = _mm_xor_si128(temp12, temp22);
const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10);
acc = _mm_xor_si128(clprod12, acc);
const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10);
acc = _mm_xor_si128(clprod22, acc);
const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12);
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12);
_mm_store_si128(prand, tempb2);
}
else
{
const __m128i tempb3 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa2);
_mm_store_si128(prand, tempb3);
}
break;
}
case 0x10:
{
// a few AES operations
const __m128i *rc = prand;
__m128i tmp;
__m128i temp1 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
__m128i temp2 = _mm_load_si128(pbuf);
AES2(temp1, temp2, 0);
MIX2(temp1, temp2);
AES2(temp1, temp2, 4);
MIX2(temp1, temp2);
AES2(temp1, temp2, 8);
MIX2(temp1, temp2);
acc = _mm_xor_si128(temp2, _mm_xor_si128(temp1, acc));
const __m128i tempa1 = _mm_load_si128(prand);
const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1);
const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2);
const __m128i tempa4 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa3);
_mm_store_si128(prand, tempa4);
break;
}
case 0x14:
{
// we'll just call this one the monkins loop, inspired by Chris
const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1);
__m128i tmp; // used by MIX2
uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times
__m128i *rc = prand;
uint64_t aesroundoffset = 0;
__m128i onekey;
do
{
if (selector & (0x10000000 << rounds))
{
onekey = _mm_load_si128(rc++);
const __m128i temp2 = _mm_load_si128(rounds & 1 ? pbuf : buftmp);
const __m128i add1 = _mm_xor_si128(onekey, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
acc = _mm_xor_si128(clprod1, acc);
}
else
{
onekey = _mm_load_si128(rc++);
__m128i temp2 = _mm_load_si128(rounds & 1 ? buftmp : pbuf);
AES2(onekey, temp2, aesroundoffset);
aesroundoffset += 4;
MIX2(onekey, temp2);
acc = _mm_xor_si128(onekey, acc);
acc = _mm_xor_si128(temp2, acc);
}
} while (rounds--);
const __m128i tempa1 = _mm_load_si128(prand);
const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1);
const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2);
const __m128i tempa4 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa3);
_mm_store_si128(prand, tempa4);
break;
}
case 0x18:
{
const __m128i temp1 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1));
const __m128i temp2 = _mm_load_si128(prand);
const __m128i add1 = _mm_xor_si128(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
acc = _mm_xor_si128(clprod1, acc);
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp2);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp2);
const __m128i tempb3 = _mm_load_si128(prandex);
_mm_store_si128(prandex, tempa2);
_mm_store_si128(prand, tempb3);
break;
}
case 0x1c:
{
const __m128i temp1 = _mm_load_si128(pbuf);
const __m128i temp2 = _mm_load_si128(prandex);
const __m128i add1 = _mm_xor_si128(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
acc = _mm_xor_si128(clprod1, acc);
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp2);
const __m128i tempa2 = _mm_xor_si128(tempa1, temp2);
const __m128i tempa3 = _mm_load_si128(prand);
_mm_store_si128(prand, tempa2);
acc = _mm_xor_si128(tempa3, acc);
const __m128i tempb1 = _mm_mulhrs_epi16(acc, tempa3);
const __m128i tempb2 = _mm_xor_si128(tempb1, tempa3);
_mm_store_si128(prandex, tempb2);
break;
}
}
}
return acc;
}
// hashes 64 bytes only by doing a carryless multiplication and reduction of the repeated 64 byte sequence 16 times,
// returning a 64 bit hash value
uint64_t verusclhash(void * random, const unsigned char buf[64], uint64_t keyMask) {
__m128i acc = __verusclmulwithoutreduction64alignedrepeat((__m128i *)random, (const __m128i *)buf, keyMask);
acc = _mm_xor_si128(acc, lazyLengthHash(1024, 64));
return precompReduction64(acc);
}
#ifdef __WIN32
#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno)
#endif
void *alloc_aligned_buffer(uint64_t bufSize)
{
void *answer = NULL;
if (posix_memalign(&answer, sizeof(__m256i), bufSize))
{
return NULL;
}
else
{
return answer;
}
}

239
crypto/verus_clhash.h

@ -0,0 +1,239 @@
/*
* This uses veriations of the clhash algorithm for Verus Coin, licensed
* with the Apache-2.0 open source license.
*
* Copyright (c) 2018 Michael Toutonghi
* Distributed under the Apache 2.0 software license, available in the original form for clhash
* here: https://github.com/lemire/clhash/commit/934da700a2a54d8202929a826e2763831bd43cf7#diff-9879d6db96fd29134fc802214163b95a
*
* CLHash is a very fast hashing function that uses the
* carry-less multiplication and SSE instructions.
*
* Original CLHash code (C) 2017, 2018 Daniel Lemire and Owen Kaser
* Faster 64-bit universal hashing
* using carry-less multiplications, Journal of Cryptographic Engineering (to appear)
*
* Best used on recent x64 processors (Haswell or better).
*
**/
#ifndef INCLUDE_VERUS_CLHASH_H
#define INCLUDE_VERUS_CLHASH_H
#ifndef _WIN32
#include <cpuid.h>
#else
#include <intrin.h>
#endif // !WIN32
#include <stdlib.h>
#include <stdint.h>
#include <stddef.h>
#include <assert.h>
#include <boost/thread.hpp>
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _WIN32
#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno)
typedef unsigned char u_char;
#endif
enum {
// Verus Key size must include the equivalent size of a Haraka key
// after the first part.
// Any excess over a power of 2 will not get mutated, and any excess over
// power of 2 + Haraka sized key will not be used
VERUSKEYSIZE=1024 * 8 + (40 * 16),
VERUSHHASH_SOLUTION_VERSION = 1
};
struct verusclhash_descr
{
uint256 seed;
uint32_t keySizeInBytes;
};
struct thread_specific_ptr {
void *ptr;
thread_specific_ptr() { ptr = NULL; }
void reset(void *newptr = NULL)
{
if (ptr && ptr != newptr)
{
std::free(ptr);
}
ptr = newptr;
}
void *get() { return ptr; }
#ifdef _WIN32 // horrible MingW and gcc thread local storage bug workaround
~thread_specific_ptr();
#else
~thread_specific_ptr() {
this->reset();
}
#endif
};
extern thread_local thread_specific_ptr verusclhasher_key;
extern thread_local thread_specific_ptr verusclhasher_descr;
extern int __cpuverusoptimized;
inline bool IsCPUVerusOptimized()
{
if (__cpuverusoptimized & 0x80)
{
#ifdef _WIN32
#define bit_AVX (1 << 28)
#define bit_AES (1 << 25)
#define bit_PCLMUL (1 << 1)
// https://insufficientlycomplicated.wordpress.com/2011/11/07/detecting-intel-advanced-vector-extensions-avx-in-visual-studio/
// bool cpuAVXSuport = cpuInfo[2] & (1 << 28) || false;
int cpuInfo[4];
__cpuid(cpuInfo, 1);
__cpuverusoptimized = ((cpuInfo[2] & (bit_AVX | bit_AES | bit_PCLMUL)) == (bit_AVX | bit_AES | bit_PCLMUL));
#else
unsigned int eax,ebx,ecx,edx;
if (!__get_cpuid(1,&eax,&ebx,&ecx,&edx))
{
__cpuverusoptimized = false;
}
else
{
__cpuverusoptimized = ((ecx & (bit_AVX | bit_AES | bit_PCLMUL)) == (bit_AVX | bit_AES | bit_PCLMUL));
}
#endif //WIN32
}
return __cpuverusoptimized;
};
inline void ForceCPUVerusOptimized(bool trueorfalse)
{
__cpuverusoptimized = trueorfalse;
};
uint64_t verusclhash(void * random, const unsigned char buf[64], uint64_t keyMask);
uint64_t verusclhash_port(void * random, const unsigned char buf[64], uint64_t keyMask);
void *alloc_aligned_buffer(uint64_t bufSize);
#ifdef __cplusplus
} // extern "C"
#endif
#ifdef __cplusplus
#include <vector>
#include <string>
// special high speed hasher for VerusHash 2.0
struct verusclhasher {
uint64_t keySizeInBytes;
uint64_t keyMask;
uint64_t (*verusclhashfunction)(void * random, const unsigned char buf[64], uint64_t keyMask);
inline uint64_t keymask(uint64_t keysize)
{
int i = 0;
while (keysize >>= 1)
{
i++;
}
return i ? (((uint64_t)1) << i) - 1 : 0;
}
// align on 256 bit boundary at end
verusclhasher(uint64_t keysize=VERUSKEYSIZE) : keySizeInBytes((keysize >> 5) << 5)
{
if (IsCPUVerusOptimized())
{
verusclhashfunction = &verusclhash;
}
else
{
verusclhashfunction = &verusclhash_port;
}
// if we changed, change it
if (verusclhasher_key.get() && keySizeInBytes != ((verusclhash_descr *)verusclhasher_descr.get())->keySizeInBytes)
{
verusclhasher_key.reset();
verusclhasher_descr.reset();
}
// get buffer space for mutating and refresh keys
void *key = NULL;
if (!(key = verusclhasher_key.get()) &&
(verusclhasher_key.reset((unsigned char *)alloc_aligned_buffer(keySizeInBytes << 1)), key = verusclhasher_key.get()))
{
verusclhash_descr *pdesc;
if (verusclhasher_descr.reset(new verusclhash_descr()), pdesc = (verusclhash_descr *)verusclhasher_descr.get())
{
pdesc->keySizeInBytes = keySizeInBytes;
}
else
{
verusclhasher_key.reset();
key = NULL;
}
}
if (key)
{
keyMask = keymask(keySizeInBytes);
}
else
{
keyMask = 0;
keySizeInBytes = 0;
}
#ifdef VERUSHASHDEBUG
printf("New hasher, keyMask: %lx, newKeySize: %lx\n", keyMask, keySizeInBytes);
#endif
}
// this prepares a key for hashing and mutation by copying it from the original key for this block
// WARNING!! this does not check for NULL ptr, so make sure the buffer is allocated
inline void *gethashkey()
{
unsigned char *ret = (unsigned char *)verusclhasher_key.get();
verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get();
memcpy(ret, ret + pdesc->keySizeInBytes, keyMask + 1);
#ifdef VERUSHASHDEBUG
// in debug mode, ensure that what should be the same, is
assert(memcmp(ret + (keyMask + 1), ret + (pdesc->keySizeInBytes + keyMask + 1), verusclhasher_keySizeInBytes - (keyMask + 1)) == 0);
#endif
return ret;
}
inline void *gethasherrefresh()
{
verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get();
return (unsigned char *)verusclhasher_key.get() + pdesc->keySizeInBytes;
}
inline verusclhash_descr *gethasherdescription()
{
return (verusclhash_descr *)verusclhasher_descr.get();
}
inline uint64_t keyrefreshsize()
{
return keyMask + 1;
}
inline uint64_t operator()(const unsigned char buf[64]) const {
return (*verusclhashfunction)(verusclhasher_key.get(), buf, keyMask);
}
inline uint64_t operator()(const unsigned char buf[64], void *key) const {
return (*verusclhashfunction)(key, buf, keyMask);
}
};
#endif // #ifdef __cplusplus
#endif // INCLUDE_VERUS_CLHASH_H

591
crypto/verus_clhash_portable.cpp

@ -0,0 +1,591 @@
/*
* This uses veriations of the clhash algorithm for Verus Coin, licensed
* with the Apache-2.0 open source license.
*
* Copyright (c) 2018 Michael Toutonghi
* Distributed under the Apache 2.0 software license, available in the original form for clhash
* here: https://github.com/lemire/clhash/commit/934da700a2a54d8202929a826e2763831bd43cf7#diff-9879d6db96fd29134fc802214163b95a
*
* Original CLHash code and any portions herein, (C) 2017, 2018 Daniel Lemire and Owen Kaser
* Faster 64-bit universal hashing
* using carry-less multiplications, Journal of Cryptographic Engineering (to appear)
*
* Best used on recent x64 processors (Haswell or better).
*
* This implements an intermediate step in the last part of a Verus block hash. The intent of this step
* is to more effectively equalize FPGAs over GPUs and CPUs.
*
**/
#include "verus_hash.h"
#include <assert.h>
#include <string.h>
#ifdef __APPLE__
#include <sys/types.h>
#endif// APPLE
#ifdef _WIN32
#pragma warning (disable : 4146)
#include <intrin.h>
#else
#include <x86intrin.h>
#endif //WIN32
void clmul64(uint64_t a, uint64_t b, uint64_t* r)
{
uint8_t s = 4,i; //window size
uint64_t two_s = 1 << s; //2^s
uint64_t smask = two_s-1; //s 1 bits
uint64_t u[16];
uint64_t tmp;
uint64_t ifmask;
//Precomputation
u[0] = 0;
u[1] = b;
for(i = 2 ; i < two_s; i += 2){
u[i] = u[i >> 1] << 1; //even indices: left shift
u[i + 1] = u[i] ^ b; //odd indices: xor b
}
//Multiply
r[0] = u[a & smask]; //first window only affects lower word
r[1] = 0;
for(i = s ; i < 64 ; i += s){
tmp = u[a >> i & smask];
r[0] ^= tmp << i;
r[1] ^= tmp >> (64 - i);
}
//Repair
uint64_t m = 0xEEEEEEEEEEEEEEEE; //s=4 => 16 times 1110
for(i = 1 ; i < s ; i++){
tmp = ((a & m) >> i);
m &= m << 1; //shift mask to exclude all bit j': j' mod s = i
ifmask = -((b >> (64-i)) & 1); //if the (64-i)th bit of b is 1
r[1] ^= (tmp & ifmask);
}
}
u128 _mm_clmulepi64_si128_emu(const __m128i &a, const __m128i &b, int imm)
{
uint64_t result[2];
clmul64(*((uint64_t*)&a + (imm & 1)), *((uint64_t*)&b + ((imm & 0x10) >> 4)), result);
/*
// TEST
const __m128i tmp1 = _mm_load_si128(&a);
const __m128i tmp2 = _mm_load_si128(&b);
imm = imm & 0x11;
const __m128i testresult = (imm == 0x10) ? _mm_clmulepi64_si128(tmp1, tmp2, 0x10) : ((imm == 0x01) ? _mm_clmulepi64_si128(tmp1, tmp2, 0x01) : ((imm == 0x00) ? _mm_clmulepi64_si128(tmp1, tmp2, 0x00) : _mm_clmulepi64_si128(tmp1, tmp2, 0x11)));
if (!memcmp(&testresult, &result, 16))
{
printf("_mm_clmulepi64_si128_emu: Portable version passed!\n");
}
else
{
printf("_mm_clmulepi64_si128_emu: Portable version failed! a: %lxh %lxl, b: %lxh %lxl, imm: %x, emu: %lxh %lxl, intrin: %lxh %lxl\n",
*((uint64_t *)&a + 1), *(uint64_t *)&a,
*((uint64_t *)&b + 1), *(uint64_t *)&b,
imm,
*((uint64_t *)result + 1), *(uint64_t *)result,
*((uint64_t *)&testresult + 1), *(uint64_t *)&testresult);
return testresult;
}
*/
return *(__m128i *)result;
}
u128 _mm_mulhrs_epi16_emu(__m128i _a, __m128i _b)
{
int16_t result[8];
int16_t *a = (int16_t*)&_a, *b = (int16_t*)&_b;
for (int i = 0; i < 8; i ++)
{
result[i] = (int16_t)((((int32_t)(a[i]) * (int32_t)(b[i])) + 0x4000) >> 15);
}
/*
const __m128i testresult = _mm_mulhrs_epi16(_a, _b);
if (!memcmp(&testresult, &result, 16))
{
printf("_mm_mulhrs_epi16_emu: Portable version passed!\n");
}
else
{
printf("_mm_mulhrs_epi16_emu: Portable version failed! a: %lxh %lxl, b: %lxh %lxl, emu: %lxh %lxl, intrin: %lxh %lxl\n",
*((uint64_t *)&a + 1), *(uint64_t *)&a,
*((uint64_t *)&b + 1), *(uint64_t *)&b,
*((uint64_t *)result + 1), *(uint64_t *)result,
*((uint64_t *)&testresult + 1), *(uint64_t *)&testresult);
}
*/
return *(__m128i *)result;
}
inline u128 _mm_set_epi64x_emu(uint64_t hi, uint64_t lo)
{
__m128i result;
((uint64_t *)&result)[0] = lo;
((uint64_t *)&result)[1] = hi;
return result;
}
inline u128 _mm_cvtsi64_si128_emu(uint64_t lo)
{
__m128i result;
((uint64_t *)&result)[0] = lo;
((uint64_t *)&result)[1] = 0;
return result;
}
inline int64_t _mm_cvtsi128_si64_emu(__m128i &a)
{
return *(int64_t *)&a;
}
inline int32_t _mm_cvtsi128_si32_emu(__m128i &a)
{
return *(int32_t *)&a;
}
inline u128 _mm_cvtsi32_si128_emu(uint32_t lo)
{
__m128i result;
((uint32_t *)&result)[0] = lo;
((uint32_t *)&result)[1] = 0;
((uint64_t *)&result)[1] = 0;
/*
const __m128i testresult = _mm_cvtsi32_si128(lo);
if (!memcmp(&testresult, &result, 16))
{
printf("_mm_cvtsi32_si128_emu: Portable version passed!\n");
}
else
{
printf("_mm_cvtsi32_si128_emu: Portable version failed!\n");
}
*/
return result;
}
u128 _mm_setr_epi8_emu(u_char c0, u_char c1, u_char c2, u_char c3, u_char c4, u_char c5, u_char c6, u_char c7, u_char c8, u_char c9, u_char c10, u_char c11, u_char c12, u_char c13, u_char c14, u_char c15)
{
__m128i result;
((uint8_t *)&result)[0] = c0;
((uint8_t *)&result)[1] = c1;
((uint8_t *)&result)[2] = c2;
((uint8_t *)&result)[3] = c3;
((uint8_t *)&result)[4] = c4;
((uint8_t *)&result)[5] = c5;
((uint8_t *)&result)[6] = c6;
((uint8_t *)&result)[7] = c7;
((uint8_t *)&result)[8] = c8;
((uint8_t *)&result)[9] = c9;
((uint8_t *)&result)[10] = c10;
((uint8_t *)&result)[11] = c11;
((uint8_t *)&result)[12] = c12;
((uint8_t *)&result)[13] = c13;
((uint8_t *)&result)[14] = c14;
((uint8_t *)&result)[15] = c15;
/*
const __m128i testresult = _mm_setr_epi8(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15);
if (!memcmp(&testresult, &result, 16))
{
printf("_mm_setr_epi8_emu: Portable version passed!\n");
}
else
{
printf("_mm_setr_epi8_emu: Portable version failed!\n");
}
*/
return result;
}
inline __m128i _mm_srli_si128_emu(__m128i a, int imm8)
{
unsigned char result[16];
uint8_t shift = imm8 & 0xff;
if (shift > 15) shift = 16;
int i;
for (i = 0; i < (16 - shift); i++)
{
result[i] = ((unsigned char *)&a)[shift + i];
}
for ( ; i < 16; i++)
{
result[i] = 0;
}
/*
const __m128i tmp1 = _mm_load_si128(&a);
__m128i testresult = _mm_srli_si128(tmp1, imm8);
if (!memcmp(&testresult, result, 16))
{
printf("_mm_srli_si128_emu: Portable version passed!\n");
}
else
{
printf("_mm_srli_si128_emu: Portable version failed! val: %lx%lx imm: %x emu: %lx%lx, intrin: %lx%lx\n",
*((uint64_t *)&a + 1), *(uint64_t *)&a,
imm8,
*((uint64_t *)result + 1), *(uint64_t *)result,
*((uint64_t *)&testresult + 1), *(uint64_t *)&testresult);
}
*/
return *(__m128i *)result;
}
inline __m128i _mm_xor_si128_emu(__m128i a, __m128i b)
{
#ifdef _WIN32
uint64_t result[2];
result[0] = *(uint64_t *)&a ^ *(uint64_t *)&b;
result[1] = *((uint64_t *)&a + 1) ^ *((uint64_t *)&b + 1);
return *(__m128i *)result;
#else
return a ^ b;
#endif
}
inline __m128i _mm_load_si128_emu(const void *p)
{
return *(__m128i *)p;
}
inline void _mm_store_si128_emu(void *p, __m128i val)
{
*(__m128i *)p = val;
}
__m128i _mm_shuffle_epi8_emu(__m128i a, __m128i b)
{
__m128i result;
for (int i = 0; i < 16; i++)
{
if (((uint8_t *)&b)[i] & 0x80)
{
((uint8_t *)&result)[i] = 0;
}
else
{
((uint8_t *)&result)[i] = ((uint8_t *)&a)[((uint8_t *)&b)[i] & 0xf];
}
}
/*
const __m128i tmp1 = _mm_load_si128(&a);
const __m128i tmp2 = _mm_load_si128(&b);
__m128i testresult = _mm_shuffle_epi8(tmp1, tmp2);
if (!memcmp(&testresult, &result, 16))
{
printf("_mm_shuffle_epi8_emu: Portable version passed!\n");
}
else
{
printf("_mm_shuffle_epi8_emu: Portable version failed!\n");
}
*/
return result;
}
// portable
static inline __m128i lazyLengthHash_port(uint64_t keylength, uint64_t length) {
const __m128i lengthvector = _mm_set_epi64x_emu(keylength,length);
const __m128i clprod1 = _mm_clmulepi64_si128_emu( lengthvector, lengthvector, 0x10);
return clprod1;
}
// modulo reduction to 64-bit value. The high 64 bits contain garbage, see precompReduction64
static inline __m128i precompReduction64_si128_port( __m128i A) {
//const __m128i C = _mm_set_epi64x(1U,(1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); // C is the irreducible poly. (64,4,3,1,0)
const __m128i C = _mm_cvtsi64_si128_emu((1U<<4)+(1U<<3)+(1U<<1)+(1U<<0));
__m128i Q2 = _mm_clmulepi64_si128_emu( A, C, 0x01);
__m128i Q3 = _mm_shuffle_epi8_emu(_mm_setr_epi8_emu(0, 27, 54, 45, 108, 119, 90, 65, (char)216, (char)195, (char)238, (char)245, (char)180, (char)175, (char)130, (char)153),
_mm_srli_si128_emu(Q2,8));
__m128i Q4 = _mm_xor_si128_emu(Q2,A);
const __m128i final = _mm_xor_si128_emu(Q3,Q4);
return final;/// WARNING: HIGH 64 BITS SHOULD BE ASSUMED TO CONTAIN GARBAGE
}
static inline uint64_t precompReduction64_port( __m128i A) {
__m128i tmp = precompReduction64_si128_port(A);
return _mm_cvtsi128_si64_emu(tmp);
}
// verus intermediate hash extra
static __m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask)
{
__m128i const *pbuf;
// divide key mask by 16 from bytes to __m128i
keyMask >>= 4;
// the random buffer must have at least 32 16 byte dwords after the keymask to work with this
// algorithm. we take the value from the last element inside the keyMask + 2, as that will never
// be used to xor into the accumulator before it is hashed with other values first
__m128i acc = _mm_load_si128_emu(randomsource + (keyMask + 2));
for (int64_t i = 0; i < 32; i++)
{
const uint64_t selector = _mm_cvtsi128_si64_emu(acc);
// get two random locations in the key, which will be mutated and swapped
__m128i *prand = randomsource + ((selector >> 5) & keyMask);
__m128i *prandex = randomsource + ((selector >> 32) & keyMask);
// select random start and order of pbuf processing
pbuf = buf + (selector & 3);
switch (selector & 0x1c)
{
case 0:
{
const __m128i temp1 = _mm_load_si128_emu(prandex);
const __m128i temp2 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1));
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10);
acc = _mm_xor_si128_emu(clprod1, acc);
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1);
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1);
const __m128i temp12 = _mm_load_si128_emu(prand);
_mm_store_si128_emu(prand, tempa2);
const __m128i temp22 = _mm_load_si128_emu(pbuf);
const __m128i add12 = _mm_xor_si128_emu(temp12, temp22);
const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10);
acc = _mm_xor_si128_emu(clprod12, acc);
const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12);
const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12);
_mm_store_si128_emu(prandex, tempb2);
break;
}
case 4:
{
const __m128i temp1 = _mm_load_si128_emu(prand);
const __m128i temp2 = _mm_load_si128_emu(pbuf);
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10);
acc = _mm_xor_si128_emu(clprod1, acc);
const __m128i clprod2 = _mm_clmulepi64_si128_emu(temp2, temp2, 0x10);
acc = _mm_xor_si128_emu(clprod2, acc);
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1);
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1);
const __m128i temp12 = _mm_load_si128_emu(prandex);
_mm_store_si128_emu(prandex, tempa2);
const __m128i temp22 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1));
const __m128i add12 = _mm_xor_si128_emu(temp12, temp22);
acc = _mm_xor_si128_emu(add12, acc);
const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12);
const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12);
_mm_store_si128_emu(prand, tempb2);
break;
}
case 8:
{
const __m128i temp1 = _mm_load_si128_emu(prandex);
const __m128i temp2 = _mm_load_si128_emu(pbuf);
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2);
acc = _mm_xor_si128_emu(add1, acc);
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1);
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1);
const __m128i temp12 = _mm_load_si128_emu(prand);
_mm_store_si128_emu(prand, tempa2);
const __m128i temp22 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1));
const __m128i add12 = _mm_xor_si128_emu(temp12, temp22);
const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10);
acc = _mm_xor_si128_emu(clprod12, acc);
const __m128i clprod22 = _mm_clmulepi64_si128_emu(temp22, temp22, 0x10);
acc = _mm_xor_si128_emu(clprod22, acc);
const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12);
const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12);
_mm_store_si128_emu(prandex, tempb2);
break;
}
case 0xc:
{
const __m128i temp1 = _mm_load_si128_emu(prand);
const __m128i temp2 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1));
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2);
// cannot be zero here
const int32_t divisor = (uint32_t)selector;
acc = _mm_xor_si128(add1, acc);
const int64_t dividend = _mm_cvtsi128_si64_emu(acc);
const __m128i modulo = _mm_cvtsi32_si128_emu(dividend % divisor);
acc = _mm_xor_si128_emu(modulo, acc);
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1);
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1);
if (dividend & 1)
{
const __m128i temp12 = _mm_load_si128_emu(prandex);
_mm_store_si128_emu(prandex, tempa2);
const __m128i temp22 = _mm_load_si128_emu(pbuf);
const __m128i add12 = _mm_xor_si128_emu(temp12, temp22);
const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10);
acc = _mm_xor_si128_emu(clprod12, acc);
const __m128i clprod22 = _mm_clmulepi64_si128_emu(temp22, temp22, 0x10);
acc = _mm_xor_si128_emu(clprod22, acc);
const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12);
const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12);
_mm_store_si128_emu(prand, tempb2);
}
else
{
const __m128i tempb3 = _mm_load_si128_emu(prandex);
_mm_store_si128_emu(prandex, tempa2);
_mm_store_si128_emu(prand, tempb3);
}
break;
}
case 0x10:
{
// a few AES operations
const __m128i *rc = prand;
__m128i tmp;
__m128i temp1 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1));
__m128i temp2 = _mm_load_si128_emu(pbuf);
AES2_EMU(temp1, temp2, 0);
MIX2_EMU(temp1, temp2);
AES2_EMU(temp1, temp2, 4);
MIX2_EMU(temp1, temp2);
AES2_EMU(temp1, temp2, 8);
MIX2_EMU(temp1, temp2);
acc = _mm_xor_si128_emu(temp1, acc);
acc = _mm_xor_si128_emu(temp2, acc);
const __m128i tempa1 = _mm_load_si128_emu(prand);
const __m128i tempa2 = _mm_mulhrs_epi16_emu(acc, tempa1);
const __m128i tempa3 = _mm_xor_si128_emu(tempa1, tempa2);
const __m128i tempa4 = _mm_load_si128_emu(prandex);
_mm_store_si128_emu(prandex, tempa3);
_mm_store_si128_emu(prand, tempa4);
break;
}
case 0x14:
{
// we'll just call this one the monkins loop, inspired by Chris
const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1);
__m128i tmp; // used by MIX2
uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times
__m128i *rc = prand;
uint64_t aesround = 0;
__m128i onekey;
do
{
if (selector & (0x10000000 << rounds))
{
onekey = _mm_load_si128_emu(rc++);
const __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? pbuf : buftmp);
const __m128i add1 = _mm_xor_si128_emu(onekey, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10);
acc = _mm_xor_si128_emu(clprod1, acc);
}
else
{
onekey = _mm_load_si128_emu(rc++);
__m128i temp2 = _mm_load_si128_emu(rounds & 1 ? buftmp : pbuf);
const uint64_t roundidx = aesround++ << 2;
AES2_EMU(onekey, temp2, roundidx);
MIX2_EMU(onekey, temp2);
acc = _mm_xor_si128_emu(onekey, acc);
acc = _mm_xor_si128_emu(temp2, acc);
}
} while (rounds--);
const __m128i tempa1 = _mm_load_si128_emu(prand);
const __m128i tempa2 = _mm_mulhrs_epi16_emu(acc, tempa1);
const __m128i tempa3 = _mm_xor_si128_emu(tempa1, tempa2);
const __m128i tempa4 = _mm_load_si128_emu(prandex);
_mm_store_si128_emu(prandex, tempa3);
_mm_store_si128_emu(prand, tempa4);
break;
}
case 0x18:
{
const __m128i temp1 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1));
const __m128i temp2 = _mm_load_si128_emu(prand);
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10);
acc = _mm_xor_si128_emu(clprod1, acc);
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp2);
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp2);
const __m128i tempb3 = _mm_load_si128_emu(prandex);
_mm_store_si128_emu(prandex, tempa2);
_mm_store_si128_emu(prand, tempb3);
break;
}
case 0x1c:
{
const __m128i temp1 = _mm_load_si128_emu(pbuf);
const __m128i temp2 = _mm_load_si128_emu(prandex);
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2);
const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10);
acc = _mm_xor_si128_emu(clprod1, acc);
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp2);
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp2);
const __m128i tempa3 = _mm_load_si128_emu(prand);
_mm_store_si128_emu(prand, tempa2);
acc = _mm_xor_si128_emu(tempa3, acc);
const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, tempa3);
const __m128i tempb2 = _mm_xor_si128_emu(tempb1, tempa3);
_mm_store_si128_emu(prandex, tempb2);
break;
}
}
}
return acc;
}
// hashes 64 bytes only by doing a carryless multiplication and reduction of the repeated 64 byte sequence 16 times,
// returning a 64 bit hash value
uint64_t verusclhash_port(void * random, const unsigned char buf[64], uint64_t keyMask) {
__m128i * rs64 = (__m128i *)random;
const __m128i * string = (const __m128i *) buf;
__m128i acc = __verusclmulwithoutreduction64alignedrepeat_port(rs64, string, keyMask);
acc = _mm_xor_si128_emu(acc, lazyLengthHash_port(1024, 64));
return precompReduction64_port(acc);
}

22
crypto/verus_hash.cpp

@ -14,11 +14,12 @@ bit output.
void (*CVerusHash::haraka512Function)(unsigned char *out, const unsigned char *in);
void CVerusHash::Hash(void *result, const void *data, size_t len)
void CVerusHash::Hash(void *result, const void *data, size_t _len)
{
unsigned char buf[128];
unsigned char *bufPtr = buf;
int pos = 0, nextOffset = 64;
int nextOffset = 64;
uint32_t pos = 0, len = _len;
unsigned char *bufPtr2 = bufPtr + nextOffset;
unsigned char *ptr = (unsigned char *)data;
@ -58,14 +59,15 @@ void CVerusHash::init()
}
}
CVerusHash &CVerusHash::Write(const unsigned char *data, size_t len)
CVerusHash &CVerusHash::Write(const unsigned char *data, size_t _len)
{
unsigned char *tmp;
uint32_t pos, len = _len;
// digest up to 32 bytes at a time
for ( int pos = 0; pos < len; )
for ( pos = 0; pos < len; )
{
int room = 32 - curPos;
uint32_t room = 32 - curPos;
if (len - pos >= room)
{
@ -94,6 +96,8 @@ void verus_hash(void *result, const void *data, size_t len)
}
void (*CVerusHashV2::haraka512Function)(unsigned char *out, const unsigned char *in);
void (*CVerusHashV2::haraka512KeyedFunction)(unsigned char *out, const unsigned char *in, const u128 *rc);
void (*CVerusHashV2::haraka256Function)(unsigned char *out, const unsigned char *in);
void CVerusHashV2::init()
{
@ -101,12 +105,16 @@ void CVerusHashV2::init()
{
load_constants();
haraka512Function = &haraka512;
haraka512KeyedFunction = &haraka512_keyed;
haraka256Function = &haraka256;
}
else
{
// load and tweak the haraka constants
// load the haraka constants
load_constants_port();
haraka512Function = &haraka512_port;
haraka512KeyedFunction = &haraka512_port_keyed;
haraka256Function = &haraka256_port;
}
}
@ -147,7 +155,7 @@ CVerusHashV2 &CVerusHashV2::Write(const unsigned char *data, size_t len)
unsigned char *tmp;
// digest up to 32 bytes at a time
for ( int pos = 0; pos < len; )
for (int pos = 0; pos < len; )
{
int room = 32 - curPos;

153
crypto/verus_hash.h

@ -1,4 +1,4 @@
// (C) 2018 The Verus Developers
// (C) 2018 Michael Toutonghi
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
@ -8,10 +8,14 @@ This provides the PoW hash function for Verus, enabling CPU mining.
#ifndef VERUS_HASH_H_
#define VERUS_HASH_H_
// verbose output when defined
//#define VERUSHASHDEBUG 1
#include <cstring>
#include <vector>
#include <cpuid.h>
#include "uint256.h"
#include "verus_clhash.h"
extern "C"
{
@ -40,7 +44,7 @@ class CVerusHash
return *this;
}
inline int64_t *ExtraI64Ptr() { return (int64_t *)(curBuf + 32); }
int64_t *ExtraI64Ptr() { return (int64_t *)(curBuf + 32); }
void ClearExtra()
{
if (curPos)
@ -73,30 +77,58 @@ class CVerusHashV2
public:
static void Hash(void *result, const void *data, size_t len);
static void (*haraka512Function)(unsigned char *out, const unsigned char *in);
static void (*haraka512KeyedFunction)(unsigned char *out, const unsigned char *in, const u128 *rc);
static void (*haraka256Function)(unsigned char *out, const unsigned char *in);
static void init();
CVerusHashV2() {}
verusclhasher vclh;
CVerusHashV2() : vclh() {
// we must have allocated key space, or can't run
if (!verusclhasher_key.get())
{
printf("ERROR: failed to allocate hash buffer - terminating\n");
assert(false);
}
}
CVerusHashV2 &Write(const unsigned char *data, size_t len);
CVerusHashV2 &Reset()
inline CVerusHashV2 &Reset()
{
curBuf = buf1;
result = buf2;
curPos = 0;
std::fill(buf1, buf1 + sizeof(buf1), 0);
return *this;
}
int64_t *ExtraI64Ptr() { return (int64_t *)(curBuf + 32); }
void ClearExtra()
inline int64_t *ExtraI64Ptr() { return (int64_t *)(curBuf + 32); }
inline void ClearExtra()
{
if (curPos)
{
std::fill(curBuf + 32 + curPos, curBuf + 64, 0);
}
}
void ExtraHash(unsigned char hash[32]) { (*haraka512Function)(hash, curBuf); }
template <typename T>
inline void FillExtra(const T *_data)
{
unsigned char *data = (unsigned char *)_data;
unsigned int pos = curPos;
unsigned int left = 32 - pos;
do
{
unsigned int len = left > sizeof(T) ? sizeof(T) : left;
std::memcpy(curBuf + 32 + pos, data, len);
pos += len;
left -= len;
} while (left > 0);
}
inline void ExtraHash(unsigned char hash[32]) { (*haraka512Function)(hash, curBuf); }
inline void ExtraHashKeyed(unsigned char hash[32], u128 *key) { (*haraka512KeyedFunction)(hash, curBuf, key); }
void Finalize(unsigned char hash[32])
{
@ -109,9 +141,101 @@ class CVerusHashV2
std::memcpy(hash, curBuf, 32);
}
// chains Haraka256 from 32 bytes to fill the key
static u128 *GenNewCLKey(unsigned char *seedBytes32)
{
unsigned char *key = (unsigned char *)verusclhasher_key.get();
verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get();
// skip keygen if it is the current key
if (pdesc->seed != *((uint256 *)seedBytes32))
{
// generate a new key by chain hashing with Haraka256 from the last curbuf
int n256blks = pdesc->keySizeInBytes >> 5;
int nbytesExtra = pdesc->keySizeInBytes & 0x1f;
unsigned char *pkey = key + pdesc->keySizeInBytes;
unsigned char *psrc = seedBytes32;
for (int i = 0; i < n256blks; i++)
{
(*haraka256Function)(pkey, psrc);
psrc = pkey;
pkey += 32;
}
if (nbytesExtra)
{
unsigned char buf[32];
(*haraka256Function)(buf, psrc);
memcpy(pkey, buf, nbytesExtra);
}
pdesc->seed = *((uint256 *)seedBytes32);
}
memcpy(key, key + pdesc->keySizeInBytes, pdesc->keySizeInBytes);
return (u128 *)key;
}
inline uint64_t IntermediateTo128Offset(uint64_t intermediate)
{
// the mask is where we wrap
uint64_t mask = vclh.keyMask >> 4;
return intermediate & mask;
}
void Finalize2b(unsigned char hash[32])
{
// fill buffer to the end with the beginning of it to prevent any foreknowledge of
// bits that may contain zero
FillExtra((u128 *)curBuf);
#ifdef VERUSHASHDEBUG
uint256 *bhalf1 = (uint256 *)curBuf;
uint256 *bhalf2 = bhalf1 + 1;
printf("Curbuf: %s%s\n", bhalf1->GetHex().c_str(), bhalf2->GetHex().c_str());
#endif
// gen new key with what is last in buffer
u128 *key = GenNewCLKey(curBuf);
// run verusclhash on the buffer
uint64_t intermediate = vclh(curBuf, key);
// fill buffer to the end with the result
FillExtra(&intermediate);
#ifdef VERUSHASHDEBUG
printf("intermediate %lx\n", intermediate);
printf("Curbuf: %s%s\n", bhalf1->GetHex().c_str(), bhalf2->GetHex().c_str());
bhalf1 = (uint256 *)key;
bhalf2 = bhalf1 + ((vclh.keyMask + 1) >> 5);
printf(" Key: %s%s\n", bhalf1->GetHex().c_str(), bhalf2->GetHex().c_str());
#endif
// get the final hash with a mutated dynamic key for each hash result
(*haraka512KeyedFunction)(hash, curBuf, key + IntermediateTo128Offset(intermediate));
/*
// TEST BEGIN
// test against the portable version
uint256 testHash1 = *(uint256 *)hash, testHash2;
FillExtra((u128 *)curBuf);
u128 *hashKey = ((u128 *)vclh.gethashkey());
uint64_t temp = verusclhash_port(key, curBuf, vclh.keyMask);
FillExtra(&temp);
haraka512_keyed((unsigned char *)&testHash2, curBuf, hashKey + IntermediateTo128Offset(intermediate));
if (testHash1 != testHash2)
{
printf("Portable version failed! intermediate1: %lx, intermediate2: %lx\n", intermediate, temp);
}
// END TEST
*/
}
inline unsigned char *CurBuffer()
{
return curBuf;
}
private:
// only buf1, the first source, needs to be zero initialized
unsigned char buf1[64] = {0}, buf2[64];
alignas(32) unsigned char buf1[64] = {0}, buf2[64];
unsigned char *curBuf = buf1, *result = buf2;
size_t curPos = 0;
};
@ -119,15 +243,4 @@ class CVerusHashV2
extern void verus_hash(void *result, const void *data, size_t len);
extern void verus_hash_v2(void *result, const void *data, size_t len);
inline bool IsCPUVerusOptimized()
{
unsigned int eax,ebx,ecx,edx;
if (!__get_cpuid(1,&eax,&ebx,&ecx,&edx))
{
return false;
}
return ((ecx & (bit_AVX | bit_AES)) == (bit_AVX | bit_AES));
};
#endif

152
verushash.cc

@ -10,13 +10,23 @@
using namespace v8;
CVerusHash* vh;
CVerusHashV2* vh2;
bool initialized = false;
void verusInit(const v8::FunctionCallbackInfo<Value>& args) {
void initialize()
{
if (!initialized)
{
CVerusHash::init();
CVerusHashV2::init();
}
vh = new CVerusHash();
vh->init();
vh2 = new CVerusHashV2();
initialized = true;
}
void verusInit(const v8::FunctionCallbackInfo<Value>& args) {
initialize();
args.GetReturnValue().Set(args.This());
}
@ -95,14 +105,140 @@ void verusHash(const v8::FunctionCallbackInfo<Value>& args) {
char *result = new char[32];
if (initialized == false) {
CVerusHash::init();
initialized = true;
initialize();
}
verus_hash(result, buff, node::Buffer::Length(buffer));
args.GetReturnValue().Set(Nan::NewBuffer(result, 32).ToLocalChecked());
}
void verusUpdateV2(const v8::FunctionCallbackInfo<Value>& args) {
Isolate* isolate = Isolate::GetCurrent();
HandleScope scope(isolate);
if (initialized == false){
isolate->ThrowException(
Exception::TypeError(String::NewFromUtf8(isolate, "call init() first!"))
);
}
if (args.Length() < 1) {
isolate->ThrowException(
Exception::TypeError(String::NewFromUtf8(isolate, "Wrong number of arguments"))
);
return;
}
Local<Object> buffer = args[0]->ToObject();
if(!node::Buffer::HasInstance(buffer)) {
isolate->ThrowException(
Exception::TypeError(String::NewFromUtf8(isolate, "Invalid buffer objects."))
);
return;
}
const char *buff = node::Buffer::Data(buffer);
vh2->Write((const unsigned char *)buff, node::Buffer::Length(buffer));
args.GetReturnValue().Set(args.This());
}
void verusDigestV2(const v8::FunctionCallbackInfo<Value>& args) {
Isolate* isolate = Isolate::GetCurrent();
HandleScope scope(isolate);
if (initialized == false){
isolate->ThrowException(
Exception::TypeError(String::NewFromUtf8(isolate, "call init() first!"))
);
}
char *result = new char[32];
vh2->Finalize((unsigned char *)result);
args.GetReturnValue().Set(Nan::NewBuffer(result, 32).ToLocalChecked());
}
void verusDigestV2b(const v8::FunctionCallbackInfo<Value>& args) {
Isolate* isolate = Isolate::GetCurrent();
HandleScope scope(isolate);
if (initialized == false){
isolate->ThrowException(
Exception::TypeError(String::NewFromUtf8(isolate, "call init() first!"))
);
}
char *result = new char[32];
vh2->Finalize2b((unsigned char *)result);
args.GetReturnValue().Set(Nan::NewBuffer(result, 32).ToLocalChecked());
}
void verusResetV2(const v8::FunctionCallbackInfo<Value>& args) {
Isolate* isolate = Isolate::GetCurrent();
HandleScope scope(isolate);
if (initialized == false){
isolate->ThrowException(
Exception::TypeError(String::NewFromUtf8(isolate, "call init() first!"))
);
}
vh2->Reset();
args.GetReturnValue().Set(args.This());
}
void verusHashV2(const v8::FunctionCallbackInfo<Value>& args) {
Isolate* isolate = Isolate::GetCurrent();
HandleScope scope(isolate);
if (args.Length() < 1) {
isolate->ThrowException(
Exception::TypeError(String::NewFromUtf8(isolate, "Wrong number of arguments"))
);
return;
}
Local<Object> buffer = args[0]->ToObject();
if(!node::Buffer::HasInstance(buffer)) {
isolate->ThrowException(
Exception::TypeError(String::NewFromUtf8(isolate, "Invalid buffer objects."))
);
return;
}
const char *buff = node::Buffer::Data(buffer);
char *result = new char[32];
if (initialized == false) {
initialize();
}
vh2->Reset();
vh2->Write((const unsigned char *)buff, node::Buffer::Length(buffer));
vh2->Finalize((unsigned char *)result);
args.GetReturnValue().Set(Nan::NewBuffer(result, 32).ToLocalChecked());
}
void verusHashV2b(const v8::FunctionCallbackInfo<Value>& args) {
Isolate* isolate = Isolate::GetCurrent();
HandleScope scope(isolate);
if (args.Length() < 1) {
isolate->ThrowException(
Exception::TypeError(String::NewFromUtf8(isolate, "Wrong number of arguments"))
);
return;
}
Local<Object> buffer = args[0]->ToObject();
if(!node::Buffer::HasInstance(buffer)) {
isolate->ThrowException(
Exception::TypeError(String::NewFromUtf8(isolate, "Invalid buffer objects."))
);
return;
}
const char *buff = node::Buffer::Data(buffer);
char *result = new char[32];
if (initialized == false) {
initialize();
}
vh2->Reset();
vh2->Write((const unsigned char *)buff, node::Buffer::Length(buffer));
vh2->Finalize2b((unsigned char *)result);
args.GetReturnValue().Set(Nan::NewBuffer(result, 32).ToLocalChecked());
}
void Init(Handle<Object> exports) {
NODE_SET_METHOD(exports, "init", verusInit);
@ -110,6 +246,12 @@ void Init(Handle<Object> exports) {
NODE_SET_METHOD(exports, "digest", verusDigest);
NODE_SET_METHOD(exports, "reset", verusReset);
NODE_SET_METHOD(exports, "hash", verusHash);
NODE_SET_METHOD(exports, "update2", verusUpdateV2);
NODE_SET_METHOD(exports, "digest2", verusDigestV2);
NODE_SET_METHOD(exports, "digest2b", verusDigestV2b);
NODE_SET_METHOD(exports, "reset2", verusResetV2);
NODE_SET_METHOD(exports, "hash2", verusHashV2);
NODE_SET_METHOD(exports, "hash2b", verusHashV2b);
}
NODE_MODULE(verushash, Init)

Loading…
Cancel
Save