miketout
5 years ago
17 changed files with 3712 additions and 168 deletions
@ -1,3 +1,4 @@ |
|||
.lock-wscript |
|||
build/ |
|||
crypto.node |
|||
.vscode/settings.json |
|||
|
@ -1,126 +1,128 @@ |
|||
/*
|
|||
The MIT License (MIT) |
|||
|
|||
Copyright (c) 2016 kste |
|||
|
|||
Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
of this software and associated documentation files (the "Software"), to deal |
|||
in the Software without restriction, including without limitation the rights |
|||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
copies of the Software, and to permit persons to whom the Software is |
|||
furnished to do so, subject to the following conditions: |
|||
|
|||
The above copyright notice and this permission notice shall be included in all |
|||
copies or substantial portions of the Software. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
SOFTWARE. |
|||
|
|||
Optimized Implementations for Haraka256 and Haraka512 |
|||
*/ |
|||
#ifndef HARAKA_H_ |
|||
#define HARAKA_H_ |
|||
|
|||
#include "immintrin.h" |
|||
|
|||
#define NUMROUNDS 5 |
|||
|
|||
#ifdef _WIN32 |
|||
typedef unsigned long long u64; |
|||
#else |
|||
typedef unsigned long u64; |
|||
#endif |
|||
typedef __m128i u128; |
|||
|
|||
extern u128 rc[40]; |
|||
|
|||
#define LOAD(src) _mm_load_si128((u128 *)(src)) |
|||
#define STORE(dest,src) _mm_storeu_si128((u128 *)(dest),src) |
|||
|
|||
#define AES2(s0, s1, rci) \ |
|||
s0 = _mm_aesenc_si128(s0, rc[rci]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc[rci + 1]); \ |
|||
s0 = _mm_aesenc_si128(s0, rc[rci + 2]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc[rci + 3]); |
|||
|
|||
#define AES2_4x(s0, s1, s2, s3, rci) \ |
|||
AES2(s0[0], s0[1], rci); \ |
|||
AES2(s1[0], s1[1], rci); \ |
|||
AES2(s2[0], s2[1], rci); \ |
|||
AES2(s3[0], s3[1], rci); |
|||
|
|||
#define AES2_8x(s0, s1, s2, s3, s4, s5, s6, s7, rci) \ |
|||
AES2_4x(s0, s1, s2, s3, rci); \ |
|||
AES2_4x(s4, s5, s6, s7, rci); |
|||
|
|||
#define AES4(s0, s1, s2, s3, rci) \ |
|||
s0 = _mm_aesenc_si128(s0, rc[rci]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc[rci + 1]); \ |
|||
s2 = _mm_aesenc_si128(s2, rc[rci + 2]); \ |
|||
s3 = _mm_aesenc_si128(s3, rc[rci + 3]); \ |
|||
s0 = _mm_aesenc_si128(s0, rc[rci + 4]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc[rci + 5]); \ |
|||
s2 = _mm_aesenc_si128(s2, rc[rci + 6]); \ |
|||
s3 = _mm_aesenc_si128(s3, rc[rci + 7]); \ |
|||
|
|||
#define AES4_zero(s0, s1, s2, s3, rci) \ |
|||
s0 = _mm_aesenc_si128(s0, rc0[rci]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc0[rci + 1]); \ |
|||
s2 = _mm_aesenc_si128(s2, rc0[rci + 2]); \ |
|||
s3 = _mm_aesenc_si128(s3, rc0[rci + 3]); \ |
|||
s0 = _mm_aesenc_si128(s0, rc0[rci + 4]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc0[rci + 5]); \ |
|||
s2 = _mm_aesenc_si128(s2, rc0[rci + 6]); \ |
|||
s3 = _mm_aesenc_si128(s3, rc0[rci + 7]); \ |
|||
|
|||
#define AES4_4x(s0, s1, s2, s3, rci) \ |
|||
AES4(s0[0], s0[1], s0[2], s0[3], rci); \ |
|||
AES4(s1[0], s1[1], s1[2], s1[3], rci); \ |
|||
AES4(s2[0], s2[1], s2[2], s2[3], rci); \ |
|||
AES4(s3[0], s3[1], s3[2], s3[3], rci); |
|||
|
|||
#define AES4_8x(s0, s1, s2, s3, s4, s5, s6, s7, rci) \ |
|||
AES4_4x(s0, s1, s2, s3, rci); \ |
|||
AES4_4x(s4, s5, s6, s7, rci); |
|||
|
|||
#define MIX2(s0, s1) \ |
|||
tmp = _mm_unpacklo_epi32(s0, s1); \ |
|||
s1 = _mm_unpackhi_epi32(s0, s1); \ |
|||
s0 = tmp; |
|||
|
|||
#define MIX4(s0, s1, s2, s3) \ |
|||
tmp = _mm_unpacklo_epi32(s0, s1); \ |
|||
s0 = _mm_unpackhi_epi32(s0, s1); \ |
|||
s1 = _mm_unpacklo_epi32(s2, s3); \ |
|||
s2 = _mm_unpackhi_epi32(s2, s3); \ |
|||
s3 = _mm_unpacklo_epi32(s0, s2); \ |
|||
s0 = _mm_unpackhi_epi32(s0, s2); \ |
|||
s2 = _mm_unpackhi_epi32(s1, tmp); \ |
|||
s1 = _mm_unpacklo_epi32(s1, tmp); |
|||
|
|||
#define TRUNCSTORE(out, s0, s1, s2, s3) \ |
|||
*(u64*)(out) = (u64*)(s0)[1]; \ |
|||
*(u64*)(out + 8) = (u64*)(s1)[1]; \ |
|||
*(u64*)(out + 16) = (u64*)(s2)[0]; \ |
|||
*(u64*)(out + 24) = (u64*)(s3)[0]; |
|||
|
|||
void load_constants(); |
|||
void test_implementations(); |
|||
|
|||
void load_constants(); |
|||
|
|||
void haraka256(unsigned char *out, const unsigned char *in); |
|||
void haraka256_4x(unsigned char *out, const unsigned char *in); |
|||
void haraka256_8x(unsigned char *out, const unsigned char *in); |
|||
|
|||
void haraka512(unsigned char *out, const unsigned char *in); |
|||
void haraka512_zero(unsigned char *out, const unsigned char *in); |
|||
void haraka512_4x(unsigned char *out, const unsigned char *in); |
|||
void haraka512_8x(unsigned char *out, const unsigned char *in); |
|||
|
|||
#endif |
|||
/*
|
|||
The MIT License (MIT) |
|||
|
|||
Copyright (c) 2016 kste |
|||
|
|||
Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
of this software and associated documentation files (the "Software"), to deal |
|||
in the Software without restriction, including without limitation the rights |
|||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
copies of the Software, and to permit persons to whom the Software is |
|||
furnished to do so, subject to the following conditions: |
|||
|
|||
The above copyright notice and this permission notice shall be included in all |
|||
copies or substantial portions of the Software. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
SOFTWARE. |
|||
|
|||
Optimized Implementations for Haraka256 and Haraka512 |
|||
*/ |
|||
#ifndef HARAKA_H_ |
|||
#define HARAKA_H_ |
|||
|
|||
#include "immintrin.h" |
|||
|
|||
#define NUMROUNDS 5 |
|||
|
|||
#ifdef _WIN32 |
|||
typedef unsigned long long u64; |
|||
#else |
|||
typedef unsigned long u64; |
|||
#endif |
|||
typedef __m128i u128; |
|||
|
|||
extern u128 rc[40]; |
|||
|
|||
#define LOAD(src) _mm_load_si128((u128 *)(src)) |
|||
#define STORE(dest,src) _mm_storeu_si128((u128 *)(dest),src) |
|||
|
|||
#define AES2(s0, s1, rci) \ |
|||
s0 = _mm_aesenc_si128(s0, rc[rci]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc[rci + 1]); \ |
|||
s0 = _mm_aesenc_si128(s0, rc[rci + 2]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc[rci + 3]); |
|||
|
|||
#define AES2_4x(s0, s1, s2, s3, rci) \ |
|||
AES2(s0[0], s0[1], rci); \ |
|||
AES2(s1[0], s1[1], rci); \ |
|||
AES2(s2[0], s2[1], rci); \ |
|||
AES2(s3[0], s3[1], rci); |
|||
|
|||
#define AES2_8x(s0, s1, s2, s3, s4, s5, s6, s7, rci) \ |
|||
AES2_4x(s0, s1, s2, s3, rci); \ |
|||
AES2_4x(s4, s5, s6, s7, rci); |
|||
|
|||
#define AES4(s0, s1, s2, s3, rci) \ |
|||
s0 = _mm_aesenc_si128(s0, rc[rci]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc[rci + 1]); \ |
|||
s2 = _mm_aesenc_si128(s2, rc[rci + 2]); \ |
|||
s3 = _mm_aesenc_si128(s3, rc[rci + 3]); \ |
|||
s0 = _mm_aesenc_si128(s0, rc[rci + 4]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc[rci + 5]); \ |
|||
s2 = _mm_aesenc_si128(s2, rc[rci + 6]); \ |
|||
s3 = _mm_aesenc_si128(s3, rc[rci + 7]); \ |
|||
|
|||
#define AES4_zero(s0, s1, s2, s3, rci) \ |
|||
s0 = _mm_aesenc_si128(s0, rc0[rci]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc0[rci + 1]); \ |
|||
s2 = _mm_aesenc_si128(s2, rc0[rci + 2]); \ |
|||
s3 = _mm_aesenc_si128(s3, rc0[rci + 3]); \ |
|||
s0 = _mm_aesenc_si128(s0, rc0[rci + 4]); \ |
|||
s1 = _mm_aesenc_si128(s1, rc0[rci + 5]); \ |
|||
s2 = _mm_aesenc_si128(s2, rc0[rci + 6]); \ |
|||
s3 = _mm_aesenc_si128(s3, rc0[rci + 7]); \ |
|||
|
|||
#define AES4_4x(s0, s1, s2, s3, rci) \ |
|||
AES4(s0[0], s0[1], s0[2], s0[3], rci); \ |
|||
AES4(s1[0], s1[1], s1[2], s1[3], rci); \ |
|||
AES4(s2[0], s2[1], s2[2], s2[3], rci); \ |
|||
AES4(s3[0], s3[1], s3[2], s3[3], rci); |
|||
|
|||
#define AES4_8x(s0, s1, s2, s3, s4, s5, s6, s7, rci) \ |
|||
AES4_4x(s0, s1, s2, s3, rci); \ |
|||
AES4_4x(s4, s5, s6, s7, rci); |
|||
|
|||
#define MIX2(s0, s1) \ |
|||
tmp = _mm_unpacklo_epi32(s0, s1); \ |
|||
s1 = _mm_unpackhi_epi32(s0, s1); \ |
|||
s0 = tmp; |
|||
|
|||
#define MIX4(s0, s1, s2, s3) \ |
|||
tmp = _mm_unpacklo_epi32(s0, s1); \ |
|||
s0 = _mm_unpackhi_epi32(s0, s1); \ |
|||
s1 = _mm_unpacklo_epi32(s2, s3); \ |
|||
s2 = _mm_unpackhi_epi32(s2, s3); \ |
|||
s3 = _mm_unpacklo_epi32(s0, s2); \ |
|||
s0 = _mm_unpackhi_epi32(s0, s2); \ |
|||
s2 = _mm_unpackhi_epi32(s1, tmp); \ |
|||
s1 = _mm_unpacklo_epi32(s1, tmp); |
|||
|
|||
#define TRUNCSTORE(out, s0, s1, s2, s3) \ |
|||
*(u64*)(out) = *(((u64*)&(s0) + 1)); \ |
|||
*(u64*)(out + 8) = *(((u64*)&(s1) + 1)); \ |
|||
*(u64*)(out + 16) = *(((u64*)&(s2) + 0)); \ |
|||
*(u64*)(out + 24) = *(((u64*)&(s3) + 0)); |
|||
|
|||
void load_constants(); |
|||
void test_implementations(); |
|||
|
|||
void load_constants(); |
|||
|
|||
void haraka256(unsigned char *out, const unsigned char *in); |
|||
void haraka256_keyed(unsigned char *out, const unsigned char *in, const u128 *rc); |
|||
void haraka256_4x(unsigned char *out, const unsigned char *in); |
|||
void haraka256_8x(unsigned char *out, const unsigned char *in); |
|||
|
|||
void haraka512(unsigned char *out, const unsigned char *in); |
|||
void haraka512_zero(unsigned char *out, const unsigned char *in); |
|||
void haraka512_keyed(unsigned char *out, const unsigned char *in, const u128 *rc); |
|||
void haraka512_4x(unsigned char *out, const unsigned char *in); |
|||
void haraka512_8x(unsigned char *out, const unsigned char *in); |
|||
|
|||
#endif |
|||
|
File diff suppressed because it is too large
@ -0,0 +1,146 @@ |
|||
// Copyright (c) 2009-2010 Satoshi Nakamoto
|
|||
// Copyright (c) 2009-2014 The Bitcoin Core developers
|
|||
// Distributed under the MIT software license, see the accompanying
|
|||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|||
|
|||
#include "uint256.h" |
|||
|
|||
#include "utilstrencodings.h" |
|||
|
|||
#include <stdio.h> |
|||
#include <string.h> |
|||
|
|||
template <unsigned int BITS> |
|||
base_blob<BITS>::base_blob(const std::vector<unsigned char>& vch) |
|||
{ |
|||
assert(vch.size() == sizeof(data)); |
|||
memcpy(data, &vch[0], sizeof(data)); |
|||
} |
|||
|
|||
template <unsigned int BITS> |
|||
std::string base_blob<BITS>::GetHex() const |
|||
{ |
|||
char psz[sizeof(data) * 2 + 1]; |
|||
for (unsigned int i = 0; i < sizeof(data); i++) |
|||
sprintf(psz + i * 2, "%02x", data[sizeof(data) - i - 1]); |
|||
return std::string(psz, psz + sizeof(data) * 2); |
|||
} |
|||
|
|||
template <unsigned int BITS> |
|||
void base_blob<BITS>::SetHex(const char* psz) |
|||
{ |
|||
memset(data, 0, sizeof(data)); |
|||
|
|||
// skip leading spaces
|
|||
while (isspace(*psz)) |
|||
psz++; |
|||
|
|||
// skip 0x
|
|||
if (psz[0] == '0' && tolower(psz[1]) == 'x') |
|||
psz += 2; |
|||
|
|||
// hex string to uint
|
|||
const char* pbegin = psz; |
|||
while (::HexDigit(*psz) != -1) |
|||
psz++; |
|||
psz--; |
|||
unsigned char* p1 = (unsigned char*)data; |
|||
unsigned char* pend = p1 + WIDTH; |
|||
while (psz >= pbegin && p1 < pend) { |
|||
*p1 = ::HexDigit(*psz--); |
|||
if (psz >= pbegin) { |
|||
*p1 |= ((unsigned char)::HexDigit(*psz--) << 4); |
|||
p1++; |
|||
} |
|||
} |
|||
} |
|||
|
|||
template <unsigned int BITS> |
|||
void base_blob<BITS>::SetHex(const std::string& str) |
|||
{ |
|||
SetHex(str.c_str()); |
|||
} |
|||
|
|||
template <unsigned int BITS> |
|||
std::string base_blob<BITS>::ToString() const |
|||
{ |
|||
return (GetHex()); |
|||
} |
|||
|
|||
// Explicit instantiations for base_blob<160>
|
|||
template base_blob<160>::base_blob(const std::vector<unsigned char>&); |
|||
template std::string base_blob<160>::GetHex() const; |
|||
template std::string base_blob<160>::ToString() const; |
|||
template void base_blob<160>::SetHex(const char*); |
|||
template void base_blob<160>::SetHex(const std::string&); |
|||
|
|||
// Explicit instantiations for base_blob<256>
|
|||
template base_blob<256>::base_blob(const std::vector<unsigned char>&); |
|||
template std::string base_blob<256>::GetHex() const; |
|||
template std::string base_blob<256>::ToString() const; |
|||
template void base_blob<256>::SetHex(const char*); |
|||
template void base_blob<256>::SetHex(const std::string&); |
|||
|
|||
static void inline HashMix(uint32_t& a, uint32_t& b, uint32_t& c) |
|||
{ |
|||
// Taken from lookup3, by Bob Jenkins.
|
|||
a -= c; |
|||
a ^= ((c << 4) | (c >> 28)); |
|||
c += b; |
|||
b -= a; |
|||
b ^= ((a << 6) | (a >> 26)); |
|||
a += c; |
|||
c -= b; |
|||
c ^= ((b << 8) | (b >> 24)); |
|||
b += a; |
|||
a -= c; |
|||
a ^= ((c << 16) | (c >> 16)); |
|||
c += b; |
|||
b -= a; |
|||
b ^= ((a << 19) | (a >> 13)); |
|||
a += c; |
|||
c -= b; |
|||
c ^= ((b << 4) | (b >> 28)); |
|||
b += a; |
|||
} |
|||
|
|||
static void inline HashFinal(uint32_t& a, uint32_t& b, uint32_t& c) |
|||
{ |
|||
// Taken from lookup3, by Bob Jenkins.
|
|||
c ^= b; |
|||
c -= ((b << 14) | (b >> 18)); |
|||
a ^= c; |
|||
a -= ((c << 11) | (c >> 21)); |
|||
b ^= a; |
|||
b -= ((a << 25) | (a >> 7)); |
|||
c ^= b; |
|||
c -= ((b << 16) | (b >> 16)); |
|||
a ^= c; |
|||
a -= ((c << 4) | (c >> 28)); |
|||
b ^= a; |
|||
b -= ((a << 14) | (a >> 18)); |
|||
c ^= b; |
|||
c -= ((b << 24) | (b >> 8)); |
|||
} |
|||
|
|||
uint64_t uint256::GetHash(const uint256& salt) const |
|||
{ |
|||
uint32_t a, b, c; |
|||
const uint32_t *pn = (const uint32_t*)data; |
|||
const uint32_t *salt_pn = (const uint32_t*)salt.data; |
|||
a = b = c = 0xdeadbeef + WIDTH; |
|||
|
|||
a += pn[0] ^ salt_pn[0]; |
|||
b += pn[1] ^ salt_pn[1]; |
|||
c += pn[2] ^ salt_pn[2]; |
|||
HashMix(a, b, c); |
|||
a += pn[3] ^ salt_pn[3]; |
|||
b += pn[4] ^ salt_pn[4]; |
|||
c += pn[5] ^ salt_pn[5]; |
|||
HashMix(a, b, c); |
|||
a += pn[6] ^ salt_pn[6]; |
|||
b += pn[7] ^ salt_pn[7]; |
|||
HashFinal(a, b, c); |
|||
|
|||
return ((((uint64_t)b) << 32) | c); |
|||
} |
@ -0,0 +1,164 @@ |
|||
// Copyright (c) 2009-2010 Satoshi Nakamoto
|
|||
// Copyright (c) 2009-2014 The Bitcoin Core developers
|
|||
// Distributed under the MIT software license, see the accompanying
|
|||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|||
|
|||
#ifndef BITCOIN_UINT256_H |
|||
#define BITCOIN_UINT256_H |
|||
|
|||
#include <assert.h> |
|||
#include <cstring> |
|||
#include <stdexcept> |
|||
#include <stdint.h> |
|||
#include <string> |
|||
#include <vector> |
|||
|
|||
#ifdef _MSC_VER |
|||
# define _ALIGN(x) __declspec(align(x)) |
|||
#else |
|||
# define _ALIGN(x) __attribute__ ((aligned(x))) |
|||
#endif |
|||
|
|||
/** Template base class for fixed-sized opaque blobs. */ |
|||
template<unsigned int BITS> |
|||
class base_blob |
|||
{ |
|||
protected: |
|||
enum { WIDTH=BITS/8 }; |
|||
uint8_t _ALIGN(4) data[WIDTH]; |
|||
public: |
|||
base_blob() |
|||
{ |
|||
memset(data, 0, sizeof(data)); |
|||
} |
|||
|
|||
explicit base_blob(const std::vector<unsigned char>& vch); |
|||
|
|||
bool IsNull() const |
|||
{ |
|||
for (int i = 0; i < WIDTH; i++) |
|||
if (data[i] != 0) |
|||
return false; |
|||
return true; |
|||
} |
|||
|
|||
void SetNull() |
|||
{ |
|||
memset(data, 0, sizeof(data)); |
|||
} |
|||
|
|||
friend inline bool operator==(const base_blob& a, const base_blob& b) { return memcmp(a.data, b.data, sizeof(a.data)) == 0; } |
|||
friend inline bool operator!=(const base_blob& a, const base_blob& b) { return memcmp(a.data, b.data, sizeof(a.data)) != 0; } |
|||
friend inline bool operator<(const base_blob& a, const base_blob& b) { return memcmp(a.data, b.data, sizeof(a.data)) < 0; } |
|||
|
|||
std::string GetHex() const; |
|||
void SetHex(const char* psz); |
|||
void SetHex(const std::string& str); |
|||
std::string ToString() const; |
|||
|
|||
unsigned char* begin() |
|||
{ |
|||
return &data[0]; |
|||
} |
|||
|
|||
unsigned char* end() |
|||
{ |
|||
return &data[WIDTH]; |
|||
} |
|||
|
|||
const unsigned char* begin() const |
|||
{ |
|||
return &data[0]; |
|||
} |
|||
|
|||
const unsigned char* end() const |
|||
{ |
|||
return &data[WIDTH]; |
|||
} |
|||
|
|||
unsigned int size() const |
|||
{ |
|||
return sizeof(data); |
|||
} |
|||
|
|||
unsigned int GetSerializeSize(int nType, int nVersion) const |
|||
{ |
|||
return sizeof(data); |
|||
} |
|||
|
|||
template<typename Stream> |
|||
void Serialize(Stream& s, int nType, int nVersion) const |
|||
{ |
|||
s.write((char*)data, sizeof(data)); |
|||
} |
|||
|
|||
template<typename Stream> |
|||
void Unserialize(Stream& s, int nType, int nVersion) |
|||
{ |
|||
s.read((char*)data, sizeof(data)); |
|||
} |
|||
}; |
|||
|
|||
/** 160-bit opaque blob.
|
|||
* @note This type is called uint160 for historical reasons only. It is an opaque |
|||
* blob of 160 bits and has no integer operations. |
|||
*/ |
|||
class uint160 : public base_blob<160> { |
|||
public: |
|||
uint160() {} |
|||
uint160(const base_blob<160>& b) : base_blob<160>(b) {} |
|||
explicit uint160(const std::vector<unsigned char>& vch) : base_blob<160>(vch) {} |
|||
}; |
|||
|
|||
/** 256-bit opaque blob.
|
|||
* @note This type is called uint256 for historical reasons only. It is an |
|||
* opaque blob of 256 bits and has no integer operations. Use arith_uint256 if |
|||
* those are required. |
|||
*/ |
|||
class uint256 : public base_blob<256> { |
|||
public: |
|||
uint256() {} |
|||
uint256(const base_blob<256>& b) : base_blob<256>(b) {} |
|||
explicit uint256(const std::vector<unsigned char>& vch) : base_blob<256>(vch) {} |
|||
|
|||
/** A cheap hash function that just returns 64 bits from the result, it can be
|
|||
* used when the contents are considered uniformly random. It is not appropriate |
|||
* when the value can easily be influenced from outside as e.g. a network adversary could |
|||
* provide values to trigger worst-case behavior. |
|||
* @note The result of this function is not stable between little and big endian. |
|||
*/ |
|||
uint64_t GetCheapHash() const |
|||
{ |
|||
uint64_t result; |
|||
memcpy((void*)&result, (void*)data, 8); |
|||
return result; |
|||
} |
|||
|
|||
/** A more secure, salted hash function.
|
|||
* @note This hash is not stable between little and big endian. |
|||
*/ |
|||
uint64_t GetHash(const uint256& salt) const; |
|||
}; |
|||
|
|||
/* uint256 from const char *.
|
|||
* This is a separate function because the constructor uint256(const char*) can result |
|||
* in dangerously catching uint256(0). |
|||
*/ |
|||
inline uint256 uint256S(const char *str) |
|||
{ |
|||
uint256 rv; |
|||
rv.SetHex(str); |
|||
return rv; |
|||
} |
|||
/* uint256 from std::string.
|
|||
* This is a separate function because the constructor uint256(const std::string &str) can result |
|||
* in dangerously catching uint256(0) via std::string(const char*). |
|||
*/ |
|||
inline uint256 uint256S(const std::string& str) |
|||
{ |
|||
uint256 rv; |
|||
rv.SetHex(str); |
|||
return rv; |
|||
} |
|||
|
|||
#endif // BITCOIN_UINT256_H
|
@ -0,0 +1,499 @@ |
|||
// Copyright (c) 2009-2010 Satoshi Nakamoto
|
|||
// Copyright (c) 2009-2014 The Bitcoin Core developers
|
|||
// Distributed under the MIT software license, see the accompanying
|
|||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|||
|
|||
#include "utilstrencodings.h" |
|||
|
|||
#include "tinyformat.h" |
|||
|
|||
#include <cstdlib> |
|||
#include <cstring> |
|||
#include <errno.h> |
|||
#include <limits> |
|||
|
|||
using namespace std; |
|||
|
|||
string SanitizeString(const string& str) |
|||
{ |
|||
/**
|
|||
* safeChars chosen to allow simple messages/URLs/email addresses, but avoid anything |
|||
* even possibly remotely dangerous like & or > |
|||
*/ |
|||
static string safeChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890 .,;_/:?@()"); |
|||
string strResult; |
|||
for (std::string::size_type i = 0; i < str.size(); i++) |
|||
{ |
|||
if (safeChars.find(str[i]) != std::string::npos) |
|||
strResult.push_back(str[i]); |
|||
} |
|||
return strResult; |
|||
} |
|||
|
|||
const signed char p_util_hexdigit[256] = |
|||
{ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1, |
|||
-1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|||
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, }; |
|||
|
|||
signed char HexDigit(char c) |
|||
{ |
|||
return p_util_hexdigit[(unsigned char)c]; |
|||
} |
|||
|
|||
bool IsHex(const string& str) |
|||
{ |
|||
for(std::string::const_iterator it(str.begin()); it != str.end(); ++it) |
|||
{ |
|||
if (HexDigit(*it) < 0) |
|||
return false; |
|||
} |
|||
return (str.size() > 0) && (str.size()%2 == 0); |
|||
} |
|||
|
|||
vector<unsigned char> ParseHex(const char* psz) |
|||
{ |
|||
// convert hex dump to vector
|
|||
vector<unsigned char> vch; |
|||
while (true) |
|||
{ |
|||
while (isspace(*psz)) |
|||
psz++; |
|||
signed char c = HexDigit(*psz++); |
|||
if (c == (signed char)-1) |
|||
break; |
|||
unsigned char n = (c << 4); |
|||
c = HexDigit(*psz++); |
|||
if (c == (signed char)-1) |
|||
break; |
|||
n |= c; |
|||
vch.push_back(n); |
|||
} |
|||
return vch; |
|||
} |
|||
|
|||
vector<unsigned char> ParseHex(const string& str) |
|||
{ |
|||
return ParseHex(str.c_str()); |
|||
} |
|||
|
|||
string EncodeBase64(const unsigned char* pch, size_t len) |
|||
{ |
|||
static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
|||
|
|||
string strRet=""; |
|||
strRet.reserve((len+2)/3*4); |
|||
|
|||
int mode=0, left=0; |
|||
const unsigned char *pchEnd = pch+len; |
|||
|
|||
while (pch<pchEnd) |
|||
{ |
|||
int enc = *(pch++); |
|||
switch (mode) |
|||
{ |
|||
case 0: // we have no bits
|
|||
strRet += pbase64[enc >> 2]; |
|||
left = (enc & 3) << 4; |
|||
mode = 1; |
|||
break; |
|||
|
|||
case 1: // we have two bits
|
|||
strRet += pbase64[left | (enc >> 4)]; |
|||
left = (enc & 15) << 2; |
|||
mode = 2; |
|||
break; |
|||
|
|||
case 2: // we have four bits
|
|||
strRet += pbase64[left | (enc >> 6)]; |
|||
strRet += pbase64[enc & 63]; |
|||
mode = 0; |
|||
break; |
|||
} |
|||
} |
|||
|
|||
if (mode) |
|||
{ |
|||
strRet += pbase64[left]; |
|||
strRet += '='; |
|||
if (mode == 1) |
|||
strRet += '='; |
|||
} |
|||
|
|||
return strRet; |
|||
} |
|||
|
|||
string EncodeBase64(const string& str) |
|||
{ |
|||
return EncodeBase64((const unsigned char*)str.c_str(), str.size()); |
|||
} |
|||
|
|||
vector<unsigned char> DecodeBase64(const char* p, bool* pfInvalid) |
|||
{ |
|||
static const int decode64_table[256] = |
|||
{ |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, |
|||
-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
|||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, |
|||
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, |
|||
49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 |
|||
}; |
|||
|
|||
if (pfInvalid) |
|||
*pfInvalid = false; |
|||
|
|||
vector<unsigned char> vchRet; |
|||
vchRet.reserve(strlen(p)*3/4); |
|||
|
|||
int mode = 0; |
|||
int left = 0; |
|||
|
|||
while (1) |
|||
{ |
|||
int dec = decode64_table[(unsigned char)*p]; |
|||
if (dec == -1) break; |
|||
p++; |
|||
switch (mode) |
|||
{ |
|||
case 0: // we have no bits and get 6
|
|||
left = dec; |
|||
mode = 1; |
|||
break; |
|||
|
|||
case 1: // we have 6 bits and keep 4
|
|||
vchRet.push_back((left<<2) | (dec>>4)); |
|||
left = dec & 15; |
|||
mode = 2; |
|||
break; |
|||
|
|||
case 2: // we have 4 bits and get 6, we keep 2
|
|||
vchRet.push_back((left<<4) | (dec>>2)); |
|||
left = dec & 3; |
|||
mode = 3; |
|||
break; |
|||
|
|||
case 3: // we have 2 bits and get 6
|
|||
vchRet.push_back((left<<6) | dec); |
|||
mode = 0; |
|||
break; |
|||
} |
|||
} |
|||
|
|||
if (pfInvalid) |
|||
switch (mode) |
|||
{ |
|||
case 0: // 4n base64 characters processed: ok
|
|||
break; |
|||
|
|||
case 1: // 4n+1 base64 character processed: impossible
|
|||
*pfInvalid = true; |
|||
break; |
|||
|
|||
case 2: // 4n+2 base64 characters processed: require '=='
|
|||
if (left || p[0] != '=' || p[1] != '=' || decode64_table[(unsigned char)p[2]] != -1) |
|||
*pfInvalid = true; |
|||
break; |
|||
|
|||
case 3: // 4n+3 base64 characters processed: require '='
|
|||
if (left || p[0] != '=' || decode64_table[(unsigned char)p[1]] != -1) |
|||
*pfInvalid = true; |
|||
break; |
|||
} |
|||
|
|||
return vchRet; |
|||
} |
|||
|
|||
string DecodeBase64(const string& str) |
|||
{ |
|||
vector<unsigned char> vchRet = DecodeBase64(str.c_str()); |
|||
return (vchRet.size() == 0) ? string() : string((const char*)&vchRet[0], vchRet.size()); |
|||
} |
|||
|
|||
string EncodeBase32(const unsigned char* pch, size_t len) |
|||
{ |
|||
static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567"; |
|||
|
|||
string strRet=""; |
|||
strRet.reserve((len+4)/5*8); |
|||
|
|||
int mode=0, left=0; |
|||
const unsigned char *pchEnd = pch+len; |
|||
|
|||
while (pch<pchEnd) |
|||
{ |
|||
int enc = *(pch++); |
|||
switch (mode) |
|||
{ |
|||
case 0: // we have no bits
|
|||
strRet += pbase32[enc >> 3]; |
|||
left = (enc & 7) << 2; |
|||
mode = 1; |
|||
break; |
|||
|
|||
case 1: // we have three bits
|
|||
strRet += pbase32[left | (enc >> 6)]; |
|||
strRet += pbase32[(enc >> 1) & 31]; |
|||
left = (enc & 1) << 4; |
|||
mode = 2; |
|||
break; |
|||
|
|||
case 2: // we have one bit
|
|||
strRet += pbase32[left | (enc >> 4)]; |
|||
left = (enc & 15) << 1; |
|||
mode = 3; |
|||
break; |
|||
|
|||
case 3: // we have four bits
|
|||
strRet += pbase32[left | (enc >> 7)]; |
|||
strRet += pbase32[(enc >> 2) & 31]; |
|||
left = (enc & 3) << 3; |
|||
mode = 4; |
|||
break; |
|||
|
|||
case 4: // we have two bits
|
|||
strRet += pbase32[left | (enc >> 5)]; |
|||
strRet += pbase32[enc & 31]; |
|||
mode = 0; |
|||
} |
|||
} |
|||
|
|||
static const int nPadding[5] = {0, 6, 4, 3, 1}; |
|||
if (mode) |
|||
{ |
|||
strRet += pbase32[left]; |
|||
for (int n=0; n<nPadding[mode]; n++) |
|||
strRet += '='; |
|||
} |
|||
|
|||
return strRet; |
|||
} |
|||
|
|||
string EncodeBase32(const string& str) |
|||
{ |
|||
return EncodeBase32((const unsigned char*)str.c_str(), str.size()); |
|||
} |
|||
|
|||
vector<unsigned char> DecodeBase32(const char* p, bool* pfInvalid) |
|||
{ |
|||
static const int decode32_table[256] = |
|||
{ |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
|||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, |
|||
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, |
|||
23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 |
|||
}; |
|||
|
|||
if (pfInvalid) |
|||
*pfInvalid = false; |
|||
|
|||
vector<unsigned char> vchRet; |
|||
vchRet.reserve((strlen(p))*5/8); |
|||
|
|||
int mode = 0; |
|||
int left = 0; |
|||
|
|||
while (1) |
|||
{ |
|||
int dec = decode32_table[(unsigned char)*p]; |
|||
if (dec == -1) break; |
|||
p++; |
|||
switch (mode) |
|||
{ |
|||
case 0: // we have no bits and get 5
|
|||
left = dec; |
|||
mode = 1; |
|||
break; |
|||
|
|||
case 1: // we have 5 bits and keep 2
|
|||
vchRet.push_back((left<<3) | (dec>>2)); |
|||
left = dec & 3; |
|||
mode = 2; |
|||
break; |
|||
|
|||
case 2: // we have 2 bits and keep 7
|
|||
left = left << 5 | dec; |
|||
mode = 3; |
|||
break; |
|||
|
|||
case 3: // we have 7 bits and keep 4
|
|||
vchRet.push_back((left<<1) | (dec>>4)); |
|||
left = dec & 15; |
|||
mode = 4; |
|||
break; |
|||
|
|||
case 4: // we have 4 bits, and keep 1
|
|||
vchRet.push_back((left<<4) | (dec>>1)); |
|||
left = dec & 1; |
|||
mode = 5; |
|||
break; |
|||
|
|||
case 5: // we have 1 bit, and keep 6
|
|||
left = left << 5 | dec; |
|||
mode = 6; |
|||
break; |
|||
|
|||
case 6: // we have 6 bits, and keep 3
|
|||
vchRet.push_back((left<<2) | (dec>>3)); |
|||
left = dec & 7; |
|||
mode = 7; |
|||
break; |
|||
|
|||
case 7: // we have 3 bits, and keep 0
|
|||
vchRet.push_back((left<<5) | dec); |
|||
mode = 0; |
|||
break; |
|||
} |
|||
} |
|||
|
|||
if (pfInvalid) |
|||
switch (mode) |
|||
{ |
|||
case 0: // 8n base32 characters processed: ok
|
|||
break; |
|||
|
|||
case 1: // 8n+1 base32 characters processed: impossible
|
|||
case 3: // +3
|
|||
case 6: // +6
|
|||
*pfInvalid = true; |
|||
break; |
|||
|
|||
case 2: // 8n+2 base32 characters processed: require '======'
|
|||
if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || p[3] != '=' || p[4] != '=' || p[5] != '=' || decode32_table[(unsigned char)p[6]] != -1) |
|||
*pfInvalid = true; |
|||
break; |
|||
|
|||
case 4: // 8n+4 base32 characters processed: require '===='
|
|||
if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || p[3] != '=' || decode32_table[(unsigned char)p[4]] != -1) |
|||
*pfInvalid = true; |
|||
break; |
|||
|
|||
case 5: // 8n+5 base32 characters processed: require '==='
|
|||
if (left || p[0] != '=' || p[1] != '=' || p[2] != '=' || decode32_table[(unsigned char)p[3]] != -1) |
|||
*pfInvalid = true; |
|||
break; |
|||
|
|||
case 7: // 8n+7 base32 characters processed: require '='
|
|||
if (left || p[0] != '=' || decode32_table[(unsigned char)p[1]] != -1) |
|||
*pfInvalid = true; |
|||
break; |
|||
} |
|||
|
|||
return vchRet; |
|||
} |
|||
|
|||
string DecodeBase32(const string& str) |
|||
{ |
|||
vector<unsigned char> vchRet = DecodeBase32(str.c_str()); |
|||
return (vchRet.size() == 0) ? string() : string((const char*)&vchRet[0], vchRet.size()); |
|||
} |
|||
|
|||
bool ParseInt32(const std::string& str, int32_t *out) |
|||
{ |
|||
char *endp = NULL; |
|||
errno = 0; // strtol will not set errno if valid
|
|||
long int n = strtol(str.c_str(), &endp, 10); |
|||
if(out) *out = (int)n; |
|||
// Note that strtol returns a *long int*, so even if strtol doesn't report a over/underflow
|
|||
// we still have to check that the returned value is within the range of an *int32_t*. On 64-bit
|
|||
// platforms the size of these types may be different.
|
|||
return endp && *endp == 0 && !errno && |
|||
n >= std::numeric_limits<int32_t>::min() && |
|||
n <= std::numeric_limits<int32_t>::max(); |
|||
} |
|||
|
|||
std::string FormatParagraph(const std::string in, size_t width, size_t indent) |
|||
{ |
|||
std::stringstream out; |
|||
size_t col = 0; |
|||
size_t ptr = 0; |
|||
while(ptr < in.size()) |
|||
{ |
|||
// Find beginning of next word
|
|||
ptr = in.find_first_not_of(' ', ptr); |
|||
if (ptr == std::string::npos) |
|||
break; |
|||
// Find end of next word
|
|||
size_t endword = in.find_first_of(' ', ptr); |
|||
if (endword == std::string::npos) |
|||
endword = in.size(); |
|||
// Add newline and indentation if this wraps over the allowed width
|
|||
if (col > 0) |
|||
{ |
|||
if ((col + endword - ptr) > width) |
|||
{ |
|||
out << '\n'; |
|||
for(size_t i=0; i<indent; ++i) |
|||
out << ' '; |
|||
col = 0; |
|||
} else |
|||
out << ' '; |
|||
} |
|||
// Append word
|
|||
out << in.substr(ptr, endword - ptr); |
|||
col += endword - ptr + 1; |
|||
ptr = endword; |
|||
} |
|||
return out.str(); |
|||
} |
|||
|
|||
std::string i64tostr(int64_t n) |
|||
{ |
|||
return strprintf("%d", n); |
|||
} |
|||
|
|||
std::string itostr(int n) |
|||
{ |
|||
return strprintf("%d", n); |
|||
} |
|||
|
|||
int64_t atoi64(const char* psz) |
|||
{ |
|||
#ifdef _MSC_VER |
|||
return _atoi64(psz); |
|||
#else |
|||
return strtoll(psz, NULL, 10); |
|||
#endif |
|||
} |
|||
|
|||
int64_t atoi64(const std::string& str) |
|||
{ |
|||
#ifdef _MSC_VER |
|||
return _atoi64(str.c_str()); |
|||
#else |
|||
return strtoll(str.c_str(), NULL, 10); |
|||
#endif |
|||
} |
|||
|
|||
int atoi(const std::string& str) |
|||
{ |
|||
return atoi(str.c_str()); |
|||
} |
@ -0,0 +1,98 @@ |
|||
// Copyright (c) 2009-2010 Satoshi Nakamoto
|
|||
// Copyright (c) 2009-2014 The Bitcoin Core developers
|
|||
// Distributed under the MIT software license, see the accompanying
|
|||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|||
|
|||
/**
|
|||
* Utilities for converting data from/to strings. |
|||
*/ |
|||
#ifndef BITCOIN_UTILSTRENCODINGS_H |
|||
#define BITCOIN_UTILSTRENCODINGS_H |
|||
|
|||
#include <stdint.h> |
|||
#include <string> |
|||
#include <vector> |
|||
|
|||
#define BEGIN(a) ((char*)&(a)) |
|||
#define END(a) ((char*)&((&(a))[1])) |
|||
#define UBEGIN(a) ((unsigned char*)&(a)) |
|||
#define UEND(a) ((unsigned char*)&((&(a))[1])) |
|||
#define ARRAYLEN(array) (sizeof(array)/sizeof((array)[0])) |
|||
|
|||
/** This is needed because the foreach macro can't get over the comma in pair<t1, t2> */ |
|||
#define PAIRTYPE(t1, t2) std::pair<t1, t2> |
|||
|
|||
std::string SanitizeString(const std::string& str); |
|||
std::vector<unsigned char> ParseHex(const char* psz); |
|||
std::vector<unsigned char> ParseHex(const std::string& str); |
|||
signed char HexDigit(char c); |
|||
bool IsHex(const std::string& str); |
|||
std::vector<unsigned char> DecodeBase64(const char* p, bool* pfInvalid = NULL); |
|||
std::string DecodeBase64(const std::string& str); |
|||
std::string EncodeBase64(const unsigned char* pch, size_t len); |
|||
std::string EncodeBase64(const std::string& str); |
|||
std::vector<unsigned char> DecodeBase32(const char* p, bool* pfInvalid = NULL); |
|||
std::string DecodeBase32(const std::string& str); |
|||
std::string EncodeBase32(const unsigned char* pch, size_t len); |
|||
std::string EncodeBase32(const std::string& str); |
|||
|
|||
std::string i64tostr(int64_t n); |
|||
std::string itostr(int n); |
|||
int64_t atoi64(const char* psz); |
|||
int64_t atoi64(const std::string& str); |
|||
int atoi(const std::string& str); |
|||
|
|||
/**
|
|||
* Convert string to signed 32-bit integer with strict parse error feedback. |
|||
* @returns true if the entire string could be parsed as valid integer, |
|||
* false if not the entire string could be parsed or when overflow or underflow occurred. |
|||
*/ |
|||
bool ParseInt32(const std::string& str, int32_t *out); |
|||
|
|||
template<typename T> |
|||
std::string HexStr(const T itbegin, const T itend, bool fSpaces=false) |
|||
{ |
|||
std::string rv; |
|||
static const char hexmap[16] = { '0', '1', '2', '3', '4', '5', '6', '7', |
|||
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; |
|||
rv.reserve((itend-itbegin)*3); |
|||
for(T it = itbegin; it < itend; ++it) |
|||
{ |
|||
unsigned char val = (unsigned char)(*it); |
|||
if(fSpaces && it != itbegin) |
|||
rv.push_back(' '); |
|||
rv.push_back(hexmap[val>>4]); |
|||
rv.push_back(hexmap[val&15]); |
|||
} |
|||
|
|||
return rv; |
|||
} |
|||
|
|||
template<typename T> |
|||
inline std::string HexStr(const T& vch, bool fSpaces=false) |
|||
{ |
|||
return HexStr(vch.begin(), vch.end(), fSpaces); |
|||
} |
|||
|
|||
/**
|
|||
* Format a paragraph of text to a fixed width, adding spaces for |
|||
* indentation to any added line. |
|||
*/ |
|||
std::string FormatParagraph(const std::string in, size_t width=79, size_t indent=0); |
|||
|
|||
/**
|
|||
* Timing-attack-resistant comparison. |
|||
* Takes time proportional to length |
|||
* of first argument. |
|||
*/ |
|||
template <typename T> |
|||
bool TimingResistantEqual(const T& a, const T& b) |
|||
{ |
|||
if (b.size() == 0) return a.size() == 0; |
|||
size_t accumulator = a.size() ^ b.size(); |
|||
for (size_t i = 0; i < a.size(); i++) |
|||
accumulator |= a[i] ^ b[i%b.size()]; |
|||
return accumulator == 0; |
|||
} |
|||
|
|||
#endif // BITCOIN_UTILSTRENCODINGS_H
|
@ -0,0 +1,355 @@ |
|||
/*
|
|||
* This uses veriations of the clhash algorithm for Verus Coin, licensed |
|||
* with the Apache-2.0 open source license. |
|||
* |
|||
* Copyright (c) 2018 Michael Toutonghi |
|||
* Distributed under the Apache 2.0 software license, available in the original form for clhash |
|||
* here: https://github.com/lemire/clhash/commit/934da700a2a54d8202929a826e2763831bd43cf7#diff-9879d6db96fd29134fc802214163b95a
|
|||
* |
|||
* Original CLHash code and any portions herein, (C) 2017, 2018 Daniel Lemire and Owen Kaser |
|||
* Faster 64-bit universal hashing |
|||
* using carry-less multiplications, Journal of Cryptographic Engineering (to appear) |
|||
* |
|||
* Best used on recent x64 processors (Haswell or better). |
|||
* |
|||
* This implements an intermediate step in the last part of a Verus block hash. The intent of this step |
|||
* is to more effectively equalize FPGAs over GPUs and CPUs. |
|||
* |
|||
**/ |
|||
|
|||
|
|||
#include "verus_hash.h" |
|||
|
|||
#include <boost/thread.hpp> |
|||
|
|||
#include <assert.h> |
|||
#include <string.h> |
|||
#include <x86intrin.h> |
|||
|
|||
#ifdef __WIN32 |
|||
#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno) |
|||
#endif |
|||
|
|||
thread_local thread_specific_ptr verusclhasher_key; |
|||
thread_local thread_specific_ptr verusclhasher_descr; |
|||
|
|||
#ifdef _WIN32 |
|||
// attempt to workaround horrible mingw/gcc destructor bug on Windows, which passes garbage in the this pointer
|
|||
// we use the opportunity of control here to clean up all of our tls variables. we could keep a list, but this is a quick hack
|
|||
thread_specific_ptr::~thread_specific_ptr() { |
|||
if (verusclhasher_key.ptr) |
|||
{ |
|||
verusclhasher_key.reset(); |
|||
} |
|||
if (verusclhasher_descr.ptr) |
|||
{ |
|||
verusclhasher_descr.reset(); |
|||
} |
|||
} |
|||
#endif |
|||
|
|||
int __cpuverusoptimized = 0x80; |
|||
|
|||
// multiply the length and the some key, no modulo
|
|||
static inline __m128i lazyLengthHash(uint64_t keylength, uint64_t length) { |
|||
const __m128i lengthvector = _mm_set_epi64x(keylength,length); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128( lengthvector, lengthvector, 0x10); |
|||
return clprod1; |
|||
} |
|||
|
|||
// modulo reduction to 64-bit value. The high 64 bits contain garbage, see precompReduction64
|
|||
static inline __m128i precompReduction64_si128( __m128i A) { |
|||
|
|||
//const __m128i C = _mm_set_epi64x(1U,(1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); // C is the irreducible poly. (64,4,3,1,0)
|
|||
const __m128i C = _mm_cvtsi64_si128((1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); |
|||
__m128i Q2 = _mm_clmulepi64_si128( A, C, 0x01); |
|||
__m128i Q3 = _mm_shuffle_epi8(_mm_setr_epi8(0, 27, 54, 45, 108, 119, 90, 65, (char)216, (char)195, (char)238, (char)245, (char)180, (char)175, (char)130, (char)153), |
|||
_mm_srli_si128(Q2,8)); |
|||
__m128i Q4 = _mm_xor_si128(Q2,A); |
|||
const __m128i final = _mm_xor_si128(Q3,Q4); |
|||
return final;/// WARNING: HIGH 64 BITS CONTAIN GARBAGE
|
|||
} |
|||
|
|||
static inline uint64_t precompReduction64( __m128i A) { |
|||
return _mm_cvtsi128_si64(precompReduction64_si128(A)); |
|||
} |
|||
|
|||
// verus intermediate hash extra
|
|||
static __m128i __verusclmulwithoutreduction64alignedrepeat(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask) |
|||
{ |
|||
__m128i const *pbuf; |
|||
|
|||
// divide key mask by 16 from bytes to __m128i
|
|||
keyMask >>= 4; |
|||
|
|||
// the random buffer must have at least 32 16 byte dwords after the keymask to work with this
|
|||
// algorithm. we take the value from the last element inside the keyMask + 2, as that will never
|
|||
// be used to xor into the accumulator before it is hashed with other values first
|
|||
__m128i acc = _mm_load_si128(randomsource + (keyMask + 2)); |
|||
|
|||
for (int64_t i = 0; i < 32; i++) |
|||
{ |
|||
const uint64_t selector = _mm_cvtsi128_si64(acc); |
|||
|
|||
// get two random locations in the key, which will be mutated and swapped
|
|||
__m128i *prand = randomsource + ((selector >> 5) & keyMask); |
|||
__m128i *prandex = randomsource + ((selector >> 32) & keyMask); |
|||
|
|||
// select random start and order of pbuf processing
|
|||
pbuf = buf + (selector & 3); |
|||
|
|||
switch (selector & 0x1c) |
|||
{ |
|||
case 0: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128(prandex); |
|||
const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); |
|||
const __m128i add1 = _mm_xor_si128(temp1, temp2); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); |
|||
acc = _mm_xor_si128(clprod1, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); |
|||
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); |
|||
|
|||
const __m128i temp12 = _mm_load_si128(prand); |
|||
_mm_store_si128(prand, tempa2); |
|||
|
|||
const __m128i temp22 = _mm_load_si128(pbuf); |
|||
const __m128i add12 = _mm_xor_si128(temp12, temp22); |
|||
const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); |
|||
acc = _mm_xor_si128(clprod12, acc); |
|||
|
|||
const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); |
|||
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); |
|||
_mm_store_si128(prandex, tempb2); |
|||
break; |
|||
} |
|||
case 4: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128(prand); |
|||
const __m128i temp2 = _mm_load_si128(pbuf); |
|||
const __m128i add1 = _mm_xor_si128(temp1, temp2); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); |
|||
acc = _mm_xor_si128(clprod1, acc); |
|||
const __m128i clprod2 = _mm_clmulepi64_si128(temp2, temp2, 0x10); |
|||
acc = _mm_xor_si128(clprod2, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); |
|||
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); |
|||
|
|||
const __m128i temp12 = _mm_load_si128(prandex); |
|||
_mm_store_si128(prandex, tempa2); |
|||
|
|||
const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); |
|||
const __m128i add12 = _mm_xor_si128(temp12, temp22); |
|||
acc = _mm_xor_si128(add12, acc); |
|||
|
|||
const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); |
|||
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); |
|||
_mm_store_si128(prand, tempb2); |
|||
break; |
|||
} |
|||
case 8: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128(prandex); |
|||
const __m128i temp2 = _mm_load_si128(pbuf); |
|||
const __m128i add1 = _mm_xor_si128(temp1, temp2); |
|||
acc = _mm_xor_si128(add1, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); |
|||
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); |
|||
|
|||
const __m128i temp12 = _mm_load_si128(prand); |
|||
_mm_store_si128(prand, tempa2); |
|||
|
|||
const __m128i temp22 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); |
|||
const __m128i add12 = _mm_xor_si128(temp12, temp22); |
|||
const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); |
|||
acc = _mm_xor_si128(clprod12, acc); |
|||
const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10); |
|||
acc = _mm_xor_si128(clprod22, acc); |
|||
|
|||
const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); |
|||
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); |
|||
_mm_store_si128(prandex, tempb2); |
|||
break; |
|||
} |
|||
case 0xc: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128(prand); |
|||
const __m128i temp2 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); |
|||
const __m128i add1 = _mm_xor_si128(temp1, temp2); |
|||
|
|||
// cannot be zero here
|
|||
const int32_t divisor = (uint32_t)selector; |
|||
|
|||
acc = _mm_xor_si128(add1, acc); |
|||
|
|||
const int64_t dividend = _mm_cvtsi128_si64(acc); |
|||
const __m128i modulo = _mm_cvtsi32_si128(dividend % divisor); |
|||
acc = _mm_xor_si128(modulo, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp1); |
|||
const __m128i tempa2 = _mm_xor_si128(tempa1, temp1); |
|||
|
|||
if (dividend & 1) |
|||
{ |
|||
const __m128i temp12 = _mm_load_si128(prandex); |
|||
_mm_store_si128(prandex, tempa2); |
|||
|
|||
const __m128i temp22 = _mm_load_si128(pbuf); |
|||
const __m128i add12 = _mm_xor_si128(temp12, temp22); |
|||
const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10); |
|||
acc = _mm_xor_si128(clprod12, acc); |
|||
const __m128i clprod22 = _mm_clmulepi64_si128(temp22, temp22, 0x10); |
|||
acc = _mm_xor_si128(clprod22, acc); |
|||
|
|||
const __m128i tempb1 = _mm_mulhrs_epi16(acc, temp12); |
|||
const __m128i tempb2 = _mm_xor_si128(tempb1, temp12); |
|||
_mm_store_si128(prand, tempb2); |
|||
} |
|||
else |
|||
{ |
|||
const __m128i tempb3 = _mm_load_si128(prandex); |
|||
_mm_store_si128(prandex, tempa2); |
|||
_mm_store_si128(prand, tempb3); |
|||
} |
|||
break; |
|||
} |
|||
case 0x10: |
|||
{ |
|||
// a few AES operations
|
|||
const __m128i *rc = prand; |
|||
__m128i tmp; |
|||
|
|||
__m128i temp1 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); |
|||
__m128i temp2 = _mm_load_si128(pbuf); |
|||
|
|||
AES2(temp1, temp2, 0); |
|||
MIX2(temp1, temp2); |
|||
|
|||
AES2(temp1, temp2, 4); |
|||
MIX2(temp1, temp2); |
|||
|
|||
AES2(temp1, temp2, 8); |
|||
MIX2(temp1, temp2); |
|||
|
|||
acc = _mm_xor_si128(temp2, _mm_xor_si128(temp1, acc)); |
|||
|
|||
const __m128i tempa1 = _mm_load_si128(prand); |
|||
const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1); |
|||
const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2); |
|||
|
|||
const __m128i tempa4 = _mm_load_si128(prandex); |
|||
_mm_store_si128(prandex, tempa3); |
|||
_mm_store_si128(prand, tempa4); |
|||
break; |
|||
} |
|||
case 0x14: |
|||
{ |
|||
// we'll just call this one the monkins loop, inspired by Chris
|
|||
const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); |
|||
__m128i tmp; // used by MIX2
|
|||
|
|||
uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times
|
|||
__m128i *rc = prand; |
|||
uint64_t aesroundoffset = 0; |
|||
__m128i onekey; |
|||
|
|||
do |
|||
{ |
|||
if (selector & (0x10000000 << rounds)) |
|||
{ |
|||
onekey = _mm_load_si128(rc++); |
|||
const __m128i temp2 = _mm_load_si128(rounds & 1 ? pbuf : buftmp); |
|||
const __m128i add1 = _mm_xor_si128(onekey, temp2); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); |
|||
acc = _mm_xor_si128(clprod1, acc); |
|||
} |
|||
else |
|||
{ |
|||
onekey = _mm_load_si128(rc++); |
|||
__m128i temp2 = _mm_load_si128(rounds & 1 ? buftmp : pbuf); |
|||
AES2(onekey, temp2, aesroundoffset); |
|||
aesroundoffset += 4; |
|||
MIX2(onekey, temp2); |
|||
acc = _mm_xor_si128(onekey, acc); |
|||
acc = _mm_xor_si128(temp2, acc); |
|||
} |
|||
} while (rounds--); |
|||
|
|||
const __m128i tempa1 = _mm_load_si128(prand); |
|||
const __m128i tempa2 = _mm_mulhrs_epi16(acc, tempa1); |
|||
const __m128i tempa3 = _mm_xor_si128(tempa1, tempa2); |
|||
|
|||
const __m128i tempa4 = _mm_load_si128(prandex); |
|||
_mm_store_si128(prandex, tempa3); |
|||
_mm_store_si128(prand, tempa4); |
|||
break; |
|||
} |
|||
case 0x18: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128(pbuf - (((selector & 1) << 1) - 1)); |
|||
const __m128i temp2 = _mm_load_si128(prand); |
|||
const __m128i add1 = _mm_xor_si128(temp1, temp2); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); |
|||
acc = _mm_xor_si128(clprod1, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp2); |
|||
const __m128i tempa2 = _mm_xor_si128(tempa1, temp2); |
|||
|
|||
const __m128i tempb3 = _mm_load_si128(prandex); |
|||
_mm_store_si128(prandex, tempa2); |
|||
_mm_store_si128(prand, tempb3); |
|||
break; |
|||
} |
|||
case 0x1c: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128(pbuf); |
|||
const __m128i temp2 = _mm_load_si128(prandex); |
|||
const __m128i add1 = _mm_xor_si128(temp1, temp2); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10); |
|||
acc = _mm_xor_si128(clprod1, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16(acc, temp2); |
|||
const __m128i tempa2 = _mm_xor_si128(tempa1, temp2); |
|||
|
|||
const __m128i tempa3 = _mm_load_si128(prand); |
|||
_mm_store_si128(prand, tempa2); |
|||
|
|||
acc = _mm_xor_si128(tempa3, acc); |
|||
|
|||
const __m128i tempb1 = _mm_mulhrs_epi16(acc, tempa3); |
|||
const __m128i tempb2 = _mm_xor_si128(tempb1, tempa3); |
|||
_mm_store_si128(prandex, tempb2); |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
return acc; |
|||
} |
|||
|
|||
// hashes 64 bytes only by doing a carryless multiplication and reduction of the repeated 64 byte sequence 16 times,
|
|||
// returning a 64 bit hash value
|
|||
uint64_t verusclhash(void * random, const unsigned char buf[64], uint64_t keyMask) { |
|||
__m128i acc = __verusclmulwithoutreduction64alignedrepeat((__m128i *)random, (const __m128i *)buf, keyMask); |
|||
acc = _mm_xor_si128(acc, lazyLengthHash(1024, 64)); |
|||
return precompReduction64(acc); |
|||
} |
|||
|
|||
#ifdef __WIN32 |
|||
#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno) |
|||
#endif |
|||
|
|||
void *alloc_aligned_buffer(uint64_t bufSize) |
|||
{ |
|||
void *answer = NULL; |
|||
if (posix_memalign(&answer, sizeof(__m256i), bufSize)) |
|||
{ |
|||
return NULL; |
|||
} |
|||
else |
|||
{ |
|||
return answer; |
|||
} |
|||
} |
@ -0,0 +1,239 @@ |
|||
/*
|
|||
* This uses veriations of the clhash algorithm for Verus Coin, licensed |
|||
* with the Apache-2.0 open source license. |
|||
* |
|||
* Copyright (c) 2018 Michael Toutonghi |
|||
* Distributed under the Apache 2.0 software license, available in the original form for clhash |
|||
* here: https://github.com/lemire/clhash/commit/934da700a2a54d8202929a826e2763831bd43cf7#diff-9879d6db96fd29134fc802214163b95a
|
|||
* |
|||
* CLHash is a very fast hashing function that uses the |
|||
* carry-less multiplication and SSE instructions. |
|||
* |
|||
* Original CLHash code (C) 2017, 2018 Daniel Lemire and Owen Kaser |
|||
* Faster 64-bit universal hashing |
|||
* using carry-less multiplications, Journal of Cryptographic Engineering (to appear) |
|||
* |
|||
* Best used on recent x64 processors (Haswell or better). |
|||
* |
|||
**/ |
|||
|
|||
#ifndef INCLUDE_VERUS_CLHASH_H |
|||
#define INCLUDE_VERUS_CLHASH_H |
|||
|
|||
#ifndef _WIN32 |
|||
#include <cpuid.h> |
|||
#else |
|||
#include <intrin.h> |
|||
#endif // !WIN32
|
|||
|
|||
#include <stdlib.h> |
|||
#include <stdint.h> |
|||
#include <stddef.h> |
|||
#include <assert.h> |
|||
#include <boost/thread.hpp> |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
#ifdef _WIN32 |
|||
#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ?0 :errno) |
|||
typedef unsigned char u_char; |
|||
#endif |
|||
|
|||
enum { |
|||
// Verus Key size must include the equivalent size of a Haraka key
|
|||
// after the first part.
|
|||
// Any excess over a power of 2 will not get mutated, and any excess over
|
|||
// power of 2 + Haraka sized key will not be used
|
|||
VERUSKEYSIZE=1024 * 8 + (40 * 16), |
|||
VERUSHHASH_SOLUTION_VERSION = 1 |
|||
}; |
|||
|
|||
struct verusclhash_descr |
|||
{ |
|||
uint256 seed; |
|||
uint32_t keySizeInBytes; |
|||
}; |
|||
|
|||
struct thread_specific_ptr { |
|||
void *ptr; |
|||
thread_specific_ptr() { ptr = NULL; } |
|||
void reset(void *newptr = NULL) |
|||
{ |
|||
if (ptr && ptr != newptr) |
|||
{ |
|||
std::free(ptr); |
|||
} |
|||
ptr = newptr; |
|||
} |
|||
void *get() { return ptr; } |
|||
#ifdef _WIN32 // horrible MingW and gcc thread local storage bug workaround
|
|||
~thread_specific_ptr(); |
|||
#else |
|||
~thread_specific_ptr() { |
|||
this->reset(); |
|||
} |
|||
#endif |
|||
}; |
|||
|
|||
extern thread_local thread_specific_ptr verusclhasher_key; |
|||
extern thread_local thread_specific_ptr verusclhasher_descr; |
|||
|
|||
extern int __cpuverusoptimized; |
|||
|
|||
inline bool IsCPUVerusOptimized() |
|||
{ |
|||
if (__cpuverusoptimized & 0x80) |
|||
{ |
|||
#ifdef _WIN32 |
|||
#define bit_AVX (1 << 28) |
|||
#define bit_AES (1 << 25) |
|||
#define bit_PCLMUL (1 << 1) |
|||
// https://insufficientlycomplicated.wordpress.com/2011/11/07/detecting-intel-advanced-vector-extensions-avx-in-visual-studio/
|
|||
// bool cpuAVXSuport = cpuInfo[2] & (1 << 28) || false;
|
|||
|
|||
int cpuInfo[4]; |
|||
__cpuid(cpuInfo, 1); |
|||
__cpuverusoptimized = ((cpuInfo[2] & (bit_AVX | bit_AES | bit_PCLMUL)) == (bit_AVX | bit_AES | bit_PCLMUL)); |
|||
#else |
|||
unsigned int eax,ebx,ecx,edx; |
|||
|
|||
if (!__get_cpuid(1,&eax,&ebx,&ecx,&edx)) |
|||
{ |
|||
__cpuverusoptimized = false; |
|||
} |
|||
else |
|||
{ |
|||
__cpuverusoptimized = ((ecx & (bit_AVX | bit_AES | bit_PCLMUL)) == (bit_AVX | bit_AES | bit_PCLMUL)); |
|||
} |
|||
#endif //WIN32
|
|||
} |
|||
return __cpuverusoptimized; |
|||
}; |
|||
|
|||
inline void ForceCPUVerusOptimized(bool trueorfalse) |
|||
{ |
|||
__cpuverusoptimized = trueorfalse; |
|||
}; |
|||
|
|||
uint64_t verusclhash(void * random, const unsigned char buf[64], uint64_t keyMask); |
|||
uint64_t verusclhash_port(void * random, const unsigned char buf[64], uint64_t keyMask); |
|||
|
|||
void *alloc_aligned_buffer(uint64_t bufSize); |
|||
|
|||
#ifdef __cplusplus |
|||
} // extern "C"
|
|||
#endif |
|||
|
|||
#ifdef __cplusplus |
|||
|
|||
#include <vector> |
|||
#include <string> |
|||
|
|||
// special high speed hasher for VerusHash 2.0
|
|||
struct verusclhasher { |
|||
uint64_t keySizeInBytes; |
|||
uint64_t keyMask; |
|||
uint64_t (*verusclhashfunction)(void * random, const unsigned char buf[64], uint64_t keyMask); |
|||
|
|||
inline uint64_t keymask(uint64_t keysize) |
|||
{ |
|||
int i = 0; |
|||
while (keysize >>= 1) |
|||
{ |
|||
i++; |
|||
} |
|||
return i ? (((uint64_t)1) << i) - 1 : 0; |
|||
} |
|||
|
|||
// align on 256 bit boundary at end
|
|||
verusclhasher(uint64_t keysize=VERUSKEYSIZE) : keySizeInBytes((keysize >> 5) << 5) |
|||
{ |
|||
if (IsCPUVerusOptimized()) |
|||
{ |
|||
verusclhashfunction = &verusclhash; |
|||
} |
|||
else |
|||
{ |
|||
verusclhashfunction = &verusclhash_port; |
|||
} |
|||
|
|||
// if we changed, change it
|
|||
if (verusclhasher_key.get() && keySizeInBytes != ((verusclhash_descr *)verusclhasher_descr.get())->keySizeInBytes) |
|||
{ |
|||
verusclhasher_key.reset(); |
|||
verusclhasher_descr.reset(); |
|||
} |
|||
// get buffer space for mutating and refresh keys
|
|||
void *key = NULL; |
|||
if (!(key = verusclhasher_key.get()) && |
|||
(verusclhasher_key.reset((unsigned char *)alloc_aligned_buffer(keySizeInBytes << 1)), key = verusclhasher_key.get())) |
|||
{ |
|||
verusclhash_descr *pdesc; |
|||
if (verusclhasher_descr.reset(new verusclhash_descr()), pdesc = (verusclhash_descr *)verusclhasher_descr.get()) |
|||
{ |
|||
pdesc->keySizeInBytes = keySizeInBytes; |
|||
} |
|||
else |
|||
{ |
|||
verusclhasher_key.reset(); |
|||
key = NULL; |
|||
} |
|||
} |
|||
if (key) |
|||
{ |
|||
keyMask = keymask(keySizeInBytes); |
|||
} |
|||
else |
|||
{ |
|||
keyMask = 0; |
|||
keySizeInBytes = 0; |
|||
} |
|||
#ifdef VERUSHASHDEBUG |
|||
printf("New hasher, keyMask: %lx, newKeySize: %lx\n", keyMask, keySizeInBytes); |
|||
#endif |
|||
} |
|||
|
|||
// this prepares a key for hashing and mutation by copying it from the original key for this block
|
|||
// WARNING!! this does not check for NULL ptr, so make sure the buffer is allocated
|
|||
inline void *gethashkey() |
|||
{ |
|||
unsigned char *ret = (unsigned char *)verusclhasher_key.get(); |
|||
verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get(); |
|||
memcpy(ret, ret + pdesc->keySizeInBytes, keyMask + 1); |
|||
#ifdef VERUSHASHDEBUG |
|||
// in debug mode, ensure that what should be the same, is
|
|||
assert(memcmp(ret + (keyMask + 1), ret + (pdesc->keySizeInBytes + keyMask + 1), verusclhasher_keySizeInBytes - (keyMask + 1)) == 0); |
|||
#endif |
|||
return ret; |
|||
} |
|||
|
|||
inline void *gethasherrefresh() |
|||
{ |
|||
verusclhash_descr *pdesc = (verusclhash_descr *)verusclhasher_descr.get(); |
|||
return (unsigned char *)verusclhasher_key.get() + pdesc->keySizeInBytes; |
|||
} |
|||
|
|||
inline verusclhash_descr *gethasherdescription() |
|||
{ |
|||
return (verusclhash_descr *)verusclhasher_descr.get(); |
|||
} |
|||
|
|||
inline uint64_t keyrefreshsize() |
|||
{ |
|||
return keyMask + 1; |
|||
} |
|||
|
|||
inline uint64_t operator()(const unsigned char buf[64]) const { |
|||
return (*verusclhashfunction)(verusclhasher_key.get(), buf, keyMask); |
|||
} |
|||
|
|||
inline uint64_t operator()(const unsigned char buf[64], void *key) const { |
|||
return (*verusclhashfunction)(key, buf, keyMask); |
|||
} |
|||
}; |
|||
|
|||
#endif // #ifdef __cplusplus
|
|||
|
|||
#endif // INCLUDE_VERUS_CLHASH_H
|
@ -0,0 +1,591 @@ |
|||
/*
|
|||
* This uses veriations of the clhash algorithm for Verus Coin, licensed |
|||
* with the Apache-2.0 open source license. |
|||
* |
|||
* Copyright (c) 2018 Michael Toutonghi |
|||
* Distributed under the Apache 2.0 software license, available in the original form for clhash |
|||
* here: https://github.com/lemire/clhash/commit/934da700a2a54d8202929a826e2763831bd43cf7#diff-9879d6db96fd29134fc802214163b95a
|
|||
* |
|||
* Original CLHash code and any portions herein, (C) 2017, 2018 Daniel Lemire and Owen Kaser |
|||
* Faster 64-bit universal hashing |
|||
* using carry-less multiplications, Journal of Cryptographic Engineering (to appear) |
|||
* |
|||
* Best used on recent x64 processors (Haswell or better). |
|||
* |
|||
* This implements an intermediate step in the last part of a Verus block hash. The intent of this step |
|||
* is to more effectively equalize FPGAs over GPUs and CPUs. |
|||
* |
|||
**/ |
|||
|
|||
|
|||
#include "verus_hash.h" |
|||
|
|||
#include <assert.h> |
|||
#include <string.h> |
|||
|
|||
#ifdef __APPLE__ |
|||
#include <sys/types.h> |
|||
#endif// APPLE
|
|||
|
|||
#ifdef _WIN32 |
|||
#pragma warning (disable : 4146) |
|||
#include <intrin.h> |
|||
#else |
|||
#include <x86intrin.h> |
|||
#endif //WIN32
|
|||
|
|||
void clmul64(uint64_t a, uint64_t b, uint64_t* r) |
|||
{ |
|||
uint8_t s = 4,i; //window size
|
|||
uint64_t two_s = 1 << s; //2^s
|
|||
uint64_t smask = two_s-1; //s 1 bits
|
|||
uint64_t u[16]; |
|||
uint64_t tmp; |
|||
uint64_t ifmask; |
|||
//Precomputation
|
|||
u[0] = 0; |
|||
u[1] = b; |
|||
for(i = 2 ; i < two_s; i += 2){ |
|||
u[i] = u[i >> 1] << 1; //even indices: left shift
|
|||
u[i + 1] = u[i] ^ b; //odd indices: xor b
|
|||
} |
|||
//Multiply
|
|||
r[0] = u[a & smask]; //first window only affects lower word
|
|||
r[1] = 0; |
|||
for(i = s ; i < 64 ; i += s){ |
|||
tmp = u[a >> i & smask]; |
|||
r[0] ^= tmp << i; |
|||
r[1] ^= tmp >> (64 - i); |
|||
} |
|||
//Repair
|
|||
uint64_t m = 0xEEEEEEEEEEEEEEEE; //s=4 => 16 times 1110
|
|||
for(i = 1 ; i < s ; i++){ |
|||
tmp = ((a & m) >> i); |
|||
m &= m << 1; //shift mask to exclude all bit j': j' mod s = i
|
|||
ifmask = -((b >> (64-i)) & 1); //if the (64-i)th bit of b is 1
|
|||
r[1] ^= (tmp & ifmask); |
|||
} |
|||
} |
|||
|
|||
u128 _mm_clmulepi64_si128_emu(const __m128i &a, const __m128i &b, int imm) |
|||
{ |
|||
uint64_t result[2]; |
|||
clmul64(*((uint64_t*)&a + (imm & 1)), *((uint64_t*)&b + ((imm & 0x10) >> 4)), result); |
|||
|
|||
/*
|
|||
// TEST
|
|||
const __m128i tmp1 = _mm_load_si128(&a); |
|||
const __m128i tmp2 = _mm_load_si128(&b); |
|||
imm = imm & 0x11; |
|||
const __m128i testresult = (imm == 0x10) ? _mm_clmulepi64_si128(tmp1, tmp2, 0x10) : ((imm == 0x01) ? _mm_clmulepi64_si128(tmp1, tmp2, 0x01) : ((imm == 0x00) ? _mm_clmulepi64_si128(tmp1, tmp2, 0x00) : _mm_clmulepi64_si128(tmp1, tmp2, 0x11))); |
|||
if (!memcmp(&testresult, &result, 16)) |
|||
{ |
|||
printf("_mm_clmulepi64_si128_emu: Portable version passed!\n"); |
|||
} |
|||
else |
|||
{ |
|||
printf("_mm_clmulepi64_si128_emu: Portable version failed! a: %lxh %lxl, b: %lxh %lxl, imm: %x, emu: %lxh %lxl, intrin: %lxh %lxl\n", |
|||
*((uint64_t *)&a + 1), *(uint64_t *)&a, |
|||
*((uint64_t *)&b + 1), *(uint64_t *)&b, |
|||
imm, |
|||
*((uint64_t *)result + 1), *(uint64_t *)result, |
|||
*((uint64_t *)&testresult + 1), *(uint64_t *)&testresult); |
|||
return testresult; |
|||
} |
|||
*/ |
|||
|
|||
return *(__m128i *)result; |
|||
} |
|||
|
|||
u128 _mm_mulhrs_epi16_emu(__m128i _a, __m128i _b) |
|||
{ |
|||
int16_t result[8]; |
|||
int16_t *a = (int16_t*)&_a, *b = (int16_t*)&_b; |
|||
for (int i = 0; i < 8; i ++) |
|||
{ |
|||
result[i] = (int16_t)((((int32_t)(a[i]) * (int32_t)(b[i])) + 0x4000) >> 15); |
|||
} |
|||
|
|||
/*
|
|||
const __m128i testresult = _mm_mulhrs_epi16(_a, _b); |
|||
if (!memcmp(&testresult, &result, 16)) |
|||
{ |
|||
printf("_mm_mulhrs_epi16_emu: Portable version passed!\n"); |
|||
} |
|||
else |
|||
{ |
|||
printf("_mm_mulhrs_epi16_emu: Portable version failed! a: %lxh %lxl, b: %lxh %lxl, emu: %lxh %lxl, intrin: %lxh %lxl\n", |
|||
*((uint64_t *)&a + 1), *(uint64_t *)&a, |
|||
*((uint64_t *)&b + 1), *(uint64_t *)&b, |
|||
*((uint64_t *)result + 1), *(uint64_t *)result, |
|||
*((uint64_t *)&testresult + 1), *(uint64_t *)&testresult); |
|||
} |
|||
*/ |
|||
|
|||
return *(__m128i *)result; |
|||
} |
|||
|
|||
inline u128 _mm_set_epi64x_emu(uint64_t hi, uint64_t lo) |
|||
{ |
|||
__m128i result; |
|||
((uint64_t *)&result)[0] = lo; |
|||
((uint64_t *)&result)[1] = hi; |
|||
return result; |
|||
} |
|||
|
|||
inline u128 _mm_cvtsi64_si128_emu(uint64_t lo) |
|||
{ |
|||
__m128i result; |
|||
((uint64_t *)&result)[0] = lo; |
|||
((uint64_t *)&result)[1] = 0; |
|||
return result; |
|||
} |
|||
|
|||
inline int64_t _mm_cvtsi128_si64_emu(__m128i &a) |
|||
{ |
|||
return *(int64_t *)&a; |
|||
} |
|||
|
|||
inline int32_t _mm_cvtsi128_si32_emu(__m128i &a) |
|||
{ |
|||
return *(int32_t *)&a; |
|||
} |
|||
|
|||
inline u128 _mm_cvtsi32_si128_emu(uint32_t lo) |
|||
{ |
|||
__m128i result; |
|||
((uint32_t *)&result)[0] = lo; |
|||
((uint32_t *)&result)[1] = 0; |
|||
((uint64_t *)&result)[1] = 0; |
|||
|
|||
/*
|
|||
const __m128i testresult = _mm_cvtsi32_si128(lo); |
|||
if (!memcmp(&testresult, &result, 16)) |
|||
{ |
|||
printf("_mm_cvtsi32_si128_emu: Portable version passed!\n"); |
|||
} |
|||
else |
|||
{ |
|||
printf("_mm_cvtsi32_si128_emu: Portable version failed!\n"); |
|||
} |
|||
*/ |
|||
|
|||
return result; |
|||
} |
|||
|
|||
u128 _mm_setr_epi8_emu(u_char c0, u_char c1, u_char c2, u_char c3, u_char c4, u_char c5, u_char c6, u_char c7, u_char c8, u_char c9, u_char c10, u_char c11, u_char c12, u_char c13, u_char c14, u_char c15) |
|||
{ |
|||
__m128i result; |
|||
((uint8_t *)&result)[0] = c0; |
|||
((uint8_t *)&result)[1] = c1; |
|||
((uint8_t *)&result)[2] = c2; |
|||
((uint8_t *)&result)[3] = c3; |
|||
((uint8_t *)&result)[4] = c4; |
|||
((uint8_t *)&result)[5] = c5; |
|||
((uint8_t *)&result)[6] = c6; |
|||
((uint8_t *)&result)[7] = c7; |
|||
((uint8_t *)&result)[8] = c8; |
|||
((uint8_t *)&result)[9] = c9; |
|||
((uint8_t *)&result)[10] = c10; |
|||
((uint8_t *)&result)[11] = c11; |
|||
((uint8_t *)&result)[12] = c12; |
|||
((uint8_t *)&result)[13] = c13; |
|||
((uint8_t *)&result)[14] = c14; |
|||
((uint8_t *)&result)[15] = c15; |
|||
|
|||
/*
|
|||
const __m128i testresult = _mm_setr_epi8(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15); |
|||
if (!memcmp(&testresult, &result, 16)) |
|||
{ |
|||
printf("_mm_setr_epi8_emu: Portable version passed!\n"); |
|||
} |
|||
else |
|||
{ |
|||
printf("_mm_setr_epi8_emu: Portable version failed!\n"); |
|||
} |
|||
*/ |
|||
|
|||
return result; |
|||
} |
|||
|
|||
inline __m128i _mm_srli_si128_emu(__m128i a, int imm8) |
|||
{ |
|||
unsigned char result[16]; |
|||
uint8_t shift = imm8 & 0xff; |
|||
if (shift > 15) shift = 16; |
|||
|
|||
int i; |
|||
for (i = 0; i < (16 - shift); i++) |
|||
{ |
|||
result[i] = ((unsigned char *)&a)[shift + i]; |
|||
} |
|||
for ( ; i < 16; i++) |
|||
{ |
|||
result[i] = 0; |
|||
} |
|||
|
|||
/*
|
|||
const __m128i tmp1 = _mm_load_si128(&a); |
|||
__m128i testresult = _mm_srli_si128(tmp1, imm8); |
|||
if (!memcmp(&testresult, result, 16)) |
|||
{ |
|||
printf("_mm_srli_si128_emu: Portable version passed!\n"); |
|||
} |
|||
else |
|||
{ |
|||
printf("_mm_srli_si128_emu: Portable version failed! val: %lx%lx imm: %x emu: %lx%lx, intrin: %lx%lx\n", |
|||
*((uint64_t *)&a + 1), *(uint64_t *)&a, |
|||
imm8, |
|||
*((uint64_t *)result + 1), *(uint64_t *)result, |
|||
*((uint64_t *)&testresult + 1), *(uint64_t *)&testresult); |
|||
} |
|||
*/ |
|||
|
|||
return *(__m128i *)result; |
|||
} |
|||
|
|||
inline __m128i _mm_xor_si128_emu(__m128i a, __m128i b) |
|||
{ |
|||
#ifdef _WIN32 |
|||
uint64_t result[2]; |
|||
result[0] = *(uint64_t *)&a ^ *(uint64_t *)&b; |
|||
result[1] = *((uint64_t *)&a + 1) ^ *((uint64_t *)&b + 1); |
|||
return *(__m128i *)result; |
|||
#else |
|||
return a ^ b; |
|||
#endif |
|||
} |
|||
|
|||
inline __m128i _mm_load_si128_emu(const void *p) |
|||
{ |
|||
return *(__m128i *)p; |
|||
} |
|||
|
|||
inline void _mm_store_si128_emu(void *p, __m128i val) |
|||
{ |
|||
*(__m128i *)p = val; |
|||
} |
|||
|
|||
__m128i _mm_shuffle_epi8_emu(__m128i a, __m128i b) |
|||
{ |
|||
__m128i result; |
|||
for (int i = 0; i < 16; i++) |
|||
{ |
|||
if (((uint8_t *)&b)[i] & 0x80) |
|||
{ |
|||
((uint8_t *)&result)[i] = 0; |
|||
} |
|||
else |
|||
{ |
|||
((uint8_t *)&result)[i] = ((uint8_t *)&a)[((uint8_t *)&b)[i] & 0xf]; |
|||
} |
|||
} |
|||
|
|||
/*
|
|||
const __m128i tmp1 = _mm_load_si128(&a); |
|||
const __m128i tmp2 = _mm_load_si128(&b); |
|||
__m128i testresult = _mm_shuffle_epi8(tmp1, tmp2); |
|||
if (!memcmp(&testresult, &result, 16)) |
|||
{ |
|||
printf("_mm_shuffle_epi8_emu: Portable version passed!\n"); |
|||
} |
|||
else |
|||
{ |
|||
printf("_mm_shuffle_epi8_emu: Portable version failed!\n"); |
|||
} |
|||
*/ |
|||
|
|||
return result; |
|||
} |
|||
|
|||
// portable
|
|||
static inline __m128i lazyLengthHash_port(uint64_t keylength, uint64_t length) { |
|||
const __m128i lengthvector = _mm_set_epi64x_emu(keylength,length); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128_emu( lengthvector, lengthvector, 0x10); |
|||
return clprod1; |
|||
} |
|||
|
|||
// modulo reduction to 64-bit value. The high 64 bits contain garbage, see precompReduction64
|
|||
static inline __m128i precompReduction64_si128_port( __m128i A) { |
|||
|
|||
//const __m128i C = _mm_set_epi64x(1U,(1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); // C is the irreducible poly. (64,4,3,1,0)
|
|||
const __m128i C = _mm_cvtsi64_si128_emu((1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); |
|||
__m128i Q2 = _mm_clmulepi64_si128_emu( A, C, 0x01); |
|||
__m128i Q3 = _mm_shuffle_epi8_emu(_mm_setr_epi8_emu(0, 27, 54, 45, 108, 119, 90, 65, (char)216, (char)195, (char)238, (char)245, (char)180, (char)175, (char)130, (char)153), |
|||
_mm_srli_si128_emu(Q2,8)); |
|||
__m128i Q4 = _mm_xor_si128_emu(Q2,A); |
|||
const __m128i final = _mm_xor_si128_emu(Q3,Q4); |
|||
return final;/// WARNING: HIGH 64 BITS SHOULD BE ASSUMED TO CONTAIN GARBAGE
|
|||
} |
|||
|
|||
static inline uint64_t precompReduction64_port( __m128i A) { |
|||
__m128i tmp = precompReduction64_si128_port(A); |
|||
return _mm_cvtsi128_si64_emu(tmp); |
|||
} |
|||
|
|||
// verus intermediate hash extra
|
|||
static __m128i __verusclmulwithoutreduction64alignedrepeat_port(__m128i *randomsource, const __m128i buf[4], uint64_t keyMask) |
|||
{ |
|||
__m128i const *pbuf; |
|||
|
|||
// divide key mask by 16 from bytes to __m128i
|
|||
keyMask >>= 4; |
|||
|
|||
// the random buffer must have at least 32 16 byte dwords after the keymask to work with this
|
|||
// algorithm. we take the value from the last element inside the keyMask + 2, as that will never
|
|||
// be used to xor into the accumulator before it is hashed with other values first
|
|||
__m128i acc = _mm_load_si128_emu(randomsource + (keyMask + 2)); |
|||
|
|||
for (int64_t i = 0; i < 32; i++) |
|||
{ |
|||
const uint64_t selector = _mm_cvtsi128_si64_emu(acc); |
|||
|
|||
// get two random locations in the key, which will be mutated and swapped
|
|||
__m128i *prand = randomsource + ((selector >> 5) & keyMask); |
|||
__m128i *prandex = randomsource + ((selector >> 32) & keyMask); |
|||
|
|||
// select random start and order of pbuf processing
|
|||
pbuf = buf + (selector & 3); |
|||
|
|||
switch (selector & 0x1c) |
|||
{ |
|||
case 0: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128_emu(prandex); |
|||
const __m128i temp2 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); |
|||
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod1, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); |
|||
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); |
|||
|
|||
const __m128i temp12 = _mm_load_si128_emu(prand); |
|||
_mm_store_si128_emu(prand, tempa2); |
|||
|
|||
const __m128i temp22 = _mm_load_si128_emu(pbuf); |
|||
const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); |
|||
const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod12, acc); |
|||
|
|||
const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); |
|||
const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); |
|||
_mm_store_si128_emu(prandex, tempb2); |
|||
break; |
|||
} |
|||
case 4: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128_emu(prand); |
|||
const __m128i temp2 = _mm_load_si128_emu(pbuf); |
|||
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod1, acc); |
|||
const __m128i clprod2 = _mm_clmulepi64_si128_emu(temp2, temp2, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod2, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); |
|||
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); |
|||
|
|||
const __m128i temp12 = _mm_load_si128_emu(prandex); |
|||
_mm_store_si128_emu(prandex, tempa2); |
|||
|
|||
const __m128i temp22 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); |
|||
const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); |
|||
acc = _mm_xor_si128_emu(add12, acc); |
|||
|
|||
const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); |
|||
const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); |
|||
_mm_store_si128_emu(prand, tempb2); |
|||
break; |
|||
} |
|||
case 8: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128_emu(prandex); |
|||
const __m128i temp2 = _mm_load_si128_emu(pbuf); |
|||
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); |
|||
acc = _mm_xor_si128_emu(add1, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); |
|||
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); |
|||
|
|||
const __m128i temp12 = _mm_load_si128_emu(prand); |
|||
_mm_store_si128_emu(prand, tempa2); |
|||
|
|||
const __m128i temp22 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); |
|||
const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); |
|||
const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod12, acc); |
|||
const __m128i clprod22 = _mm_clmulepi64_si128_emu(temp22, temp22, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod22, acc); |
|||
|
|||
const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); |
|||
const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); |
|||
_mm_store_si128_emu(prandex, tempb2); |
|||
break; |
|||
} |
|||
case 0xc: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128_emu(prand); |
|||
const __m128i temp2 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); |
|||
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); |
|||
|
|||
// cannot be zero here
|
|||
const int32_t divisor = (uint32_t)selector; |
|||
|
|||
acc = _mm_xor_si128(add1, acc); |
|||
|
|||
const int64_t dividend = _mm_cvtsi128_si64_emu(acc); |
|||
const __m128i modulo = _mm_cvtsi32_si128_emu(dividend % divisor); |
|||
acc = _mm_xor_si128_emu(modulo, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp1); |
|||
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp1); |
|||
|
|||
if (dividend & 1) |
|||
{ |
|||
const __m128i temp12 = _mm_load_si128_emu(prandex); |
|||
_mm_store_si128_emu(prandex, tempa2); |
|||
|
|||
const __m128i temp22 = _mm_load_si128_emu(pbuf); |
|||
const __m128i add12 = _mm_xor_si128_emu(temp12, temp22); |
|||
const __m128i clprod12 = _mm_clmulepi64_si128_emu(add12, add12, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod12, acc); |
|||
const __m128i clprod22 = _mm_clmulepi64_si128_emu(temp22, temp22, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod22, acc); |
|||
|
|||
const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, temp12); |
|||
const __m128i tempb2 = _mm_xor_si128_emu(tempb1, temp12); |
|||
_mm_store_si128_emu(prand, tempb2); |
|||
} |
|||
else |
|||
{ |
|||
const __m128i tempb3 = _mm_load_si128_emu(prandex); |
|||
_mm_store_si128_emu(prandex, tempa2); |
|||
_mm_store_si128_emu(prand, tempb3); |
|||
} |
|||
break; |
|||
} |
|||
case 0x10: |
|||
{ |
|||
// a few AES operations
|
|||
const __m128i *rc = prand; |
|||
__m128i tmp; |
|||
|
|||
__m128i temp1 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); |
|||
__m128i temp2 = _mm_load_si128_emu(pbuf); |
|||
|
|||
AES2_EMU(temp1, temp2, 0); |
|||
MIX2_EMU(temp1, temp2); |
|||
|
|||
AES2_EMU(temp1, temp2, 4); |
|||
MIX2_EMU(temp1, temp2); |
|||
|
|||
AES2_EMU(temp1, temp2, 8); |
|||
MIX2_EMU(temp1, temp2); |
|||
|
|||
acc = _mm_xor_si128_emu(temp1, acc); |
|||
acc = _mm_xor_si128_emu(temp2, acc); |
|||
|
|||
const __m128i tempa1 = _mm_load_si128_emu(prand); |
|||
const __m128i tempa2 = _mm_mulhrs_epi16_emu(acc, tempa1); |
|||
const __m128i tempa3 = _mm_xor_si128_emu(tempa1, tempa2); |
|||
|
|||
const __m128i tempa4 = _mm_load_si128_emu(prandex); |
|||
_mm_store_si128_emu(prandex, tempa3); |
|||
_mm_store_si128_emu(prand, tempa4); |
|||
break; |
|||
} |
|||
case 0x14: |
|||
{ |
|||
// we'll just call this one the monkins loop, inspired by Chris
|
|||
const __m128i *buftmp = pbuf - (((selector & 1) << 1) - 1); |
|||
__m128i tmp; // used by MIX2
|
|||
|
|||
uint64_t rounds = selector >> 61; // loop randomly between 1 and 8 times
|
|||
__m128i *rc = prand; |
|||
uint64_t aesround = 0; |
|||
__m128i onekey; |
|||
|
|||
do |
|||
{ |
|||
if (selector & (0x10000000 << rounds)) |
|||
{ |
|||
onekey = _mm_load_si128_emu(rc++); |
|||
const __m128i temp2 = _mm_load_si128_emu(rounds & 1 ? pbuf : buftmp); |
|||
const __m128i add1 = _mm_xor_si128_emu(onekey, temp2); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod1, acc); |
|||
} |
|||
else |
|||
{ |
|||
onekey = _mm_load_si128_emu(rc++); |
|||
__m128i temp2 = _mm_load_si128_emu(rounds & 1 ? buftmp : pbuf); |
|||
const uint64_t roundidx = aesround++ << 2; |
|||
AES2_EMU(onekey, temp2, roundidx); |
|||
MIX2_EMU(onekey, temp2); |
|||
acc = _mm_xor_si128_emu(onekey, acc); |
|||
acc = _mm_xor_si128_emu(temp2, acc); |
|||
} |
|||
} while (rounds--); |
|||
|
|||
const __m128i tempa1 = _mm_load_si128_emu(prand); |
|||
const __m128i tempa2 = _mm_mulhrs_epi16_emu(acc, tempa1); |
|||
const __m128i tempa3 = _mm_xor_si128_emu(tempa1, tempa2); |
|||
|
|||
const __m128i tempa4 = _mm_load_si128_emu(prandex); |
|||
_mm_store_si128_emu(prandex, tempa3); |
|||
_mm_store_si128_emu(prand, tempa4); |
|||
break; |
|||
} |
|||
case 0x18: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128_emu(pbuf - (((selector & 1) << 1) - 1)); |
|||
const __m128i temp2 = _mm_load_si128_emu(prand); |
|||
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod1, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp2); |
|||
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp2); |
|||
|
|||
const __m128i tempb3 = _mm_load_si128_emu(prandex); |
|||
_mm_store_si128_emu(prandex, tempa2); |
|||
_mm_store_si128_emu(prand, tempb3); |
|||
break; |
|||
} |
|||
case 0x1c: |
|||
{ |
|||
const __m128i temp1 = _mm_load_si128_emu(pbuf); |
|||
const __m128i temp2 = _mm_load_si128_emu(prandex); |
|||
const __m128i add1 = _mm_xor_si128_emu(temp1, temp2); |
|||
const __m128i clprod1 = _mm_clmulepi64_si128_emu(add1, add1, 0x10); |
|||
acc = _mm_xor_si128_emu(clprod1, acc); |
|||
|
|||
const __m128i tempa1 = _mm_mulhrs_epi16_emu(acc, temp2); |
|||
const __m128i tempa2 = _mm_xor_si128_emu(tempa1, temp2); |
|||
|
|||
const __m128i tempa3 = _mm_load_si128_emu(prand); |
|||
_mm_store_si128_emu(prand, tempa2); |
|||
|
|||
acc = _mm_xor_si128_emu(tempa3, acc); |
|||
|
|||
const __m128i tempb1 = _mm_mulhrs_epi16_emu(acc, tempa3); |
|||
const __m128i tempb2 = _mm_xor_si128_emu(tempb1, tempa3); |
|||
_mm_store_si128_emu(prandex, tempb2); |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
return acc; |
|||
} |
|||
|
|||
// hashes 64 bytes only by doing a carryless multiplication and reduction of the repeated 64 byte sequence 16 times,
|
|||
// returning a 64 bit hash value
|
|||
uint64_t verusclhash_port(void * random, const unsigned char buf[64], uint64_t keyMask) { |
|||
__m128i * rs64 = (__m128i *)random; |
|||
const __m128i * string = (const __m128i *) buf; |
|||
|
|||
__m128i acc = __verusclmulwithoutreduction64alignedrepeat_port(rs64, string, keyMask); |
|||
acc = _mm_xor_si128_emu(acc, lazyLengthHash_port(1024, 64)); |
|||
return precompReduction64_port(acc); |
|||
} |
Loading…
Reference in new issue