Browse Source

Update to RandomX v1.2.1

Commit 102f8acf90a7649ada410de5499a7ec62e49e1da
witness_cache
Duke 6 months ago
parent
commit
6029b3d571
  1. 40
      src/RandomX/CMakeLists.txt
  2. 5
      src/RandomX/README.md
  3. 18
      src/RandomX/doc/tevador.asc
  4. 2
      src/RandomX/src/allocator.cpp
  5. 2
      src/RandomX/src/assembly_generator_x86.cpp
  6. 2
      src/RandomX/src/bytecode_machine.cpp
  7. 7
      src/RandomX/src/common.hpp
  8. 10
      src/RandomX/src/configuration.h
  9. 2
      src/RandomX/src/dataset.cpp
  10. 12
      src/RandomX/src/intrin_portable.h
  11. 42
      src/RandomX/src/jit_compiler.hpp
  12. 74
      src/RandomX/src/jit_compiler_a64.cpp
  13. 2
      src/RandomX/src/jit_compiler_a64.hpp
  14. 98
      src/RandomX/src/jit_compiler_a64_static.S
  15. 1175
      src/RandomX/src/jit_compiler_rv64.cpp
  16. 69
      src/RandomX/src/jit_compiler_rv64.hpp
  17. 1235
      src/RandomX/src/jit_compiler_rv64_static.S
  18. 53
      src/RandomX/src/jit_compiler_rv64_static.hpp
  19. 4
      src/RandomX/src/jit_compiler_x86.cpp
  20. 28
      src/RandomX/src/randomx.cpp
  21. 11
      src/RandomX/src/randomx.h
  22. 34
      src/RandomX/src/reciprocal.c
  23. 4
      src/RandomX/src/reciprocal.h
  24. 32
      src/RandomX/src/tests/benchmark.cpp
  25. 2
      src/RandomX/src/tests/perf-simulation.cpp
  26. 9
      src/RandomX/src/tests/riscv64_zba.s
  27. 9
      src/RandomX/src/tests/riscv64_zbb.s
  28. 24
      src/RandomX/src/tests/tests.cpp
  29. 153
      src/RandomX/src/virtual_memory.c
  30. 26
      src/RandomX/src/virtual_memory.h
  31. 4
      src/RandomX/vcxproj/randomx-dll.vcxproj
  32. 4
      src/RandomX/vcxproj/randomx-dll.vcxproj.filters
  33. 4
      src/RandomX/vcxproj/randomx.vcxproj
  34. 4
      src/RandomX/vcxproj/randomx.vcxproj.filters

40
src/RandomX/CMakeLists.txt

@ -39,7 +39,7 @@ src/bytecode_machine.cpp
src/cpu.cpp
src/dataset.cpp
src/soft_aes.cpp
src/virtual_memory.cpp
src/virtual_memory.c
src/vm_interpreted.cpp
src/allocator.cpp
src/assembly_generator_x86.cpp
@ -96,7 +96,7 @@ function(add_flag flag)
endfunction()
# x86-64
if(ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64")
if ((CMAKE_SIZEOF_VOID_P EQUAL 8) AND (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64"))
list(APPEND randomx_sources
src/jit_compiler_x86.cpp)
@ -173,6 +173,42 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv
endif()
endif()
# RISC-V
if(ARCH_ID STREQUAL "riscv64")
list(APPEND randomx_sources
src/jit_compiler_rv64_static.S
src/jit_compiler_rv64.cpp)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
set_property(SOURCE src/jit_compiler_rv64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
# default build uses the RV64GC baseline
set(RVARCH "rv64gc")
# for native builds, enable Zba and Zbb if supported by the CPU
if(ARCH STREQUAL "native")
enable_language(ASM)
try_run(RANDOMX_ZBA_RUN_FAIL
RANDOMX_ZBA_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
${CMAKE_CURRENT_SOURCE_DIR}/src/tests/riscv64_zba.s
COMPILE_DEFINITIONS "-march=rv64gc_zba")
if (RANDOMX_ZBA_COMPILE_OK AND NOT RANDOMX_ZBA_RUN_FAIL)
set(RVARCH "${RVARCH}_zba")
endif()
try_run(RANDOMX_ZBB_RUN_FAIL
RANDOMX_ZBB_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
${CMAKE_CURRENT_SOURCE_DIR}/src/tests/riscv64_zbb.s
COMPILE_DEFINITIONS "-march=rv64gc_zbb")
if (RANDOMX_ZBB_COMPILE_OK AND NOT RANDOMX_ZBB_RUN_FAIL)
set(RVARCH "${RVARCH}_zbb")
endif()
endif()
add_flag("-march=${RVARCH}")
endif()
set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path")
add_library(randomx ${randomx_sources})

5
src/RandomX/README.md

@ -37,7 +37,7 @@ RandomX is written in C++11 and builds a static library with a C API provided by
### Linux
Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended).
Build dependencies: `cmake` (minimum 3.5) and `gcc` (minimum version 4.8, but version 7+ is recommended).
To build optimized binaries for your machine, run:
```
@ -82,7 +82,7 @@ Intel Core i7-8550U|16G DDR4-2400|Windows 10|hw|200 (4T)|1700 (4T)|350 (8T)|
Intel Core i3-3220|4G DDR3-1333|Ubuntu 16.04|soft|42 (4T)|510 (4T)|150 (4T)|
Raspberry Pi 3|1G LPDDR2|Ubuntu 16.04|soft|3.5 (4T)|-|20 (4T)|
Note that RandomX currently includes a JIT compiler for x86-64 and ARM64. Other architectures have to use the portable interpreter, which is much slower.
Note that RandomX currently includes a JIT compiler for x86-64, ARM64 and RISCV64. Other architectures have to use the portable interpreter, which is much slower.
### GPU performance
@ -129,6 +129,7 @@ The reference implementation has been validated on the following platforms:
* ARMv7+VFPv3 (32-bit, little-endian)
* ARMv8 (64-bit, little-endian)
* PPC64 (64-bit, big-endian)
* RISCV64 (64-bit, little-endian)
### Can FPGAs mine RandomX?

18
src/RandomX/doc/tevador.asc

@ -1,13 +1,13 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
mDMEXd+PeBYJKwYBBAHaRw8BAQdAZ0nqJ+nRYoScG2QLX62pl+WO1+Mkv6Yyt2Kb
ntGUuLq0G3RldmFkb3IgPHRldmFkb3JAZ21haWwuY29tPoiWBBMWCAA+FiEEMoWj
LVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwMFCQWnqDgFCwkIBwIGFQoJCAsCBBYC
AwECHgECF4AACgkQWijIaue9c6YBFQD+N1XTUqSCZp9jB/yTHQ9ahSaIUMtmuvdT
So2s+quudP4A/R5wLwukpfGN9UZ4cfpmKCJ9jO1HJ2udmlGMsJbQpDAIuDgEXd+P
ntGUuLq0G3RldmFkb3IgPHRldmFkb3JAZ21haWwuY29tPoiWBBMWCAA+AhsDBQsJ
CAcCBhUKCQgLAgQWAgMBAh4BAheAFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAmRP
r8MFCQ/ZS2YACgkQWijIaue9c6bR5gEA0tnQ4Al+yOLoRUBQitAV8FU4FLy8Xx8U
IyyivjJ0UhIA/2jwJfMXmJdMKtar8xfIA5mZLLofkEP6hug4knhitpkBuDgEXd+P
eBIKKwYBBAGXVQEFAQEHQBNbQuPcDojMCkRb5B5u7Ld/AFLClOh+6ElL+u61rIY/
AwEIB4h+BBgWCAAmFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwwFCQWn
qDgACgkQWijIaue9c6YJvgD+IY1Q9mCM1P1iZIoXuafRihXJ7UgVXpQqW2yoaUT3
bfQA/RkisI2eElYoOjdwPszPP6VfL5+SViwDmDuJG2P5llgE
=V4vd
-----END PGP PUBLIC KEY BLOCK-----
AwEIB4h+BBgWCAAmAhsMFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAmRQoAMFCQ/Z
S2YACgkQWijIaue9c6bUfwD9Hw20kGCaZ8rWghz9W3bc645ys1vPQpQW28CD9w3B
cTMBALsV1xpS2pGwTfn1PUimqESZfTrREmNvOjKSQwe0yicI
=D4lm
-----END PGP PUBLIC KEY BLOCK-----

2
src/RandomX/src/allocator.cpp

@ -29,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <new>
#include "allocator.hpp"
#include "intrin_portable.h"
#include "virtual_memory.hpp"
#include "virtual_memory.h"
#include "common.hpp"
namespace randomx {

2
src/RandomX/src/assembly_generator_x86.cpp

@ -445,7 +445,7 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;

2
src/RandomX/src/bytecode_machine.cpp

@ -243,7 +243,7 @@ namespace randomx {
}
if (opcode < ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::IMUL_R;

7
src/RandomX/src/common.hpp

@ -116,12 +116,19 @@ namespace randomx {
#if defined(_M_X64) || defined(__x86_64__)
#define RANDOMX_HAVE_COMPILER 1
#define RANDOMX_COMPILER_X86
class JitCompilerX86;
using JitCompiler = JitCompilerX86;
#elif defined(__aarch64__)
#define RANDOMX_HAVE_COMPILER 1
#define RANDOMX_COMPILER_A64
class JitCompilerA64;
using JitCompiler = JitCompilerA64;
#elif defined(__riscv) && __riscv_xlen == 64
#define RANDOMX_HAVE_COMPILER 1
#define RANDOMX_COMPILER_RV64
class JitCompilerRV64;
using JitCompiler = JitCompilerRV64;
#else
#define RANDOMX_HAVE_COMPILER 0
class JitCompilerFallback;

10
src/RandomX/src/configuration.h

@ -32,13 +32,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_ARGON_MEMORY 262144
//Number of Argon2d iterations for Cache initialization.
#define RANDOMX_ARGON_ITERATIONS 5
#define RANDOMX_ARGON_ITERATIONS 3
//Number of parallel lanes for Cache initialization.
#define RANDOMX_ARGON_LANES 1
//Argon2d salt
#define RANDOMX_ARGON_SALT "RandomXHUSH\x03"
#define RANDOMX_ARGON_SALT "RandomX\x03"
//Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8
@ -53,13 +53,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define RANDOMX_DATASET_EXTRA_SIZE 33554368
//Number of instructions in a RandomX program. Must be divisible by 8.
#define RANDOMX_PROGRAM_SIZE 512
#define RANDOMX_PROGRAM_SIZE 256
//Number of iterations during VM execution.
#define RANDOMX_PROGRAM_ITERATIONS 4096
#define RANDOMX_PROGRAM_ITERATIONS 2048
//Number of chained VM executions per hash.
#define RANDOMX_PROGRAM_COUNT 16
#define RANDOMX_PROGRAM_COUNT 8
//Scratchpad L3 size in bytes. Must be a power of 2.
#define RANDOMX_SCRATCHPAD_L3 2097152

2
src/RandomX/src/dataset.cpp

@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.hpp"
#include "dataset.hpp"
#include "virtual_memory.hpp"
#include "virtual_memory.h"
#include "superscalar.hpp"
#include "blake2_generator.hpp"
#include "reciprocal.h"

12
src/RandomX/src/intrin_portable.h

@ -349,7 +349,7 @@ FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *p) {
#if defined(NATIVE_LITTLE_ENDIAN)
return *p;
#else
uint32_t* ptr = (uint32_t*)p;
const uint32_t* ptr = (const uint32_t*)p;
vec_u c;
c.u32[0] = load32(ptr + 0);
c.u32[1] = load32(ptr + 1);
@ -375,8 +375,8 @@ FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b) {
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
vec_u x;
x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0));
x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4));
return (rx_vec_f128)x.d;
}
@ -684,7 +684,7 @@ FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const* p) {
#if defined(NATIVE_LITTLE_ENDIAN)
return *p;
#else
uint32_t* ptr = (uint32_t*)p;
const uint32_t* ptr = (const uint32_t*)p;
rx_vec_i128 c;
c.u32[0] = load32(ptr + 0);
c.u32[1] = load32(ptr + 1);
@ -708,8 +708,8 @@ FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b) {
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
rx_vec_f128 x;
x.lo = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
x.hi = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
x.lo = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0));
x.hi = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4));
return x;
}

42
src/RandomX/src/jit_compiler.hpp

@ -28,10 +28,48 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#if defined(_M_X64) || defined(__x86_64__)
#include "common.hpp"
namespace randomx {
struct CodeBuffer {
uint8_t* code;
int32_t codePos;
int32_t rcpCount;
void emit(const uint8_t* src, int32_t len) {
memcpy(&code[codePos], src, len);
codePos += len;
}
template<typename T>
void emit(T src) {
memcpy(&code[codePos], &src, sizeof(src));
codePos += sizeof(src);
}
void emitAt(int32_t codePos, const uint8_t* src, int32_t len) {
memcpy(&code[codePos], src, len);
}
template<typename T>
void emitAt(int32_t codePos, T src) {
memcpy(&code[codePos], &src, sizeof(src));
}
};
struct CompilerState : public CodeBuffer {
int32_t instructionOffsets[RANDOMX_PROGRAM_SIZE];
int registerUsage[RegistersCount];
};
}
#if defined(RANDOMX_COMPILER_X86)
#include "jit_compiler_x86.hpp"
#elif defined(__aarch64__)
#elif defined(RANDOMX_COMPILER_A64)
#include "jit_compiler_a64.hpp"
#elif defined(RANDOMX_COMPILER_RV64)
#include "jit_compiler_rv64.hpp"
#else
#include "jit_compiler_fallback.hpp"
#endif

74
src/RandomX/src/jit_compiler_a64.cpp

@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "superscalar.hpp"
#include "program.hpp"
#include "reciprocal.h"
#include "virtual_memory.hpp"
#include "virtual_memory.h"
namespace ARMV8A {
@ -130,8 +130,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
// and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd;
@ -149,16 +149,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
}
// Update spMix2
// eor w18, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
emit32(ARMV8A::B | (offset / 4), code, codePos);
// and w18, w18, CacheLineAlignMask
// and w20, w20, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
emit32(0x121A0000 | 18 | (18 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
emit32(0x121A0000 | 20 | (20 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
// and w10, w10, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
@ -181,8 +181,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
// and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd;
@ -200,8 +200,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
}
// Update spMix2
// eor w18, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
@ -434,7 +434,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
}
else
{
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMovImmediate(tmp_reg, imm, code, k);
// add dst, src, tmp_reg
@ -483,7 +483,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
uint32_t k = codePos;
uint32_t imm = instr.getImm32();
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 19;
imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1);
emitAddImmediate(tmp_reg, src, imm, code, k);
@ -537,7 +537,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// add dst, dst, tmp_reg
@ -575,7 +575,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg
@ -594,7 +594,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
if (src == dst)
{
src = 18;
src = 20;
emitMovImmediate(src, instr.getImm32(), code, k);
}
@ -612,7 +612,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg
@ -643,7 +643,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// umulh dst, dst, tmp_reg
@ -674,7 +674,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// smulh dst, dst, tmp_reg
@ -686,34 +686,24 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
{
const uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (isZeroOrPowerOf2(divisor))
return;
uint32_t k = codePos;
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint64_t N = 1ULL << 63;
const uint64_t q = N / divisor;
const uint64_t r = N % divisor;
#ifdef __GNUC__
const uint64_t shift = 64 - __builtin_clzll(divisor);
#else
uint64_t shift = 32;
for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
--shift;
#endif
const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t);
literalPos -= sizeof(uint64_t);
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
if (literal_id < 13)
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
memcpy(code + literalPos, &reciprocal, sizeof(reciprocal));
if (literal_id < 12)
{
static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };
static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };
// mul dst, dst, literal_reg
emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
@ -751,7 +741,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
if (src == dst)
{
src = 18;
src = 20;
emitMovImmediate(src, instr.getImm32(), code, k);
}
@ -769,7 +759,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// eor dst, dst, tmp_reg
@ -807,7 +797,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
if (src != dst)
{
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
// sub tmp_reg, xzr, src
emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
@ -835,7 +825,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos;
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
@ -984,7 +974,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t fpcr_tmp_reg = 8;
// ror tmp_reg, src, imm
@ -1008,7 +998,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t tmp_reg = 20;
uint32_t imm = instr.getImm32();

2
src/RandomX/src/jit_compiler_a64.hpp

@ -81,7 +81,7 @@ namespace randomx {
static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos)
{
*(uint64_t*)(code + codePos) = val;
memcpy(code + codePos, &val, sizeof(val));
codePos += sizeof(val);
}

98
src/RandomX/src/jit_compiler_a64_static.S

@ -74,9 +74,9 @@
# x15 -> "r7"
# x16 -> spAddr0
# x17 -> spAddr1
# x18 -> temporary
# x18 -> unused (platform register, don't touch it)
# x19 -> temporary
# x20 -> literal for IMUL_RCP
# x20 -> temporary
# x21 -> literal for IMUL_RCP
# x22 -> literal for IMUL_RCP
# x23 -> literal for IMUL_RCP
@ -111,7 +111,7 @@ DECL(randomx_program_aarch64):
# Save callee-saved registers
sub sp, sp, 192
stp x16, x17, [sp]
stp x18, x19, [sp, 16]
str x19, [sp, 16]
stp x20, x21, [sp, 32]
stp x22, x23, [sp, 48]
stp x24, x25, [sp, 64]
@ -166,7 +166,6 @@ DECL(randomx_program_aarch64):
# Read literals
ldr x0, literal_x0
ldr x11, literal_x11
ldr x20, literal_x20
ldr x21, literal_x21
ldr x22, literal_x22
ldr x23, literal_x23
@ -198,11 +197,11 @@ DECL(randomx_program_aarch64):
DECL(randomx_program_aarch64_main_loop):
# spAddr0 = spMix1 & ScratchpadL3Mask64;
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
lsr x18, x10, 32
lsr x20, x10, 32
# Actual mask will be inserted by JIT compiler
and w16, w10, 1
and w17, w18, 1
and w17, w20, 1
# x16 = scratchpad + spAddr0
# x17 = scratchpad + spAddr1
@ -210,31 +209,31 @@ DECL(randomx_program_aarch64_main_loop):
add x17, x17, x2
# xor integer registers with scratchpad data (spAddr0)
ldp x18, x19, [x16]
eor x4, x4, x18
ldp x20, x19, [x16]
eor x4, x4, x20
eor x5, x5, x19
ldp x18, x19, [x16, 16]
eor x6, x6, x18
ldp x20, x19, [x16, 16]
eor x6, x6, x20
eor x7, x7, x19
ldp x18, x19, [x16, 32]
eor x12, x12, x18
ldp x20, x19, [x16, 32]
eor x12, x12, x20
eor x13, x13, x19
ldp x18, x19, [x16, 48]
eor x14, x14, x18
ldp x20, x19, [x16, 48]
eor x14, x14, x20
eor x15, x15, x19
# Load group F registers (spAddr1)
ldpsw x18, x19, [x17]
ins v16.d[0], x18
ldpsw x20, x19, [x17]
ins v16.d[0], x20
ins v16.d[1], x19
ldpsw x18, x19, [x17, 8]
ins v17.d[0], x18
ldpsw x20, x19, [x17, 8]
ins v17.d[0], x20
ins v17.d[1], x19
ldpsw x18, x19, [x17, 16]
ins v18.d[0], x18
ldpsw x20, x19, [x17, 16]
ins v18.d[0], x20
ins v18.d[1], x19
ldpsw x18, x19, [x17, 24]
ins v19.d[0], x18
ldpsw x20, x19, [x17, 24]
ins v19.d[0], x20
ins v19.d[1], x19
scvtf v16.2d, v16.2d
scvtf v17.2d, v17.2d
@ -242,17 +241,17 @@ DECL(randomx_program_aarch64_main_loop):
scvtf v19.2d, v19.2d
# Load group E registers (spAddr1)
ldpsw x18, x19, [x17, 32]
ins v20.d[0], x18
ldpsw x20, x19, [x17, 32]
ins v20.d[0], x20
ins v20.d[1], x19
ldpsw x18, x19, [x17, 40]
ins v21.d[0], x18
ldpsw x20, x19, [x17, 40]
ins v21.d[0], x20
ins v21.d[1], x19
ldpsw x18, x19, [x17, 48]
ins v22.d[0], x18
ldpsw x20, x19, [x17, 48]
ins v22.d[0], x20
ins v22.d[1], x19
ldpsw x18, x19, [x17, 56]
ins v23.d[0], x18
ldpsw x20, x19, [x17, 56]
ins v23.d[0], x20
ins v23.d[1], x19
scvtf v20.2d, v20.2d
scvtf v21.2d, v21.2d
@ -276,7 +275,6 @@ DECL(randomx_program_aarch64_vm_instructions):
literal_x0: .fill 1,8,0
literal_x11: .fill 1,8,0
literal_x20: .fill 1,8,0
literal_x21: .fill 1,8,0
literal_x22: .fill 1,8,0
literal_x23: .fill 1,8,0
@ -312,17 +310,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
lsr x10, x9, 32
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18
eor x9, x9, x20
# Calculate dataset pointer for dataset prefetch
mov w18, w9
mov w20, w9
DECL(randomx_program_aarch64_cacheline_align_mask1):
# Actual mask will be inserted by JIT compiler
and x18, x18, 1
add x18, x18, x1
and x20, x20, 1
add x20, x20, x1
# Prefetch dataset data
prfm pldl2strm, [x18]
prfm pldl2strm, [x20]
# mx <-> ma
ror x9, x9, 32
@ -335,17 +333,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
DECL(randomx_program_aarch64_xor_with_dataset_line):
rx_program_xor_with_dataset_line:
# xor integer registers with dataset data
ldp x18, x19, [x10]
eor x4, x4, x18
ldp x20, x19, [x10]
eor x4, x4, x20
eor x5, x5, x19
ldp x18, x19, [x10, 16]
eor x6, x6, x18
ldp x20, x19, [x10, 16]
eor x6, x6, x20
eor x7, x7, x19
ldp x18, x19, [x10, 32]
eor x12, x12, x18
ldp x20, x19, [x10, 32]
eor x12, x12, x20
eor x13, x13, x19
ldp x18, x19, [x10, 48]
eor x14, x14, x18
ldp x20, x19, [x10, 48]
eor x14, x14, x20
eor x15, x15, x19
DECL(randomx_program_aarch64_update_spMix1):
@ -388,7 +386,7 @@ DECL(randomx_program_aarch64_update_spMix1):
# Restore callee-saved registers
ldp x16, x17, [sp]
ldp x18, x19, [sp, 16]
ldr x19, [sp, 16]
ldp x20, x21, [sp, 32]
ldp x22, x23, [sp, 48]
ldp x24, x25, [sp, 64]
@ -409,7 +407,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
stp x2, x30, [sp, 80]
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x18
eor x9, x9, x20
# mx <-> ma
ror x9, x9, 32
@ -451,8 +449,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
# x3 -> end item
DECL(randomx_init_dataset_aarch64):
# Save x30 (return address)
str x30, [sp, -16]!
# Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
stp x20, x30, [sp, -16]!
# Load pointer to cache memory
ldr x0, [x0]
@ -464,8 +462,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
cmp x2, x3
bne DECL(randomx_init_dataset_aarch64_main_loop)
# Restore x30 (return address)
ldr x30, [sp], 16
# Restore x20 and x30
ldp x20, x30, [sp], 16
ret

1175
src/RandomX/src/jit_compiler_rv64.cpp

File diff suppressed because it is too large

69
src/RandomX/src/jit_compiler_rv64.hpp

@ -0,0 +1,69 @@
/*
Copyright (c) 2023 tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstdint>
#include <cstring>
#include <vector>
#include "jit_compiler.hpp"
namespace randomx {
class Program;
struct ProgramConfiguration;
class SuperscalarProgram;
class Instruction;
class JitCompilerRV64 {
public:
JitCompilerRV64();
~JitCompilerRV64();
void generateProgram(Program&, ProgramConfiguration&);
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
void generateSuperscalarHash(SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES], std::vector<uint64_t>&);
void generateDatasetInitCode() {}
ProgramFunc* getProgramFunc() {
return (ProgramFunc*)entryProgram;
}
DatasetInitFunc* getDatasetInitFunc() {
return (DatasetInitFunc*)entryDataInit;
}
uint8_t* getCode() {
return state.code;
}
size_t getCodeSize();
void enableWriting();
void enableExecution();
void enableAll();
private:
CompilerState state;
void* entryDataInit;
void* entryProgram;
};
}

1235
src/RandomX/src/jit_compiler_rv64_static.S

File diff suppressed because it is too large

53
src/RandomX/src/jit_compiler_rv64_static.hpp

@ -0,0 +1,53 @@
/*
Copyright (c) 2023 tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
extern "C" {
void randomx_riscv64_literals();
void randomx_riscv64_literals_end();
void randomx_riscv64_data_init();
void randomx_riscv64_fix_data_call();
void randomx_riscv64_prologue();
void randomx_riscv64_loop_begin();
void randomx_riscv64_data_read();
void randomx_riscv64_data_read_light();
void randomx_riscv64_fix_loop_call();
void randomx_riscv64_spad_store();
void randomx_riscv64_spad_store_hardaes();
void randomx_riscv64_spad_store_softaes();
void randomx_riscv64_loop_end();
void randomx_riscv64_fix_continue_loop();
void randomx_riscv64_epilogue();
void randomx_riscv64_softaes();
void randomx_riscv64_program_end();
void randomx_riscv64_ssh_init();
void randomx_riscv64_ssh_load();
void randomx_riscv64_ssh_prefetch();
void randomx_riscv64_ssh_end();
}

4
src/RandomX/src/jit_compiler_x86.cpp

@ -34,7 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "superscalar.hpp"
#include "program.hpp"
#include "reciprocal.h"
#include "virtual_memory.hpp"
#include "virtual_memory.h"
namespace randomx {
/*
@ -618,7 +618,7 @@ namespace randomx {
}
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
emit(MOV_RAX_I);

28
src/RandomX/src/randomx.cpp

@ -36,7 +36,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cpu.hpp"
#include <cassert>
#include <limits>
#if defined(__SSE__) || defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP > 0))
#define USE_CSR_INTRINSICS
#include <xmmintrin.h>
#else
#include <cfenv>
#endif
extern "C" {
@ -356,8 +362,14 @@ extern "C" {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
#ifdef USE_CSR_INTRINSICS
const unsigned int fpstate = _mm_getcsr();
#else
fenv_t fpstate;
fegetenv(&fpstate);
#endif
alignas(16) uint64_t tempHash[8];
int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
assert(blakeResult == 0);
@ -370,7 +382,12 @@ extern "C" {
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
#ifdef USE_CSR_INTRINSICS
_mm_setcsr(fpstate);
#else
fesetenv(&fpstate);
#endif
}
void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
@ -400,4 +417,15 @@ extern "C" {
machine->run(machine->tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out) {
assert(inputSize == 0 || input != nullptr);
assert(hash_in != nullptr);
assert(com_out != nullptr);
blake2b_state state;
blake2b_init(&state, RANDOMX_HASH_SIZE);
blake2b_update(&state, input, inputSize);
blake2b_update(&state, hash_in, RANDOMX_HASH_SIZE);
blake2b_final(&state, com_out, RANDOMX_HASH_SIZE);
}
}

11
src/RandomX/src/randomx.h

@ -260,6 +260,17 @@ RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);
RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output);
/**
* Calculate a RandomX commitment from a RandomX hash and its input.
*
* @param input is a pointer to memory that was hashed. Must not be NULL.
* @param inputSize is the number of bytes in the input.
* @param hash_in is the output from randomx_calculate_hash* (RANDOMX_HASH_SIZE bytes).
* @param com_out is a pointer to memory where the commitment will be stored. Must not
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
*/
RANDOMX_EXPORT void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out);
#if defined(__cplusplus)
}
#endif

34
src/RandomX/src/reciprocal.c

@ -44,36 +44,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ret
*/
uint64_t randomx_reciprocal(uint64_t divisor) {
uint64_t randomx_reciprocal(uint32_t divisor) {
assert(divisor != 0);
const uint64_t p2exp63 = 1ULL << 63;
const uint64_t q = p2exp63 / divisor;
const uint64_t r = p2exp63 % divisor;
#ifdef __GNUC__
const uint32_t shift = 64 - __builtin_clzll(divisor);
#else
uint32_t shift = 32;
for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
--shift;
#endif
uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor;
unsigned bsr = 0; //highest set bit in divisor
for (uint64_t bit = divisor; bit > 0; bit >>= 1)
bsr++;
for (unsigned shift = 0; shift < bsr; shift++) {
if (remainder >= divisor - remainder) {
quotient = quotient * 2 + 1;
remainder = remainder * 2 - divisor;
}
else {
quotient = quotient * 2;
remainder = remainder * 2;
}
}
return quotient;
return (q << shift) + ((r << shift) / divisor);
}
#if !RANDOMX_HAVE_FAST_RECIPROCAL
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
uint64_t randomx_reciprocal_fast(uint32_t divisor) {
return randomx_reciprocal(divisor);
}

4
src/RandomX/src/reciprocal.h

@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extern "C" {
#endif
uint64_t randomx_reciprocal(uint64_t);
uint64_t randomx_reciprocal_fast(uint64_t);
uint64_t randomx_reciprocal(uint32_t);
uint64_t randomx_reciprocal_fast(uint32_t);
#if defined(__cplusplus)
}

32
src/RandomX/src/tests/benchmark.cpp

@ -96,6 +96,7 @@ void printUsage(const char* executable) {
std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl;
std::cout << " --auto select the best options for the current CPU" << std::endl;
std::cout << " --noBatch calculate hashes one by one (default: batch)" << std::endl;
std::cout << " --commit calculate commitments instead of hashes (default: hashes)" << std::endl;
}
struct MemoryException : public std::exception {
@ -113,7 +114,7 @@ struct DatasetAllocException : public MemoryException {
using MineFunc = void(randomx_vm * vm, std::atomic<uint32_t> & atomicNonce, AtomicHash & result, uint32_t noncesCount, int thread, int cpuid);
template<bool batch>
template<bool batch, bool commit>
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid = -1) {
if (cpuid >= 0) {
int rc = set_thread_affinity(cpuid);
@ -138,6 +139,9 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
}
store32(noncePtr, nonce);
(batch ? randomx_calculate_hash_next : randomx_calculate_hash)(vm, blockTemplate, sizeof(blockTemplate), &hash);
if (commit) {
randomx_calculate_commitment(blockTemplate, sizeof(blockTemplate), &hash, &hash);
}
result.xorWith(hash);
if (!batch) {
nonce = atomicNonce.fetch_add(1);
@ -146,7 +150,7 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
}
int main(int argc, char** argv) {
bool softAes, miningMode, verificationMode, help, largePages, jit, secure;
bool softAes, miningMode, verificationMode, help, largePages, jit, secure, commit;
bool ssse3, avx2, autoFlags, noBatch;
int noncesCount, threadCount, initThreadCount;
uint64_t threadAffinity;
@ -172,10 +176,11 @@ int main(int argc, char** argv) {
readOption("--avx2", argc, argv, avx2);
readOption("--auto", argc, argv, autoFlags);
readOption("--noBatch", argc, argv, noBatch);
readOption("--commit", argc, argv, commit);
store32(&seed, seedValue);
std::cout << "RandomX benchmark v1.1.11" << std::endl;
std::cout << "RandomX benchmark v1.2.1" << std::endl;
if (help) {
printUsage(argv[0]);
@ -280,11 +285,24 @@ int main(int argc, char** argv) {
MineFunc* func;
if (noBatch) {
func = &mine<false>;
if (commit) {
std::cout << " - hash commitments" << std::endl;
func = &mine<false, true>;
}
else {
func = &mine<false, false>;
}
}
else {
func = &mine<true>;
std::cout << " - batch mode" << std::endl;
if (commit) {
//TODO: support batch mode with commitments
std::cout << " - hash commitments" << std::endl;
func = &mine<false, true>;
}
else {
std::cout << " - batch mode" << std::endl;
func = &mine<true, false>;
}
}
std::cout << "Initializing";
@ -376,7 +394,7 @@ int main(int argc, char** argv) {
randomx_release_cache(cache);
std::cout << "Calculated result: ";
result.print(std::cout);
if (noncesCount == 1000 && seedValue == 0)
if (noncesCount == 1000 && seedValue == 0 && !commit)
std::cout << "Reference result: 10b649a3f15c7c7f88277812f2e74b337a0f20ce909af09199cccb960771cfa1" << std::endl;
if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;

2
src/RandomX/src/tests/perf-simulation.cpp

@ -477,7 +477,7 @@ int analyze(randomx::Program& p) {
}
if (opcode < randomx::ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
const uint32_t divisor = instr.getImm32();
if (!randomx::isZeroOrPowerOf2(divisor)) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_INT;

9
src/RandomX/src/tests/riscv64_zba.s

@ -0,0 +1,9 @@
/* RISC-V - test if the Zba extension is present */
.text
.global main
main:
sh1add x6, x6, x7
li x10, 0
ret

9
src/RandomX/src/tests/riscv64_zbb.s

@ -0,0 +1,9 @@
/* RISC-V - test if the Zbb extension is present */
.text
.global main
main:
ror x6, x6, x7
li x10, 0
ret

24
src/RandomX/src/tests/tests.cpp

@ -34,6 +34,14 @@ void calcStringHash(const char(&key)[K], const char(&input)[H], void* output) {
randomx_calculate_hash(vm, input, H - 1, output);
}
template<size_t K, size_t H>
void calcStringCommitment(const char(&key)[K], const char(&input)[H], void* output) {
initCache(key);
assert(vm != nullptr);
randomx_calculate_hash(vm, input, H - 1, output);
randomx_calculate_commitment(input, H - 1, output, output);
}
template<size_t K, size_t H>
void calcHexHash(const char(&key)[K], const char(&hex)[H], void* output) {
initCache(key);
@ -1082,6 +1090,22 @@ int main() {
assert(rx_get_rounding_mode() == RoundToNearest);
});
if (RANDOMX_HAVE_COMPILER) {
randomx_destroy_vm(vm);
vm = nullptr;
#ifdef RANDOMX_FORCE_SECURE
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
#else
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
#endif
}
runTest("Commitment test", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
char hash[RANDOMX_HASH_SIZE];
calcStringCommitment("test key 000", "This is a test", &hash);
assert(equalsHex(hash, "d53ccf348b75291b7be76f0a7ac8208bbced734b912f6fca60539ab6f86be919"));
});
randomx_destroy_vm(vm);
vm = nullptr;

153
src/RandomX/src/virtual_memory.cpp → src/RandomX/src/virtual_memory.c

@ -26,28 +26,24 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "virtual_memory.hpp"
#include <stdexcept>
#if defined(_WIN32) || defined(__CYGWIN__)
#include <windows.h>
#else
#define _GNU_SOURCE 1 /* needed for MAP_ANONYMOUS on older platforms */
#ifdef __APPLE__
#include <mach/vm_statistics.h>
#include <TargetConditionals.h>
#include <AvailabilityMacros.h>
# if TARGET_OS_OSX
# if TARGET_CPU_ARM64
# define USE_PTHREAD_JIT_WP 1
# else
# undef USE_PTHREAD_JIT_WP
# endif
# define USE_PTHREAD_JIT_WP 1
# include <pthread.h>
# include <sys/utsname.h>
# include <stdio.h>
# endif
#endif
#include <sys/types.h>
#include <sys/mman.h>
#include <errno.h>
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
@ -57,27 +53,50 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define PAGE_EXECUTE_READWRITE (PROT_READ | PROT_WRITE | PROT_EXEC)
#endif
#if defined(_WIN32) || defined(__CYGWIN__)
std::string getErrorMessage(const char* function) {
LPSTR messageBuffer = nullptr;
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL);
std::string message(messageBuffer, size);
LocalFree(messageBuffer);
return std::string(function) + std::string(": ") + message;
#include "virtual_memory.h"
#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
static int MacOSchecked, MacOSver;
/* This function is used implicitly by clang's __builtin_available() checker.
* When cross-compiling, the library containing this function doesn't exist,
* and linking will fail because the symbol is unresolved. The function here
* is a quick and dirty hack to get close enough to identify MacOSX 11.0.
*/
static int32_t __isOSVersionAtLeast(int32_t major, int32_t minor, int32_t subminor) {
if (!MacOSchecked) {
struct utsname ut;
int mmaj, mmin;
uname(&ut);
sscanf(ut.release, "%d.%d", &mmaj, &mmin);
// The utsname release version is 9 greater than the canonical OS version
mmaj -= 9;
MacOSver = (mmaj << 8) | mmin;
MacOSchecked = 1;
}
return MacOSver >= ((major << 8) | minor);
}
#endif
void setPrivilege(const char* pszPrivilege, BOOL bEnable) {
#if defined(_WIN32) || defined(__CYGWIN__)
#define Fail(func) do {*errfunc = func; return GetLastError();} while(0)
int setPrivilege(const char* pszPrivilege, BOOL bEnable, char **errfunc) {
HANDLE hToken;
TOKEN_PRIVILEGES tp;
BOOL status;
DWORD error;
DWORD error = 0;
*errfunc = NULL;
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
throw std::runtime_error(getErrorMessage("OpenProcessToken"));
Fail("OpenProcessToken");
if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid))
throw std::runtime_error(getErrorMessage("LookupPrivilegeValue"));
if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid)) {
*errfunc = "LookupPrivilegeValue";
error = GetLastError();
goto out;
}
tp.PrivilegeCount = 1;
@ -89,20 +108,28 @@ void setPrivilege(const char* pszPrivilege, BOOL bEnable) {
status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
error = GetLastError();
if (!status || (error != ERROR_SUCCESS))
throw std::runtime_error(getErrorMessage("AdjustTokenPrivileges"));
if (!status || (error != ERROR_SUCCESS)) {
*errfunc = "AdjustTokenPrivileges";
goto out;
}
if (!CloseHandle(hToken))
throw std::runtime_error(getErrorMessage("CloseHandle"));
out:
if (!CloseHandle(hToken)) {
if (*errfunc == NULL) {
*errfunc = "CloseHandle";
error = GetLastError();
}
}
return error;
}
#else
#define Fail(func) do {*errfunc = func; return errno;} while(0)
#endif
void* allocMemoryPages(std::size_t bytes) {
void* allocMemoryPages(size_t bytes) {
void* mem;
#if defined(_WIN32) || defined(__CYGWIN__)
mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_READWRITE);
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocMemoryPages - VirtualAlloc"));
mem = VirtualAlloc(NULL, bytes, MEM_COMMIT, PAGE_READWRITE);
#else
#if defined(__NetBSD__)
#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
@ -116,89 +143,95 @@ void* allocMemoryPages(std::size_t bytes) {
#define MEXTRA 0
#define PEXTRA 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
mem = mmap(NULL, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
if (mem == MAP_FAILED)
throw std::runtime_error("allocMemoryPages - mmap failed");
mem = NULL;
#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(false);
pthread_jit_write_protect_np(0);
}
#endif
#endif
return mem;
}
static inline void pageProtect(void* ptr, std::size_t bytes, int rules) {
static inline int pageProtect(void* ptr, size_t bytes, int rules, char **errfunc) {
#if defined(_WIN32) || defined(__CYGWIN__)
DWORD oldp;
if (!VirtualProtect(ptr, bytes, (DWORD)rules, &oldp)) {
throw std::runtime_error(getErrorMessage("VirtualProtect"));
Fail("VirtualProtect");
}
#else
if (-1 == mprotect(ptr, bytes, rules))
throw std::runtime_error("mprotect failed");
Fail("mprotect");
#endif
return 0;
}
void setPagesRW(void* ptr, std::size_t bytes) {
void setPagesRW(void* ptr, size_t bytes) {
char *errfunc;
#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(false);
pthread_jit_write_protect_np(0);
} else {
pageProtect(ptr, bytes, PAGE_READWRITE);
pageProtect(ptr, bytes, PAGE_READWRITE, &errfunc);
}
#else
pageProtect(ptr, bytes, PAGE_READWRITE);
pageProtect(ptr, bytes, PAGE_READWRITE, &errfunc);
#endif
}
void setPagesRX(void* ptr, std::size_t bytes) {
void setPagesRX(void* ptr, size_t bytes) {
char *errfunc;
#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(true);
pthread_jit_write_protect_np(1);
__builtin___clear_cache((char*)ptr, ((char*)ptr) + bytes);
} else {
pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
pageProtect(ptr, bytes, PAGE_EXECUTE_READ, &errfunc);
}
#else
pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
pageProtect(ptr, bytes, PAGE_EXECUTE_READ, &errfunc);
#endif
}
void setPagesRWX(void* ptr, std::size_t bytes) {
pageProtect(ptr, bytes, PAGE_EXECUTE_READWRITE);
void setPagesRWX(void* ptr, size_t bytes) {
char *errfunc;
pageProtect(ptr, bytes, PAGE_EXECUTE_READWRITE, &errfunc);
}
void* allocLargePagesMemory(std::size_t bytes) {
void* allocLargePagesMemory(size_t bytes) {
void* mem;
char *errfunc;
#if defined(_WIN32) || defined(__CYGWIN__)
setPrivilege("SeLockMemoryPrivilege", 1);
auto pageMinimum = GetLargePageMinimum();
if (pageMinimum > 0)
mem = VirtualAlloc(NULL, alignSize(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
else
throw std::runtime_error("allocLargePagesMemory - Large pages are not supported");
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc"));
if (setPrivilege("SeLockMemoryPrivilege", 1, &errfunc))
return NULL;
size_t pageMinimum = GetLargePageMinimum();
if (!pageMinimum) {
errfunc = "No large pages";
return NULL;
}
mem = VirtualAlloc(NULL, alignSize(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
#else
#ifdef __APPLE__
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
#elif defined(__FreeBSD__)
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
#elif defined(__OpenBSD__) || defined(__NetBSD__)
mem = MAP_FAILED; // OpenBSD does not support huge pages
#else
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
#endif
if (mem == MAP_FAILED)
throw std::runtime_error("allocLargePagesMemory - mmap failed");
mem = NULL;
#endif
return mem;
}
void freePagedMemory(void* ptr, std::size_t bytes) {
void freePagedMemory(void* ptr, size_t bytes) {
#if defined(_WIN32) || defined(__CYGWIN__)
VirtualFree(ptr, 0, MEM_RELEASE);
#else

26
src/RandomX/src/virtual_memory.hpp → src/RandomX/src/virtual_memory.h

@ -28,15 +28,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <cstddef>
#ifdef __cplusplus
extern "C" {
#endif
constexpr std::size_t alignSize(std::size_t pos, std::size_t align) {
return ((pos - 1) / align + 1) * align;
}
#include <stddef.h>
#define alignSize(pos, align) (((pos - 1) / align + 1) * align)
void* allocMemoryPages(std::size_t);
void setPagesRW(void*, std::size_t);
void setPagesRX(void*, std::size_t);
void setPagesRWX(void*, std::size_t);
void* allocLargePagesMemory(std::size_t);
void freePagedMemory(void*, std::size_t);
void* allocMemoryPages(size_t);
void setPagesRW(void*, size_t);
void setPagesRX(void*, size_t);
void setPagesRWX(void*, size_t);
void* allocLargePagesMemory(size_t);
void freePagedMemory(void*, size_t);
#ifdef __cplusplus
}
#endif

4
src/RandomX/vcxproj/randomx-dll.vcxproj

@ -43,7 +43,7 @@
<ClInclude Include="..\src\superscalar.hpp" />
<ClInclude Include="..\src\superscalar_program.hpp" />
<ClInclude Include="..\src\virtual_machine.hpp" />
<ClInclude Include="..\src\virtual_memory.hpp" />
<ClInclude Include="..\src\virtual_memory.h" />
<ClInclude Include="..\src\vm_compiled.hpp" />
<ClInclude Include="..\src\vm_compiled_light.hpp" />
<ClInclude Include="..\src\vm_interpreted.hpp" />
@ -74,7 +74,7 @@
<ClCompile Include="..\src\soft_aes.cpp" />
<ClCompile Include="..\src\superscalar.cpp" />
<ClCompile Include="..\src\virtual_machine.cpp" />
<ClCompile Include="..\src\virtual_memory.cpp" />
<ClCompile Include="..\src\virtual_memory.c" />
<ClCompile Include="..\src\vm_compiled.cpp" />
<ClCompile Include="..\src\vm_compiled_light.cpp" />
<ClCompile Include="..\src\vm_interpreted.cpp" />

4
src/RandomX/vcxproj/randomx-dll.vcxproj.filters

@ -87,7 +87,7 @@
<ClInclude Include="..\src\virtual_machine.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\virtual_memory.hpp">
<ClInclude Include="..\src\virtual_memory.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\vm_compiled.hpp">
@ -151,7 +151,7 @@
<ClCompile Include="..\src\virtual_machine.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\virtual_memory.cpp">
<ClCompile Include="..\src\virtual_memory.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\vm_compiled.cpp">

4
src/RandomX/vcxproj/randomx.vcxproj

@ -156,7 +156,7 @@ SET ERRORLEVEL = 0</Command>
<ClCompile Include="..\src\reciprocal.c" />
<ClCompile Include="..\src\soft_aes.cpp" />
<ClCompile Include="..\src\virtual_machine.cpp" />
<ClCompile Include="..\src\virtual_memory.cpp" />
<ClCompile Include="..\src\virtual_memory.c" />
</ItemGroup>
<ItemGroup>
<MASM Include="..\src\jit_compiler_x86_static.asm" />
@ -198,7 +198,7 @@ SET ERRORLEVEL = 0</Command>
<ClInclude Include="..\src\soft_aes.h" />
<ClInclude Include="..\src\superscalar_program.hpp" />
<ClInclude Include="..\src\virtual_machine.hpp" />
<ClInclude Include="..\src\virtual_memory.hpp" />
<ClInclude Include="..\src\virtual_memory.h" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">

4
src/RandomX/vcxproj/randomx.vcxproj.filters

@ -72,7 +72,7 @@
<ClCompile Include="..\src\vm_interpreted.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\virtual_memory.cpp">
<ClCompile Include="..\src\virtual_memory.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\blake2_generator.cpp">
@ -164,7 +164,7 @@
<ClInclude Include="..\src\virtual_machine.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\virtual_memory.hpp">
<ClInclude Include="..\src\virtual_memory.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\superscalar.hpp">

Loading…
Cancel
Save