Compare commits

...

1 Commits

Author SHA1 Message Date
Duke 9c46fcb3fd Use the previous version of randomx on current dev branch 5 months ago
  1. 40
      src/RandomX/CMakeLists.txt
  2. 5
      src/RandomX/README.md
  3. 18
      src/RandomX/doc/tevador.asc
  4. 2
      src/RandomX/src/allocator.cpp
  5. 2
      src/RandomX/src/assembly_generator_x86.cpp
  6. 2
      src/RandomX/src/bytecode_machine.cpp
  7. 7
      src/RandomX/src/common.hpp
  8. 2
      src/RandomX/src/dataset.cpp
  9. 12
      src/RandomX/src/intrin_portable.h
  10. 42
      src/RandomX/src/jit_compiler.hpp
  11. 74
      src/RandomX/src/jit_compiler_a64.cpp
  12. 2
      src/RandomX/src/jit_compiler_a64.hpp
  13. 98
      src/RandomX/src/jit_compiler_a64_static.S
  14. 4
      src/RandomX/src/jit_compiler_x86.cpp
  15. 28
      src/RandomX/src/randomx.cpp
  16. 11
      src/RandomX/src/randomx.h
  17. 34
      src/RandomX/src/reciprocal.c
  18. 4
      src/RandomX/src/reciprocal.h
  19. 32
      src/RandomX/src/tests/benchmark.cpp
  20. 2
      src/RandomX/src/tests/perf-simulation.cpp
  21. 24
      src/RandomX/src/tests/tests.cpp
  22. 207
      src/RandomX/src/virtual_memory.cpp
  23. 42
      src/RandomX/src/virtual_memory.hpp
  24. 4
      src/RandomX/vcxproj/randomx-dll.vcxproj
  25. 4
      src/RandomX/vcxproj/randomx-dll.vcxproj.filters
  26. 4
      src/RandomX/vcxproj/randomx.vcxproj
  27. 4
      src/RandomX/vcxproj/randomx.vcxproj.filters

40
src/RandomX/CMakeLists.txt

@ -39,7 +39,7 @@ src/bytecode_machine.cpp
src/cpu.cpp
src/dataset.cpp
src/soft_aes.cpp
src/virtual_memory.c
src/virtual_memory.cpp
src/vm_interpreted.cpp
src/allocator.cpp
src/assembly_generator_x86.cpp
@ -96,7 +96,7 @@ function(add_flag flag)
endfunction()
# x86-64
if ((CMAKE_SIZEOF_VOID_P EQUAL 8) AND (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64"))
if(ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64")
list(APPEND randomx_sources
src/jit_compiler_x86.cpp)
@ -173,42 +173,6 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv
endif()
endif()
# RISC-V
if(ARCH_ID STREQUAL "riscv64")
list(APPEND randomx_sources
src/jit_compiler_rv64_static.S
src/jit_compiler_rv64.cpp)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
set_property(SOURCE src/jit_compiler_rv64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
# default build uses the RV64GC baseline
set(RVARCH "rv64gc")
# for native builds, enable Zba and Zbb if supported by the CPU
if(ARCH STREQUAL "native")
enable_language(ASM)
try_run(RANDOMX_ZBA_RUN_FAIL
RANDOMX_ZBA_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
${CMAKE_CURRENT_SOURCE_DIR}/src/tests/riscv64_zba.s
COMPILE_DEFINITIONS "-march=rv64gc_zba")
if (RANDOMX_ZBA_COMPILE_OK AND NOT RANDOMX_ZBA_RUN_FAIL)
set(RVARCH "${RVARCH}_zba")
endif()
try_run(RANDOMX_ZBB_RUN_FAIL
RANDOMX_ZBB_COMPILE_OK
${CMAKE_CURRENT_BINARY_DIR}/
${CMAKE_CURRENT_SOURCE_DIR}/src/tests/riscv64_zbb.s
COMPILE_DEFINITIONS "-march=rv64gc_zbb")
if (RANDOMX_ZBB_COMPILE_OK AND NOT RANDOMX_ZBB_RUN_FAIL)
set(RVARCH "${RVARCH}_zbb")
endif()
endif()
add_flag("-march=${RVARCH}")
endif()
set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path")
add_library(randomx ${randomx_sources})

5
src/RandomX/README.md

@ -37,7 +37,7 @@ RandomX is written in C++11 and builds a static library with a C API provided by
### Linux
Build dependencies: `cmake` (minimum 3.5) and `gcc` (minimum version 4.8, but version 7+ is recommended).
Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended).
To build optimized binaries for your machine, run:
```
@ -82,7 +82,7 @@ Intel Core i7-8550U|16G DDR4-2400|Windows 10|hw|200 (4T)|1700 (4T)|350 (8T)|
Intel Core i3-3220|4G DDR3-1333|Ubuntu 16.04|soft|42 (4T)|510 (4T)|150 (4T)|
Raspberry Pi 3|1G LPDDR2|Ubuntu 16.04|soft|3.5 (4T)|-|20 (4T)|
Note that RandomX currently includes a JIT compiler for x86-64, ARM64 and RISCV64. Other architectures have to use the portable interpreter, which is much slower.
Note that RandomX currently includes a JIT compiler for x86-64 and ARM64. Other architectures have to use the portable interpreter, which is much slower.
### GPU performance
@ -129,7 +129,6 @@ The reference implementation has been validated on the following platforms:
* ARMv7+VFPv3 (32-bit, little-endian)
* ARMv8 (64-bit, little-endian)
* PPC64 (64-bit, big-endian)
* RISCV64 (64-bit, little-endian)
### Can FPGAs mine RandomX?

18
src/RandomX/doc/tevador.asc

@ -1,13 +1,13 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
mDMEXd+PeBYJKwYBBAHaRw8BAQdAZ0nqJ+nRYoScG2QLX62pl+WO1+Mkv6Yyt2Kb
ntGUuLq0G3RldmFkb3IgPHRldmFkb3JAZ21haWwuY29tPoiWBBMWCAA+AhsDBQsJ
CAcCBhUKCQgLAgQWAgMBAh4BAheAFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAmRP
r8MFCQ/ZS2YACgkQWijIaue9c6bR5gEA0tnQ4Al+yOLoRUBQitAV8FU4FLy8Xx8U
IyyivjJ0UhIA/2jwJfMXmJdMKtar8xfIA5mZLLofkEP6hug4knhitpkBuDgEXd+P
ntGUuLq0G3RldmFkb3IgPHRldmFkb3JAZ21haWwuY29tPoiWBBMWCAA+FiEEMoWj
LVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwMFCQWnqDgFCwkIBwIGFQoJCAsCBBYC
AwECHgECF4AACgkQWijIaue9c6YBFQD+N1XTUqSCZp9jB/yTHQ9ahSaIUMtmuvdT
So2s+quudP4A/R5wLwukpfGN9UZ4cfpmKCJ9jO1HJ2udmlGMsJbQpDAIuDgEXd+P
eBIKKwYBBAGXVQEFAQEHQBNbQuPcDojMCkRb5B5u7Ld/AFLClOh+6ElL+u61rIY/
AwEIB4h+BBgWCAAmAhsMFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAmRQoAMFCQ/Z
S2YACgkQWijIaue9c6bUfwD9Hw20kGCaZ8rWghz9W3bc645ys1vPQpQW28CD9w3B
cTMBALsV1xpS2pGwTfn1PUimqESZfTrREmNvOjKSQwe0yicI
=D4lm
-----END PGP PUBLIC KEY BLOCK-----
AwEIB4h+BBgWCAAmFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwwFCQWn
qDgACgkQWijIaue9c6YJvgD+IY1Q9mCM1P1iZIoXuafRihXJ7UgVXpQqW2yoaUT3
bfQA/RkisI2eElYoOjdwPszPP6VfL5+SViwDmDuJG2P5llgE
=V4vd
-----END PGP PUBLIC KEY BLOCK-----

2
src/RandomX/src/allocator.cpp

@ -29,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <new>
#include "allocator.hpp"
#include "intrin_portable.h"
#include "virtual_memory.h"
#include "virtual_memory.hpp"
#include "common.hpp"
namespace randomx {

2
src/RandomX/src/assembly_generator_x86.cpp

@ -445,7 +445,7 @@ namespace randomx {
}
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
const uint32_t divisor = instr.getImm32();
uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;

2
src/RandomX/src/bytecode_machine.cpp

@ -243,7 +243,7 @@ namespace randomx {
}
if (opcode < ceil_IMUL_RCP) {
const uint32_t divisor = instr.getImm32();
uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::IMUL_R;

7
src/RandomX/src/common.hpp

@ -116,19 +116,12 @@ namespace randomx {
#if defined(_M_X64) || defined(__x86_64__)
#define RANDOMX_HAVE_COMPILER 1
#define RANDOMX_COMPILER_X86
class JitCompilerX86;
using JitCompiler = JitCompilerX86;
#elif defined(__aarch64__)
#define RANDOMX_HAVE_COMPILER 1
#define RANDOMX_COMPILER_A64
class JitCompilerA64;
using JitCompiler = JitCompilerA64;
#elif defined(__riscv) && __riscv_xlen == 64
#define RANDOMX_HAVE_COMPILER 1
#define RANDOMX_COMPILER_RV64
class JitCompilerRV64;
using JitCompiler = JitCompilerRV64;
#else
#define RANDOMX_HAVE_COMPILER 0
class JitCompilerFallback;

2
src/RandomX/src/dataset.cpp

@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.hpp"
#include "dataset.hpp"
#include "virtual_memory.h"
#include "virtual_memory.hpp"
#include "superscalar.hpp"
#include "blake2_generator.hpp"
#include "reciprocal.h"

12
src/RandomX/src/intrin_portable.h

@ -349,7 +349,7 @@ FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *p) {
#if defined(NATIVE_LITTLE_ENDIAN)
return *p;
#else
const uint32_t* ptr = (const uint32_t*)p;
uint32_t* ptr = (uint32_t*)p;
vec_u c;
c.u32[0] = load32(ptr + 0);
c.u32[1] = load32(ptr + 1);
@ -375,8 +375,8 @@ FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b) {
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
vec_u x;
x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0));
x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4));
x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
return (rx_vec_f128)x.d;
}
@ -684,7 +684,7 @@ FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const* p) {
#if defined(NATIVE_LITTLE_ENDIAN)
return *p;
#else
const uint32_t* ptr = (const uint32_t*)p;
uint32_t* ptr = (uint32_t*)p;
rx_vec_i128 c;
c.u32[0] = load32(ptr + 0);
c.u32[1] = load32(ptr + 1);
@ -708,8 +708,8 @@ FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b) {
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
rx_vec_f128 x;
x.lo = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0));
x.hi = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4));
x.lo = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
x.hi = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
return x;
}

42
src/RandomX/src/jit_compiler.hpp

@ -28,48 +28,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include "common.hpp"
namespace randomx {
struct CodeBuffer {
uint8_t* code;
int32_t codePos;
int32_t rcpCount;
void emit(const uint8_t* src, int32_t len) {
memcpy(&code[codePos], src, len);
codePos += len;
}
template<typename T>
void emit(T src) {
memcpy(&code[codePos], &src, sizeof(src));
codePos += sizeof(src);
}
void emitAt(int32_t codePos, const uint8_t* src, int32_t len) {
memcpy(&code[codePos], src, len);
}
template<typename T>
void emitAt(int32_t codePos, T src) {
memcpy(&code[codePos], &src, sizeof(src));
}
};
struct CompilerState : public CodeBuffer {
int32_t instructionOffsets[RANDOMX_PROGRAM_SIZE];
int registerUsage[RegistersCount];
};
}
#if defined(RANDOMX_COMPILER_X86)
#if defined(_M_X64) || defined(__x86_64__)
#include "jit_compiler_x86.hpp"
#elif defined(RANDOMX_COMPILER_A64)
#elif defined(__aarch64__)
#include "jit_compiler_a64.hpp"
#elif defined(RANDOMX_COMPILER_RV64)
#include "jit_compiler_rv64.hpp"
#else
#include "jit_compiler_fallback.hpp"
#endif

74
src/RandomX/src/jit_compiler_a64.cpp

@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "superscalar.hpp"
#include "program.hpp"
#include "reciprocal.h"
#include "virtual_memory.h"
#include "virtual_memory.hpp"
namespace ARMV8A {
@ -130,8 +130,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
// and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd;
@ -149,16 +149,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
}
// Update spMix2
// eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// eor w18, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
emit32(ARMV8A::B | (offset / 4), code, codePos);
// and w20, w20, CacheLineAlignMask
// and w18, w18, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
emit32(0x121A0000 | 20 | (20 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
emit32(0x121A0000 | 18 | (18 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
// and w10, w10, CacheLineAlignMask
codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
@ -181,8 +181,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
// and w16, w10, ScratchpadL3Mask64
emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w20, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
// and w17, w18, ScratchpadL3Mask64
emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
codePos = PrologueSize;
literalPos = ImulRcpLiteralsEnd;
@ -200,8 +200,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
}
// Update spMix2
// eor w20, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// eor w18, config.readReg2, config.readReg3
emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
// Jump back to the main loop
const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
@ -434,7 +434,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
}
else
{
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
emitMovImmediate(tmp_reg, imm, code, k);
// add dst, src, tmp_reg
@ -483,7 +483,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
uint32_t k = codePos;
uint32_t imm = instr.getImm32();
constexpr uint32_t tmp_reg = 19;
constexpr uint32_t tmp_reg = 18;
imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1);
emitAddImmediate(tmp_reg, src, imm, code, k);
@ -537,7 +537,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// add dst, dst, tmp_reg
@ -575,7 +575,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg
@ -594,7 +594,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
if (src == dst)
{
src = 20;
src = 18;
emitMovImmediate(src, instr.getImm32(), code, k);
}
@ -612,7 +612,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// sub dst, dst, tmp_reg
@ -643,7 +643,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// umulh dst, dst, tmp_reg
@ -674,7 +674,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// smulh dst, dst, tmp_reg
@ -686,24 +686,34 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
{
const uint32_t divisor = instr.getImm32();
const uint64_t divisor = instr.getImm32();
if (isZeroOrPowerOf2(divisor))
return;
uint32_t k = codePos;
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint64_t N = 1ULL << 63;
const uint64_t q = N / divisor;
const uint64_t r = N % divisor;
#ifdef __GNUC__
const uint64_t shift = 64 - __builtin_clzll(divisor);
#else
uint64_t shift = 32;
for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
--shift;
#endif
const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t);
literalPos -= sizeof(uint64_t);
const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
memcpy(code + literalPos, &reciprocal, sizeof(reciprocal));
literalPos -= sizeof(uint64_t);
*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);
if (literal_id < 12)
if (literal_id < 13)
{
static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };
static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };
// mul dst, dst, literal_reg
emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
@ -741,7 +751,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
if (src == dst)
{
src = 20;
src = 18;
emitMovImmediate(src, instr.getImm32(), code, k);
}
@ -759,7 +769,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
emitMemLoad<tmp_reg>(dst, src, instr, code, k);
// eor dst, dst, tmp_reg
@ -797,7 +807,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
if (src != dst)
{
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
// sub tmp_reg, xzr, src
emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
@ -825,7 +835,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos;
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
@ -974,7 +984,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
constexpr uint32_t fpcr_tmp_reg = 8;
// ror tmp_reg, src, imm
@ -998,7 +1008,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap[instr.src];
const uint32_t dst = IntRegMap[instr.dst];
constexpr uint32_t tmp_reg = 20;
constexpr uint32_t tmp_reg = 18;
uint32_t imm = instr.getImm32();

2
src/RandomX/src/jit_compiler_a64.hpp

@ -81,7 +81,7 @@ namespace randomx {
static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos)
{
memcpy(code + codePos, &val, sizeof(val));
*(uint64_t*)(code + codePos) = val;
codePos += sizeof(val);
}

98
src/RandomX/src/jit_compiler_a64_static.S

@ -74,9 +74,9 @@
# x15 -> "r7"
# x16 -> spAddr0
# x17 -> spAddr1
# x18 -> unused (platform register, don't touch it)
# x18 -> temporary
# x19 -> temporary
# x20 -> temporary
# x20 -> literal for IMUL_RCP
# x21 -> literal for IMUL_RCP
# x22 -> literal for IMUL_RCP
# x23 -> literal for IMUL_RCP
@ -111,7 +111,7 @@ DECL(randomx_program_aarch64):
# Save callee-saved registers
sub sp, sp, 192
stp x16, x17, [sp]
str x19, [sp, 16]
stp x18, x19, [sp, 16]
stp x20, x21, [sp, 32]
stp x22, x23, [sp, 48]
stp x24, x25, [sp, 64]
@ -166,6 +166,7 @@ DECL(randomx_program_aarch64):
# Read literals
ldr x0, literal_x0
ldr x11, literal_x11
ldr x20, literal_x20
ldr x21, literal_x21
ldr x22, literal_x22
ldr x23, literal_x23
@ -197,11 +198,11 @@ DECL(randomx_program_aarch64):
DECL(randomx_program_aarch64_main_loop):
# spAddr0 = spMix1 & ScratchpadL3Mask64;
# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
lsr x20, x10, 32
lsr x18, x10, 32
# Actual mask will be inserted by JIT compiler
and w16, w10, 1
and w17, w20, 1
and w17, w18, 1
# x16 = scratchpad + spAddr0
# x17 = scratchpad + spAddr1
@ -209,31 +210,31 @@ DECL(randomx_program_aarch64_main_loop):
add x17, x17, x2
# xor integer registers with scratchpad data (spAddr0)
ldp x20, x19, [x16]
eor x4, x4, x20
ldp x18, x19, [x16]
eor x4, x4, x18
eor x5, x5, x19
ldp x20, x19, [x16, 16]
eor x6, x6, x20
ldp x18, x19, [x16, 16]
eor x6, x6, x18
eor x7, x7, x19
ldp x20, x19, [x16, 32]
eor x12, x12, x20
ldp x18, x19, [x16, 32]
eor x12, x12, x18
eor x13, x13, x19
ldp x20, x19, [x16, 48]
eor x14, x14, x20
ldp x18, x19, [x16, 48]
eor x14, x14, x18
eor x15, x15, x19
# Load group F registers (spAddr1)
ldpsw x20, x19, [x17]
ins v16.d[0], x20
ldpsw x18, x19, [x17]
ins v16.d[0], x18
ins v16.d[1], x19
ldpsw x20, x19, [x17, 8]
ins v17.d[0], x20
ldpsw x18, x19, [x17, 8]
ins v17.d[0], x18
ins v17.d[1], x19
ldpsw x20, x19, [x17, 16]
ins v18.d[0], x20
ldpsw x18, x19, [x17, 16]
ins v18.d[0], x18
ins v18.d[1], x19
ldpsw x20, x19, [x17, 24]
ins v19.d[0], x20
ldpsw x18, x19, [x17, 24]
ins v19.d[0], x18
ins v19.d[1], x19
scvtf v16.2d, v16.2d
scvtf v17.2d, v17.2d
@ -241,17 +242,17 @@ DECL(randomx_program_aarch64_main_loop):
scvtf v19.2d, v19.2d
# Load group E registers (spAddr1)
ldpsw x20, x19, [x17, 32]
ins v20.d[0], x20
ldpsw x18, x19, [x17, 32]
ins v20.d[0], x18
ins v20.d[1], x19
ldpsw x20, x19, [x17, 40]
ins v21.d[0], x20
ldpsw x18, x19, [x17, 40]
ins v21.d[0], x18
ins v21.d[1], x19
ldpsw x20, x19, [x17, 48]
ins v22.d[0], x20
ldpsw x18, x19, [x17, 48]
ins v22.d[0], x18
ins v22.d[1], x19
ldpsw x20, x19, [x17, 56]
ins v23.d[0], x20
ldpsw x18, x19, [x17, 56]
ins v23.d[0], x18
ins v23.d[1], x19
scvtf v20.2d, v20.2d
scvtf v21.2d, v21.2d
@ -275,6 +276,7 @@ DECL(randomx_program_aarch64_vm_instructions):
literal_x0: .fill 1,8,0
literal_x11: .fill 1,8,0
literal_x20: .fill 1,8,0
literal_x21: .fill 1,8,0
literal_x22: .fill 1,8,0
literal_x23: .fill 1,8,0
@ -310,17 +312,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
lsr x10, x9, 32
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x20
eor x9, x9, x18
# Calculate dataset pointer for dataset prefetch
mov w20, w9
mov w18, w9
DECL(randomx_program_aarch64_cacheline_align_mask1):
# Actual mask will be inserted by JIT compiler
and x20, x20, 1
add x20, x20, x1
and x18, x18, 1
add x18, x18, x1
# Prefetch dataset data
prfm pldl2strm, [x20]
prfm pldl2strm, [x18]
# mx <-> ma
ror x9, x9, 32
@ -333,17 +335,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
DECL(randomx_program_aarch64_xor_with_dataset_line):
rx_program_xor_with_dataset_line:
# xor integer registers with dataset data
ldp x20, x19, [x10]
eor x4, x4, x20
ldp x18, x19, [x10]
eor x4, x4, x18
eor x5, x5, x19
ldp x20, x19, [x10, 16]
eor x6, x6, x20
ldp x18, x19, [x10, 16]
eor x6, x6, x18
eor x7, x7, x19
ldp x20, x19, [x10, 32]
eor x12, x12, x20
ldp x18, x19, [x10, 32]
eor x12, x12, x18
eor x13, x13, x19
ldp x20, x19, [x10, 48]
eor x14, x14, x20
ldp x18, x19, [x10, 48]
eor x14, x14, x18
eor x15, x15, x19
DECL(randomx_program_aarch64_update_spMix1):
@ -386,7 +388,7 @@ DECL(randomx_program_aarch64_update_spMix1):
# Restore callee-saved registers
ldp x16, x17, [sp]
ldr x19, [sp, 16]
ldp x18, x19, [sp, 16]
ldp x20, x21, [sp, 32]
ldp x22, x23, [sp, 48]
ldp x24, x25, [sp, 64]
@ -407,7 +409,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
stp x2, x30, [sp, 80]
# mx ^= r[readReg2] ^ r[readReg3];
eor x9, x9, x20
eor x9, x9, x18
# mx <-> ma
ror x9, x9, 32
@ -449,8 +451,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
# x3 -> end item
DECL(randomx_init_dataset_aarch64):
# Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
stp x20, x30, [sp, -16]!
# Save x30 (return address)
str x30, [sp, -16]!
# Load pointer to cache memory
ldr x0, [x0]
@ -462,8 +464,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
cmp x2, x3
bne DECL(randomx_init_dataset_aarch64_main_loop)
# Restore x20 and x30
ldp x20, x30, [sp], 16
# Restore x30 (return address)
ldr x30, [sp], 16
ret

4
src/RandomX/src/jit_compiler_x86.cpp

@ -34,7 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "superscalar.hpp"
#include "program.hpp"
#include "reciprocal.h"
#include "virtual_memory.h"
#include "virtual_memory.hpp"
namespace randomx {
/*
@ -618,7 +618,7 @@ namespace randomx {
}
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
const uint32_t divisor = instr.getImm32();
uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
emit(MOV_RAX_I);

28
src/RandomX/src/randomx.cpp

@ -36,13 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cpu.hpp"
#include <cassert>
#include <limits>
#if defined(__SSE__) || defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP > 0))
#define USE_CSR_INTRINSICS
#include <xmmintrin.h>
#else
#include <cfenv>
#endif
extern "C" {
@ -362,14 +356,8 @@ extern "C" {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
#ifdef USE_CSR_INTRINSICS
const unsigned int fpstate = _mm_getcsr();
#else
fenv_t fpstate;
fegetenv(&fpstate);
#endif
alignas(16) uint64_t tempHash[8];
int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
assert(blakeResult == 0);
@ -382,12 +370,7 @@ extern "C" {
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
#ifdef USE_CSR_INTRINSICS
_mm_setcsr(fpstate);
#else
fesetenv(&fpstate);
#endif
}
void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
@ -417,15 +400,4 @@ extern "C" {
machine->run(machine->tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out) {
assert(inputSize == 0 || input != nullptr);
assert(hash_in != nullptr);
assert(com_out != nullptr);
blake2b_state state;
blake2b_init(&state, RANDOMX_HASH_SIZE);
blake2b_update(&state, input, inputSize);
blake2b_update(&state, hash_in, RANDOMX_HASH_SIZE);
blake2b_final(&state, com_out, RANDOMX_HASH_SIZE);
}
}

11
src/RandomX/src/randomx.h

@ -260,17 +260,6 @@ RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void
RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);
RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output);
/**
* Calculate a RandomX commitment from a RandomX hash and its input.
*
* @param input is a pointer to memory that was hashed. Must not be NULL.
* @param inputSize is the number of bytes in the input.
* @param hash_in is the output from randomx_calculate_hash* (RANDOMX_HASH_SIZE bytes).
* @param com_out is a pointer to memory where the commitment will be stored. Must not
* be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
*/
RANDOMX_EXPORT void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out);
#if defined(__cplusplus)
}
#endif

34
src/RandomX/src/reciprocal.c

@ -44,28 +44,36 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ret
*/
uint64_t randomx_reciprocal(uint32_t divisor) {
uint64_t randomx_reciprocal(uint64_t divisor) {
assert(divisor != 0);
const uint64_t p2exp63 = 1ULL << 63;
const uint64_t q = p2exp63 / divisor;
const uint64_t r = p2exp63 % divisor;
#ifdef __GNUC__
const uint32_t shift = 64 - __builtin_clzll(divisor);
#else
uint32_t shift = 32;
for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
--shift;
#endif
return (q << shift) + ((r << shift) / divisor);
uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor;
unsigned bsr = 0; //highest set bit in divisor
for (uint64_t bit = divisor; bit > 0; bit >>= 1)
bsr++;
for (unsigned shift = 0; shift < bsr; shift++) {
if (remainder >= divisor - remainder) {
quotient = quotient * 2 + 1;
remainder = remainder * 2 - divisor;
}
else {
quotient = quotient * 2;
remainder = remainder * 2;
}
}
return quotient;
}
#if !RANDOMX_HAVE_FAST_RECIPROCAL
uint64_t randomx_reciprocal_fast(uint32_t divisor) {
uint64_t randomx_reciprocal_fast(uint64_t divisor) {
return randomx_reciprocal(divisor);
}

4
src/RandomX/src/reciprocal.h

@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
extern "C" {
#endif
uint64_t randomx_reciprocal(uint32_t);
uint64_t randomx_reciprocal_fast(uint32_t);
uint64_t randomx_reciprocal(uint64_t);
uint64_t randomx_reciprocal_fast(uint64_t);
#if defined(__cplusplus)
}

32
src/RandomX/src/tests/benchmark.cpp

@ -96,7 +96,6 @@ void printUsage(const char* executable) {
std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl;
std::cout << " --auto select the best options for the current CPU" << std::endl;
std::cout << " --noBatch calculate hashes one by one (default: batch)" << std::endl;
std::cout << " --commit calculate commitments instead of hashes (default: hashes)" << std::endl;
}
struct MemoryException : public std::exception {
@ -114,7 +113,7 @@ struct DatasetAllocException : public MemoryException {
using MineFunc = void(randomx_vm * vm, std::atomic<uint32_t> & atomicNonce, AtomicHash & result, uint32_t noncesCount, int thread, int cpuid);
template<bool batch, bool commit>
template<bool batch>
void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid = -1) {
if (cpuid >= 0) {
int rc = set_thread_affinity(cpuid);
@ -139,9 +138,6 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
}
store32(noncePtr, nonce);
(batch ? randomx_calculate_hash_next : randomx_calculate_hash)(vm, blockTemplate, sizeof(blockTemplate), &hash);
if (commit) {
randomx_calculate_commitment(blockTemplate, sizeof(blockTemplate), &hash, &hash);
}
result.xorWith(hash);
if (!batch) {
nonce = atomicNonce.fetch_add(1);
@ -150,7 +146,7 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
}
int main(int argc, char** argv) {
bool softAes, miningMode, verificationMode, help, largePages, jit, secure, commit;
bool softAes, miningMode, verificationMode, help, largePages, jit, secure;
bool ssse3, avx2, autoFlags, noBatch;
int noncesCount, threadCount, initThreadCount;
uint64_t threadAffinity;
@ -176,11 +172,10 @@ int main(int argc, char** argv) {
readOption("--avx2", argc, argv, avx2);
readOption("--auto", argc, argv, autoFlags);
readOption("--noBatch", argc, argv, noBatch);
readOption("--commit", argc, argv, commit);
store32(&seed, seedValue);
std::cout << "RandomX benchmark v1.2.1" << std::endl;
std::cout << "RandomX benchmark v1.1.11" << std::endl;
if (help) {
printUsage(argv[0]);
@ -285,24 +280,11 @@ int main(int argc, char** argv) {
MineFunc* func;
if (noBatch) {
if (commit) {
std::cout << " - hash commitments" << std::endl;
func = &mine<false, true>;
}
else {
func = &mine<false, false>;
}
func = &mine<false>;
}
else {
if (commit) {
//TODO: support batch mode with commitments
std::cout << " - hash commitments" << std::endl;
func = &mine<false, true>;
}
else {
std::cout << " - batch mode" << std::endl;
func = &mine<true, false>;
}
func = &mine<true>;
std::cout << " - batch mode" << std::endl;
}
std::cout << "Initializing";
@ -394,7 +376,7 @@ int main(int argc, char** argv) {
randomx_release_cache(cache);
std::cout << "Calculated result: ";
result.print(std::cout);
if (noncesCount == 1000 && seedValue == 0 && !commit)
if (noncesCount == 1000 && seedValue == 0)
std::cout << "Reference result: 10b649a3f15c7c7f88277812f2e74b337a0f20ce909af09199cccb960771cfa1" << std::endl;
if (!miningMode) {
std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;

2
src/RandomX/src/tests/perf-simulation.cpp

@ -477,7 +477,7 @@ int analyze(randomx::Program& p) {
}
if (opcode < randomx::ceil_IMUL_RCP) {
const uint32_t divisor = instr.getImm32();
uint64_t divisor = instr.getImm32();
if (!randomx::isZeroOrPowerOf2(divisor)) {
instr.dst = instr.dst % randomx::RegistersCount;
instr.opcode |= DST_INT;

24
src/RandomX/src/tests/tests.cpp

@ -34,14 +34,6 @@ void calcStringHash(const char(&key)[K], const char(&input)[H], void* output) {
randomx_calculate_hash(vm, input, H - 1, output);
}
template<size_t K, size_t H>
void calcStringCommitment(const char(&key)[K], const char(&input)[H], void* output) {
initCache(key);
assert(vm != nullptr);
randomx_calculate_hash(vm, input, H - 1, output);
randomx_calculate_commitment(input, H - 1, output, output);
}
template<size_t K, size_t H>
void calcHexHash(const char(&key)[K], const char(&hex)[H], void* output) {
initCache(key);
@ -1090,22 +1082,6 @@ int main() {
assert(rx_get_rounding_mode() == RoundToNearest);
});
if (RANDOMX_HAVE_COMPILER) {
randomx_destroy_vm(vm);
vm = nullptr;
#ifdef RANDOMX_FORCE_SECURE
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
#else
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
#endif
}
runTest("Commitment test", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
char hash[RANDOMX_HASH_SIZE];
calcStringCommitment("test key 000", "This is a test", &hash);
assert(equalsHex(hash, "d53ccf348b75291b7be76f0a7ac8208bbced734b912f6fca60539ab6f86be919"));
});
randomx_destroy_vm(vm);
vm = nullptr;

207
src/RandomX/src/virtual_memory.cpp

@ -0,0 +1,207 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "virtual_memory.hpp"
#include <stdexcept>
#if defined(_WIN32) || defined(__CYGWIN__)
#include <windows.h>
#else
#ifdef __APPLE__
#include <mach/vm_statistics.h>
#include <TargetConditionals.h>
#include <AvailabilityMacros.h>
# if TARGET_OS_OSX
# if TARGET_CPU_ARM64
# define USE_PTHREAD_JIT_WP 1
# else
# undef USE_PTHREAD_JIT_WP
# endif
# include <pthread.h>
# endif
#endif
#include <sys/types.h>
#include <sys/mman.h>
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
#define PAGE_READONLY PROT_READ
#define PAGE_READWRITE (PROT_READ | PROT_WRITE)
#define PAGE_EXECUTE_READ (PROT_READ | PROT_EXEC)
#define PAGE_EXECUTE_READWRITE (PROT_READ | PROT_WRITE | PROT_EXEC)
#endif
#if defined(_WIN32) || defined(__CYGWIN__)
std::string getErrorMessage(const char* function) {
LPSTR messageBuffer = nullptr;
size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL);
std::string message(messageBuffer, size);
LocalFree(messageBuffer);
return std::string(function) + std::string(": ") + message;
}
void setPrivilege(const char* pszPrivilege, BOOL bEnable) {
HANDLE hToken;
TOKEN_PRIVILEGES tp;
BOOL status;
DWORD error;
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
throw std::runtime_error(getErrorMessage("OpenProcessToken"));
if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid))
throw std::runtime_error(getErrorMessage("LookupPrivilegeValue"));
tp.PrivilegeCount = 1;
if (bEnable)
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
else
tp.Privileges[0].Attributes = 0;
status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
error = GetLastError();
if (!status || (error != ERROR_SUCCESS))
throw std::runtime_error(getErrorMessage("AdjustTokenPrivileges"));
if (!CloseHandle(hToken))
throw std::runtime_error(getErrorMessage("CloseHandle"));
}
#endif
void* allocMemoryPages(std::size_t bytes) {
void* mem;
#if defined(_WIN32) || defined(__CYGWIN__)
mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_READWRITE);
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocMemoryPages - VirtualAlloc"));
#else
#if defined(__NetBSD__)
#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
#else
#define RESERVED_FLAGS 0
#endif
#ifdef USE_PTHREAD_JIT_WP
#define MEXTRA MAP_JIT
#define PEXTRA PROT_EXEC
#else
#define MEXTRA 0
#define PEXTRA 0
#endif
mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
if (mem == MAP_FAILED)
throw std::runtime_error("allocMemoryPages - mmap failed");
#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(false);
}
#endif
#endif
return mem;
}
static inline void pageProtect(void* ptr, std::size_t bytes, int rules) {
#if defined(_WIN32) || defined(__CYGWIN__)
DWORD oldp;
if (!VirtualProtect(ptr, bytes, (DWORD)rules, &oldp)) {
throw std::runtime_error(getErrorMessage("VirtualProtect"));
}
#else
if (-1 == mprotect(ptr, bytes, rules))
throw std::runtime_error("mprotect failed");
#endif
}
void setPagesRW(void* ptr, std::size_t bytes) {
#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(false);
} else {
pageProtect(ptr, bytes, PAGE_READWRITE);
}
#else
pageProtect(ptr, bytes, PAGE_READWRITE);
#endif
}
void setPagesRX(void* ptr, std::size_t bytes) {
#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
if (__builtin_available(macOS 11.0, *)) {
pthread_jit_write_protect_np(true);
} else {
pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
}
#else
pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
#endif
}
void setPagesRWX(void* ptr, std::size_t bytes) {
pageProtect(ptr, bytes, PAGE_EXECUTE_READWRITE);
}
void* allocLargePagesMemory(std::size_t bytes) {
void* mem;
#if defined(_WIN32) || defined(__CYGWIN__)
setPrivilege("SeLockMemoryPrivilege", 1);
auto pageMinimum = GetLargePageMinimum();
if (pageMinimum > 0)
mem = VirtualAlloc(NULL, alignSize(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
else
throw std::runtime_error("allocLargePagesMemory - Large pages are not supported");
if (mem == nullptr)
throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc"));
#else
#ifdef __APPLE__
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
#elif defined(__FreeBSD__)
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
#elif defined(__OpenBSD__) || defined(__NetBSD__)
mem = MAP_FAILED; // OpenBSD does not support huge pages
#else
mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
#endif
if (mem == MAP_FAILED)
throw std::runtime_error("allocLargePagesMemory - mmap failed");
#endif
return mem;
}
void freePagedMemory(void* ptr, std::size_t bytes) {
#if defined(_WIN32) || defined(__CYGWIN__)
VirtualFree(ptr, 0, MEM_RELEASE);
#else
munmap(ptr, bytes);
#endif
}

42
src/RandomX/src/virtual_memory.hpp

@ -0,0 +1,42 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstddef>
constexpr std::size_t alignSize(std::size_t pos, std::size_t align) {
return ((pos - 1) / align + 1) * align;
}
void* allocMemoryPages(std::size_t);
void setPagesRW(void*, std::size_t);
void setPagesRX(void*, std::size_t);
void setPagesRWX(void*, std::size_t);
void* allocLargePagesMemory(std::size_t);
void freePagedMemory(void*, std::size_t);

4
src/RandomX/vcxproj/randomx-dll.vcxproj

@ -43,7 +43,7 @@
<ClInclude Include="..\src\superscalar.hpp" />
<ClInclude Include="..\src\superscalar_program.hpp" />
<ClInclude Include="..\src\virtual_machine.hpp" />
<ClInclude Include="..\src\virtual_memory.h" />
<ClInclude Include="..\src\virtual_memory.hpp" />
<ClInclude Include="..\src\vm_compiled.hpp" />
<ClInclude Include="..\src\vm_compiled_light.hpp" />
<ClInclude Include="..\src\vm_interpreted.hpp" />
@ -74,7 +74,7 @@
<ClCompile Include="..\src\soft_aes.cpp" />
<ClCompile Include="..\src\superscalar.cpp" />
<ClCompile Include="..\src\virtual_machine.cpp" />
<ClCompile Include="..\src\virtual_memory.c" />
<ClCompile Include="..\src\virtual_memory.cpp" />
<ClCompile Include="..\src\vm_compiled.cpp" />
<ClCompile Include="..\src\vm_compiled_light.cpp" />
<ClCompile Include="..\src\vm_interpreted.cpp" />

4
src/RandomX/vcxproj/randomx-dll.vcxproj.filters

@ -87,7 +87,7 @@
<ClInclude Include="..\src\virtual_machine.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\virtual_memory.h">
<ClInclude Include="..\src\virtual_memory.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\vm_compiled.hpp">
@ -151,7 +151,7 @@
<ClCompile Include="..\src\virtual_machine.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\virtual_memory.c">
<ClCompile Include="..\src\virtual_memory.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\vm_compiled.cpp">

4
src/RandomX/vcxproj/randomx.vcxproj

@ -156,7 +156,7 @@ SET ERRORLEVEL = 0</Command>
<ClCompile Include="..\src\reciprocal.c" />
<ClCompile Include="..\src\soft_aes.cpp" />
<ClCompile Include="..\src\virtual_machine.cpp" />
<ClCompile Include="..\src\virtual_memory.c" />
<ClCompile Include="..\src\virtual_memory.cpp" />
</ItemGroup>
<ItemGroup>
<MASM Include="..\src\jit_compiler_x86_static.asm" />
@ -198,7 +198,7 @@ SET ERRORLEVEL = 0</Command>
<ClInclude Include="..\src\soft_aes.h" />
<ClInclude Include="..\src\superscalar_program.hpp" />
<ClInclude Include="..\src\virtual_machine.hpp" />
<ClInclude Include="..\src\virtual_memory.h" />
<ClInclude Include="..\src\virtual_memory.hpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">

4
src/RandomX/vcxproj/randomx.vcxproj.filters

@ -72,7 +72,7 @@
<ClCompile Include="..\src\vm_interpreted.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\virtual_memory.c">
<ClCompile Include="..\src\virtual_memory.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\src\blake2_generator.cpp">
@ -164,7 +164,7 @@
<ClInclude Include="..\src\virtual_machine.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\virtual_memory.h">
<ClInclude Include="..\src\virtual_memory.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\src\superscalar.hpp">

Loading…
Cancel
Save