diff --git a/src/RandomX/CMakeLists.txt b/src/RandomX/CMakeLists.txt index ebbdff2b6..f41f606b9 100644 --- a/src/RandomX/CMakeLists.txt +++ b/src/RandomX/CMakeLists.txt @@ -39,7 +39,7 @@ src/bytecode_machine.cpp src/cpu.cpp src/dataset.cpp src/soft_aes.cpp -src/virtual_memory.c +src/virtual_memory.cpp src/vm_interpreted.cpp src/allocator.cpp src/assembly_generator_x86.cpp @@ -96,7 +96,7 @@ function(add_flag flag) endfunction() # x86-64 -if ((CMAKE_SIZEOF_VOID_P EQUAL 8) AND (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64")) +if(ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64") list(APPEND randomx_sources src/jit_compiler_x86.cpp) @@ -173,42 +173,6 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv endif() endif() -# RISC-V -if(ARCH_ID STREQUAL "riscv64") - list(APPEND randomx_sources - src/jit_compiler_rv64_static.S - src/jit_compiler_rv64.cpp) - # cheat because cmake and ccache hate each other - set_property(SOURCE src/jit_compiler_rv64_static.S PROPERTY LANGUAGE C) - set_property(SOURCE src/jit_compiler_rv64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm) - - # default build uses the RV64GC baseline - set(RVARCH "rv64gc") - - # for native builds, enable Zba and Zbb if supported by the CPU - if(ARCH STREQUAL "native") - enable_language(ASM) - try_run(RANDOMX_ZBA_RUN_FAIL - RANDOMX_ZBA_COMPILE_OK - ${CMAKE_CURRENT_BINARY_DIR}/ - ${CMAKE_CURRENT_SOURCE_DIR}/src/tests/riscv64_zba.s - COMPILE_DEFINITIONS "-march=rv64gc_zba") - if (RANDOMX_ZBA_COMPILE_OK AND NOT RANDOMX_ZBA_RUN_FAIL) - set(RVARCH "${RVARCH}_zba") - endif() - try_run(RANDOMX_ZBB_RUN_FAIL - RANDOMX_ZBB_COMPILE_OK - ${CMAKE_CURRENT_BINARY_DIR}/ - ${CMAKE_CURRENT_SOURCE_DIR}/src/tests/riscv64_zbb.s - COMPILE_DEFINITIONS "-march=rv64gc_zbb") - if (RANDOMX_ZBB_COMPILE_OK AND NOT RANDOMX_ZBB_RUN_FAIL) - set(RVARCH "${RVARCH}_zbb") - endif() - endif() - - add_flag("-march=${RVARCH}") -endif() - set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path") add_library(randomx ${randomx_sources}) diff --git a/src/RandomX/README.md b/src/RandomX/README.md index 2c9bdd318..4c1dabb65 100644 --- a/src/RandomX/README.md +++ b/src/RandomX/README.md @@ -37,7 +37,7 @@ RandomX is written in C++11 and builds a static library with a C API provided by ### Linux -Build dependencies: `cmake` (minimum 3.5) and `gcc` (minimum version 4.8, but version 7+ is recommended). +Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended). To build optimized binaries for your machine, run: ``` @@ -82,7 +82,7 @@ Intel Core i7-8550U|16G DDR4-2400|Windows 10|hw|200 (4T)|1700 (4T)|350 (8T)| Intel Core i3-3220|4G DDR3-1333|Ubuntu 16.04|soft|42 (4T)|510 (4T)|150 (4T)| Raspberry Pi 3|1G LPDDR2|Ubuntu 16.04|soft|3.5 (4T)|-|20 (4T)| -Note that RandomX currently includes a JIT compiler for x86-64, ARM64 and RISCV64. Other architectures have to use the portable interpreter, which is much slower. +Note that RandomX currently includes a JIT compiler for x86-64 and ARM64. Other architectures have to use the portable interpreter, which is much slower. ### GPU performance @@ -129,7 +129,6 @@ The reference implementation has been validated on the following platforms: * ARMv7+VFPv3 (32-bit, little-endian) * ARMv8 (64-bit, little-endian) * PPC64 (64-bit, big-endian) -* RISCV64 (64-bit, little-endian) ### Can FPGAs mine RandomX? diff --git a/src/RandomX/doc/tevador.asc b/src/RandomX/doc/tevador.asc index 8bada54bb..b998f1ef2 100644 --- a/src/RandomX/doc/tevador.asc +++ b/src/RandomX/doc/tevador.asc @@ -1,13 +1,13 @@ -----BEGIN PGP PUBLIC KEY BLOCK----- mDMEXd+PeBYJKwYBBAHaRw8BAQdAZ0nqJ+nRYoScG2QLX62pl+WO1+Mkv6Yyt2Kb -ntGUuLq0G3RldmFkb3IgPHRldmFkb3JAZ21haWwuY29tPoiWBBMWCAA+AhsDBQsJ -CAcCBhUKCQgLAgQWAgMBAh4BAheAFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAmRP -r8MFCQ/ZS2YACgkQWijIaue9c6bR5gEA0tnQ4Al+yOLoRUBQitAV8FU4FLy8Xx8U -IyyivjJ0UhIA/2jwJfMXmJdMKtar8xfIA5mZLLofkEP6hug4knhitpkBuDgEXd+P +ntGUuLq0G3RldmFkb3IgPHRldmFkb3JAZ21haWwuY29tPoiWBBMWCAA+FiEEMoWj +LVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwMFCQWnqDgFCwkIBwIGFQoJCAsCBBYC +AwECHgECF4AACgkQWijIaue9c6YBFQD+N1XTUqSCZp9jB/yTHQ9ahSaIUMtmuvdT +So2s+quudP4A/R5wLwukpfGN9UZ4cfpmKCJ9jO1HJ2udmlGMsJbQpDAIuDgEXd+P eBIKKwYBBAGXVQEFAQEHQBNbQuPcDojMCkRb5B5u7Ld/AFLClOh+6ElL+u61rIY/ -AwEIB4h+BBgWCAAmAhsMFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAmRQoAMFCQ/Z -S2YACgkQWijIaue9c6bUfwD9Hw20kGCaZ8rWghz9W3bc645ys1vPQpQW28CD9w3B -cTMBALsV1xpS2pGwTfn1PUimqESZfTrREmNvOjKSQwe0yicI -=D4lm ------END PGP PUBLIC KEY BLOCK----- \ No newline at end of file +AwEIB4h+BBgWCAAmFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwwFCQWn +qDgACgkQWijIaue9c6YJvgD+IY1Q9mCM1P1iZIoXuafRihXJ7UgVXpQqW2yoaUT3 +bfQA/RkisI2eElYoOjdwPszPP6VfL5+SViwDmDuJG2P5llgE +=V4vd +-----END PGP PUBLIC KEY BLOCK----- diff --git a/src/RandomX/src/allocator.cpp b/src/RandomX/src/allocator.cpp index bcee0f6b6..6b48a7e70 100644 --- a/src/RandomX/src/allocator.cpp +++ b/src/RandomX/src/allocator.cpp @@ -29,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "allocator.hpp" #include "intrin_portable.h" -#include "virtual_memory.h" +#include "virtual_memory.hpp" #include "common.hpp" namespace randomx { diff --git a/src/RandomX/src/assembly_generator_x86.cpp b/src/RandomX/src/assembly_generator_x86.cpp index 1ce31dd55..e7e5258b7 100644 --- a/src/RandomX/src/assembly_generator_x86.cpp +++ b/src/RandomX/src/assembly_generator_x86.cpp @@ -445,7 +445,7 @@ namespace randomx { } void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) { - const uint32_t divisor = instr.getImm32(); + uint64_t divisor = instr.getImm32(); if (!isZeroOrPowerOf2(divisor)) { registerUsage[instr.dst] = i; asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl; diff --git a/src/RandomX/src/bytecode_machine.cpp b/src/RandomX/src/bytecode_machine.cpp index 1d00d0959..7d8e902d2 100644 --- a/src/RandomX/src/bytecode_machine.cpp +++ b/src/RandomX/src/bytecode_machine.cpp @@ -243,7 +243,7 @@ namespace randomx { } if (opcode < ceil_IMUL_RCP) { - const uint32_t divisor = instr.getImm32(); + uint64_t divisor = instr.getImm32(); if (!isZeroOrPowerOf2(divisor)) { auto dst = instr.dst % RegistersCount; ibc.type = InstructionType::IMUL_R; diff --git a/src/RandomX/src/common.hpp b/src/RandomX/src/common.hpp index f4b85342a..a77feb3bf 100644 --- a/src/RandomX/src/common.hpp +++ b/src/RandomX/src/common.hpp @@ -116,19 +116,12 @@ namespace randomx { #if defined(_M_X64) || defined(__x86_64__) #define RANDOMX_HAVE_COMPILER 1 - #define RANDOMX_COMPILER_X86 class JitCompilerX86; using JitCompiler = JitCompilerX86; #elif defined(__aarch64__) #define RANDOMX_HAVE_COMPILER 1 - #define RANDOMX_COMPILER_A64 class JitCompilerA64; using JitCompiler = JitCompilerA64; -#elif defined(__riscv) && __riscv_xlen == 64 - #define RANDOMX_HAVE_COMPILER 1 - #define RANDOMX_COMPILER_RV64 - class JitCompilerRV64; - using JitCompiler = JitCompilerRV64; #else #define RANDOMX_HAVE_COMPILER 0 class JitCompilerFallback; diff --git a/src/RandomX/src/dataset.cpp b/src/RandomX/src/dataset.cpp index 7ebf1bca4..675c5abc5 100644 --- a/src/RandomX/src/dataset.cpp +++ b/src/RandomX/src/dataset.cpp @@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.hpp" #include "dataset.hpp" -#include "virtual_memory.h" +#include "virtual_memory.hpp" #include "superscalar.hpp" #include "blake2_generator.hpp" #include "reciprocal.h" diff --git a/src/RandomX/src/intrin_portable.h b/src/RandomX/src/intrin_portable.h index 50020c3e2..8c09ae885 100644 --- a/src/RandomX/src/intrin_portable.h +++ b/src/RandomX/src/intrin_portable.h @@ -349,7 +349,7 @@ FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *p) { #if defined(NATIVE_LITTLE_ENDIAN) return *p; #else - const uint32_t* ptr = (const uint32_t*)p; + uint32_t* ptr = (uint32_t*)p; vec_u c; c.u32[0] = load32(ptr + 0); c.u32[1] = load32(ptr + 1); @@ -375,8 +375,8 @@ FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b) { FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { vec_u x; - x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0)); - x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4)); + x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0)); + x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4)); return (rx_vec_f128)x.d; } @@ -684,7 +684,7 @@ FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const* p) { #if defined(NATIVE_LITTLE_ENDIAN) return *p; #else - const uint32_t* ptr = (const uint32_t*)p; + uint32_t* ptr = (uint32_t*)p; rx_vec_i128 c; c.u32[0] = load32(ptr + 0); c.u32[1] = load32(ptr + 1); @@ -708,8 +708,8 @@ FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b) { FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) { rx_vec_f128 x; - x.lo = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0)); - x.hi = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4)); + x.lo = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0)); + x.hi = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4)); return x; } diff --git a/src/RandomX/src/jit_compiler.hpp b/src/RandomX/src/jit_compiler.hpp index 5b76fa5f9..17fdad4e3 100644 --- a/src/RandomX/src/jit_compiler.hpp +++ b/src/RandomX/src/jit_compiler.hpp @@ -28,48 +28,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once -#include "common.hpp" - -namespace randomx { - - struct CodeBuffer { - uint8_t* code; - int32_t codePos; - int32_t rcpCount; - - void emit(const uint8_t* src, int32_t len) { - memcpy(&code[codePos], src, len); - codePos += len; - } - - template - void emit(T src) { - memcpy(&code[codePos], &src, sizeof(src)); - codePos += sizeof(src); - } - - void emitAt(int32_t codePos, const uint8_t* src, int32_t len) { - memcpy(&code[codePos], src, len); - } - - template - void emitAt(int32_t codePos, T src) { - memcpy(&code[codePos], &src, sizeof(src)); - } - }; - - struct CompilerState : public CodeBuffer { - int32_t instructionOffsets[RANDOMX_PROGRAM_SIZE]; - int registerUsage[RegistersCount]; - }; -} - -#if defined(RANDOMX_COMPILER_X86) +#if defined(_M_X64) || defined(__x86_64__) #include "jit_compiler_x86.hpp" -#elif defined(RANDOMX_COMPILER_A64) +#elif defined(__aarch64__) #include "jit_compiler_a64.hpp" -#elif defined(RANDOMX_COMPILER_RV64) -#include "jit_compiler_rv64.hpp" #else #include "jit_compiler_fallback.hpp" #endif diff --git a/src/RandomX/src/jit_compiler_a64.cpp b/src/RandomX/src/jit_compiler_a64.cpp index 5be8f6e42..fc4634868 100644 --- a/src/RandomX/src/jit_compiler_a64.cpp +++ b/src/RandomX/src/jit_compiler_a64.cpp @@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "superscalar.hpp" #include "program.hpp" #include "reciprocal.h" -#include "virtual_memory.h" +#include "virtual_memory.hpp" namespace ARMV8A { @@ -130,8 +130,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con // and w16, w10, ScratchpadL3Mask64 emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); - // and w17, w20, ScratchpadL3Mask64 - emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); + // and w17, w18, ScratchpadL3Mask64 + emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); codePos = PrologueSize; literalPos = ImulRcpLiteralsEnd; @@ -149,16 +149,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con } // Update spMix2 - // eor w20, config.readReg2, config.readReg3 - emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); + // eor w18, config.readReg2, config.readReg3 + emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); // Jump back to the main loop const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos; emit32(ARMV8A::B | (offset / 4), code, codePos); - // and w20, w20, CacheLineAlignMask + // and w18, w18, CacheLineAlignMask codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64)); - emit32(0x121A0000 | 20 | (20 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos); + emit32(0x121A0000 | 18 | (18 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos); // and w10, w10, CacheLineAlignMask codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64)); @@ -181,8 +181,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration // and w16, w10, ScratchpadL3Mask64 emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); - // and w17, w20, ScratchpadL3Mask64 - emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); + // and w17, w18, ScratchpadL3Mask64 + emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos); codePos = PrologueSize; literalPos = ImulRcpLiteralsEnd; @@ -200,8 +200,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration } // Update spMix2 - // eor w20, config.readReg2, config.readReg3 - emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); + // eor w18, config.readReg2, config.readReg3 + emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos); // Jump back to the main loop const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos; @@ -434,7 +434,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm, } else { - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; emitMovImmediate(tmp_reg, imm, code, k); // add dst, src, tmp_reg @@ -483,7 +483,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co uint32_t k = codePos; uint32_t imm = instr.getImm32(); - constexpr uint32_t tmp_reg = 19; + constexpr uint32_t tmp_reg = 18; imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1); emitAddImmediate(tmp_reg, src, imm, code, k); @@ -537,7 +537,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos) const uint32_t src = IntRegMap[instr.src]; const uint32_t dst = IntRegMap[instr.dst]; - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; emitMemLoad(dst, src, instr, code, k); // add dst, dst, tmp_reg @@ -575,7 +575,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos) const uint32_t src = IntRegMap[instr.src]; const uint32_t dst = IntRegMap[instr.dst]; - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; emitMemLoad(dst, src, instr, code, k); // sub dst, dst, tmp_reg @@ -594,7 +594,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos) if (src == dst) { - src = 20; + src = 18; emitMovImmediate(src, instr.getImm32(), code, k); } @@ -612,7 +612,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos) const uint32_t src = IntRegMap[instr.src]; const uint32_t dst = IntRegMap[instr.dst]; - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; emitMemLoad(dst, src, instr, code, k); // sub dst, dst, tmp_reg @@ -643,7 +643,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos) const uint32_t src = IntRegMap[instr.src]; const uint32_t dst = IntRegMap[instr.dst]; - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; emitMemLoad(dst, src, instr, code, k); // umulh dst, dst, tmp_reg @@ -674,7 +674,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos) const uint32_t src = IntRegMap[instr.src]; const uint32_t dst = IntRegMap[instr.dst]; - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; emitMemLoad(dst, src, instr, code, k); // smulh dst, dst, tmp_reg @@ -686,24 +686,34 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos) void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos) { - const uint32_t divisor = instr.getImm32(); + const uint64_t divisor = instr.getImm32(); if (isZeroOrPowerOf2(divisor)) return; uint32_t k = codePos; - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; const uint32_t dst = IntRegMap[instr.dst]; + constexpr uint64_t N = 1ULL << 63; + const uint64_t q = N / divisor; + const uint64_t r = N % divisor; +#ifdef __GNUC__ + const uint64_t shift = 64 - __builtin_clzll(divisor); +#else + uint64_t shift = 32; + for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1) + --shift; +#endif + const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t); - literalPos -= sizeof(uint64_t); - const uint64_t reciprocal = randomx_reciprocal_fast(divisor); - memcpy(code + literalPos, &reciprocal, sizeof(reciprocal)); + literalPos -= sizeof(uint64_t); + *(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor); - if (literal_id < 12) + if (literal_id < 13) { - static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 }; + static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 }; // mul dst, dst, literal_reg emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k); @@ -741,7 +751,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos) if (src == dst) { - src = 20; + src = 18; emitMovImmediate(src, instr.getImm32(), code, k); } @@ -759,7 +769,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos) const uint32_t src = IntRegMap[instr.src]; const uint32_t dst = IntRegMap[instr.dst]; - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; emitMemLoad(dst, src, instr, code, k); // eor dst, dst, tmp_reg @@ -797,7 +807,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos) if (src != dst) { - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; // sub tmp_reg, xzr, src emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k); @@ -825,7 +835,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos) uint32_t k = codePos; - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k); emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k); emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k); @@ -974,7 +984,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos) const uint32_t src = IntRegMap[instr.src]; - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; constexpr uint32_t fpcr_tmp_reg = 8; // ror tmp_reg, src, imm @@ -998,7 +1008,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos) const uint32_t src = IntRegMap[instr.src]; const uint32_t dst = IntRegMap[instr.dst]; - constexpr uint32_t tmp_reg = 20; + constexpr uint32_t tmp_reg = 18; uint32_t imm = instr.getImm32(); diff --git a/src/RandomX/src/jit_compiler_a64.hpp b/src/RandomX/src/jit_compiler_a64.hpp index f8484c083..a02824ffb 100644 --- a/src/RandomX/src/jit_compiler_a64.hpp +++ b/src/RandomX/src/jit_compiler_a64.hpp @@ -81,7 +81,7 @@ namespace randomx { static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos) { - memcpy(code + codePos, &val, sizeof(val)); + *(uint64_t*)(code + codePos) = val; codePos += sizeof(val); } diff --git a/src/RandomX/src/jit_compiler_a64_static.S b/src/RandomX/src/jit_compiler_a64_static.S index bc146133a..4886fcf3c 100644 --- a/src/RandomX/src/jit_compiler_a64_static.S +++ b/src/RandomX/src/jit_compiler_a64_static.S @@ -74,9 +74,9 @@ # x15 -> "r7" # x16 -> spAddr0 # x17 -> spAddr1 -# x18 -> unused (platform register, don't touch it) +# x18 -> temporary # x19 -> temporary -# x20 -> temporary +# x20 -> literal for IMUL_RCP # x21 -> literal for IMUL_RCP # x22 -> literal for IMUL_RCP # x23 -> literal for IMUL_RCP @@ -111,7 +111,7 @@ DECL(randomx_program_aarch64): # Save callee-saved registers sub sp, sp, 192 stp x16, x17, [sp] - str x19, [sp, 16] + stp x18, x19, [sp, 16] stp x20, x21, [sp, 32] stp x22, x23, [sp, 48] stp x24, x25, [sp, 64] @@ -166,6 +166,7 @@ DECL(randomx_program_aarch64): # Read literals ldr x0, literal_x0 ldr x11, literal_x11 + ldr x20, literal_x20 ldr x21, literal_x21 ldr x22, literal_x22 ldr x23, literal_x23 @@ -197,11 +198,11 @@ DECL(randomx_program_aarch64): DECL(randomx_program_aarch64_main_loop): # spAddr0 = spMix1 & ScratchpadL3Mask64; # spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64; - lsr x20, x10, 32 + lsr x18, x10, 32 # Actual mask will be inserted by JIT compiler and w16, w10, 1 - and w17, w20, 1 + and w17, w18, 1 # x16 = scratchpad + spAddr0 # x17 = scratchpad + spAddr1 @@ -209,31 +210,31 @@ DECL(randomx_program_aarch64_main_loop): add x17, x17, x2 # xor integer registers with scratchpad data (spAddr0) - ldp x20, x19, [x16] - eor x4, x4, x20 + ldp x18, x19, [x16] + eor x4, x4, x18 eor x5, x5, x19 - ldp x20, x19, [x16, 16] - eor x6, x6, x20 + ldp x18, x19, [x16, 16] + eor x6, x6, x18 eor x7, x7, x19 - ldp x20, x19, [x16, 32] - eor x12, x12, x20 + ldp x18, x19, [x16, 32] + eor x12, x12, x18 eor x13, x13, x19 - ldp x20, x19, [x16, 48] - eor x14, x14, x20 + ldp x18, x19, [x16, 48] + eor x14, x14, x18 eor x15, x15, x19 # Load group F registers (spAddr1) - ldpsw x20, x19, [x17] - ins v16.d[0], x20 + ldpsw x18, x19, [x17] + ins v16.d[0], x18 ins v16.d[1], x19 - ldpsw x20, x19, [x17, 8] - ins v17.d[0], x20 + ldpsw x18, x19, [x17, 8] + ins v17.d[0], x18 ins v17.d[1], x19 - ldpsw x20, x19, [x17, 16] - ins v18.d[0], x20 + ldpsw x18, x19, [x17, 16] + ins v18.d[0], x18 ins v18.d[1], x19 - ldpsw x20, x19, [x17, 24] - ins v19.d[0], x20 + ldpsw x18, x19, [x17, 24] + ins v19.d[0], x18 ins v19.d[1], x19 scvtf v16.2d, v16.2d scvtf v17.2d, v17.2d @@ -241,17 +242,17 @@ DECL(randomx_program_aarch64_main_loop): scvtf v19.2d, v19.2d # Load group E registers (spAddr1) - ldpsw x20, x19, [x17, 32] - ins v20.d[0], x20 + ldpsw x18, x19, [x17, 32] + ins v20.d[0], x18 ins v20.d[1], x19 - ldpsw x20, x19, [x17, 40] - ins v21.d[0], x20 + ldpsw x18, x19, [x17, 40] + ins v21.d[0], x18 ins v21.d[1], x19 - ldpsw x20, x19, [x17, 48] - ins v22.d[0], x20 + ldpsw x18, x19, [x17, 48] + ins v22.d[0], x18 ins v22.d[1], x19 - ldpsw x20, x19, [x17, 56] - ins v23.d[0], x20 + ldpsw x18, x19, [x17, 56] + ins v23.d[0], x18 ins v23.d[1], x19 scvtf v20.2d, v20.2d scvtf v21.2d, v21.2d @@ -275,6 +276,7 @@ DECL(randomx_program_aarch64_vm_instructions): literal_x0: .fill 1,8,0 literal_x11: .fill 1,8,0 +literal_x20: .fill 1,8,0 literal_x21: .fill 1,8,0 literal_x22: .fill 1,8,0 literal_x23: .fill 1,8,0 @@ -310,17 +312,17 @@ DECL(randomx_program_aarch64_vm_instructions_end): lsr x10, x9, 32 # mx ^= r[readReg2] ^ r[readReg3]; - eor x9, x9, x20 + eor x9, x9, x18 # Calculate dataset pointer for dataset prefetch - mov w20, w9 + mov w18, w9 DECL(randomx_program_aarch64_cacheline_align_mask1): # Actual mask will be inserted by JIT compiler - and x20, x20, 1 - add x20, x20, x1 + and x18, x18, 1 + add x18, x18, x1 # Prefetch dataset data - prfm pldl2strm, [x20] + prfm pldl2strm, [x18] # mx <-> ma ror x9, x9, 32 @@ -333,17 +335,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2): DECL(randomx_program_aarch64_xor_with_dataset_line): rx_program_xor_with_dataset_line: # xor integer registers with dataset data - ldp x20, x19, [x10] - eor x4, x4, x20 + ldp x18, x19, [x10] + eor x4, x4, x18 eor x5, x5, x19 - ldp x20, x19, [x10, 16] - eor x6, x6, x20 + ldp x18, x19, [x10, 16] + eor x6, x6, x18 eor x7, x7, x19 - ldp x20, x19, [x10, 32] - eor x12, x12, x20 + ldp x18, x19, [x10, 32] + eor x12, x12, x18 eor x13, x13, x19 - ldp x20, x19, [x10, 48] - eor x14, x14, x20 + ldp x18, x19, [x10, 48] + eor x14, x14, x18 eor x15, x15, x19 DECL(randomx_program_aarch64_update_spMix1): @@ -386,7 +388,7 @@ DECL(randomx_program_aarch64_update_spMix1): # Restore callee-saved registers ldp x16, x17, [sp] - ldr x19, [sp, 16] + ldp x18, x19, [sp, 16] ldp x20, x21, [sp, 32] ldp x22, x23, [sp, 48] ldp x24, x25, [sp, 64] @@ -407,7 +409,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light): stp x2, x30, [sp, 80] # mx ^= r[readReg2] ^ r[readReg3]; - eor x9, x9, x20 + eor x9, x9, x18 # mx <-> ma ror x9, x9, 32 @@ -449,8 +451,8 @@ DECL(randomx_program_aarch64_light_dataset_offset): # x3 -> end item DECL(randomx_init_dataset_aarch64): - # Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address) - stp x20, x30, [sp, -16]! + # Save x30 (return address) + str x30, [sp, -16]! # Load pointer to cache memory ldr x0, [x0] @@ -462,8 +464,8 @@ DECL(randomx_init_dataset_aarch64_main_loop): cmp x2, x3 bne DECL(randomx_init_dataset_aarch64_main_loop) - # Restore x20 and x30 - ldp x20, x30, [sp], 16 + # Restore x30 (return address) + ldr x30, [sp], 16 ret diff --git a/src/RandomX/src/jit_compiler_x86.cpp b/src/RandomX/src/jit_compiler_x86.cpp index 785ce5f59..5587e6afb 100644 --- a/src/RandomX/src/jit_compiler_x86.cpp +++ b/src/RandomX/src/jit_compiler_x86.cpp @@ -34,7 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "superscalar.hpp" #include "program.hpp" #include "reciprocal.h" -#include "virtual_memory.h" +#include "virtual_memory.hpp" namespace randomx { /* @@ -618,7 +618,7 @@ namespace randomx { } void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) { - const uint32_t divisor = instr.getImm32(); + uint64_t divisor = instr.getImm32(); if (!isZeroOrPowerOf2(divisor)) { registerUsage[instr.dst] = i; emit(MOV_RAX_I); diff --git a/src/RandomX/src/randomx.cpp b/src/RandomX/src/randomx.cpp index a08968e6a..7daaa46df 100644 --- a/src/RandomX/src/randomx.cpp +++ b/src/RandomX/src/randomx.cpp @@ -36,13 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cpu.hpp" #include #include - -#if defined(__SSE__) || defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP > 0)) -#define USE_CSR_INTRINSICS -#include -#else #include -#endif extern "C" { @@ -362,14 +356,8 @@ extern "C" { assert(machine != nullptr); assert(inputSize == 0 || input != nullptr); assert(output != nullptr); - -#ifdef USE_CSR_INTRINSICS - const unsigned int fpstate = _mm_getcsr(); -#else fenv_t fpstate; fegetenv(&fpstate); -#endif - alignas(16) uint64_t tempHash[8]; int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0); assert(blakeResult == 0); @@ -382,12 +370,7 @@ extern "C" { } machine->run(&tempHash); machine->getFinalResult(output, RANDOMX_HASH_SIZE); - -#ifdef USE_CSR_INTRINSICS - _mm_setcsr(fpstate); -#else fesetenv(&fpstate); -#endif } void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) { @@ -417,15 +400,4 @@ extern "C" { machine->run(machine->tempHash); machine->getFinalResult(output, RANDOMX_HASH_SIZE); } - - void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out) { - assert(inputSize == 0 || input != nullptr); - assert(hash_in != nullptr); - assert(com_out != nullptr); - blake2b_state state; - blake2b_init(&state, RANDOMX_HASH_SIZE); - blake2b_update(&state, input, inputSize); - blake2b_update(&state, hash_in, RANDOMX_HASH_SIZE); - blake2b_final(&state, com_out, RANDOMX_HASH_SIZE); - } } diff --git a/src/RandomX/src/randomx.h b/src/RandomX/src/randomx.h index 313bcd2e0..64d18068b 100644 --- a/src/RandomX/src/randomx.h +++ b/src/RandomX/src/randomx.h @@ -260,17 +260,6 @@ RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output); RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output); -/** - * Calculate a RandomX commitment from a RandomX hash and its input. - * - * @param input is a pointer to memory that was hashed. Must not be NULL. - * @param inputSize is the number of bytes in the input. - * @param hash_in is the output from randomx_calculate_hash* (RANDOMX_HASH_SIZE bytes). - * @param com_out is a pointer to memory where the commitment will be stored. Must not - * be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing. -*/ -RANDOMX_EXPORT void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out); - #if defined(__cplusplus) } #endif diff --git a/src/RandomX/src/reciprocal.c b/src/RandomX/src/reciprocal.c index 074d1846b..22620f53a 100644 --- a/src/RandomX/src/reciprocal.c +++ b/src/RandomX/src/reciprocal.c @@ -44,28 +44,36 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ret */ -uint64_t randomx_reciprocal(uint32_t divisor) { +uint64_t randomx_reciprocal(uint64_t divisor) { assert(divisor != 0); const uint64_t p2exp63 = 1ULL << 63; - const uint64_t q = p2exp63 / divisor; - const uint64_t r = p2exp63 % divisor; - -#ifdef __GNUC__ - const uint32_t shift = 64 - __builtin_clzll(divisor); -#else - uint32_t shift = 32; - for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1) - --shift; -#endif - return (q << shift) + ((r << shift) / divisor); + uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor; + + unsigned bsr = 0; //highest set bit in divisor + + for (uint64_t bit = divisor; bit > 0; bit >>= 1) + bsr++; + + for (unsigned shift = 0; shift < bsr; shift++) { + if (remainder >= divisor - remainder) { + quotient = quotient * 2 + 1; + remainder = remainder * 2 - divisor; + } + else { + quotient = quotient * 2; + remainder = remainder * 2; + } + } + + return quotient; } #if !RANDOMX_HAVE_FAST_RECIPROCAL -uint64_t randomx_reciprocal_fast(uint32_t divisor) { +uint64_t randomx_reciprocal_fast(uint64_t divisor) { return randomx_reciprocal(divisor); } diff --git a/src/RandomX/src/reciprocal.h b/src/RandomX/src/reciprocal.h index 90bd9b6be..8858df2b8 100644 --- a/src/RandomX/src/reciprocal.h +++ b/src/RandomX/src/reciprocal.h @@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. extern "C" { #endif -uint64_t randomx_reciprocal(uint32_t); -uint64_t randomx_reciprocal_fast(uint32_t); +uint64_t randomx_reciprocal(uint64_t); +uint64_t randomx_reciprocal_fast(uint64_t); #if defined(__cplusplus) } diff --git a/src/RandomX/src/tests/benchmark.cpp b/src/RandomX/src/tests/benchmark.cpp index 148521a51..36b0259b6 100644 --- a/src/RandomX/src/tests/benchmark.cpp +++ b/src/RandomX/src/tests/benchmark.cpp @@ -96,7 +96,6 @@ void printUsage(const char* executable) { std::cout << " --avx2 use optimized Argon2 for AVX2 CPUs" << std::endl; std::cout << " --auto select the best options for the current CPU" << std::endl; std::cout << " --noBatch calculate hashes one by one (default: batch)" << std::endl; - std::cout << " --commit calculate commitments instead of hashes (default: hashes)" << std::endl; } struct MemoryException : public std::exception { @@ -114,7 +113,7 @@ struct DatasetAllocException : public MemoryException { using MineFunc = void(randomx_vm * vm, std::atomic & atomicNonce, AtomicHash & result, uint32_t noncesCount, int thread, int cpuid); -template +template void mine(randomx_vm* vm, std::atomic& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid = -1) { if (cpuid >= 0) { int rc = set_thread_affinity(cpuid); @@ -139,9 +138,6 @@ void mine(randomx_vm* vm, std::atomic& atomicNonce, AtomicHash& result } store32(noncePtr, nonce); (batch ? randomx_calculate_hash_next : randomx_calculate_hash)(vm, blockTemplate, sizeof(blockTemplate), &hash); - if (commit) { - randomx_calculate_commitment(blockTemplate, sizeof(blockTemplate), &hash, &hash); - } result.xorWith(hash); if (!batch) { nonce = atomicNonce.fetch_add(1); @@ -150,7 +146,7 @@ void mine(randomx_vm* vm, std::atomic& atomicNonce, AtomicHash& result } int main(int argc, char** argv) { - bool softAes, miningMode, verificationMode, help, largePages, jit, secure, commit; + bool softAes, miningMode, verificationMode, help, largePages, jit, secure; bool ssse3, avx2, autoFlags, noBatch; int noncesCount, threadCount, initThreadCount; uint64_t threadAffinity; @@ -176,11 +172,10 @@ int main(int argc, char** argv) { readOption("--avx2", argc, argv, avx2); readOption("--auto", argc, argv, autoFlags); readOption("--noBatch", argc, argv, noBatch); - readOption("--commit", argc, argv, commit); store32(&seed, seedValue); - std::cout << "RandomX benchmark v1.2.1" << std::endl; + std::cout << "RandomX benchmark v1.1.11" << std::endl; if (help) { printUsage(argv[0]); @@ -285,24 +280,11 @@ int main(int argc, char** argv) { MineFunc* func; if (noBatch) { - if (commit) { - std::cout << " - hash commitments" << std::endl; - func = &mine; - } - else { - func = &mine; - } + func = &mine; } else { - if (commit) { - //TODO: support batch mode with commitments - std::cout << " - hash commitments" << std::endl; - func = &mine; - } - else { - std::cout << " - batch mode" << std::endl; - func = &mine; - } + func = &mine; + std::cout << " - batch mode" << std::endl; } std::cout << "Initializing"; @@ -394,7 +376,7 @@ int main(int argc, char** argv) { randomx_release_cache(cache); std::cout << "Calculated result: "; result.print(std::cout); - if (noncesCount == 1000 && seedValue == 0 && !commit) + if (noncesCount == 1000 && seedValue == 0) std::cout << "Reference result: 10b649a3f15c7c7f88277812f2e74b337a0f20ce909af09199cccb960771cfa1" << std::endl; if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; diff --git a/src/RandomX/src/tests/perf-simulation.cpp b/src/RandomX/src/tests/perf-simulation.cpp index 27f34d8c4..1068a40ef 100644 --- a/src/RandomX/src/tests/perf-simulation.cpp +++ b/src/RandomX/src/tests/perf-simulation.cpp @@ -477,7 +477,7 @@ int analyze(randomx::Program& p) { } if (opcode < randomx::ceil_IMUL_RCP) { - const uint32_t divisor = instr.getImm32(); + uint64_t divisor = instr.getImm32(); if (!randomx::isZeroOrPowerOf2(divisor)) { instr.dst = instr.dst % randomx::RegistersCount; instr.opcode |= DST_INT; diff --git a/src/RandomX/src/tests/tests.cpp b/src/RandomX/src/tests/tests.cpp index 5e1b41a38..412585b1d 100644 --- a/src/RandomX/src/tests/tests.cpp +++ b/src/RandomX/src/tests/tests.cpp @@ -34,14 +34,6 @@ void calcStringHash(const char(&key)[K], const char(&input)[H], void* output) { randomx_calculate_hash(vm, input, H - 1, output); } -template -void calcStringCommitment(const char(&key)[K], const char(&input)[H], void* output) { - initCache(key); - assert(vm != nullptr); - randomx_calculate_hash(vm, input, H - 1, output); - randomx_calculate_commitment(input, H - 1, output, output); -} - template void calcHexHash(const char(&key)[K], const char(&hex)[H], void* output) { initCache(key); @@ -1090,22 +1082,6 @@ int main() { assert(rx_get_rounding_mode() == RoundToNearest); }); - if (RANDOMX_HAVE_COMPILER) { - randomx_destroy_vm(vm); - vm = nullptr; -#ifdef RANDOMX_FORCE_SECURE - vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr); -#else - vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr); -#endif - } - - runTest("Commitment test", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() { - char hash[RANDOMX_HASH_SIZE]; - calcStringCommitment("test key 000", "This is a test", &hash); - assert(equalsHex(hash, "d53ccf348b75291b7be76f0a7ac8208bbced734b912f6fca60539ab6f86be919")); - }); - randomx_destroy_vm(vm); vm = nullptr; diff --git a/src/RandomX/src/virtual_memory.cpp b/src/RandomX/src/virtual_memory.cpp new file mode 100644 index 000000000..248d3a2c4 --- /dev/null +++ b/src/RandomX/src/virtual_memory.cpp @@ -0,0 +1,207 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "virtual_memory.hpp" + +#include + +#if defined(_WIN32) || defined(__CYGWIN__) +#include +#else +#ifdef __APPLE__ +#include +#include +#include +# if TARGET_OS_OSX +# if TARGET_CPU_ARM64 +# define USE_PTHREAD_JIT_WP 1 +# else +# undef USE_PTHREAD_JIT_WP +# endif +# include +# endif +#endif +#include +#include +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif +#define PAGE_READONLY PROT_READ +#define PAGE_READWRITE (PROT_READ | PROT_WRITE) +#define PAGE_EXECUTE_READ (PROT_READ | PROT_EXEC) +#define PAGE_EXECUTE_READWRITE (PROT_READ | PROT_WRITE | PROT_EXEC) +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) +std::string getErrorMessage(const char* function) { + LPSTR messageBuffer = nullptr; + size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL); + std::string message(messageBuffer, size); + LocalFree(messageBuffer); + return std::string(function) + std::string(": ") + message; +} + +void setPrivilege(const char* pszPrivilege, BOOL bEnable) { + HANDLE hToken; + TOKEN_PRIVILEGES tp; + BOOL status; + DWORD error; + + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) + throw std::runtime_error(getErrorMessage("OpenProcessToken")); + + if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid)) + throw std::runtime_error(getErrorMessage("LookupPrivilegeValue")); + + tp.PrivilegeCount = 1; + + if (bEnable) + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + else + tp.Privileges[0].Attributes = 0; + + status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + + error = GetLastError(); + if (!status || (error != ERROR_SUCCESS)) + throw std::runtime_error(getErrorMessage("AdjustTokenPrivileges")); + + if (!CloseHandle(hToken)) + throw std::runtime_error(getErrorMessage("CloseHandle")); +} +#endif + +void* allocMemoryPages(std::size_t bytes) { + void* mem; +#if defined(_WIN32) || defined(__CYGWIN__) + mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_READWRITE); + if (mem == nullptr) + throw std::runtime_error(getErrorMessage("allocMemoryPages - VirtualAlloc")); +#else + #if defined(__NetBSD__) + #define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC) + #else + #define RESERVED_FLAGS 0 + #endif + #ifdef USE_PTHREAD_JIT_WP + #define MEXTRA MAP_JIT + #define PEXTRA PROT_EXEC + #else + #define MEXTRA 0 + #define PEXTRA 0 + #endif + mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0); + if (mem == MAP_FAILED) + throw std::runtime_error("allocMemoryPages - mmap failed"); +#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \ + && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0 + if (__builtin_available(macOS 11.0, *)) { + pthread_jit_write_protect_np(false); + } +#endif +#endif + return mem; +} + +static inline void pageProtect(void* ptr, std::size_t bytes, int rules) { +#if defined(_WIN32) || defined(__CYGWIN__) + DWORD oldp; + if (!VirtualProtect(ptr, bytes, (DWORD)rules, &oldp)) { + throw std::runtime_error(getErrorMessage("VirtualProtect")); + } +#else + if (-1 == mprotect(ptr, bytes, rules)) + throw std::runtime_error("mprotect failed"); +#endif +} + +void setPagesRW(void* ptr, std::size_t bytes) { +#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \ + && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0 + if (__builtin_available(macOS 11.0, *)) { + pthread_jit_write_protect_np(false); + } else { + pageProtect(ptr, bytes, PAGE_READWRITE); + } +#else + pageProtect(ptr, bytes, PAGE_READWRITE); +#endif +} + +void setPagesRX(void* ptr, std::size_t bytes) { +#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \ + && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0 + if (__builtin_available(macOS 11.0, *)) { + pthread_jit_write_protect_np(true); + } else { + pageProtect(ptr, bytes, PAGE_EXECUTE_READ); + } +#else + pageProtect(ptr, bytes, PAGE_EXECUTE_READ); +#endif +} + +void setPagesRWX(void* ptr, std::size_t bytes) { + pageProtect(ptr, bytes, PAGE_EXECUTE_READWRITE); +} + +void* allocLargePagesMemory(std::size_t bytes) { + void* mem; +#if defined(_WIN32) || defined(__CYGWIN__) + setPrivilege("SeLockMemoryPrivilege", 1); + auto pageMinimum = GetLargePageMinimum(); + if (pageMinimum > 0) + mem = VirtualAlloc(NULL, alignSize(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE); + else + throw std::runtime_error("allocLargePagesMemory - Large pages are not supported"); + if (mem == nullptr) + throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc")); +#else +#ifdef __APPLE__ + mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0); +#elif defined(__FreeBSD__) + mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0); +#elif defined(__OpenBSD__) || defined(__NetBSD__) + mem = MAP_FAILED; // OpenBSD does not support huge pages +#else + mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0); +#endif + if (mem == MAP_FAILED) + throw std::runtime_error("allocLargePagesMemory - mmap failed"); +#endif + return mem; +} + +void freePagedMemory(void* ptr, std::size_t bytes) { +#if defined(_WIN32) || defined(__CYGWIN__) + VirtualFree(ptr, 0, MEM_RELEASE); +#else + munmap(ptr, bytes); +#endif +} diff --git a/src/RandomX/src/virtual_memory.hpp b/src/RandomX/src/virtual_memory.hpp new file mode 100644 index 000000000..9e8bc29ab --- /dev/null +++ b/src/RandomX/src/virtual_memory.hpp @@ -0,0 +1,42 @@ +/* +Copyright (c) 2018-2019, tevador + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#pragma once + +#include + +constexpr std::size_t alignSize(std::size_t pos, std::size_t align) { + return ((pos - 1) / align + 1) * align; +} + +void* allocMemoryPages(std::size_t); +void setPagesRW(void*, std::size_t); +void setPagesRX(void*, std::size_t); +void setPagesRWX(void*, std::size_t); +void* allocLargePagesMemory(std::size_t); +void freePagedMemory(void*, std::size_t); diff --git a/src/RandomX/vcxproj/randomx-dll.vcxproj b/src/RandomX/vcxproj/randomx-dll.vcxproj index 4eaae9bed..8b8ea8c08 100644 --- a/src/RandomX/vcxproj/randomx-dll.vcxproj +++ b/src/RandomX/vcxproj/randomx-dll.vcxproj @@ -43,7 +43,7 @@ - + @@ -74,7 +74,7 @@ - + diff --git a/src/RandomX/vcxproj/randomx-dll.vcxproj.filters b/src/RandomX/vcxproj/randomx-dll.vcxproj.filters index 5b51f9f72..68e1b8559 100644 --- a/src/RandomX/vcxproj/randomx-dll.vcxproj.filters +++ b/src/RandomX/vcxproj/randomx-dll.vcxproj.filters @@ -87,7 +87,7 @@ Header Files - + Header Files @@ -151,7 +151,7 @@ Source Files - + Source Files diff --git a/src/RandomX/vcxproj/randomx.vcxproj b/src/RandomX/vcxproj/randomx.vcxproj index cefdc8fb3..e0625c88b 100644 --- a/src/RandomX/vcxproj/randomx.vcxproj +++ b/src/RandomX/vcxproj/randomx.vcxproj @@ -156,7 +156,7 @@ SET ERRORLEVEL = 0 - + @@ -198,7 +198,7 @@ SET ERRORLEVEL = 0 - + diff --git a/src/RandomX/vcxproj/randomx.vcxproj.filters b/src/RandomX/vcxproj/randomx.vcxproj.filters index 7f055b5b8..eb4462a59 100644 --- a/src/RandomX/vcxproj/randomx.vcxproj.filters +++ b/src/RandomX/vcxproj/randomx.vcxproj.filters @@ -72,7 +72,7 @@ Source Files - + Source Files @@ -164,7 +164,7 @@ Header Files - + Header Files