Update to RandomX v1.2.1

Commit 102f8acf90a7649ada410de5499a7ec62e49e1da
6 months ago · 6029b3d571
34 changed files with 2966 additions and 234 deletions
--- a/src/RandomX/CMakeLists.txt
+++ b/src/RandomX/CMakeLists.txt
@ -39,7 +39,7 @@ src/bytecode_machine.cpp
 src/cpu.cpp
 src/dataset.cpp
 src/soft_aes.cpp
-src/virtual_memory.cpp
+src/virtual_memory.c
 src/vm_interpreted.cpp
 src/allocator.cpp
 src/assembly_generator_x86.cpp
@ -96,7 +96,7 @@ function(add_flag flag)
 endfunction()

 # x86-64
-if(ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64")
+if ((CMAKE_SIZEOF_VOID_P EQUAL 8) AND (ARCH_ID STREQUAL "x86_64" OR ARCH_ID STREQUAL "x86-64" OR ARCH_ID STREQUAL "amd64"))
  list(APPEND randomx_sources
    src/jit_compiler_x86.cpp)

@ -173,6 +173,42 @@ if(ARM_ID STREQUAL "aarch64" OR ARM_ID STREQUAL "arm64" OR ARM_ID STREQUAL "armv
  endif()
 endif()

+# RISC-V
+if(ARCH_ID STREQUAL "riscv64")
+  list(APPEND randomx_sources
+    src/jit_compiler_rv64_static.S
+    src/jit_compiler_rv64.cpp)
+  # cheat because cmake and ccache hate each other
+  set_property(SOURCE src/jit_compiler_rv64_static.S PROPERTY LANGUAGE C)
+  set_property(SOURCE src/jit_compiler_rv64_static.S PROPERTY XCODE_EXPLICIT_FILE_TYPE sourcecode.asm)
+
+  # default build uses the RV64GC baseline
+  set(RVARCH "rv64gc")
+
+  # for native builds, enable Zba and Zbb if supported by the CPU
+  if(ARCH STREQUAL "native")
+    enable_language(ASM)
+    try_run(RANDOMX_ZBA_RUN_FAIL
+        RANDOMX_ZBA_COMPILE_OK
+        ${CMAKE_CURRENT_BINARY_DIR}/
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/tests/riscv64_zba.s
+        COMPILE_DEFINITIONS "-march=rv64gc_zba")
+    if (RANDOMX_ZBA_COMPILE_OK AND NOT RANDOMX_ZBA_RUN_FAIL)
+      set(RVARCH "${RVARCH}_zba")
+    endif()
+    try_run(RANDOMX_ZBB_RUN_FAIL
+        RANDOMX_ZBB_COMPILE_OK
+        ${CMAKE_CURRENT_BINARY_DIR}/
+        ${CMAKE_CURRENT_SOURCE_DIR}/src/tests/riscv64_zbb.s
+        COMPILE_DEFINITIONS "-march=rv64gc_zbb")
+    if (RANDOMX_ZBB_COMPILE_OK AND NOT RANDOMX_ZBB_RUN_FAIL)
+      set(RVARCH "${RVARCH}_zbb")
+    endif()
+  endif()
+
+  add_flag("-march=${RVARCH}")
+endif()
+
 set(RANDOMX_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src" CACHE STRING "RandomX Include path")

 add_library(randomx ${randomx_sources})
--- a/src/RandomX/README.md
+++ b/src/RandomX/README.md
@ -37,7 +37,7 @@ RandomX is written in C++11 and builds a static library with a C API provided by

 ### Linux

-Build dependencies: `cmake` (minimum 2.8.7) and `gcc` (minimum version 4.8, but version 7+ is recommended).
+Build dependencies: `cmake` (minimum 3.5) and `gcc` (minimum version 4.8, but version 7+ is recommended).

 To build optimized binaries for your machine, run:
 ```
@ -82,7 +82,7 @@ Intel Core i7-8550U|16G DDR4-2400|Windows 10|hw|200 (4T)|1700  (4T)|350 (8T)|
 Intel Core i3-3220|4G DDR3-1333|Ubuntu 16.04|soft|42 (4T)|510 (4T)|150 (4T)|
 Raspberry Pi 3|1G LPDDR2|Ubuntu 16.04|soft|3.5 (4T)|-|20 (4T)|

-Note that RandomX currently includes a JIT compiler for x86-64 and ARM64. Other architectures have to use the portable interpreter, which is much slower.
+Note that RandomX currently includes a JIT compiler for x86-64, ARM64 and RISCV64. Other architectures have to use the portable interpreter, which is much slower.

 ### GPU performance

@ -129,6 +129,7 @@ The reference implementation has been validated on the following platforms:
 * ARMv7+VFPv3 (32-bit, little-endian)
 * ARMv8 (64-bit, little-endian)
 * PPC64 (64-bit, big-endian)
+* RISCV64 (64-bit, little-endian)

 ### Can FPGAs mine RandomX?

--- a/src/RandomX/doc/tevador.asc
+++ b/src/RandomX/doc/tevador.asc
@ -1,13 +1,13 @@
 -----BEGIN PGP PUBLIC KEY BLOCK-----

 mDMEXd+PeBYJKwYBBAHaRw8BAQdAZ0nqJ+nRYoScG2QLX62pl+WO1+Mkv6Yyt2Kb
-ntGUuLq0G3RldmFkb3IgPHRldmFkb3JAZ21haWwuY29tPoiWBBMWCAA+FiEEMoWj
-LVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwMFCQWnqDgFCwkIBwIGFQoJCAsCBBYC
-AwECHgECF4AACgkQWijIaue9c6YBFQD+N1XTUqSCZp9jB/yTHQ9ahSaIUMtmuvdT
-So2s+quudP4A/R5wLwukpfGN9UZ4cfpmKCJ9jO1HJ2udmlGMsJbQpDAIuDgEXd+P
+ntGUuLq0G3RldmFkb3IgPHRldmFkb3JAZ21haWwuY29tPoiWBBMWCAA+AhsDBQsJ
+CAcCBhUKCQgLAgQWAgMBAh4BAheAFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAmRP
+r8MFCQ/ZS2YACgkQWijIaue9c6bR5gEA0tnQ4Al+yOLoRUBQitAV8FU4FLy8Xx8U
+IyyivjJ0UhIA/2jwJfMXmJdMKtar8xfIA5mZLLofkEP6hug4knhitpkBuDgEXd+P
 eBIKKwYBBAGXVQEFAQEHQBNbQuPcDojMCkRb5B5u7Ld/AFLClOh+6ElL+u61rIY/
-AwEIB4h+BBgWCAAmFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAl3fj3gCGwwFCQWn
-qDgACgkQWijIaue9c6YJvgD+IY1Q9mCM1P1iZIoXuafRihXJ7UgVXpQqW2yoaUT3
-bfQA/RkisI2eElYoOjdwPszPP6VfL5+SViwDmDuJG2P5llgE
-=V4vd
-----END PGP PUBLIC KEY BLOCK-----
+AwEIB4h+BBgWCAAmAhsMFiEEMoWjLVEwdmMs6CUQWijIaue9c6YFAmRQoAMFCQ/Z
+S2YACgkQWijIaue9c6bUfwD9Hw20kGCaZ8rWghz9W3bc645ys1vPQpQW28CD9w3B
+cTMBALsV1xpS2pGwTfn1PUimqESZfTrREmNvOjKSQwe0yicI
+=D4lm
+-----END PGP PUBLIC KEY BLOCK-----
--- a/src/RandomX/src/allocator.cpp
+++ b/src/RandomX/src/allocator.cpp
@ -29,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <new>
 #include "allocator.hpp"
 #include "intrin_portable.h"
-#include "virtual_memory.hpp"
+#include "virtual_memory.h"
 #include "common.hpp"

 namespace randomx {
--- a/src/RandomX/src/assembly_generator_x86.cpp
+++ b/src/RandomX/src/assembly_generator_x86.cpp
@ -445,7 +445,7 @@ namespace randomx {
 	}

 	void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
-		uint64_t divisor = instr.getImm32();
+		const uint32_t divisor = instr.getImm32();
 		if (!isZeroOrPowerOf2(divisor)) {
 			registerUsage[instr.dst] = i;
 			asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;
--- a/src/RandomX/src/bytecode_machine.cpp
+++ b/src/RandomX/src/bytecode_machine.cpp
@ -243,7 +243,7 @@ namespace randomx {
 		}

 		if (opcode < ceil_IMUL_RCP) {
-			uint64_t divisor = instr.getImm32();
+			const uint32_t divisor = instr.getImm32();
 			if (!isZeroOrPowerOf2(divisor)) {
 				auto dst = instr.dst % RegistersCount;
 				ibc.type = InstructionType::IMUL_R;
--- a/src/RandomX/src/common.hpp
+++ b/src/RandomX/src/common.hpp
@ -116,12 +116,19 @@ namespace randomx {

 #if defined(_M_X64) || defined(__x86_64__)
 	#define RANDOMX_HAVE_COMPILER 1
+	#define RANDOMX_COMPILER_X86
 	class JitCompilerX86;
 	using JitCompiler = JitCompilerX86;
 #elif defined(__aarch64__)
 	#define RANDOMX_HAVE_COMPILER 1
+	#define RANDOMX_COMPILER_A64
 	class JitCompilerA64;
 	using JitCompiler = JitCompilerA64;
+#elif defined(__riscv) && __riscv_xlen == 64
+	#define RANDOMX_HAVE_COMPILER 1
+	#define RANDOMX_COMPILER_RV64
+	class JitCompilerRV64;
+	using JitCompiler = JitCompilerRV64;
 #else
 	#define RANDOMX_HAVE_COMPILER 0
 	class JitCompilerFallback;
--- a/src/RandomX/src/configuration.h
+++ b/src/RandomX/src/configuration.h
@ -32,13 +32,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define RANDOMX_ARGON_MEMORY       262144

 //Number of Argon2d iterations for Cache initialization.
-#define RANDOMX_ARGON_ITERATIONS   5
+#define RANDOMX_ARGON_ITERATIONS   3

 //Number of parallel lanes for Cache initialization.
 #define RANDOMX_ARGON_LANES        1

 //Argon2d salt
-#define RANDOMX_ARGON_SALT         "RandomXHUSH\x03"
+#define RANDOMX_ARGON_SALT         "RandomX\x03"

 //Number of random Cache accesses per Dataset item. Minimum is 2.
 #define RANDOMX_CACHE_ACCESSES     8
@ -53,13 +53,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define RANDOMX_DATASET_EXTRA_SIZE 33554368

 //Number of instructions in a RandomX program. Must be divisible by 8.
-#define RANDOMX_PROGRAM_SIZE       512
+#define RANDOMX_PROGRAM_SIZE       256

 //Number of iterations during VM execution.
-#define RANDOMX_PROGRAM_ITERATIONS 4096
+#define RANDOMX_PROGRAM_ITERATIONS 2048

 //Number of chained VM executions per hash.
-#define RANDOMX_PROGRAM_COUNT      16
+#define RANDOMX_PROGRAM_COUNT      8

 //Scratchpad L3 size in bytes. Must be a power of 2.
 #define RANDOMX_SCRATCHPAD_L3      2097152
--- a/src/RandomX/src/dataset.cpp
+++ b/src/RandomX/src/dataset.cpp
@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include "common.hpp"
 #include "dataset.hpp"
-#include "virtual_memory.hpp"
+#include "virtual_memory.h"
 #include "superscalar.hpp"
 #include "blake2_generator.hpp"
 #include "reciprocal.h"
--- a/src/RandomX/src/intrin_portable.h
+++ b/src/RandomX/src/intrin_portable.h
@ -349,7 +349,7 @@ FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *p) {
 #if defined(NATIVE_LITTLE_ENDIAN)
 	return *p;
 #else
-	uint32_t* ptr = (uint32_t*)p;
+	const uint32_t* ptr = (const uint32_t*)p;
 	vec_u c;
 	c.u32[0] = load32(ptr + 0);
 	c.u32[1] = load32(ptr + 1);
@ -375,8 +375,8 @@ FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b) {

 FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
 	vec_u x;
-	x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
-	x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
+	x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0));
+	x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4));
 	return (rx_vec_f128)x.d;
 }

@ -684,7 +684,7 @@ FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const* p) {
 #if defined(NATIVE_LITTLE_ENDIAN)
 	return *p;
 #else
-	uint32_t* ptr = (uint32_t*)p;
+	const uint32_t* ptr = (const uint32_t*)p;
 	rx_vec_i128 c;
 	c.u32[0] = load32(ptr + 0);
 	c.u32[1] = load32(ptr + 1);
@ -708,8 +708,8 @@ FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *p, rx_vec_i128 b) {

 FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
 	rx_vec_f128 x;
-	x.lo = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
-	x.hi = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
+	x.lo = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 0));
+	x.hi = (double)unsigned32ToSigned2sCompl(load32((const uint8_t*)addr + 4));
 	return x;
 }

--- a/src/RandomX/src/jit_compiler.hpp
+++ b/src/RandomX/src/jit_compiler.hpp
@ -28,10 +28,48 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #pragma once

-#if defined(_M_X64) || defined(__x86_64__)
+#include "common.hpp"
+
+namespace randomx {
+
+	struct CodeBuffer {
+		uint8_t* code;
+		int32_t codePos;
+		int32_t rcpCount;
+
+		void emit(const uint8_t* src, int32_t len) {
+			memcpy(&code[codePos], src, len);
+			codePos += len;
+		}
+
+		template<typename T>
+		void emit(T src) {
+			memcpy(&code[codePos], &src, sizeof(src));
+			codePos += sizeof(src);
+		}
+
+		void emitAt(int32_t codePos, const uint8_t* src, int32_t len) {
+			memcpy(&code[codePos], src, len);
+		}
+
+		template<typename T>
+		void emitAt(int32_t codePos, T src) {
+			memcpy(&code[codePos], &src, sizeof(src));
+		}
+	};
+
+	struct CompilerState : public CodeBuffer {
+		int32_t instructionOffsets[RANDOMX_PROGRAM_SIZE];
+		int registerUsage[RegistersCount];
+	};
+}
+
+#if defined(RANDOMX_COMPILER_X86)
 #include "jit_compiler_x86.hpp"
-#elif defined(__aarch64__)
+#elif defined(RANDOMX_COMPILER_A64)
 #include "jit_compiler_a64.hpp"
+#elif defined(RANDOMX_COMPILER_RV64)
+#include "jit_compiler_rv64.hpp"
 #else
 #include "jit_compiler_fallback.hpp"
 #endif
--- a/src/RandomX/src/jit_compiler_a64.cpp
+++ b/src/RandomX/src/jit_compiler_a64.cpp
@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "superscalar.hpp"
 #include "program.hpp"
 #include "reciprocal.h"
-#include "virtual_memory.hpp"
+#include "virtual_memory.h"

 namespace ARMV8A {

@ -130,8 +130,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	// and w16, w10, ScratchpadL3Mask64
 	emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);

-	// and w17, w18, ScratchpadL3Mask64
-	emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
+	// and w17, w20, ScratchpadL3Mask64
+	emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);

 	codePos = PrologueSize;
 	literalPos = ImulRcpLiteralsEnd;
@ -149,16 +149,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
 	}

 	// Update spMix2
-	// eor w18, config.readReg2, config.readReg3
-	emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
+	// eor w20, config.readReg2, config.readReg3
+	emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);

 	// Jump back to the main loop
 	const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end) - ((uint8_t*)randomx_program_aarch64)) - codePos;
 	emit32(ARMV8A::B | (offset / 4), code, codePos);

-	// and w18, w18, CacheLineAlignMask
+	// and w20, w20, CacheLineAlignMask
 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask1) - ((uint8_t*)randomx_program_aarch64));
-	emit32(0x121A0000 | 18 | (18 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);
+	emit32(0x121A0000 | 20 | (20 << 5) | ((Log2(RANDOMX_DATASET_BASE_SIZE) - 7) << 10), code, codePos);

 	// and w10, w10, CacheLineAlignMask
 	codePos = (((uint8_t*)randomx_program_aarch64_cacheline_align_mask2) - ((uint8_t*)randomx_program_aarch64));
@ -181,8 +181,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
 	// and w16, w10, ScratchpadL3Mask64
 	emit32(0x121A0000 | 16 | (10 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);

-	// and w17, w18, ScratchpadL3Mask64
-	emit32(0x121A0000 | 17 | (18 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);
+	// and w17, w20, ScratchpadL3Mask64
+	emit32(0x121A0000 | 17 | (20 << 5) | ((Log2(RANDOMX_SCRATCHPAD_L3) - 7) << 10), code, codePos);

 	codePos = PrologueSize;
 	literalPos = ImulRcpLiteralsEnd;
@ -200,8 +200,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
 	}

 	// Update spMix2
-	// eor w18, config.readReg2, config.readReg3
-	emit32(ARMV8A::EOR32 | 18 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);
+	// eor w20, config.readReg2, config.readReg3
+	emit32(ARMV8A::EOR32 | 20 | (IntRegMap[config.readReg2] << 5) | (IntRegMap[config.readReg3] << 16), code, codePos);

 	// Jump back to the main loop
 	const uint32_t offset = (((uint8_t*)randomx_program_aarch64_vm_instructions_end_light) - ((uint8_t*)randomx_program_aarch64)) - codePos;
@ -434,7 +434,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
 	}
 	else
 	{
-		constexpr uint32_t tmp_reg = 18;
+		constexpr uint32_t tmp_reg = 20;
 		emitMovImmediate(tmp_reg, imm, code, k);

 		// add dst, src, tmp_reg
@ -483,7 +483,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
 	uint32_t k = codePos;

 	uint32_t imm = instr.getImm32();
-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 19;

 	imm &= instr.getModMem() ? (RANDOMX_SCRATCHPAD_L1 - 1) : (RANDOMX_SCRATCHPAD_L2 - 1);
 	emitAddImmediate(tmp_reg, src, imm, code, k);
@ -537,7 +537,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// add dst, dst, tmp_reg
@ -575,7 +575,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// sub dst, dst, tmp_reg
@ -594,7 +594,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)

 	if (src == dst)
 	{
-		src = 18;
+		src = 20;
 		emitMovImmediate(src, instr.getImm32(), code, k);
 	}

@ -612,7 +612,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// sub dst, dst, tmp_reg
@ -643,7 +643,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// umulh dst, dst, tmp_reg
@ -674,7 +674,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// smulh dst, dst, tmp_reg
@ -686,34 +686,24 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)

 void JitCompilerA64::h_IMUL_RCP(Instruction& instr, uint32_t& codePos)
 {
-	const uint64_t divisor = instr.getImm32();
+	const uint32_t divisor = instr.getImm32();
 	if (isZeroOrPowerOf2(divisor))
 		return;

 	uint32_t k = codePos;

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint64_t N = 1ULL << 63;
-	const uint64_t q = N / divisor;
-	const uint64_t r = N % divisor;
-#ifdef __GNUC__
-	const uint64_t shift = 64 - __builtin_clzll(divisor);
-#else
-	uint64_t shift = 32;
-	for (uint64_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
-		--shift;
-#endif
-
 	const uint32_t literal_id = (ImulRcpLiteralsEnd - literalPos) / sizeof(uint64_t);
-
 	literalPos -= sizeof(uint64_t);
-	*(uint64_t*)(code + literalPos) = (q << shift) + ((r << shift) / divisor);

-	if (literal_id < 13)
+	const uint64_t reciprocal = randomx_reciprocal_fast(divisor);
+	memcpy(code + literalPos, &reciprocal, sizeof(reciprocal));
+
+	if (literal_id < 12)
 	{
-		static constexpr uint32_t literal_regs[13] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 20 << 16, 11 << 16, 0 };
+		static constexpr uint32_t literal_regs[12] = { 30 << 16, 29 << 16, 28 << 16, 27 << 16, 26 << 16, 25 << 16, 24 << 16, 23 << 16, 22 << 16, 21 << 16, 11 << 16, 0 };

 		// mul dst, dst, literal_reg
 		emit32(ARMV8A::MUL | dst | (dst << 5) | literal_regs[literal_id], code, k);
@ -751,7 +741,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)

 	if (src == dst)
 	{
-		src = 18;
+		src = 20;
 		emitMovImmediate(src, instr.getImm32(), code, k);
 	}

@ -769,7 +759,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emitMemLoad<tmp_reg>(dst, src, instr, code, k);

 	// eor dst, dst, tmp_reg
@ -807,7 +797,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)

 	if (src != dst)
 	{
-		constexpr uint32_t tmp_reg = 18;
+		constexpr uint32_t tmp_reg = 20;

 		// sub tmp_reg, xzr, src
 		emit32(ARMV8A::SUB | tmp_reg | (31 << 5) | (src << 16), code, k);
@ -835,7 +825,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)

 	uint32_t k = codePos;

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	emit32(ARMV8A::MOV_REG | tmp_reg | (dst << 16), code, k);
 	emit32(ARMV8A::MOV_REG | dst | (src << 16), code, k);
 	emit32(ARMV8A::MOV_REG | src | (tmp_reg << 16), code, k);
@ -984,7 +974,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)

 	const uint32_t src = IntRegMap[instr.src];

-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;
 	constexpr uint32_t fpcr_tmp_reg = 8;

 	// ror tmp_reg, src, imm
@ -1008,7 +998,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)

 	const uint32_t src = IntRegMap[instr.src];
 	const uint32_t dst = IntRegMap[instr.dst];
-	constexpr uint32_t tmp_reg = 18;
+	constexpr uint32_t tmp_reg = 20;

 	uint32_t imm = instr.getImm32();

--- a/src/RandomX/src/jit_compiler_a64.hpp
+++ b/src/RandomX/src/jit_compiler_a64.hpp
@ -81,7 +81,7 @@ namespace randomx {

 		static void emit64(uint64_t val, uint8_t* code, uint32_t& codePos)
 		{
-			*(uint64_t*)(code + codePos) = val;
+			memcpy(code + codePos, &val, sizeof(val));
 			codePos += sizeof(val);
 		}

--- a/src/RandomX/src/jit_compiler_a64_static.S
+++ b/src/RandomX/src/jit_compiler_a64_static.S
@ -74,9 +74,9 @@
 # x15 -> "r7"
 # x16 -> spAddr0
 # x17 -> spAddr1
-# x18 -> temporary
+# x18 -> unused (platform register, don't touch it)
 # x19 -> temporary
-# x20 -> literal for IMUL_RCP
+# x20 -> temporary
 # x21 -> literal for IMUL_RCP
 # x22 -> literal for IMUL_RCP
 # x23 -> literal for IMUL_RCP
@ -111,7 +111,7 @@ DECL(randomx_program_aarch64):
 	# Save callee-saved registers
 	sub	sp, sp, 192
 	stp	x16, x17, [sp]
-	stp	x18, x19, [sp, 16]
+	str	x19, [sp, 16]
 	stp	x20, x21, [sp, 32]
 	stp	x22, x23, [sp, 48]
 	stp	x24, x25, [sp, 64]
@ -166,7 +166,6 @@ DECL(randomx_program_aarch64):
 	# Read literals
 	ldr	x0, literal_x0
 	ldr	x11, literal_x11
-	ldr	x20, literal_x20
 	ldr	x21, literal_x21
 	ldr	x22, literal_x22
 	ldr	x23, literal_x23
@ -198,11 +197,11 @@ DECL(randomx_program_aarch64):
 DECL(randomx_program_aarch64_main_loop):
 	# spAddr0 = spMix1 & ScratchpadL3Mask64;
 	# spAddr1 = (spMix1 >> 32) & ScratchpadL3Mask64;
-	lsr	x18, x10, 32
+	lsr	x20, x10, 32

 	# Actual mask will be inserted by JIT compiler
 	and	w16, w10, 1
-	and	w17, w18, 1
+	and	w17, w20, 1

 	# x16 = scratchpad + spAddr0
 	# x17 = scratchpad + spAddr1
@ -210,31 +209,31 @@ DECL(randomx_program_aarch64_main_loop):
 	add	x17, x17, x2

 	# xor integer registers with scratchpad data (spAddr0)
-	ldp	x18, x19, [x16]
-	eor	x4, x4, x18
+	ldp	x20, x19, [x16]
+	eor	x4, x4, x20
 	eor	x5, x5, x19
-	ldp	x18, x19, [x16, 16]
-	eor	x6, x6, x18
+	ldp	x20, x19, [x16, 16]
+	eor	x6, x6, x20
 	eor	x7, x7, x19
-	ldp	x18, x19, [x16, 32]
-	eor	x12, x12, x18
+	ldp	x20, x19, [x16, 32]
+	eor	x12, x12, x20
 	eor	x13, x13, x19
-	ldp	x18, x19, [x16, 48]
-	eor	x14, x14, x18
+	ldp	x20, x19, [x16, 48]
+	eor	x14, x14, x20
 	eor	x15, x15, x19

 	# Load group F registers (spAddr1)
-	ldpsw	x18, x19, [x17]
-	ins	v16.d[0], x18
+	ldpsw	x20, x19, [x17]
+	ins	v16.d[0], x20
 	ins	v16.d[1], x19
-	ldpsw	x18, x19, [x17, 8]
-	ins	v17.d[0], x18
+	ldpsw	x20, x19, [x17, 8]
+	ins	v17.d[0], x20
 	ins	v17.d[1], x19
-	ldpsw	x18, x19, [x17, 16]
-	ins	v18.d[0], x18
+	ldpsw	x20, x19, [x17, 16]
+	ins	v18.d[0], x20
 	ins	v18.d[1], x19
-	ldpsw	x18, x19, [x17, 24]
-	ins	v19.d[0], x18
+	ldpsw	x20, x19, [x17, 24]
+	ins	v19.d[0], x20
 	ins	v19.d[1], x19
 	scvtf	v16.2d, v16.2d
 	scvtf	v17.2d, v17.2d
@ -242,17 +241,17 @@ DECL(randomx_program_aarch64_main_loop):
 	scvtf	v19.2d, v19.2d

 	# Load group E registers (spAddr1)
-	ldpsw	x18, x19, [x17, 32]
-	ins	v20.d[0], x18
+	ldpsw	x20, x19, [x17, 32]
+	ins	v20.d[0], x20
 	ins	v20.d[1], x19
-	ldpsw	x18, x19, [x17, 40]
-	ins	v21.d[0], x18
+	ldpsw	x20, x19, [x17, 40]
+	ins	v21.d[0], x20
 	ins	v21.d[1], x19
-	ldpsw	x18, x19, [x17, 48]
-	ins	v22.d[0], x18
+	ldpsw	x20, x19, [x17, 48]
+	ins	v22.d[0], x20
 	ins	v22.d[1], x19
-	ldpsw	x18, x19, [x17, 56]
-	ins	v23.d[0], x18
+	ldpsw	x20, x19, [x17, 56]
+	ins	v23.d[0], x20
 	ins	v23.d[1], x19
 	scvtf	v20.2d, v20.2d
 	scvtf	v21.2d, v21.2d
@ -276,7 +275,6 @@ DECL(randomx_program_aarch64_vm_instructions):

 literal_x0:  .fill 1,8,0
 literal_x11: .fill 1,8,0
-literal_x20: .fill 1,8,0
 literal_x21: .fill 1,8,0
 literal_x22: .fill 1,8,0
 literal_x23: .fill 1,8,0
@ -312,17 +310,17 @@ DECL(randomx_program_aarch64_vm_instructions_end):
 	lsr	x10, x9, 32

 	# mx ^= r[readReg2] ^ r[readReg3];
-	eor	x9, x9, x18
+	eor	x9, x9, x20

 	# Calculate dataset pointer for dataset prefetch
-	mov	w18, w9
+	mov	w20, w9
 DECL(randomx_program_aarch64_cacheline_align_mask1):
 	# Actual mask will be inserted by JIT compiler
-	and	x18, x18, 1
-	add	x18, x18, x1
+	and	x20, x20, 1
+	add	x20, x20, x1

 	# Prefetch dataset data
-	prfm	pldl2strm, [x18]
+	prfm	pldl2strm, [x20]

 	# mx <-> ma
 	ror	x9, x9, 32
@ -335,17 +333,17 @@ DECL(randomx_program_aarch64_cacheline_align_mask2):
 DECL(randomx_program_aarch64_xor_with_dataset_line):
 rx_program_xor_with_dataset_line:
 	# xor integer registers with dataset data
-	ldp	x18, x19, [x10]
-	eor	x4, x4, x18
+	ldp	x20, x19, [x10]
+	eor	x4, x4, x20
 	eor	x5, x5, x19
-	ldp	x18, x19, [x10, 16]
-	eor	x6, x6, x18
+	ldp	x20, x19, [x10, 16]
+	eor	x6, x6, x20
 	eor	x7, x7, x19
-	ldp	x18, x19, [x10, 32]
-	eor	x12, x12, x18
+	ldp	x20, x19, [x10, 32]
+	eor	x12, x12, x20
 	eor	x13, x13, x19
-	ldp	x18, x19, [x10, 48]
-	eor	x14, x14, x18
+	ldp	x20, x19, [x10, 48]
+	eor	x14, x14, x20
 	eor	x15, x15, x19

 DECL(randomx_program_aarch64_update_spMix1):
@ -388,7 +386,7 @@ DECL(randomx_program_aarch64_update_spMix1):

 	# Restore callee-saved registers
 	ldp	x16, x17, [sp]
-	ldp	x18, x19, [sp, 16]
+	ldr	x19, [sp, 16]
 	ldp	x20, x21, [sp, 32]
 	ldp	x22, x23, [sp, 48]
 	ldp	x24, x25, [sp, 64]
@ -409,7 +407,7 @@ DECL(randomx_program_aarch64_vm_instructions_end_light):
 	stp	x2, x30, [sp, 80]

 	# mx ^= r[readReg2] ^ r[readReg3];
-	eor	x9, x9, x18
+	eor	x9, x9, x20

 	# mx <-> ma
 	ror	x9, x9, 32
@ -451,8 +449,8 @@ DECL(randomx_program_aarch64_light_dataset_offset):
 # x3 -> end item

 DECL(randomx_init_dataset_aarch64):
-	# Save x30 (return address)
-	str	x30, [sp, -16]!
+	# Save x20 (used as temporary, but must be saved to not break ABI) and x30 (return address)
+	stp	x20, x30, [sp, -16]!

 	# Load pointer to cache memory
 	ldr	x0, [x0]
@ -464,8 +462,8 @@ DECL(randomx_init_dataset_aarch64_main_loop):
 	cmp	x2, x3
 	bne	DECL(randomx_init_dataset_aarch64_main_loop)

-	# Restore x30 (return address)
-	ldr	x30, [sp], 16
+	# Restore x20 and x30
+	ldp	x20, x30, [sp], 16

 	ret

--- a/src/RandomX/src/jit_compiler_rv64.cpp
+++ b/src/RandomX/src/jit_compiler_rv64.cpp
--- a/src/RandomX/src/jit_compiler_rv64.hpp
+++ b/src/RandomX/src/jit_compiler_rv64.hpp
@ -0,0 +1,69 @@
+/*
+Copyright (c) 2023 tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <vector>
+#include "jit_compiler.hpp"
+
+namespace randomx {
+
+	class Program;
+	struct ProgramConfiguration;
+	class SuperscalarProgram;
+	class Instruction;
+
+	class JitCompilerRV64 {
+	public:
+		JitCompilerRV64();
+		~JitCompilerRV64();
+		void generateProgram(Program&, ProgramConfiguration&);
+		void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
+		void generateSuperscalarHash(SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES], std::vector<uint64_t>&);
+		void generateDatasetInitCode() {}
+		ProgramFunc* getProgramFunc() {
+			return (ProgramFunc*)entryProgram;
+		}
+		DatasetInitFunc* getDatasetInitFunc() {
+			return (DatasetInitFunc*)entryDataInit;
+		}
+		uint8_t* getCode() {
+			return state.code;
+		}
+		size_t getCodeSize();
+		void enableWriting();
+		void enableExecution();
+		void enableAll();
+	private:
+		CompilerState state;
+		void* entryDataInit;
+		void* entryProgram;
+	};
+}
--- a/src/RandomX/src/jit_compiler_rv64_static.S
+++ b/src/RandomX/src/jit_compiler_rv64_static.S
--- a/src/RandomX/src/jit_compiler_rv64_static.hpp
+++ b/src/RandomX/src/jit_compiler_rv64_static.hpp
@ -0,0 +1,53 @@
+/*
+Copyright (c) 2023 tevador <tevador@gmail.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+	* Redistributions of source code must retain the above copyright
+	  notice, this list of conditions and the following disclaimer.
+	* Redistributions in binary form must reproduce the above copyright
+	  notice, this list of conditions and the following disclaimer in the
+	  documentation and/or other materials provided with the distribution.
+	* Neither the name of the copyright holder nor the
+	  names of its contributors may be used to endorse or promote products
+	  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#pragma once
+
+extern "C" {
+	void randomx_riscv64_literals();
+	void randomx_riscv64_literals_end();
+	void randomx_riscv64_data_init();
+	void randomx_riscv64_fix_data_call();
+	void randomx_riscv64_prologue();
+	void randomx_riscv64_loop_begin();
+	void randomx_riscv64_data_read();
+	void randomx_riscv64_data_read_light();
+	void randomx_riscv64_fix_loop_call();
+	void randomx_riscv64_spad_store();
+	void randomx_riscv64_spad_store_hardaes();
+	void randomx_riscv64_spad_store_softaes();
+	void randomx_riscv64_loop_end();
+	void randomx_riscv64_fix_continue_loop();
+	void randomx_riscv64_epilogue();
+	void randomx_riscv64_softaes();
+	void randomx_riscv64_program_end();
+	void randomx_riscv64_ssh_init();
+	void randomx_riscv64_ssh_load();
+	void randomx_riscv64_ssh_prefetch();
+	void randomx_riscv64_ssh_end();
+}
--- a/src/RandomX/src/jit_compiler_x86.cpp
+++ b/src/RandomX/src/jit_compiler_x86.cpp
@ -34,7 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "superscalar.hpp"
 #include "program.hpp"
 #include "reciprocal.h"
-#include "virtual_memory.hpp"
+#include "virtual_memory.h"

 namespace randomx {
 	/*
@ -618,7 +618,7 @@ namespace randomx {
 	}

 	void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
-		uint64_t divisor = instr.getImm32();
+		const uint32_t divisor = instr.getImm32();
 		if (!isZeroOrPowerOf2(divisor)) {
 			registerUsage[instr.dst] = i;
 			emit(MOV_RAX_I);
--- a/src/RandomX/src/randomx.cpp
+++ b/src/RandomX/src/randomx.cpp
@ -36,7 +36,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cpu.hpp"
 #include <cassert>
 #include <limits>
+
+#if defined(__SSE__) || defined(__SSE2__) || (defined(_M_IX86_FP) && (_M_IX86_FP > 0))
+#define USE_CSR_INTRINSICS
+#include <xmmintrin.h>
+#else
 #include <cfenv>
+#endif

 extern "C" {

@ -356,8 +362,14 @@ extern "C" {
 		assert(machine != nullptr);
 		assert(inputSize == 0 || input != nullptr);
 		assert(output != nullptr);
+
+#ifdef USE_CSR_INTRINSICS
+		const unsigned int fpstate = _mm_getcsr();
+#else
 		fenv_t fpstate;
 		fegetenv(&fpstate);
+#endif
+
 		alignas(16) uint64_t tempHash[8];
 		int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
 		assert(blakeResult == 0);
@ -370,7 +382,12 @@ extern "C" {
 		}
 		machine->run(&tempHash);
 		machine->getFinalResult(output, RANDOMX_HASH_SIZE);
+
+#ifdef USE_CSR_INTRINSICS
+		_mm_setcsr(fpstate);
+#else
 		fesetenv(&fpstate);
+#endif
 	}

 	void randomx_calculate_hash_first(randomx_vm* machine, const void* input, size_t inputSize) {
@ -400,4 +417,15 @@ extern "C" {
 		machine->run(machine->tempHash);
 		machine->getFinalResult(output, RANDOMX_HASH_SIZE);
 	}
+
+	void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out) {
+		assert(inputSize == 0 || input != nullptr);
+		assert(hash_in != nullptr);
+		assert(com_out != nullptr);
+		blake2b_state state;
+		blake2b_init(&state, RANDOMX_HASH_SIZE);
+		blake2b_update(&state, input, inputSize);
+		blake2b_update(&state, hash_in, RANDOMX_HASH_SIZE);
+		blake2b_final(&state, com_out, RANDOMX_HASH_SIZE);
+	}
 }
--- a/src/RandomX/src/randomx.h
+++ b/src/RandomX/src/randomx.h
@ -260,6 +260,17 @@ RANDOMX_EXPORT void randomx_calculate_hash_first(randomx_vm* machine, const void
 RANDOMX_EXPORT void randomx_calculate_hash_next(randomx_vm* machine, const void* nextInput, size_t nextInputSize, void* output);
 RANDOMX_EXPORT void randomx_calculate_hash_last(randomx_vm* machine, void* output);

+/**
+ * Calculate a RandomX commitment from a RandomX hash and its input.
+ *
+ * @param input is a pointer to memory that was hashed. Must not be NULL.
+ * @param inputSize is the number of bytes in the input.
+ * @param hash_in is the output from randomx_calculate_hash* (RANDOMX_HASH_SIZE bytes).
+ * @param com_out is a pointer to memory where the commitment will be stored. Must not
+ *        be NULL and at least RANDOMX_HASH_SIZE bytes must be available for writing.
+*/
+RANDOMX_EXPORT void randomx_calculate_commitment(const void* input, size_t inputSize, const void* hash_in, void* com_out);
+
 #if defined(__cplusplus)
 }
 #endif
--- a/src/RandomX/src/reciprocal.c
+++ b/src/RandomX/src/reciprocal.c
@ -44,36 +44,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 	ret

 */
-uint64_t randomx_reciprocal(uint64_t divisor) {
+uint64_t randomx_reciprocal(uint32_t divisor) {

 	assert(divisor != 0);

 	const uint64_t p2exp63 = 1ULL << 63;
+	const uint64_t q = p2exp63 / divisor;
+	const uint64_t r = p2exp63 % divisor;
+
+#ifdef __GNUC__
+	const uint32_t shift = 64 - __builtin_clzll(divisor);
+#else
+	uint32_t shift = 32;
+	for (uint32_t k = 1U << 31; (k & divisor) == 0; k >>= 1)
+		--shift;
+#endif

-	uint64_t quotient = p2exp63 / divisor, remainder = p2exp63 % divisor;
-
-	unsigned bsr = 0; //highest set bit in divisor
-
-	for (uint64_t bit = divisor; bit > 0; bit >>= 1)
-		bsr++;
-
-	for (unsigned shift = 0; shift < bsr; shift++) {
-		if (remainder >= divisor - remainder) {
-			quotient = quotient * 2 + 1;
-			remainder = remainder * 2 - divisor;
-		}
-		else {
-			quotient = quotient * 2;
-			remainder = remainder * 2;
-		}
-	}
-
-	return quotient;
+	return (q << shift) + ((r << shift) / divisor);
 }

 #if !RANDOMX_HAVE_FAST_RECIPROCAL

-uint64_t randomx_reciprocal_fast(uint64_t divisor) {
+uint64_t randomx_reciprocal_fast(uint32_t divisor) {
 	return randomx_reciprocal(divisor);
 }

--- a/src/RandomX/src/reciprocal.h
+++ b/src/RandomX/src/reciprocal.h
@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 extern "C" {
 #endif

-uint64_t randomx_reciprocal(uint64_t);
-uint64_t randomx_reciprocal_fast(uint64_t);
+uint64_t randomx_reciprocal(uint32_t);
+uint64_t randomx_reciprocal_fast(uint32_t);

 #if defined(__cplusplus)
 }
--- a/src/RandomX/src/tests/benchmark.cpp
+++ b/src/RandomX/src/tests/benchmark.cpp
@ -96,6 +96,7 @@ void printUsage(const char* executable) {
 	std::cout << "  --avx2        use optimized Argon2 for AVX2 CPUs" << std::endl;
 	std::cout << "  --auto        select the best options for the current CPU" << std::endl;
 	std::cout << "  --noBatch     calculate hashes one by one (default: batch)" << std::endl;
+	std::cout << "  --commit      calculate commitments instead of hashes (default: hashes)" << std::endl;
 }

 struct MemoryException : public std::exception {
@ -113,7 +114,7 @@ struct DatasetAllocException : public MemoryException {

 using MineFunc = void(randomx_vm * vm, std::atomic<uint32_t> & atomicNonce, AtomicHash & result, uint32_t noncesCount, int thread, int cpuid);

-template<bool batch>
+template<bool batch, bool commit>
 void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result, uint32_t noncesCount, int thread, int cpuid = -1) {
 	if (cpuid >= 0) {
 		int rc = set_thread_affinity(cpuid);
@ -138,6 +139,9 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
 		}
 		store32(noncePtr, nonce);
 		(batch ? randomx_calculate_hash_next : randomx_calculate_hash)(vm, blockTemplate, sizeof(blockTemplate), &hash);
+		if (commit) {
+			randomx_calculate_commitment(blockTemplate, sizeof(blockTemplate), &hash, &hash);
+		}
 		result.xorWith(hash);
 		if (!batch) {
 			nonce = atomicNonce.fetch_add(1);
@ -146,7 +150,7 @@ void mine(randomx_vm* vm, std::atomic<uint32_t>& atomicNonce, AtomicHash& result
 }

 int main(int argc, char** argv) {
-	bool softAes, miningMode, verificationMode, help, largePages, jit, secure;
+	bool softAes, miningMode, verificationMode, help, largePages, jit, secure, commit;
 	bool ssse3, avx2, autoFlags, noBatch;
 	int noncesCount, threadCount, initThreadCount;
 	uint64_t threadAffinity;
@ -172,10 +176,11 @@ int main(int argc, char** argv) {
 	readOption("--avx2", argc, argv, avx2);
 	readOption("--auto", argc, argv, autoFlags);
 	readOption("--noBatch", argc, argv, noBatch);
+	readOption("--commit", argc, argv, commit);

 	store32(&seed, seedValue);

-	std::cout << "RandomX benchmark v1.1.11" << std::endl;
+	std::cout << "RandomX benchmark v1.2.1" << std::endl;

 	if (help) {
 		printUsage(argv[0]);
@ -280,11 +285,24 @@ int main(int argc, char** argv) {
 	MineFunc* func;

 	if (noBatch) {
-		func = &mine<false>;
+		if (commit) {
+			std::cout << " - hash commitments" << std::endl;
+			func = &mine<false, true>;
+		}
+		else {
+			func = &mine<false, false>;
+		}
 	}
 	else {
-		func = &mine<true>;
-		std::cout << " - batch mode" << std::endl;
+		if (commit) {
+			//TODO: support batch mode with commitments
+			std::cout << " - hash commitments" << std::endl;
+			func = &mine<false, true>;
+		}
+		else {
+			std::cout << " - batch mode" << std::endl;
+			func = &mine<true, false>;
+		}
 	}

 	std::cout << "Initializing";
@ -376,7 +394,7 @@ int main(int argc, char** argv) {
 			randomx_release_cache(cache);
 		std::cout << "Calculated result: ";
 		result.print(std::cout);
-		if (noncesCount == 1000 && seedValue == 0)
+		if (noncesCount == 1000 && seedValue == 0 && !commit)
 			std::cout << "Reference result:  10b649a3f15c7c7f88277812f2e74b337a0f20ce909af09199cccb960771cfa1" << std::endl;
 		if (!miningMode) {
 			std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl;
--- a/src/RandomX/src/tests/perf-simulation.cpp
+++ b/src/RandomX/src/tests/perf-simulation.cpp
@ -477,7 +477,7 @@ int analyze(randomx::Program& p) {
 		}

 		if (opcode < randomx::ceil_IMUL_RCP) {
-			uint64_t divisor = instr.getImm32();
+			const uint32_t divisor = instr.getImm32();
 			if (!randomx::isZeroOrPowerOf2(divisor)) {
 				instr.dst = instr.dst % randomx::RegistersCount;
 				instr.opcode |= DST_INT;
--- a/src/RandomX/src/tests/riscv64_zba.s
+++ b/src/RandomX/src/tests/riscv64_zba.s
@ -0,0 +1,9 @@
+/* RISC-V - test if the Zba extension is present */
+
+.text
+.global main
+
+main:
+    sh1add x6, x6, x7
+    li x10, 0
+    ret
--- a/src/RandomX/src/tests/riscv64_zbb.s
+++ b/src/RandomX/src/tests/riscv64_zbb.s
@ -0,0 +1,9 @@
+/* RISC-V - test if the Zbb extension is present */
+
+.text
+.global main
+
+main:
+    ror x6, x6, x7
+    li x10, 0
+    ret
--- a/src/RandomX/src/tests/tests.cpp
+++ b/src/RandomX/src/tests/tests.cpp
@ -34,6 +34,14 @@ void calcStringHash(const char(&key)[K], const char(&input)[H], void* output) {
 	randomx_calculate_hash(vm, input, H - 1, output);
 }

+template<size_t K, size_t H>
+void calcStringCommitment(const char(&key)[K], const char(&input)[H], void* output) {
+	initCache(key);
+	assert(vm != nullptr);
+	randomx_calculate_hash(vm, input, H - 1, output);
+	randomx_calculate_commitment(input, H - 1, output, output);
+}
+
 template<size_t K, size_t H>
 void calcHexHash(const char(&key)[K], const char(&hex)[H], void* output) {
 	initCache(key);
@ -1082,6 +1090,22 @@ int main() {
 		assert(rx_get_rounding_mode() == RoundToNearest);
 	});

+	if (RANDOMX_HAVE_COMPILER) {
+		randomx_destroy_vm(vm);
+		vm = nullptr;
+#ifdef RANDOMX_FORCE_SECURE
+		vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT | RANDOMX_FLAG_SECURE, cache, nullptr);
+#else
+		vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
+#endif
+	}
+
+	runTest("Commitment test", stringsEqual(RANDOMX_ARGON_SALT, "RandomX\x03"), []() {
+		char hash[RANDOMX_HASH_SIZE];
+		calcStringCommitment("test key 000", "This is a test", &hash);
+		assert(equalsHex(hash, "d53ccf348b75291b7be76f0a7ac8208bbced734b912f6fca60539ab6f86be919"));
+	});
+
 	randomx_destroy_vm(vm);
 	vm = nullptr;

--- a/src/RandomX/src/virtual_memory.cpp
+++ b/src/RandomX/src/virtual_memory.cpp
@ -26,28 +26,24 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include "virtual_memory.hpp"
-
-#include <stdexcept>
-
 #if defined(_WIN32) || defined(__CYGWIN__)
 #include <windows.h>
 #else
+#define _GNU_SOURCE	1	/* needed for MAP_ANONYMOUS on older platforms */
 #ifdef __APPLE__
 #include <mach/vm_statistics.h>
 #include <TargetConditionals.h>
 #include <AvailabilityMacros.h>
 # if TARGET_OS_OSX
-#  if TARGET_CPU_ARM64
-#   define USE_PTHREAD_JIT_WP    1
-#  else
-#   undef USE_PTHREAD_JIT_WP
-#  endif
+#  define USE_PTHREAD_JIT_WP	1
 #  include <pthread.h>
+#  include <sys/utsname.h>
+#  include <stdio.h>
 # endif
 #endif
 #include <sys/types.h>
 #include <sys/mman.h>
+#include <errno.h>
 #ifndef MAP_ANONYMOUS
 #define MAP_ANONYMOUS MAP_ANON
 #endif
@ -57,27 +53,50 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define PAGE_EXECUTE_READWRITE (PROT_READ | PROT_WRITE | PROT_EXEC)
 #endif

-#if defined(_WIN32) || defined(__CYGWIN__)
-std::string getErrorMessage(const char* function) {
-	LPSTR messageBuffer = nullptr;
-	size_t size = FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
-		NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&messageBuffer, 0, NULL);
-	std::string message(messageBuffer, size);
-	LocalFree(messageBuffer);
-	return std::string(function) + std::string(": ") + message;
+#include "virtual_memory.h"
+
+#if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
+	&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
+static int MacOSchecked, MacOSver;
+/* This function is used implicitly by clang's __builtin_available() checker.
+ * When cross-compiling, the library containing this function doesn't exist,
+ * and linking will fail because the symbol is unresolved. The function here
+ * is a quick and dirty hack to get close enough to identify MacOSX 11.0.
+ */
+static int32_t __isOSVersionAtLeast(int32_t major, int32_t minor, int32_t subminor) {
+	if (!MacOSchecked) {
+	    struct utsname ut;
+		int mmaj, mmin;
+		uname(&ut);
+		sscanf(ut.release, "%d.%d", &mmaj, &mmin);
+		// The utsname release version is 9 greater than the canonical OS version
+		mmaj -= 9;
+		MacOSver = (mmaj << 8) | mmin;
+		MacOSchecked = 1;
+	}
+	return MacOSver >= ((major << 8) | minor);
 }
+#endif
+

-void setPrivilege(const char* pszPrivilege, BOOL bEnable) {
+#if defined(_WIN32) || defined(__CYGWIN__)
+#define Fail(func)	do  {*errfunc = func; return GetLastError();} while(0)
+int setPrivilege(const char* pszPrivilege, BOOL bEnable, char **errfunc) {
 	HANDLE           hToken;
 	TOKEN_PRIVILEGES tp;
 	BOOL             status;
-	DWORD            error;
+	DWORD            error = 0;
+
+	*errfunc = NULL;

 	if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
-		throw std::runtime_error(getErrorMessage("OpenProcessToken"));
+		Fail("OpenProcessToken");

-	if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid))
-		throw std::runtime_error(getErrorMessage("LookupPrivilegeValue"));
+	if (!LookupPrivilegeValue(NULL, pszPrivilege, &tp.Privileges[0].Luid)) {
+		*errfunc = "LookupPrivilegeValue";
+		error = GetLastError();
+		goto out;
+	}

 	tp.PrivilegeCount = 1;

@ -89,20 +108,28 @@ void setPrivilege(const char* pszPrivilege, BOOL bEnable) {
 	status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);

 	error = GetLastError();
-	if (!status || (error != ERROR_SUCCESS))
-		throw std::runtime_error(getErrorMessage("AdjustTokenPrivileges"));
+	if (!status || (error != ERROR_SUCCESS)) {
+		*errfunc = "AdjustTokenPrivileges";
+		goto out;
+	}

-	if (!CloseHandle(hToken))
-		throw std::runtime_error(getErrorMessage("CloseHandle"));
+out:
+	if (!CloseHandle(hToken)) {
+		if (*errfunc == NULL) {
+			*errfunc = "CloseHandle";
+			error = GetLastError();
+		}
+	}
+	return error;
 }
+#else
+#define Fail(func)	do  {*errfunc = func; return errno;} while(0)
 #endif

-void* allocMemoryPages(std::size_t bytes) {
+void* allocMemoryPages(size_t bytes) {
 	void* mem;
 #if defined(_WIN32) || defined(__CYGWIN__)
-	mem = VirtualAlloc(nullptr, bytes, MEM_COMMIT, PAGE_READWRITE);
-	if (mem == nullptr)
-		throw std::runtime_error(getErrorMessage("allocMemoryPages - VirtualAlloc"));
+	mem = VirtualAlloc(NULL, bytes, MEM_COMMIT, PAGE_READWRITE);
 #else
 	#if defined(__NetBSD__)
 		#define RESERVED_FLAGS PROT_MPROTECT(PROT_EXEC)
@ -116,89 +143,95 @@ void* allocMemoryPages(std::size_t bytes) {
 		#define MEXTRA 0
 		#define PEXTRA	0
 	#endif
-	mem = mmap(nullptr, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
+	mem = mmap(NULL, bytes, PAGE_READWRITE | RESERVED_FLAGS | PEXTRA, MAP_ANONYMOUS | MAP_PRIVATE | MEXTRA, -1, 0);
 	if (mem == MAP_FAILED)
-		throw std::runtime_error("allocMemoryPages - mmap failed");
+		mem = NULL;
 #if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
 	&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
 	if (__builtin_available(macOS 11.0, *)) {
-		pthread_jit_write_protect_np(false);
+		pthread_jit_write_protect_np(0);
 	}
 #endif
 #endif
 	return mem;
 }

-static inline void pageProtect(void* ptr, std::size_t bytes, int rules) {
+static inline int pageProtect(void* ptr, size_t bytes, int rules, char **errfunc) {
 #if defined(_WIN32) || defined(__CYGWIN__)
 	DWORD oldp;
 	if (!VirtualProtect(ptr, bytes, (DWORD)rules, &oldp)) {
-		throw std::runtime_error(getErrorMessage("VirtualProtect"));
+		Fail("VirtualProtect");
 	}
 #else
 	if (-1 == mprotect(ptr, bytes, rules))
-		throw std::runtime_error("mprotect failed");
+		Fail("mprotect");
 #endif
+	return 0;
 }

-void setPagesRW(void* ptr, std::size_t bytes) {
+void setPagesRW(void* ptr, size_t bytes) {
+	char *errfunc;
 #if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
 	&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
 	if (__builtin_available(macOS 11.0, *)) {
-		pthread_jit_write_protect_np(false);
+		pthread_jit_write_protect_np(0);
 	} else {
-		pageProtect(ptr, bytes, PAGE_READWRITE);
+		pageProtect(ptr, bytes, PAGE_READWRITE, &errfunc);
 	}
 #else
-	pageProtect(ptr, bytes, PAGE_READWRITE);
+	pageProtect(ptr, bytes, PAGE_READWRITE, &errfunc);
 #endif
 }

-void setPagesRX(void* ptr, std::size_t bytes) {
+void setPagesRX(void* ptr, size_t bytes) {
+	char *errfunc;
 #if defined(USE_PTHREAD_JIT_WP) && defined(MAC_OS_VERSION_11_0) \
 	&& MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_VERSION_11_0
 	if (__builtin_available(macOS 11.0, *)) {
-		pthread_jit_write_protect_np(true);
+		pthread_jit_write_protect_np(1);
+		__builtin___clear_cache((char*)ptr, ((char*)ptr) + bytes);
 	} else {
-		pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
+		pageProtect(ptr, bytes, PAGE_EXECUTE_READ, &errfunc);
 	}
 #else
-	pageProtect(ptr, bytes, PAGE_EXECUTE_READ);
+	pageProtect(ptr, bytes, PAGE_EXECUTE_READ, &errfunc);
 #endif
 }

-void setPagesRWX(void* ptr, std::size_t bytes) {
-	pageProtect(ptr, bytes, PAGE_EXECUTE_READWRITE);
+void setPagesRWX(void* ptr, size_t bytes) {
+	char *errfunc;
+	pageProtect(ptr, bytes, PAGE_EXECUTE_READWRITE, &errfunc);
 }

-void* allocLargePagesMemory(std::size_t bytes) {
+void* allocLargePagesMemory(size_t bytes) {
 	void* mem;
+	char *errfunc;
 #if defined(_WIN32) || defined(__CYGWIN__)
-	setPrivilege("SeLockMemoryPrivilege", 1);
-	auto pageMinimum = GetLargePageMinimum();
-	if (pageMinimum > 0)
-		mem = VirtualAlloc(NULL, alignSize(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
-	else
-		throw std::runtime_error("allocLargePagesMemory - Large pages are not supported");
-	if (mem == nullptr)
-		throw std::runtime_error(getErrorMessage("allocLargePagesMemory - VirtualAlloc"));
+	if (setPrivilege("SeLockMemoryPrivilege", 1, &errfunc))
+		return NULL;
+	size_t pageMinimum = GetLargePageMinimum();
+	if (!pageMinimum) {
+		errfunc = "No large pages";
+		return NULL;
+	}
+	mem = VirtualAlloc(NULL, alignSize(bytes, pageMinimum), MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
 #else
 #ifdef __APPLE__
-	mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
+	mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
 #elif defined(__FreeBSD__)
-	mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
+	mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER, -1, 0);
 #elif defined(__OpenBSD__) || defined(__NetBSD__)
 	mem = MAP_FAILED; // OpenBSD does not support huge pages
 #else
-	mem = mmap(nullptr, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
+	mem = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1, 0);
 #endif
 	if (mem == MAP_FAILED)
-		throw std::runtime_error("allocLargePagesMemory - mmap failed");
+		mem = NULL;
 #endif
 	return mem;
 }

-void freePagedMemory(void* ptr, std::size_t bytes) {
+void freePagedMemory(void* ptr, size_t bytes) {
 #if defined(_WIN32) || defined(__CYGWIN__)
 	VirtualFree(ptr, 0, MEM_RELEASE);
 #else
--- a/src/RandomX/src/virtual_memory.hpp
+++ b/src/RandomX/src/virtual_memory.hpp
@ -28,15 +28,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #pragma once

-#include <cstddef>
+#ifdef __cplusplus
+extern "C" {
+#endif

-constexpr std::size_t alignSize(std::size_t pos, std::size_t align) {
-	return ((pos - 1) / align + 1) * align;
-}
+#include <stddef.h>
+
+#define alignSize(pos, align) (((pos - 1) / align + 1) * align)

-void* allocMemoryPages(std::size_t);
-void setPagesRW(void*, std::size_t);
-void setPagesRX(void*, std::size_t);
-void setPagesRWX(void*, std::size_t);
-void* allocLargePagesMemory(std::size_t);
-void freePagedMemory(void*, std::size_t);
+void* allocMemoryPages(size_t);
+void setPagesRW(void*, size_t);
+void setPagesRX(void*, size_t);
+void setPagesRWX(void*, size_t);
+void* allocLargePagesMemory(size_t);
+void freePagedMemory(void*, size_t);
+
+#ifdef __cplusplus
+}
+#endif
--- a/src/RandomX/vcxproj/randomx-dll.vcxproj
+++ b/src/RandomX/vcxproj/randomx-dll.vcxproj
@ -43,7 +43,7 @@
    <ClInclude Include="..\src\superscalar.hpp" />
    <ClInclude Include="..\src\superscalar_program.hpp" />
    <ClInclude Include="..\src\virtual_machine.hpp" />
-    <ClInclude Include="..\src\virtual_memory.hpp" />
+    <ClInclude Include="..\src\virtual_memory.h" />
    <ClInclude Include="..\src\vm_compiled.hpp" />
    <ClInclude Include="..\src\vm_compiled_light.hpp" />
    <ClInclude Include="..\src\vm_interpreted.hpp" />
@ -74,7 +74,7 @@
    <ClCompile Include="..\src\soft_aes.cpp" />
    <ClCompile Include="..\src\superscalar.cpp" />
    <ClCompile Include="..\src\virtual_machine.cpp" />
-    <ClCompile Include="..\src\virtual_memory.cpp" />
+    <ClCompile Include="..\src\virtual_memory.c" />
    <ClCompile Include="..\src\vm_compiled.cpp" />
    <ClCompile Include="..\src\vm_compiled_light.cpp" />
    <ClCompile Include="..\src\vm_interpreted.cpp" />
--- a/src/RandomX/vcxproj/randomx-dll.vcxproj.filters
+++ b/src/RandomX/vcxproj/randomx-dll.vcxproj.filters
@ -87,7 +87,7 @@
    <ClInclude Include="..\src\virtual_machine.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
-    <ClInclude Include="..\src\virtual_memory.hpp">
+    <ClInclude Include="..\src\virtual_memory.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="..\src\vm_compiled.hpp">
@ -151,7 +151,7 @@
    <ClCompile Include="..\src\virtual_machine.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="..\src\virtual_memory.cpp">
+    <ClCompile Include="..\src\virtual_memory.c">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\vm_compiled.cpp">
--- a/src/RandomX/vcxproj/randomx.vcxproj
+++ b/src/RandomX/vcxproj/randomx.vcxproj
@ -156,7 +156,7 @@ SET ERRORLEVEL = 0</Command>
    <ClCompile Include="..\src\reciprocal.c" />
    <ClCompile Include="..\src\soft_aes.cpp" />
    <ClCompile Include="..\src\virtual_machine.cpp" />
-    <ClCompile Include="..\src\virtual_memory.cpp" />
+    <ClCompile Include="..\src\virtual_memory.c" />
  </ItemGroup>
  <ItemGroup>
    <MASM Include="..\src\jit_compiler_x86_static.asm" />
@ -198,7 +198,7 @@ SET ERRORLEVEL = 0</Command>
    <ClInclude Include="..\src\soft_aes.h" />
    <ClInclude Include="..\src\superscalar_program.hpp" />
    <ClInclude Include="..\src\virtual_machine.hpp" />
-    <ClInclude Include="..\src\virtual_memory.hpp" />
+    <ClInclude Include="..\src\virtual_memory.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
--- a/src/RandomX/vcxproj/randomx.vcxproj.filters
+++ b/src/RandomX/vcxproj/randomx.vcxproj.filters
@ -72,7 +72,7 @@
    <ClCompile Include="..\src\vm_interpreted.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="..\src\virtual_memory.cpp">
+    <ClCompile Include="..\src\virtual_memory.c">
      <Filter>Source Files</Filter>
    </ClCompile>
    <ClCompile Include="..\src\blake2_generator.cpp">
@ -164,7 +164,7 @@
    <ClInclude Include="..\src\virtual_machine.hpp">
      <Filter>Header Files</Filter>
    </ClInclude>
-    <ClInclude Include="..\src\virtual_memory.hpp">
+    <ClInclude Include="..\src\virtual_memory.h">
      <Filter>Header Files</Filter>
    </ClInclude>
    <ClInclude Include="..\src\superscalar.hpp">