@ -31,7 +31,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# include "superscalar.hpp"
# include "program.hpp"
# include "reciprocal.h"
# include "virtual_memory.h"
# include "virtual_memory.hpp "
namespace ARMV8A {
@ -130,8 +130,8 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
// and w16, w10, ScratchpadL3Mask64
emit32 ( 0x121A0000 | 16 | ( 10 < < 5 ) | ( ( Log2 ( RANDOMX_SCRATCHPAD_L3 ) - 7 ) < < 10 ) , code , codePos ) ;
// and w17, w20 , ScratchpadL3Mask64
emit32 ( 0x121A0000 | 17 | ( 20 < < 5 ) | ( ( Log2 ( RANDOMX_SCRATCHPAD_L3 ) - 7 ) < < 10 ) , code , codePos ) ;
// and w17, w18 , ScratchpadL3Mask64
emit32 ( 0x121A0000 | 17 | ( 18 < < 5 ) | ( ( Log2 ( RANDOMX_SCRATCHPAD_L3 ) - 7 ) < < 10 ) , code , codePos ) ;
codePos = PrologueSize ;
literalPos = ImulRcpLiteralsEnd ;
@ -149,16 +149,16 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
}
// Update spMix2
// eor w20 , config.readReg2, config.readReg3
emit32 ( ARMV8A : : EOR32 | 20 | ( IntRegMap [ config . readReg2 ] < < 5 ) | ( IntRegMap [ config . readReg3 ] < < 16 ) , code , codePos ) ;
// eor w18 , config.readReg2, config.readReg3
emit32 ( ARMV8A : : EOR32 | 18 | ( IntRegMap [ config . readReg2 ] < < 5 ) | ( IntRegMap [ config . readReg3 ] < < 16 ) , code , codePos ) ;
// Jump back to the main loop
const uint32_t offset = ( ( ( uint8_t * ) randomx_program_aarch64_vm_instructions_end ) - ( ( uint8_t * ) randomx_program_aarch64 ) ) - codePos ;
emit32 ( ARMV8A : : B | ( offset / 4 ) , code , codePos ) ;
// and w20, w20 , CacheLineAlignMask
// and w18, w18 , CacheLineAlignMask
codePos = ( ( ( uint8_t * ) randomx_program_aarch64_cacheline_align_mask1 ) - ( ( uint8_t * ) randomx_program_aarch64 ) ) ;
emit32 ( 0x121A0000 | 20 | ( 20 < < 5 ) | ( ( Log2 ( RANDOMX_DATASET_BASE_SIZE ) - 7 ) < < 10 ) , code , codePos ) ;
emit32 ( 0x121A0000 | 18 | ( 18 < < 5 ) | ( ( Log2 ( RANDOMX_DATASET_BASE_SIZE ) - 7 ) < < 10 ) , code , codePos ) ;
// and w10, w10, CacheLineAlignMask
codePos = ( ( ( uint8_t * ) randomx_program_aarch64_cacheline_align_mask2 ) - ( ( uint8_t * ) randomx_program_aarch64 ) ) ;
@ -181,8 +181,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
// and w16, w10, ScratchpadL3Mask64
emit32 ( 0x121A0000 | 16 | ( 10 < < 5 ) | ( ( Log2 ( RANDOMX_SCRATCHPAD_L3 ) - 7 ) < < 10 ) , code , codePos ) ;
// and w17, w20 , ScratchpadL3Mask64
emit32 ( 0x121A0000 | 17 | ( 20 < < 5 ) | ( ( Log2 ( RANDOMX_SCRATCHPAD_L3 ) - 7 ) < < 10 ) , code , codePos ) ;
// and w17, w18 , ScratchpadL3Mask64
emit32 ( 0x121A0000 | 17 | ( 18 < < 5 ) | ( ( Log2 ( RANDOMX_SCRATCHPAD_L3 ) - 7 ) < < 10 ) , code , codePos ) ;
codePos = PrologueSize ;
literalPos = ImulRcpLiteralsEnd ;
@ -200,8 +200,8 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
}
// Update spMix2
// eor w20 , config.readReg2, config.readReg3
emit32 ( ARMV8A : : EOR32 | 20 | ( IntRegMap [ config . readReg2 ] < < 5 ) | ( IntRegMap [ config . readReg3 ] < < 16 ) , code , codePos ) ;
// eor w18 , config.readReg2, config.readReg3
emit32 ( ARMV8A : : EOR32 | 18 | ( IntRegMap [ config . readReg2 ] < < 5 ) | ( IntRegMap [ config . readReg3 ] < < 16 ) , code , codePos ) ;
// Jump back to the main loop
const uint32_t offset = ( ( ( uint8_t * ) randomx_program_aarch64_vm_instructions_end_light ) - ( ( uint8_t * ) randomx_program_aarch64 ) ) - codePos ;
@ -434,7 +434,7 @@ void JitCompilerA64::emitAddImmediate(uint32_t dst, uint32_t src, uint32_t imm,
}
else
{
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
emitMovImmediate ( tmp_reg , imm , code , k ) ;
// add dst, src, tmp_reg
@ -483,7 +483,7 @@ void JitCompilerA64::emitMemLoadFP(uint32_t src, Instruction& instr, uint8_t* co
uint32_t k = codePos ;
uint32_t imm = instr . getImm32 ( ) ;
constexpr uint32_t tmp_reg = 19 ;
constexpr uint32_t tmp_reg = 18 ;
imm & = instr . getModMem ( ) ? ( RANDOMX_SCRATCHPAD_L1 - 1 ) : ( RANDOMX_SCRATCHPAD_L2 - 1 ) ;
emitAddImmediate ( tmp_reg , src , imm , code , k ) ;
@ -537,7 +537,7 @@ void JitCompilerA64::h_IADD_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap [ instr . src ] ;
const uint32_t dst = IntRegMap [ instr . dst ] ;
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
emitMemLoad < tmp_reg > ( dst , src , instr , code , k ) ;
// add dst, dst, tmp_reg
@ -575,7 +575,7 @@ void JitCompilerA64::h_ISUB_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap [ instr . src ] ;
const uint32_t dst = IntRegMap [ instr . dst ] ;
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
emitMemLoad < tmp_reg > ( dst , src , instr , code , k ) ;
// sub dst, dst, tmp_reg
@ -594,7 +594,7 @@ void JitCompilerA64::h_IMUL_R(Instruction& instr, uint32_t& codePos)
if ( src = = dst )
{
src = 20 ;
src = 18 ;
emitMovImmediate ( src , instr . getImm32 ( ) , code , k ) ;
}
@ -612,7 +612,7 @@ void JitCompilerA64::h_IMUL_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap [ instr . src ] ;
const uint32_t dst = IntRegMap [ instr . dst ] ;
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
emitMemLoad < tmp_reg > ( dst , src , instr , code , k ) ;
// sub dst, dst, tmp_reg
@ -643,7 +643,7 @@ void JitCompilerA64::h_IMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap [ instr . src ] ;
const uint32_t dst = IntRegMap [ instr . dst ] ;
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
emitMemLoad < tmp_reg > ( dst , src , instr , code , k ) ;
// umulh dst, dst, tmp_reg
@ -674,7 +674,7 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap [ instr . src ] ;
const uint32_t dst = IntRegMap [ instr . dst ] ;
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
emitMemLoad < tmp_reg > ( dst , src , instr , code , k ) ;
// smulh dst, dst, tmp_reg
@ -686,24 +686,34 @@ void JitCompilerA64::h_ISMULH_M(Instruction& instr, uint32_t& codePos)
void JitCompilerA64 : : h_IMUL_RCP ( Instruction & instr , uint32_t & codePos )
{
const uint32 _t divisor = instr . getImm32 ( ) ;
const uint64 _t divisor = instr . getImm32 ( ) ;
if ( isZeroOrPowerOf2 ( divisor ) )
return ;
uint32_t k = codePos ;
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
const uint32_t dst = IntRegMap [ instr . dst ] ;
constexpr uint64_t N = 1ULL < < 63 ;
const uint64_t q = N / divisor ;
const uint64_t r = N % divisor ;
# ifdef __GNUC__
const uint64_t shift = 64 - __builtin_clzll ( divisor ) ;
# else
uint64_t shift = 32 ;
for ( uint64_t k = 1U < < 31 ; ( k & divisor ) = = 0 ; k > > = 1 )
- - shift ;
# endif
const uint32_t literal_id = ( ImulRcpLiteralsEnd - literalPos ) / sizeof ( uint64_t ) ;
literalPos - = sizeof ( uint64_t ) ;
const uint64_t reciprocal = randomx_reciprocal_fast ( divisor ) ;
memcpy ( code + literalPos , & reciprocal , sizeof ( reciprocal ) ) ;
literalPos - = sizeof ( uint64_t ) ;
* ( uint64_t * ) ( code + literalPos ) = ( q < < shift ) + ( ( r < < shift ) / divisor ) ;
if ( literal_id < 12 )
if ( literal_id < 13 )
{
static constexpr uint32_t literal_regs [ 12 ] = { 30 < < 16 , 29 < < 16 , 28 < < 16 , 27 < < 16 , 26 < < 16 , 25 < < 16 , 24 < < 16 , 23 < < 16 , 22 < < 16 , 21 < < 16 , 11 < < 16 , 0 } ;
static constexpr uint32_t literal_regs [ 13 ] = { 30 < < 16 , 29 < < 16 , 28 < < 16 , 27 < < 16 , 26 < < 16 , 25 < < 16 , 24 < < 16 , 23 < < 16 , 22 < < 16 , 21 < < 16 , 20 < < 16 , 11 < < 16 , 0 } ;
// mul dst, dst, literal_reg
emit32 ( ARMV8A : : MUL | dst | ( dst < < 5 ) | literal_regs [ literal_id ] , code , k ) ;
@ -741,7 +751,7 @@ void JitCompilerA64::h_IXOR_R(Instruction& instr, uint32_t& codePos)
if ( src = = dst )
{
src = 20 ;
src = 18 ;
emitMovImmediate ( src , instr . getImm32 ( ) , code , k ) ;
}
@ -759,7 +769,7 @@ void JitCompilerA64::h_IXOR_M(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap [ instr . src ] ;
const uint32_t dst = IntRegMap [ instr . dst ] ;
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
emitMemLoad < tmp_reg > ( dst , src , instr , code , k ) ;
// eor dst, dst, tmp_reg
@ -797,7 +807,7 @@ void JitCompilerA64::h_IROL_R(Instruction& instr, uint32_t& codePos)
if ( src ! = dst )
{
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
// sub tmp_reg, xzr, src
emit32 ( ARMV8A : : SUB | tmp_reg | ( 31 < < 5 ) | ( src < < 16 ) , code , k ) ;
@ -825,7 +835,7 @@ void JitCompilerA64::h_ISWAP_R(Instruction& instr, uint32_t& codePos)
uint32_t k = codePos ;
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
emit32 ( ARMV8A : : MOV_REG | tmp_reg | ( dst < < 16 ) , code , k ) ;
emit32 ( ARMV8A : : MOV_REG | dst | ( src < < 16 ) , code , k ) ;
emit32 ( ARMV8A : : MOV_REG | src | ( tmp_reg < < 16 ) , code , k ) ;
@ -974,7 +984,7 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap [ instr . src ] ;
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
constexpr uint32_t fpcr_tmp_reg = 8 ;
// ror tmp_reg, src, imm
@ -998,7 +1008,7 @@ void JitCompilerA64::h_ISTORE(Instruction& instr, uint32_t& codePos)
const uint32_t src = IntRegMap [ instr . src ] ;
const uint32_t dst = IntRegMap [ instr . dst ] ;
constexpr uint32_t tmp_reg = 20 ;
constexpr uint32_t tmp_reg = 18 ;
uint32_t imm = instr . getImm32 ( ) ;