![support@xmrig.com](/assets/img/avatar_default.png)
15 changed files with 3748 additions and 60 deletions
@ -0,0 +1,146 @@ |
|||
/* XMRig
|
|||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com> |
|||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org> |
|||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
|||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
|||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> |
|||
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
|||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
|||
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
|
|||
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
|||
* |
|||
* This program is free software: you can redistribute it and/or modify |
|||
* it under the terms of the GNU General Public License as published by |
|||
* the Free Software Foundation, either version 3 of the License, or |
|||
* (at your option) any later version. |
|||
* |
|||
* This program is distributed in the hope that it will be useful, |
|||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
* GNU General Public License for more details. |
|||
* |
|||
* You should have received a copy of the GNU General Public License |
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
*/ |
|||
|
|||
#include <string.h> |
|||
|
|||
#include "algo/cryptonight/cryptonight_monero.h" |
|||
#include "crypto/asm/CryptonightR_template.h" |
|||
#include "persistent_memory.h" |
|||
|
|||
|
|||
static inline void add_code(uint8_t **p, void (*p1)(), void (*p2)()) |
|||
{ |
|||
const ptrdiff_t size = (const uint8_t*)(p2) - (const uint8_t*)(p1); |
|||
if (size > 0) { |
|||
memcpy(*p, (const void *) p1, size); |
|||
*p += size; |
|||
} |
|||
} |
|||
|
|||
|
|||
static inline void add_random_math(uint8_t **p, const struct V4_Instruction* code, int code_size, const void_func* instructions, const void_func* instructions_mov, bool is_64_bit, enum Assembly ASM) |
|||
{ |
|||
uint32_t prev_rot_src = (uint32_t)(-1); |
|||
|
|||
for (int i = 0;; ++i) { |
|||
const struct V4_Instruction inst = code[i]; |
|||
if (inst.opcode == RET) { |
|||
break; |
|||
} |
|||
|
|||
uint8_t opcode = (inst.opcode == MUL) ? inst.opcode : (inst.opcode + 2); |
|||
uint8_t dst_index = inst.dst_index; |
|||
uint8_t src_index = inst.src_index; |
|||
|
|||
const uint32_t a = inst.dst_index; |
|||
const uint32_t b = inst.src_index; |
|||
const uint8_t c = opcode | (dst_index << V4_OPCODE_BITS) | (((src_index == 8) ? dst_index : src_index) << (V4_OPCODE_BITS + V4_DST_INDEX_BITS)); |
|||
|
|||
switch (inst.opcode) { |
|||
case ROR: |
|||
case ROL: |
|||
if (b != prev_rot_src) { |
|||
prev_rot_src = b; |
|||
add_code(p, instructions_mov[c], instructions_mov[c + 1]); |
|||
} |
|||
break; |
|||
} |
|||
|
|||
if (a == prev_rot_src) { |
|||
prev_rot_src = (uint32_t)(-1); |
|||
} |
|||
|
|||
void_func begin = instructions[c]; |
|||
|
|||
if ((ASM = ASM_BULLDOZER) && (inst.opcode == MUL) && !is_64_bit) { |
|||
// AMD Bulldozer has latency 4 for 32-bit IMUL and 6 for 64-bit IMUL
|
|||
// Always use 32-bit IMUL for AMD Bulldozer in 32-bit mode - skip prefix 0x48 and change 0x49 to 0x41
|
|||
uint8_t* prefix = (uint8_t*) begin; |
|||
|
|||
if (*prefix == 0x49) { |
|||
**p = 0x41; |
|||
*p += 1; |
|||
} |
|||
|
|||
begin = (void_func)(prefix + 1); |
|||
} |
|||
|
|||
add_code(p, begin, instructions[c + 1]); |
|||
|
|||
if (inst.opcode == ADD) { |
|||
*(uint32_t*)(*p - sizeof(uint32_t) - (is_64_bit ? 3 : 0)) = inst.C; |
|||
if (is_64_bit) { |
|||
prev_rot_src = (uint32_t)(-1); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
void v4_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM) |
|||
{ |
|||
uint8_t* p0 = machine_code; |
|||
uint8_t* p = p0; |
|||
|
|||
add_code(&p, CryptonightR_template_part1, CryptonightR_template_part2); |
|||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM); |
|||
add_code(&p, CryptonightR_template_part2, CryptonightR_template_part3); |
|||
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_template_mainloop) - ((const uint8_t*)CryptonightR_template_part1)) - (p - p0)); |
|||
add_code(&p, CryptonightR_template_part3, CryptonightR_template_end); |
|||
|
|||
flush_instruction_cache(machine_code, p - p0); |
|||
} |
|||
|
|||
|
|||
void v4_compile_code_double(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM) |
|||
{ |
|||
uint8_t* p0 = (uint8_t*) machine_code; |
|||
uint8_t* p = p0; |
|||
|
|||
add_code(&p, CryptonightR_template_double_part1, CryptonightR_template_double_part2); |
|||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM); |
|||
add_code(&p, CryptonightR_template_double_part2, CryptonightR_template_double_part3); |
|||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM); |
|||
add_code(&p, CryptonightR_template_double_part3, CryptonightR_template_double_part4); |
|||
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_template_double_mainloop) - ((const uint8_t*)CryptonightR_template_double_part1)) - (p - p0)); |
|||
add_code(&p, CryptonightR_template_double_part4, CryptonightR_template_double_end); |
|||
|
|||
flush_instruction_cache(machine_code, p - p0); |
|||
} |
|||
|
|||
|
|||
void v4_soft_aes_compile_code(const struct V4_Instruction* code, int code_size, void* machine_code, enum Assembly ASM) |
|||
{ |
|||
uint8_t* p0 = machine_code; |
|||
uint8_t* p = p0; |
|||
|
|||
add_code(&p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2); |
|||
add_random_math(&p, code, code_size, instructions, instructions_mov, false, ASM); |
|||
add_code(&p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3); |
|||
*(int*)(p - 4) = (int)((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0)); |
|||
add_code(&p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end); |
|||
|
|||
flush_instruction_cache(machine_code, p - p0); |
|||
} |
@ -0,0 +1,279 @@ |
|||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1) |
|||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop) |
|||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2) |
|||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3) |
|||
PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end) |
|||
|
|||
ALIGN(64) |
|||
FN_PREFIX(CryptonightR_soft_aes_template_part1): |
|||
mov QWORD PTR [rsp+8], rcx |
|||
push rbx |
|||
push rbp |
|||
push rsi |
|||
push rdi |
|||
push r12 |
|||
push r13 |
|||
push r14 |
|||
push r15 |
|||
sub rsp, 232 |
|||
|
|||
mov eax, [rcx+96] |
|||
mov ebx, [rcx+100] |
|||
mov esi, [rcx+104] |
|||
mov edx, [rcx+108] |
|||
mov [rsp+144], eax |
|||
mov [rsp+148], ebx |
|||
mov [rsp+152], esi |
|||
mov [rsp+156], edx |
|||
|
|||
mov rax, QWORD PTR [rcx+48] |
|||
mov r10, rcx |
|||
xor rax, QWORD PTR [rcx+16] |
|||
mov r8, QWORD PTR [rcx+32] |
|||
xor r8, QWORD PTR [rcx] |
|||
mov r9, QWORD PTR [rcx+40] |
|||
xor r9, QWORD PTR [rcx+8] |
|||
movq xmm4, rax |
|||
mov rdx, QWORD PTR [rcx+56] |
|||
xor rdx, QWORD PTR [rcx+24] |
|||
mov r11, QWORD PTR [rcx+224] |
|||
mov rcx, QWORD PTR [rcx+88] |
|||
xor rcx, QWORD PTR [r10+72] |
|||
mov rax, QWORD PTR [r10+80] |
|||
movq xmm0, rdx |
|||
xor rax, QWORD PTR [r10+64] |
|||
|
|||
movaps XMMWORD PTR [rsp+16], xmm6 |
|||
movaps XMMWORD PTR [rsp+32], xmm7 |
|||
movaps XMMWORD PTR [rsp+48], xmm8 |
|||
movaps XMMWORD PTR [rsp+64], xmm9 |
|||
movaps XMMWORD PTR [rsp+80], xmm10 |
|||
movaps XMMWORD PTR [rsp+96], xmm11 |
|||
movaps XMMWORD PTR [rsp+112], xmm12 |
|||
movaps XMMWORD PTR [rsp+128], xmm13 |
|||
|
|||
movq xmm5, rax |
|||
|
|||
mov rax, r8 |
|||
punpcklqdq xmm4, xmm0 |
|||
and eax, 2097136 |
|||
movq xmm10, QWORD PTR [r10+96] |
|||
movq xmm0, rcx |
|||
mov rcx, QWORD PTR [r10+104] |
|||
xorps xmm9, xmm9 |
|||
mov QWORD PTR [rsp+328], rax |
|||
movq xmm12, r11 |
|||
mov QWORD PTR [rsp+320], r9 |
|||
punpcklqdq xmm5, xmm0 |
|||
movq xmm13, rcx |
|||
mov r12d, 524288 |
|||
|
|||
ALIGN(64) |
|||
FN_PREFIX(CryptonightR_soft_aes_template_mainloop): |
|||
movd xmm11, r12d |
|||
mov r12, QWORD PTR [r10+272] |
|||
lea r13, QWORD PTR [rax+r11] |
|||
mov esi, DWORD PTR [r13] |
|||
movq xmm0, r9 |
|||
mov r10d, DWORD PTR [r13+4] |
|||
movq xmm7, r8 |
|||
mov ebp, DWORD PTR [r13+12] |
|||
mov r14d, DWORD PTR [r13+8] |
|||
mov rdx, QWORD PTR [rsp+328] |
|||
movzx ecx, sil |
|||
shr esi, 8 |
|||
punpcklqdq xmm7, xmm0 |
|||
mov r15d, DWORD PTR [r12+rcx*4] |
|||
movzx ecx, r10b |
|||
shr r10d, 8 |
|||
mov edi, DWORD PTR [r12+rcx*4] |
|||
movzx ecx, r14b |
|||
shr r14d, 8 |
|||
mov ebx, DWORD PTR [r12+rcx*4] |
|||
movzx ecx, bpl |
|||
shr ebp, 8 |
|||
mov r9d, DWORD PTR [r12+rcx*4] |
|||
movzx ecx, r10b |
|||
shr r10d, 8 |
|||
xor r15d, DWORD PTR [r12+rcx*4+1024] |
|||
movzx ecx, r14b |
|||
shr r14d, 8 |
|||
mov eax, r14d |
|||
shr eax, 8 |
|||
xor edi, DWORD PTR [r12+rcx*4+1024] |
|||
add eax, 256 |
|||
movzx ecx, bpl |
|||
shr ebp, 8 |
|||
xor ebx, DWORD PTR [r12+rcx*4+1024] |
|||
movzx ecx, sil |
|||
shr esi, 8 |
|||
xor r9d, DWORD PTR [r12+rcx*4+1024] |
|||
add r12, 2048 |
|||
movzx ecx, r10b |
|||
shr r10d, 8 |
|||
add r10d, 256 |
|||
mov r11d, DWORD PTR [r12+rax*4] |
|||
xor r11d, DWORD PTR [r12+rcx*4] |
|||
xor r11d, r9d |
|||
movzx ecx, sil |
|||
mov r10d, DWORD PTR [r12+r10*4] |
|||
shr esi, 8 |
|||
add esi, 256 |
|||
xor r10d, DWORD PTR [r12+rcx*4] |
|||
movzx ecx, bpl |
|||
xor r10d, ebx |
|||
shr ebp, 8 |
|||
movd xmm1, r11d |
|||
add ebp, 256 |
|||
movq r11, xmm12 |
|||
mov r9d, DWORD PTR [r12+rcx*4] |
|||
xor r9d, DWORD PTR [r12+rsi*4] |
|||
mov eax, DWORD PTR [r12+rbp*4] |
|||
xor r9d, edi |
|||
movzx ecx, r14b |
|||
movd xmm0, r10d |
|||
movd xmm2, r9d |
|||
xor eax, DWORD PTR [r12+rcx*4] |
|||
mov rcx, rdx |
|||
xor eax, r15d |
|||
punpckldq xmm2, xmm1 |
|||
xor rcx, 16 |
|||
movd xmm6, eax |
|||
mov rax, rdx |
|||
punpckldq xmm6, xmm0 |
|||
xor rax, 32 |
|||
punpckldq xmm6, xmm2 |
|||
xor rdx, 48 |
|||
movdqu xmm2, XMMWORD PTR [rcx+r11] |
|||
pxor xmm6, xmm2 |
|||
pxor xmm6, xmm7 |
|||
paddq xmm2, xmm4 |
|||
movdqu xmm1, XMMWORD PTR [rax+r11] |
|||
movdqu xmm0, XMMWORD PTR [rdx+r11] |
|||
pxor xmm6, xmm1 |
|||
pxor xmm6, xmm0 |
|||
paddq xmm0, xmm5 |
|||
movdqu XMMWORD PTR [rcx+r11], xmm0 |
|||
movdqu XMMWORD PTR [rax+r11], xmm2 |
|||
movq rcx, xmm13 |
|||
paddq xmm1, xmm7 |
|||
movdqu XMMWORD PTR [rdx+r11], xmm1 |
|||
movq rdi, xmm6 |
|||
mov r10, rdi |
|||
and r10d, 2097136 |
|||
movdqa xmm0, xmm6 |
|||
pxor xmm0, xmm4 |
|||
movdqu XMMWORD PTR [r13], xmm0 |
|||
|
|||
mov ebx, [rsp+144] |
|||
mov ebp, [rsp+152] |
|||
add ebx, [rsp+148] |
|||
add ebp, [rsp+156] |
|||
shl rbp, 32 |
|||
or rbx, rbp |
|||
|
|||
xor rbx, QWORD PTR [r10+r11] |
|||
lea r14, QWORD PTR [r10+r11] |
|||
mov rbp, QWORD PTR [r14+8] |
|||
|
|||
mov [rsp+160], rbx |
|||
mov [rsp+168], rdi |
|||
mov [rsp+176], rbp |
|||
mov [rsp+184], r10 |
|||
mov r10, rsp |
|||
|
|||
mov ebx, [rsp+144] |
|||
mov esi, [rsp+148] |
|||
mov edi, [rsp+152] |
|||
mov ebp, [rsp+156] |
|||
|
|||
movd esp, xmm7 |
|||
movaps xmm0, xmm7 |
|||
psrldq xmm0, 8 |
|||
movd r15d, xmm0 |
|||
movd eax, xmm4 |
|||
movd edx, xmm5 |
|||
movaps xmm0, xmm5 |
|||
psrldq xmm0, 8 |
|||
movd r9d, xmm0 |
|||
|
|||
FN_PREFIX(CryptonightR_soft_aes_template_part2): |
|||
mov rsp, r10 |
|||
mov [rsp+144], ebx |
|||
mov [rsp+148], esi |
|||
mov [rsp+152], edi |
|||
mov [rsp+156], ebp |
|||
|
|||
mov edi, edi |
|||
shl rbp, 32 |
|||
or rbp, rdi |
|||
xor r8, rbp |
|||
|
|||
mov ebx, ebx |
|||
shl rsi, 32 |
|||
or rsi, rbx |
|||
xor QWORD PTR [rsp+320], rsi |
|||
|
|||
mov rbx, [rsp+160] |
|||
mov rdi, [rsp+168] |
|||
mov rbp, [rsp+176] |
|||
mov r10, [rsp+184] |
|||
|
|||
mov r9, r10 |
|||
xor r9, 16 |
|||
mov rcx, r10 |
|||
xor rcx, 32 |
|||
xor r10, 48 |
|||
mov rax, rbx |
|||
mul rdi |
|||
movdqu xmm2, XMMWORD PTR [r9+r11] |
|||
movdqu xmm1, XMMWORD PTR [rcx+r11] |
|||
pxor xmm6, xmm2 |
|||
pxor xmm6, xmm1 |
|||
paddq xmm1, xmm7 |
|||
add r8, rdx |
|||
movdqu xmm0, XMMWORD PTR [r10+r11] |
|||
pxor xmm6, xmm0 |
|||
paddq xmm0, xmm5 |
|||
paddq xmm2, xmm4 |
|||
movdqu XMMWORD PTR [r9+r11], xmm0 |
|||
movdqa xmm5, xmm4 |
|||
mov r9, QWORD PTR [rsp+320] |
|||
movdqa xmm4, xmm6 |
|||
add r9, rax |
|||
movdqu XMMWORD PTR [rcx+r11], xmm2 |
|||
movdqu XMMWORD PTR [r10+r11], xmm1 |
|||
mov r10, QWORD PTR [rsp+304] |
|||
movd r12d, xmm11 |
|||
mov QWORD PTR [r14], r8 |
|||
xor r8, rbx |
|||
mov rax, r8 |
|||
mov QWORD PTR [r14+8], r9 |
|||
and eax, 2097136 |
|||
xor r9, rbp |
|||
mov QWORD PTR [rsp+320], r9 |
|||
mov QWORD PTR [rsp+328], rax |
|||
sub r12d, 1 |
|||
jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop) |
|||
|
|||
FN_PREFIX(CryptonightR_soft_aes_template_part3): |
|||
movaps xmm6, XMMWORD PTR [rsp+16] |
|||
movaps xmm7, XMMWORD PTR [rsp+32] |
|||
movaps xmm8, XMMWORD PTR [rsp+48] |
|||
movaps xmm9, XMMWORD PTR [rsp+64] |
|||
movaps xmm10, XMMWORD PTR [rsp+80] |
|||
movaps xmm11, XMMWORD PTR [rsp+96] |
|||
movaps xmm12, XMMWORD PTR [rsp+112] |
|||
movaps xmm13, XMMWORD PTR [rsp+128] |
|||
|
|||
add rsp, 232 |
|||
pop r15 |
|||
pop r14 |
|||
pop r13 |
|||
pop r12 |
|||
pop rdi |
|||
pop rsi |
|||
pop rbp |
|||
pop rbx |
|||
ret |
|||
FN_PREFIX(CryptonightR_soft_aes_template_end): |
File diff suppressed because it is too large
File diff suppressed because it is too large
@ -0,0 +1,529 @@ |
|||
PUBLIC FN_PREFIX(CryptonightR_template_part1) |
|||
PUBLIC FN_PREFIX(CryptonightR_template_mainloop) |
|||
PUBLIC FN_PREFIX(CryptonightR_template_part2) |
|||
PUBLIC FN_PREFIX(CryptonightR_template_part3) |
|||
PUBLIC FN_PREFIX(CryptonightR_template_end) |
|||
PUBLIC FN_PREFIX(CryptonightR_template_double_part1) |
|||
PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop) |
|||
PUBLIC FN_PREFIX(CryptonightR_template_double_part2) |
|||
PUBLIC FN_PREFIX(CryptonightR_template_double_part3) |
|||
PUBLIC FN_PREFIX(CryptonightR_template_double_part4) |
|||
PUBLIC FN_PREFIX(CryptonightR_template_double_end) |
|||
|
|||
ALIGN(64) |
|||
FN_PREFIX(CryptonightR_template_part1): |
|||
mov QWORD PTR [rsp+16], rbx |
|||
mov QWORD PTR [rsp+24], rbp |
|||
mov QWORD PTR [rsp+32], rsi |
|||
push r10 |
|||
push r11 |
|||
push r12 |
|||
push r13 |
|||
push r14 |
|||
push r15 |
|||
push rdi |
|||
sub rsp, 64 |
|||
mov r12, rcx |
|||
mov r8, QWORD PTR [r12+32] |
|||
mov rdx, r12 |
|||
xor r8, QWORD PTR [r12] |
|||
mov r15, QWORD PTR [r12+40] |
|||
mov r9, r8 |
|||
xor r15, QWORD PTR [r12+8] |
|||
mov r11, QWORD PTR [r12+224] |
|||
mov r12, QWORD PTR [r12+56] |
|||
xor r12, QWORD PTR [rdx+24] |
|||
mov rax, QWORD PTR [rdx+48] |
|||
xor rax, QWORD PTR [rdx+16] |
|||
movaps XMMWORD PTR [rsp+48], xmm6 |
|||
movq xmm0, r12 |
|||
movaps XMMWORD PTR [rsp+32], xmm7 |
|||
movaps XMMWORD PTR [rsp+16], xmm8 |
|||
movaps XMMWORD PTR [rsp], xmm9 |
|||
mov r12, QWORD PTR [rdx+88] |
|||
xor r12, QWORD PTR [rdx+72] |
|||
movq xmm6, rax |
|||
mov rax, QWORD PTR [rdx+80] |
|||
xor rax, QWORD PTR [rdx+64] |
|||
punpcklqdq xmm6, xmm0 |
|||
and r9d, 2097136 |
|||
movq xmm0, r12 |
|||
movq xmm7, rax |
|||
punpcklqdq xmm7, xmm0 |
|||
mov r10d, r9d |
|||
movq xmm9, rsp |
|||
mov rsp, r8 |
|||
mov r8d, 524288 |
|||
|
|||
mov ebx, [rdx+96] |
|||
mov esi, [rdx+100] |
|||
mov edi, [rdx+104] |
|||
mov ebp, [rdx+108] |
|||
|
|||
ALIGN(64) |
|||
FN_PREFIX(CryptonightR_template_mainloop): |
|||
movdqa xmm5, XMMWORD PTR [r9+r11] |
|||
movq xmm0, r15 |
|||
movq xmm4, rsp |
|||
punpcklqdq xmm4, xmm0 |
|||
lea rdx, QWORD PTR [r9+r11] |
|||
|
|||
aesenc xmm5, xmm4 |
|||
|
|||
mov r12d, r9d |
|||
mov eax, r9d |
|||
xor r9d, 48 |
|||
xor r12d, 16 |
|||
xor eax, 32 |
|||
movdqu xmm0, XMMWORD PTR [r9+r11] |
|||
movaps xmm3, xmm0 |
|||
movdqu xmm2, XMMWORD PTR [r12+r11] |
|||
movdqu xmm1, XMMWORD PTR [rax+r11] |
|||
pxor xmm0, xmm2 |
|||
pxor xmm5, xmm1 |
|||
pxor xmm5, xmm0 |
|||
paddq xmm3, xmm7 |
|||
paddq xmm2, xmm6 |
|||
paddq xmm1, xmm4 |
|||
movdqu XMMWORD PTR [r12+r11], xmm3 |
|||
movdqu XMMWORD PTR [rax+r11], xmm2 |
|||
movdqu XMMWORD PTR [r9+r11], xmm1 |
|||
|
|||
movq r12, xmm5 |
|||
movd r10d, xmm5 |
|||
and r10d, 2097136 |
|||
|
|||
movdqa xmm0, xmm5 |
|||
pxor xmm0, xmm6 |
|||
movdqu XMMWORD PTR [rdx], xmm0 |
|||
|
|||
lea r13d, [ebx+esi] |
|||
lea edx, [edi+ebp] |
|||
shl rdx, 32 |
|||
or r13, rdx |
|||
|
|||
xor r13, QWORD PTR [r10+r11] |
|||
mov r14, QWORD PTR [r10+r11+8] |
|||
|
|||
movd eax, xmm6 |
|||
movd edx, xmm7 |
|||
pextrd r9d, xmm7, 2 |
|||
|
|||
FN_PREFIX(CryptonightR_template_part2): |
|||
mov eax, edi |
|||
mov edx, ebp |
|||
shl rdx, 32 |
|||
or rax, rdx |
|||
xor rsp, rax |
|||
|
|||
mov eax, ebx |
|||
mov edx, esi |
|||
shl rdx, 32 |
|||
or rax, rdx |
|||
xor r15, rax |
|||
|
|||
mov rax, r13 |
|||
mul r12 |
|||
|
|||
mov r9d, r10d |
|||
mov r12d, r10d |
|||
xor r9d, 16 |
|||
xor r12d, 32 |
|||
xor r10d, 48 |
|||
movdqa xmm1, XMMWORD PTR [r12+r11] |
|||
movaps xmm3, xmm1 |
|||
movdqa xmm2, XMMWORD PTR [r9+r11] |
|||
movdqa xmm0, XMMWORD PTR [r10+r11] |
|||
pxor xmm1, xmm2 |
|||
pxor xmm5, xmm0 |
|||
pxor xmm5, xmm1 |
|||
paddq xmm3, xmm4 |
|||
paddq xmm2, xmm6 |
|||
paddq xmm0, xmm7 |
|||
movdqu XMMWORD PTR [r9+r11], xmm0 |
|||
movdqu XMMWORD PTR [r12+r11], xmm2 |
|||
movdqu XMMWORD PTR [r10+r11], xmm3 |
|||
|
|||
movdqa xmm7, xmm6 |
|||
add r15, rax |
|||
add rsp, rdx |
|||
xor r10, 48 |
|||
mov QWORD PTR [r10+r11], rsp |
|||
xor rsp, r13 |
|||
mov r9d, esp |
|||
mov QWORD PTR [r10+r11+8], r15 |
|||
and r9d, 2097136 |
|||
xor r15, r14 |
|||
movdqa xmm6, xmm5 |
|||
dec r8d |
|||
jnz FN_PREFIX(CryptonightR_template_mainloop) |
|||
|
|||
FN_PREFIX(CryptonightR_template_part3): |
|||
movq rsp, xmm9 |
|||
|
|||
mov rbx, QWORD PTR [rsp+136] |
|||
mov rbp, QWORD PTR [rsp+144] |
|||
mov rsi, QWORD PTR [rsp+152] |
|||
movaps xmm6, XMMWORD PTR [rsp+48] |
|||
movaps xmm7, XMMWORD PTR [rsp+32] |
|||
movaps xmm8, XMMWORD PTR [rsp+16] |
|||
movaps xmm9, XMMWORD PTR [rsp] |
|||
add rsp, 64 |
|||
pop rdi |
|||
pop r15 |
|||
pop r14 |
|||
pop r13 |
|||
pop r12 |
|||
pop r11 |
|||
pop r10 |
|||
ret 0 |
|||
FN_PREFIX(CryptonightR_template_end): |
|||
|
|||
ALIGN(64) |
|||
FN_PREFIX(CryptonightR_template_double_part1): |
|||
mov QWORD PTR [rsp+24], rbx |
|||
push rbp |
|||
push rsi |
|||
push rdi |
|||
push r12 |
|||
push r13 |
|||
push r14 |
|||
push r15 |
|||
sub rsp, 320 |
|||
mov r14, QWORD PTR [rcx+32] |
|||
mov r8, rcx |
|||
xor r14, QWORD PTR [rcx] |
|||
mov r12, QWORD PTR [rcx+40] |
|||
mov ebx, r14d |
|||
mov rsi, QWORD PTR [rcx+224] |
|||
and ebx, 2097136 |
|||
xor r12, QWORD PTR [rcx+8] |
|||
mov rcx, QWORD PTR [rcx+56] |
|||
xor rcx, QWORD PTR [r8+24] |
|||
mov rax, QWORD PTR [r8+48] |
|||
xor rax, QWORD PTR [r8+16] |
|||
mov r15, QWORD PTR [rdx+32] |
|||
xor r15, QWORD PTR [rdx] |
|||
movq xmm0, rcx |
|||
mov rcx, QWORD PTR [r8+88] |
|||
xor rcx, QWORD PTR [r8+72] |
|||
mov r13, QWORD PTR [rdx+40] |
|||
mov rdi, QWORD PTR [rdx+224] |
|||
xor r13, QWORD PTR [rdx+8] |
|||
movaps XMMWORD PTR [rsp+160], xmm6 |
|||
movaps XMMWORD PTR [rsp+176], xmm7 |
|||
movaps XMMWORD PTR [rsp+192], xmm8 |
|||
movaps XMMWORD PTR [rsp+208], xmm9 |
|||
movaps XMMWORD PTR [rsp+224], xmm10 |
|||
movaps XMMWORD PTR [rsp+240], xmm11 |
|||
movaps XMMWORD PTR [rsp+256], xmm12 |
|||
movaps XMMWORD PTR [rsp+272], xmm13 |
|||
movaps XMMWORD PTR [rsp+288], xmm14 |
|||
movaps XMMWORD PTR [rsp+304], xmm15 |
|||
movq xmm7, rax |
|||
mov rax, QWORD PTR [r8+80] |
|||
xor rax, QWORD PTR [r8+64] |
|||
|
|||
movaps xmm1, XMMWORD PTR [rdx+96] |
|||
movaps xmm2, XMMWORD PTR [r8+96] |
|||
movaps XMMWORD PTR [rsp], xmm1 |
|||
movaps XMMWORD PTR [rsp+16], xmm2 |
|||
|
|||
mov r8d, r15d |
|||
punpcklqdq xmm7, xmm0 |
|||
movq xmm0, rcx |
|||
mov rcx, QWORD PTR [rdx+56] |
|||
xor rcx, QWORD PTR [rdx+24] |
|||
movq xmm9, rax |
|||
mov QWORD PTR [rsp+128], rsi |
|||
mov rax, QWORD PTR [rdx+48] |
|||
xor rax, QWORD PTR [rdx+16] |
|||
punpcklqdq xmm9, xmm0 |
|||
movq xmm0, rcx |
|||
mov rcx, QWORD PTR [rdx+88] |
|||
xor rcx, QWORD PTR [rdx+72] |
|||
movq xmm8, rax |
|||
mov QWORD PTR [rsp+136], rdi |
|||
mov rax, QWORD PTR [rdx+80] |
|||
xor rax, QWORD PTR [rdx+64] |
|||
punpcklqdq xmm8, xmm0 |
|||
and r8d, 2097136 |
|||
movq xmm0, rcx |
|||
mov r11d, 524288 |
|||
movq xmm10, rax |
|||
punpcklqdq xmm10, xmm0 |
|||
|
|||
movq xmm14, QWORD PTR [rsp+128] |
|||
movq xmm15, QWORD PTR [rsp+136] |
|||
|
|||
ALIGN(64) |
|||
FN_PREFIX(CryptonightR_template_double_mainloop): |
|||
movdqu xmm6, XMMWORD PTR [rbx+rsi] |
|||
movq xmm0, r12 |
|||
mov ecx, ebx |
|||
movq xmm3, r14 |
|||
punpcklqdq xmm3, xmm0 |
|||
xor ebx, 16 |
|||
aesenc xmm6, xmm3 |
|||
movq xmm4, r15 |
|||
movdqu xmm0, XMMWORD PTR [rbx+rsi] |
|||
pxor xmm6, xmm0 |
|||
xor ebx, 48 |
|||
paddq xmm0, xmm7 |
|||
movdqu xmm1, XMMWORD PTR [rbx+rsi] |
|||
pxor xmm6, xmm1 |
|||
movdqu XMMWORD PTR [rbx+rsi], xmm0 |
|||
paddq xmm1, xmm3 |
|||
xor ebx, 16 |
|||
mov eax, ebx |
|||
xor rax, 32 |
|||
movdqu xmm0, XMMWORD PTR [rbx+rsi] |
|||
pxor xmm6, xmm0 |
|||
movq rdx, xmm6 |
|||
movdqu XMMWORD PTR [rbx+rsi], xmm1 |
|||
paddq xmm0, xmm9 |
|||
movdqu XMMWORD PTR [rax+rsi], xmm0 |
|||
movdqa xmm0, xmm6 |
|||
pxor xmm0, xmm7 |
|||
movdqu XMMWORD PTR [rcx+rsi], xmm0 |
|||
mov esi, edx |
|||
movdqu xmm5, XMMWORD PTR [r8+rdi] |
|||
and esi, 2097136 |
|||
mov ecx, r8d |
|||
movq xmm0, r13 |
|||
punpcklqdq xmm4, xmm0 |
|||
xor r8d, 16 |
|||
aesenc xmm5, xmm4 |
|||
movdqu xmm0, XMMWORD PTR [r8+rdi] |
|||
pxor xmm5, xmm0 |
|||
xor r8d, 48 |
|||
paddq xmm0, xmm8 |
|||
movdqu xmm1, XMMWORD PTR [r8+rdi] |
|||
pxor xmm5, xmm1 |
|||
movdqu XMMWORD PTR [r8+rdi], xmm0 |
|||
paddq xmm1, xmm4 |
|||
xor r8d, 16 |
|||
mov eax, r8d |
|||
xor rax, 32 |
|||
movdqu xmm0, XMMWORD PTR [r8+rdi] |
|||
pxor xmm5, xmm0 |
|||
movdqu XMMWORD PTR [r8+rdi], xmm1 |
|||
paddq xmm0, xmm10 |
|||
movdqu XMMWORD PTR [rax+rdi], xmm0 |
|||
movdqa xmm0, xmm5 |
|||
pxor xmm0, xmm8 |
|||
movdqu XMMWORD PTR [rcx+rdi], xmm0 |
|||
movq rdi, xmm5 |
|||
movq rcx, xmm14 |
|||
mov ebp, edi |
|||
mov r8, QWORD PTR [rcx+rsi] |
|||
mov r10, QWORD PTR [rcx+rsi+8] |
|||
lea r9, QWORD PTR [rcx+rsi] |
|||
xor esi, 16 |
|||
|
|||
movq xmm0, rsp |
|||
movq xmm1, rsi |
|||
movq xmm2, rdi |
|||
movq xmm11, rbp |
|||
movq xmm12, r15 |
|||
movq xmm13, rdx |
|||
mov [rsp+104], rcx |
|||
mov [rsp+112], r9 |
|||
|
|||
mov ebx, DWORD PTR [rsp+16] |
|||
mov esi, DWORD PTR [rsp+20] |
|||
mov edi, DWORD PTR [rsp+24] |
|||
mov ebp, DWORD PTR [rsp+28] |
|||
|
|||
lea eax, [ebx+esi] |
|||
lea edx, [edi+ebp] |
|||
shl rdx, 32 |
|||
or rax, rdx |
|||
xor r8, rax |
|||
|
|||
movd esp, xmm3 |
|||
pextrd r15d, xmm3, 2 |
|||
movd eax, xmm7 |
|||
movd edx, xmm9 |
|||
pextrd r9d, xmm9, 2 |
|||
|
|||
FN_PREFIX(CryptonightR_template_double_part2): |
|||
|
|||
mov eax, edi |
|||
mov edx, ebp |
|||
shl rdx, 32 |
|||
or rax, rdx |
|||
xor r14, rax |
|||
|
|||
mov eax, ebx |
|||
mov edx, esi |
|||
shl rdx, 32 |
|||
or rax, rdx |
|||
xor r12, rax |
|||
|
|||
movq rsp, xmm0 |
|||
mov DWORD PTR [rsp+16], ebx |
|||
mov DWORD PTR [rsp+20], esi |
|||
mov DWORD PTR [rsp+24], edi |
|||
mov DWORD PTR [rsp+28], ebp |
|||
|
|||
movq rsi, xmm1 |
|||
movq rdi, xmm2 |
|||
movq rbp, xmm11 |
|||
movq r15, xmm12 |
|||
movq rdx, xmm13 |
|||
mov rcx, [rsp+104] |
|||
mov r9, [rsp+112] |
|||
|
|||
mov rbx, r8 |
|||
mov rax, r8 |
|||
mul rdx |
|||
and ebp, 2097136 |
|||
mov r8, rax |
|||
movdqu xmm1, XMMWORD PTR [rcx+rsi] |
|||
pxor xmm6, xmm1 |
|||
xor esi, 48 |
|||
paddq xmm1, xmm7 |
|||
movdqu xmm2, XMMWORD PTR [rsi+rcx] |
|||
pxor xmm6, xmm2 |
|||
paddq xmm2, xmm3 |
|||
movdqu XMMWORD PTR [rsi+rcx], xmm1 |
|||
xor esi, 16 |
|||
mov eax, esi |
|||
mov rsi, rcx |
|||
movdqu xmm0, XMMWORD PTR [rax+rcx] |
|||
pxor xmm6, xmm0 |
|||
movdqu XMMWORD PTR [rax+rcx], xmm2 |
|||
paddq xmm0, xmm9 |
|||
add r12, r8 |
|||
xor rax, 32 |
|||
add r14, rdx |
|||
movdqa xmm9, xmm7 |
|||
movdqa xmm7, xmm6 |
|||
movdqu XMMWORD PTR [rax+rcx], xmm0 |
|||
mov QWORD PTR [r9+8], r12 |
|||
xor r12, r10 |
|||
mov QWORD PTR [r9], r14 |
|||
movq rcx, xmm15 |
|||
xor r14, rbx |
|||
mov r10d, ebp |
|||
mov ebx, r14d |
|||
xor ebp, 16 |
|||
and ebx, 2097136 |
|||
mov r8, QWORD PTR [r10+rcx] |
|||
mov r9, QWORD PTR [r10+rcx+8] |
|||
|
|||
movq xmm0, rsp |
|||
movq xmm1, rbx |
|||
movq xmm2, rsi |
|||
movq xmm11, rdi |
|||
movq xmm12, rbp |
|||
movq xmm13, r15 |
|||
mov [rsp+104], rcx |
|||
mov [rsp+112], r9 |
|||
|
|||
mov ebx, DWORD PTR [rsp] |
|||
mov esi, DWORD PTR [rsp+4] |
|||
mov edi, DWORD PTR [rsp+8] |
|||
mov ebp, DWORD PTR [rsp+12] |
|||
|
|||
lea eax, [ebx+esi] |
|||
lea edx, [edi+ebp] |
|||
shl rdx, 32 |
|||
or rax, rdx |
|||
|
|||
xor r8, rax |
|||
movq xmm3, r8 |
|||
|
|||
movd esp, xmm4 |
|||
pextrd r15d, xmm4, 2 |
|||
movd eax, xmm8 |
|||
movd edx, xmm10 |
|||
pextrd r9d, xmm10, 2 |
|||
|
|||
FN_PREFIX(CryptonightR_template_double_part3): |
|||
|
|||
movq r15, xmm13 |
|||
|
|||
mov eax, edi |
|||
mov edx, ebp |
|||
shl rdx, 32 |
|||
or rax, rdx |
|||
xor r15, rax |
|||
|
|||
mov eax, ebx |
|||
mov edx, esi |
|||
shl rdx, 32 |
|||
or rax, rdx |
|||
xor r13, rax |
|||
|
|||
movq rsp, xmm0 |
|||
mov DWORD PTR [rsp], ebx |
|||
mov DWORD PTR [rsp+4], esi |
|||
mov DWORD PTR [rsp+8], edi |
|||
mov DWORD PTR [rsp+12], ebp |
|||
|
|||
movq rbx, xmm1 |
|||
movq rsi, xmm2 |
|||
movq rdi, xmm11 |
|||
movq rbp, xmm12 |
|||
mov rcx, [rsp+104] |
|||
mov r9, [rsp+112] |
|||
|
|||
mov rax, r8 |
|||
mul rdi |
|||
mov rdi, rcx |
|||
mov r8, rax |
|||
movdqu xmm1, XMMWORD PTR [rbp+rcx] |
|||
pxor xmm5, xmm1 |
|||
xor ebp, 48 |
|||
paddq xmm1, xmm8 |
|||
add r13, r8 |
|||
movdqu xmm2, XMMWORD PTR [rbp+rcx] |
|||
pxor xmm5, xmm2 |
|||
add r15, rdx |
|||
movdqu XMMWORD PTR [rbp+rcx], xmm1 |
|||
paddq xmm2, xmm4 |
|||
xor ebp, 16 |
|||
mov eax, ebp |
|||
xor rax, 32 |
|||
movdqu xmm0, XMMWORD PTR [rbp+rcx] |
|||
pxor xmm5, xmm0 |
|||
movdqu XMMWORD PTR [rbp+rcx], xmm2 |
|||
paddq xmm0, xmm10 |
|||
movdqu XMMWORD PTR [rax+rcx], xmm0 |
|||
movq rax, xmm3 |
|||
movdqa xmm10, xmm8 |
|||
mov QWORD PTR [r10+rcx], r15 |
|||
movdqa xmm8, xmm5 |
|||
xor r15, rax |
|||
mov QWORD PTR [r10+rcx+8], r13 |
|||
mov r8d, r15d |
|||
xor r13, r9 |
|||
and r8d, 2097136 |
|||
dec r11d |
|||
jnz FN_PREFIX(CryptonightR_template_double_mainloop) |
|||
|
|||
FN_PREFIX(CryptonightR_template_double_part4): |
|||
|
|||
mov rbx, QWORD PTR [rsp+400] |
|||
movaps xmm6, XMMWORD PTR [rsp+160] |
|||
movaps xmm7, XMMWORD PTR [rsp+176] |
|||
movaps xmm8, XMMWORD PTR [rsp+192] |
|||
movaps xmm9, XMMWORD PTR [rsp+208] |
|||
movaps xmm10, XMMWORD PTR [rsp+224] |
|||
movaps xmm11, XMMWORD PTR [rsp+240] |
|||
movaps xmm12, XMMWORD PTR [rsp+256] |
|||
movaps xmm13, XMMWORD PTR [rsp+272] |
|||
movaps xmm14, XMMWORD PTR [rsp+288] |
|||
movaps xmm15, XMMWORD PTR [rsp+304] |
|||
add rsp, 320 |
|||
pop r15 |
|||
pop r14 |
|||
pop r13 |
|||
pop r12 |
|||
pop rdi |
|||
pop rsi |
|||
pop rbp |
|||
ret 0 |
|||
FN_PREFIX(CryptonightR_template_double_end): |
@ -1,36 +0,0 @@ |
|||
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE |
|||
PUBLIC cnv2_mainloop_ivybridge_asm |
|||
PUBLIC cnv2_mainloop_ryzen_asm |
|||
PUBLIC cnv2_mainloop_bulldozer_asm |
|||
PUBLIC cnv2_double_mainloop_sandybridge_asm |
|||
|
|||
ALIGN(64) |
|||
cnv2_mainloop_ivybridge_asm PROC |
|||
INCLUDE cn2/cnv2_main_loop_ivybridge.inc |
|||
ret 0 |
|||
mov eax, 3735929054 |
|||
cnv2_mainloop_ivybridge_asm ENDP |
|||
|
|||
ALIGN(64) |
|||
cnv2_mainloop_ryzen_asm PROC |
|||
INCLUDE cn2/cnv2_main_loop_ryzen.inc |
|||
ret 0 |
|||
mov eax, 3735929054 |
|||
cnv2_mainloop_ryzen_asm ENDP |
|||
|
|||
ALIGN(64) |
|||
cnv2_mainloop_bulldozer_asm PROC |
|||
INCLUDE cn2/cnv2_main_loop_bulldozer.inc |
|||
ret 0 |
|||
mov eax, 3735929054 |
|||
cnv2_mainloop_bulldozer_asm ENDP |
|||
|
|||
ALIGN(64) |
|||
cnv2_double_mainloop_sandybridge_asm PROC |
|||
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc |
|||
ret 0 |
|||
mov eax, 3735929054 |
|||
cnv2_double_mainloop_sandybridge_asm ENDP |
|||
|
|||
_TEXT_CNV2_MAINLOOP ENDS |
|||
END |
Loading…
Reference in new issue