SChernykh
4 years ago
19 changed files with 390 additions and 481 deletions
@ -0,0 +1,100 @@ |
|||
/* XMRig
|
|||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
|||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
|||
* |
|||
* This program is free software: you can redistribute it and/or modify |
|||
* it under the terms of the GNU General Public License as published by |
|||
* the Free Software Foundation, either version 3 of the License, or |
|||
* (at your option) any later version. |
|||
* |
|||
* This program is distributed in the hope that it will be useful, |
|||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
* GNU General Public License for more details. |
|||
* |
|||
* You should have received a copy of the GNU General Public License |
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
*/ |
|||
|
|||
|
|||
#include "base/tools/Profiler.h" |
|||
#include "base/io/log/Log.h" |
|||
#include "base/io/log/Tags.h" |
|||
#include <sstream> |
|||
#include <thread> |
|||
#include <chrono> |
|||
#include <algorithm> |
|||
|
|||
|
|||
#ifdef XMRIG_FEATURE_PROFILING |
|||
|
|||
|
|||
ProfileScopeData* ProfileScopeData::s_data[MAX_DATA_COUNT] = {}; |
|||
volatile long ProfileScopeData::s_dataCount = 0; |
|||
double ProfileScopeData::s_tscSpeed = 0.0; |
|||
|
|||
|
|||
#ifndef NOINLINE |
|||
#ifdef __GNUC__ |
|||
#define NOINLINE __attribute__ ((noinline)) |
|||
#elif _MSC_VER |
|||
#define NOINLINE __declspec(noinline) |
|||
#else |
|||
#define NOINLINE |
|||
#endif |
|||
#endif |
|||
|
|||
|
|||
static std::string get_thread_id() |
|||
{ |
|||
std::stringstream ss; |
|||
ss << std::this_thread::get_id(); |
|||
|
|||
std::string s = ss.str(); |
|||
if (s.length() > ProfileScopeData::MAX_THREAD_ID_LENGTH) { |
|||
s.resize(ProfileScopeData::MAX_THREAD_ID_LENGTH); |
|||
} |
|||
|
|||
return s; |
|||
} |
|||
|
|||
|
|||
NOINLINE void ProfileScopeData::Register(ProfileScopeData* data) |
|||
{ |
|||
#ifdef _MSC_VER |
|||
const long id = _InterlockedIncrement(&s_dataCount) - 1; |
|||
#else |
|||
const long id = __sync_fetch_and_add(&s_dataCount, 1); |
|||
#endif |
|||
|
|||
if (static_cast<unsigned long>(id) < MAX_DATA_COUNT) { |
|||
s_data[id] = data; |
|||
|
|||
const std::string s = get_thread_id(); |
|||
memcpy(data->m_threadId, s.c_str(), s.length() + 1); |
|||
} |
|||
} |
|||
|
|||
|
|||
NOINLINE void ProfileScopeData::Init() |
|||
{ |
|||
using namespace std::chrono; |
|||
|
|||
const uint64_t t1 = static_cast<uint64_t>(time_point_cast<nanoseconds>(high_resolution_clock::now()).time_since_epoch().count()); |
|||
const uint64_t count1 = ReadTSC(); |
|||
|
|||
for (;;) |
|||
{ |
|||
const uint64_t t2 = static_cast<uint64_t>(time_point_cast<nanoseconds>(high_resolution_clock::now()).time_since_epoch().count()); |
|||
const uint64_t count2 = ReadTSC(); |
|||
|
|||
if (t2 - t1 > 1000000000) { |
|||
s_tscSpeed = (count2 - count1) * 1e9 / (t2 - t1); |
|||
LOG_INFO("%s TSC speed = %.3f GHz", xmrig::Tags::profiler(), s_tscSpeed / 1e9); |
|||
return; |
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
#endif /* XMRIG_FEATURE_PROFILING */ |
@ -0,0 +1,132 @@ |
|||
/* XMRig
|
|||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
|||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
|||
* |
|||
* This program is free software: you can redistribute it and/or modify |
|||
* it under the terms of the GNU General Public License as published by |
|||
* the Free Software Foundation, either version 3 of the License, or |
|||
* (at your option) any later version. |
|||
* |
|||
* This program is distributed in the hope that it will be useful, |
|||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|||
* GNU General Public License for more details. |
|||
* |
|||
* You should have received a copy of the GNU General Public License |
|||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
*/ |
|||
|
|||
#ifndef XMRIG_PROFILER_H |
|||
#define XMRIG_PROFILER_H |
|||
|
|||
|
|||
#ifndef FORCE_INLINE |
|||
#if defined(_MSC_VER) |
|||
#define FORCE_INLINE __forceinline |
|||
#elif defined(__GNUC__) |
|||
#define FORCE_INLINE __attribute__((always_inline)) inline |
|||
#elif defined(__clang__) |
|||
#define FORCE_INLINE __inline__ |
|||
#else |
|||
#define FORCE_INLINE |
|||
#endif |
|||
#endif |
|||
|
|||
|
|||
#ifdef XMRIG_FEATURE_PROFILING |
|||
|
|||
|
|||
#include <cstdint> |
|||
#include <type_traits> |
|||
|
|||
#if defined(_MSC_VER) |
|||
#include <intrin.h> |
|||
#endif |
|||
|
|||
|
|||
static FORCE_INLINE uint64_t ReadTSC() |
|||
{ |
|||
#ifdef _MSC_VER |
|||
return __rdtsc(); |
|||
#else |
|||
uint32_t hi, lo; |
|||
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); |
|||
return (((uint64_t)hi) << 32) | lo; |
|||
#endif |
|||
} |
|||
|
|||
|
|||
struct ProfileScopeData |
|||
{ |
|||
const char* m_name; |
|||
uint64_t m_totalCycles; |
|||
uint32_t m_totalSamples; |
|||
|
|||
enum |
|||
{ |
|||
MAX_THREAD_ID_LENGTH = 11, |
|||
MAX_SAMPLE_COUNT = 128, |
|||
MAX_DATA_COUNT = 1024 |
|||
}; |
|||
|
|||
char m_threadId[MAX_THREAD_ID_LENGTH + 1]; |
|||
|
|||
static ProfileScopeData* s_data[MAX_DATA_COUNT]; |
|||
static volatile long s_dataCount; |
|||
static double s_tscSpeed; |
|||
|
|||
static void Register(ProfileScopeData* data); |
|||
static void Init(); |
|||
}; |
|||
|
|||
static_assert(std::is_trivial<ProfileScopeData>::value, "ProfileScopeData must be a trivial struct"); |
|||
static_assert(sizeof(ProfileScopeData) <= 32, "ProfileScopeData struct is too big"); |
|||
|
|||
|
|||
class ProfileScope |
|||
{ |
|||
public: |
|||
FORCE_INLINE ProfileScope(ProfileScopeData& data) |
|||
: m_data(data) |
|||
{ |
|||
if (m_data.m_totalCycles == 0) { |
|||
ProfileScopeData::Register(&data); |
|||
} |
|||
|
|||
m_startCounter = ReadTSC(); |
|||
} |
|||
|
|||
FORCE_INLINE ~ProfileScope() |
|||
{ |
|||
m_data.m_totalCycles += ReadTSC() - m_startCounter; |
|||
++m_data.m_totalSamples; |
|||
} |
|||
|
|||
private: |
|||
ProfileScopeData& m_data; |
|||
uint64_t m_startCounter; |
|||
}; |
|||
|
|||
|
|||
#define PROFILE_SCOPE(x) static thread_local ProfileScopeData x##_data{#x}; ProfileScope x(x##_data); |
|||
|
|||
|
|||
#else /* XMRIG_FEATURE_PROFILING */ |
|||
#define PROFILE_SCOPE(x) |
|||
#endif /* XMRIG_FEATURE_PROFILING */ |
|||
|
|||
|
|||
#include "crypto/randomx/blake2/blake2.h" |
|||
|
|||
|
|||
struct rx_blake2b_wrapper |
|||
{ |
|||
FORCE_INLINE static void run(void* out, size_t outlen, const void* in, size_t inlen) |
|||
{ |
|||
PROFILE_SCOPE(RandomX_Blake2b); |
|||
rx_blake2b(out, outlen, in, inlen); |
|||
} |
|||
}; |
|||
|
|||
|
|||
#endif /* XMRIG_PROFILER_H */ |
@ -1,402 +0,0 @@ |
|||
/*
|
|||
BLAKE2 reference source code package - optimized C implementations |
|||
|
|||
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the |
|||
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at |
|||
your option. The terms of these licenses can be found at: |
|||
|
|||
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
|
|||
- OpenSSL license : https://www.openssl.org/source/license.html
|
|||
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
|
|||
|
|||
More information about the BLAKE2 hash function can be found at |
|||
https://blake2.net.
|
|||
*/ |
|||
#ifndef BLAKE2B_LOAD_SSE41_H |
|||
#define BLAKE2B_LOAD_SSE41_H |
|||
|
|||
#define LOAD_MSG_0_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m0, m1); \ |
|||
b1 = _mm_unpacklo_epi64(m2, m3); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_0_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m0, m1); \ |
|||
b1 = _mm_unpackhi_epi64(m2, m3); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_0_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m4, m5); \ |
|||
b1 = _mm_unpacklo_epi64(m6, m7); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_0_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m4, m5); \ |
|||
b1 = _mm_unpackhi_epi64(m6, m7); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_1_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m7, m2); \ |
|||
b1 = _mm_unpackhi_epi64(m4, m6); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_1_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m5, m4); \ |
|||
b1 = _mm_alignr_epi8(m3, m7, 8); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_1_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ |
|||
b1 = _mm_unpackhi_epi64(m5, m2); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_1_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m6, m1); \ |
|||
b1 = _mm_unpackhi_epi64(m3, m1); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_2_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_alignr_epi8(m6, m5, 8); \ |
|||
b1 = _mm_unpackhi_epi64(m2, m7); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_2_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m4, m0); \ |
|||
b1 = _mm_blend_epi16(m1, m6, 0xF0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_2_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_blend_epi16(m5, m1, 0xF0); \ |
|||
b1 = _mm_unpackhi_epi64(m3, m4); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_2_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m7, m3); \ |
|||
b1 = _mm_alignr_epi8(m2, m0, 8); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_3_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m3, m1); \ |
|||
b1 = _mm_unpackhi_epi64(m6, m5); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_3_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m4, m0); \ |
|||
b1 = _mm_unpacklo_epi64(m6, m7); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_3_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_blend_epi16(m1, m2, 0xF0); \ |
|||
b1 = _mm_blend_epi16(m2, m7, 0xF0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_3_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m3, m5); \ |
|||
b1 = _mm_unpacklo_epi64(m0, m4); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_4_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m4, m2); \ |
|||
b1 = _mm_unpacklo_epi64(m1, m5); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_4_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_blend_epi16(m0, m3, 0xF0); \ |
|||
b1 = _mm_blend_epi16(m2, m7, 0xF0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_4_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_blend_epi16(m7, m5, 0xF0); \ |
|||
b1 = _mm_blend_epi16(m3, m1, 0xF0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_4_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_alignr_epi8(m6, m0, 8); \ |
|||
b1 = _mm_blend_epi16(m4, m6, 0xF0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_5_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m1, m3); \ |
|||
b1 = _mm_unpacklo_epi64(m0, m4); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_5_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m6, m5); \ |
|||
b1 = _mm_unpackhi_epi64(m5, m1); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_5_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_blend_epi16(m2, m3, 0xF0); \ |
|||
b1 = _mm_unpackhi_epi64(m7, m0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_5_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m6, m2); \ |
|||
b1 = _mm_blend_epi16(m7, m4, 0xF0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_6_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_blend_epi16(m6, m0, 0xF0); \ |
|||
b1 = _mm_unpacklo_epi64(m7, m2); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_6_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m2, m7); \ |
|||
b1 = _mm_alignr_epi8(m5, m6, 8); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_6_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m0, m3); \ |
|||
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_6_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m3, m1); \ |
|||
b1 = _mm_blend_epi16(m1, m5, 0xF0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_7_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m6, m3); \ |
|||
b1 = _mm_blend_epi16(m6, m1, 0xF0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_7_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_alignr_epi8(m7, m5, 8); \ |
|||
b1 = _mm_unpackhi_epi64(m0, m4); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_7_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m2, m7); \ |
|||
b1 = _mm_unpacklo_epi64(m4, m1); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_7_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m0, m2); \ |
|||
b1 = _mm_unpacklo_epi64(m3, m5); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_8_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m3, m7); \ |
|||
b1 = _mm_alignr_epi8(m0, m5, 8); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_8_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m7, m4); \ |
|||
b1 = _mm_alignr_epi8(m4, m1, 8); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_8_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = m6; \ |
|||
b1 = _mm_alignr_epi8(m5, m0, 8); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_8_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_blend_epi16(m1, m3, 0xF0); \ |
|||
b1 = m2; \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_9_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m5, m4); \ |
|||
b1 = _mm_unpackhi_epi64(m3, m0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_9_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m1, m2); \ |
|||
b1 = _mm_blend_epi16(m3, m2, 0xF0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_9_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m7, m4); \ |
|||
b1 = _mm_unpackhi_epi64(m1, m6); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_9_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_alignr_epi8(m7, m5, 8); \ |
|||
b1 = _mm_unpacklo_epi64(m6, m0); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_10_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m0, m1); \ |
|||
b1 = _mm_unpacklo_epi64(m2, m3); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_10_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m0, m1); \ |
|||
b1 = _mm_unpackhi_epi64(m2, m3); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_10_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m4, m5); \ |
|||
b1 = _mm_unpacklo_epi64(m6, m7); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_10_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpackhi_epi64(m4, m5); \ |
|||
b1 = _mm_unpackhi_epi64(m6, m7); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_11_1(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m7, m2); \ |
|||
b1 = _mm_unpackhi_epi64(m4, m6); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_11_2(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m5, m4); \ |
|||
b1 = _mm_alignr_epi8(m3, m7, 8); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_11_3(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \ |
|||
b1 = _mm_unpackhi_epi64(m5, m2); \ |
|||
} while(0) |
|||
|
|||
|
|||
#define LOAD_MSG_11_4(b0, b1) \ |
|||
do \ |
|||
{ \ |
|||
b0 = _mm_unpacklo_epi64(m6, m1); \ |
|||
b1 = _mm_unpackhi_epi64(m3, m1); \ |
|||
} while(0) |
|||
|
|||
|
|||
#endif |
Loading…
Reference in new issue