Browse Source

Restored OclCache.

pull/1185/head
XMRig 5 years ago
parent
commit
ec1839d580
  1. 1
      CMakeLists.txt
  2. 6
      src/3rdparty/base32/base32.h
  3. 6
      src/backend/common/Thread.h
  4. 14
      src/backend/common/Workers.cpp
  5. 4
      src/backend/cpu/CpuWorker.cpp
  6. 2
      src/backend/cpu/CpuWorker.h
  7. 2
      src/backend/opencl/OclBackend.cpp
  8. 158
      src/backend/opencl/OclCache.cpp
  9. 13
      src/backend/opencl/OclCache.h
  10. 2
      src/backend/opencl/OclCache_unix.cpp
  11. 2
      src/backend/opencl/OclCache_win.cpp
  12. 4
      src/backend/opencl/OclConfig.cpp
  13. 2
      src/backend/opencl/OclConfig.h
  14. 3
      src/backend/opencl/OclLaunchData.cpp
  15. 4
      src/backend/opencl/OclLaunchData.h
  16. 12
      src/backend/opencl/OclWorker.cpp
  17. 2
      src/backend/opencl/OclWorker.h
  18. 10
      src/backend/opencl/cl/cn/cryptonight.cl
  19. 6
      src/backend/opencl/cl/cn/cryptonight2.cl
  20. 4
      src/backend/opencl/cl/cn/cryptonight_gpu.cl
  21. 2
      src/backend/opencl/cl/cn/cryptonight_r.cl
  22. 6
      src/backend/opencl/interfaces/IOclRunner.h
  23. 6
      src/backend/opencl/opencl.cmake
  24. 41
      src/backend/opencl/runners/OclBaseRunner.cpp
  25. 16
      src/backend/opencl/runners/OclBaseRunner.h
  26. 12
      src/backend/opencl/runners/OclCnRunner.cpp
  27. 22
      src/backend/opencl/wrappers/OclLib.cpp

1
CMakeLists.txt

@ -15,6 +15,7 @@ option(WITH_TLS "Enable OpenSSL support" ON)
option(WITH_ASM "Enable ASM PoW implementations" ON)
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
option(WITH_OPENCL "Enable OpenCL backend" OFF)
option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
option(BUILD_STATIC "Build static binary" OFF)
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)

6
src/3rdparty/base32/base32.h

@ -25,8 +25,8 @@
// All functions return the number of output bytes or -1 on error. If the
// output buffer is too small, the result will silently be truncated.
#ifndef _BASE32_H_
#define _BASE32_H_
#ifndef XMRIG_BASE32_H
#define XMRIG_BASE32_H
#include <stdint.h>
@ -65,4 +65,4 @@ int base32_encode(const uint8_t *data, int length, uint8_t *result, int bufSize)
}
#endif /* _BASE32_H_ */
#endif /* XMRIG_BASE32_H */

6
src/backend/common/Thread.h

@ -42,18 +42,18 @@ template<class T>
class Thread
{
public:
inline Thread(IBackend *backend, size_t index, const T &config) : m_index(index), m_config(config), m_backend(backend) {}
inline Thread(IBackend *backend, size_t id, const T &config) : m_id(id), m_config(config), m_backend(backend) {}
inline ~Thread() { m_thread.join(); delete m_worker; }
inline const T &config() const { return m_config; }
inline IBackend *backend() const { return m_backend; }
inline IWorker *worker() const { return m_worker; }
inline size_t index() const { return m_index; }
inline size_t id() const { return m_id; }
inline void setWorker(IWorker *worker) { m_worker = worker; }
inline void start(void (*callback) (void *)) { m_thread = std::thread(callback, this); }
private:
const size_t m_index = 0;
const size_t m_id = 0;
const T m_config;
IBackend *m_backend;
IWorker *m_worker = nullptr;

14
src/backend/common/Workers.cpp

@ -134,7 +134,7 @@ void xmrig::Workers<T>::tick(uint64_t)
return;
}
d_ptr->hashrate->add(handle->index(), handle->worker()->hashCount(), handle->worker()->timestamp());
d_ptr->hashrate->add(handle->id(), handle->worker()->hashCount(), handle->worker()->timestamp());
}
d_ptr->hashrate->updateHighest();
@ -175,19 +175,19 @@ xmrig::IWorker *xmrig::Workers<CpuLaunchData>::create(Thread<CpuLaunchData> *han
{
switch (handle->config().intensity) {
case 1:
return new CpuWorker<1>(handle->index(), handle->config());
return new CpuWorker<1>(handle->id(), handle->config());
case 2:
return new CpuWorker<2>(handle->index(), handle->config());
return new CpuWorker<2>(handle->id(), handle->config());
case 3:
return new CpuWorker<3>(handle->index(), handle->config());
return new CpuWorker<3>(handle->id(), handle->config());
case 4:
return new CpuWorker<4>(handle->index(), handle->config());
return new CpuWorker<4>(handle->id(), handle->config());
case 5:
return new CpuWorker<5>(handle->index(), handle->config());
return new CpuWorker<5>(handle->id(), handle->config());
}
return nullptr;
@ -201,7 +201,7 @@ template class Workers<CpuLaunchData>;
template<>
xmrig::IWorker *xmrig::Workers<OclLaunchData>::create(Thread<OclLaunchData> *handle)
{
return new OclWorker(handle->index(), handle->config());
return new OclWorker(handle->id(), handle->config());
}

4
src/backend/cpu/CpuWorker.cpp

@ -53,8 +53,8 @@ static constexpr uint32_t kReserveCount = 4096;
template<size_t N>
xmrig::CpuWorker<N>::CpuWorker(size_t index, const CpuLaunchData &data) :
Worker(index, data.affinity, data.priority),
xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
Worker(id, data.affinity, data.priority),
m_algorithm(data.algorithm),
m_assembly(data.assembly),
m_hwAES(data.hwAES),

2
src/backend/cpu/CpuWorker.h

@ -43,7 +43,7 @@ template<size_t N>
class CpuWorker : public Worker
{
public:
CpuWorker(size_t index, const CpuLaunchData &data);
CpuWorker(size_t id, const CpuLaunchData &data);
~CpuWorker() override;
protected:

2
src/backend/opencl/OclBackend.cpp

@ -260,7 +260,7 @@ void xmrig::OclBackend::setJob(const Job &job)
const OclConfig &cl = d_ptr->controller->config()->cl();
std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->devices, tag);
std::vector<OclLaunchData> threads = cl.get(d_ptr->controller->miner(), job.algorithm(), d_ptr->platform, d_ptr->devices, tag);
if (!d_ptr->threads.empty() && d_ptr->threads.size() == threads.size() && std::equal(d_ptr->threads.begin(), d_ptr->threads.end(), threads.begin())) {
return;
}

158
src/backend/opencl/OclCache.cpp

@ -23,4 +23,162 @@
*/
#include <fstream>
#include <map>
#include <mutex>
#include <sstream>
#include "3rdparty/base32/base32.h"
#include "backend/opencl/interfaces/IOclRunner.h"
#include "backend/opencl/OclCache.h"
#include "backend/opencl/OclLaunchData.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "base/io/log/Log.h"
#include "base/tools/Chrono.h"
#include "crypto/common/keccak.h"
namespace xmrig {
static const char *tag = MAGENTA_BG_BOLD(WHITE_BOLD_S " ocl ");
static std::mutex mutex;
static cl_program createFromSource(const IOclRunner *runner)
{
LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " YELLOW_BOLD("compiling..."), tag, runner->data().device.index());
cl_int ret;
cl_device_id device = runner->data().device.id();
const char *source = runner->source();
const uint64_t ts = Chrono::steadyMSecs();
cl_program program = OclLib::createProgramWithSource(runner->data().ctx, 1, &source, nullptr, &ret);
if (ret != CL_SUCCESS) {
return nullptr;
}
if (OclLib::buildProgram(program, 1, &device, runner->buildOptions()) != CL_SUCCESS) {
printf("BUILD LOG:\n%s\n", OclLib::getProgramBuildLog(program, device).data());
OclLib::releaseProgram(program);
return nullptr;
}
LOG_INFO("%s GPU " WHITE_BOLD("#%zu") " " GREEN_BOLD("compilation completed") BLACK_BOLD( " (%.3fs)"),
tag, runner->data().device.index(), (Chrono::steadyMSecs() - ts) / 1000.0);
return program;
}
static cl_program createFromBinary(const IOclRunner *runner, const std::string &fileName)
{
std::ifstream file(fileName, std::ofstream::in | std::ofstream::binary);
if (!file.good()) {
return nullptr;
}
std::ostringstream ss;
ss << file.rdbuf();
const std::string s = ss.str();
const size_t bin_size = s.size();
auto data_ptr = s.data();
cl_device_id device = runner->data().device.id();
cl_int clStatus;
cl_int ret;
cl_program program = OclLib::createProgramWithBinary(runner->data().ctx, 1, &device, &bin_size, reinterpret_cast<const unsigned char **>(&data_ptr), &clStatus, &ret);
if (ret != CL_SUCCESS) {
return nullptr;
}
if (OclLib::buildProgram(program, 1, &device) != CL_SUCCESS) {
OclLib::releaseProgram(program);
return nullptr;
}
return program;
}
} // namespace xmrig
cl_program xmrig::OclCache::build(const IOclRunner *runner)
{
std::lock_guard<std::mutex> lock(mutex);
if (Nonce::sequence(Nonce::OPENCL) == 0) {
return nullptr;
}
std::string fileName;
if (runner->data().cache) {
# ifdef _WIN32
fileName = prefix() + "\\xmrig\\.cache\\" + cacheKey(runner) + ".bin";
# else
fileName = prefix() + "/.cache/" + cacheKey(runner) + ".bin";
# endif
cl_program program = createFromBinary(runner, fileName);
if (program) {
return program;
}
}
cl_program program = createFromSource(runner);
if (runner->data().cache && program) {
save(program, fileName);
}
return program;
}
std::string xmrig::OclCache::cacheKey(const char *deviceKey, const char *options, const char *source)
{
std::string in(source);
in += options;
in += deviceKey;
uint8_t hash[200];
keccak(in.c_str(), in.size(), hash);
uint8_t result[32] = { 0 };
base32_encode(hash, 12, result, sizeof(result));
return reinterpret_cast<char *>(result);
}
std::string xmrig::OclCache::cacheKey(const IOclRunner *runner)
{
return cacheKey(runner->deviceKey(), runner->buildOptions(), runner->source());
}
void xmrig::OclCache::save(cl_program program, const std::string &fileName)
{
size_t size = 0;
if (OclLib::getProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size), &size) != CL_SUCCESS) {
return;
}
std::vector<char> binary(size);
char *data = binary.data();
if (OclLib::getProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(char *), &data) != CL_SUCCESS) {
return;
}
createDirectory();
std::ofstream file_stream;
file_stream.open(fileName, std::ofstream::out | std::ofstream::binary);
file_stream.write(binary.data(), static_cast<int64_t>(binary.size()));
file_stream.close();
}

13
src/backend/opencl/OclCache.h

@ -29,17 +29,26 @@
#include <string>
typedef struct _cl_program *cl_program;
namespace xmrig {
class IOclRunner;
class OclCache
{
public:
static cl_program build(const IOclRunner *runner);
static std::string cacheKey(const char *deviceKey, const char *options, const char *source);
static std::string cacheKey(const IOclRunner *runner);
private:
void createDirectory() const;
static std::string prefix();
static void createDirectory();
static void save(cl_program program, const std::string &fileName);
};

2
src/backend/opencl/OclCache_unix.cpp

@ -29,7 +29,7 @@
#include "backend/opencl/OclCache.h"
void xmrig::OclCache::createDirectory() const
void xmrig::OclCache::createDirectory()
{
std::string path = prefix() + "/.cache";
mkdir(path.c_str(), 0744);

2
src/backend/opencl/OclCache_win.cpp

@ -31,7 +31,7 @@
#include "backend/opencl/OclCache.h"
void xmrig::OclCache::createDirectory() const
void xmrig::OclCache::createDirectory()
{
std::string path = prefix() + "/xmrig";
_mkdir(path.c_str());

4
src/backend/opencl/OclConfig.cpp

@ -149,7 +149,7 @@ rapidjson::Value xmrig::OclConfig::toJSON(rapidjson::Document &doc) const
}
std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const std::vector<OclDevice> &devices, const char *tag) const
std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const
{
std::vector<OclLaunchData> out;
const OclThreads &threads = m_threads.get(algorithm);
@ -166,7 +166,7 @@ std::vector<xmrig::OclLaunchData> xmrig::OclConfig::get(const Miner *miner, cons
continue;
}
out.emplace_back(miner, algorithm, *this, thread, devices[thread.index()]);
out.emplace_back(miner, algorithm, *this, platform, thread, devices[thread.index()]);
}
return out;

2
src/backend/opencl/OclConfig.h

@ -42,7 +42,7 @@ public:
OclPlatform platform() const;
rapidjson::Value toJSON(rapidjson::Document &doc) const;
std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const std::vector<OclDevice> &devices, const char *tag) const;
std::vector<OclLaunchData> get(const Miner *miner, const Algorithm &algorithm, const OclPlatform &platform, const std::vector<OclDevice> &devices, const char *tag) const;
void read(const rapidjson::Value &value);
inline bool isCacheEnabled() const { return m_cache; }

3
src/backend/opencl/OclLaunchData.cpp

@ -28,11 +28,12 @@
#include "backend/opencl/OclConfig.h"
xmrig::OclLaunchData::OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclThread &thread, const OclDevice &device) :
xmrig::OclLaunchData::OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclPlatform &platform, const OclThread &thread, const OclDevice &device) :
algorithm(algorithm),
cache(config.isCacheEnabled()),
miner(miner),
device(device),
platform(platform),
thread(thread)
{
}

4
src/backend/opencl/OclLaunchData.h

@ -29,6 +29,7 @@
#include "backend/opencl/OclThread.h"
#include "backend/opencl/wrappers/OclDevice.h"
#include "backend/opencl/wrappers/OclPlatform.h"
#include "crypto/common/Algorithm.h"
#include "crypto/common/Nonce.h"
@ -46,7 +47,7 @@ class Miner;
class OclLaunchData
{
public:
OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclThread &thread, const OclDevice &device);
OclLaunchData(const Miner *miner, const Algorithm &algorithm, const OclConfig &config, const OclPlatform &platform, const OclThread &thread, const OclDevice &device);
bool isEqual(const OclLaunchData &other) const;
@ -60,6 +61,7 @@ public:
const bool cache;
const Miner *miner;
const OclDevice device;
const OclPlatform platform;
const OclThread thread;
};

12
src/backend/opencl/OclWorker.cpp

@ -48,15 +48,15 @@ static constexpr uint32_t kReserveCount = 4096;
xmrig::OclWorker::OclWorker(size_t index, const OclLaunchData &data) :
Worker(index, data.thread.affinity(), -1),
xmrig::OclWorker::OclWorker(size_t id, const OclLaunchData &data) :
Worker(id, data.thread.affinity(), -1),
m_algorithm(data.algorithm),
m_miner(data.miner)
{
switch (m_algorithm.family()) {
case Algorithm::RANDOM_X:
# ifdef XMRIG_ALGO_RANDOMX
m_runner = new OclRxRunner(index, data);
m_runner = new OclRxRunner(id, data);
# endif
break;
@ -67,9 +67,13 @@ xmrig::OclWorker::OclWorker(size_t index, const OclLaunchData &data) :
break;
default:
m_runner = new OclCnRunner(index, data);
m_runner = new OclCnRunner(id, data);
break;
}
if (m_runner) {
m_runner->build();
}
}

2
src/backend/opencl/OclWorker.h

@ -42,7 +42,7 @@ class IOclRunner;
class OclWorker : public Worker
{
public:
OclWorker(size_t index, const OclLaunchData &data);
OclWorker(size_t id, const OclLaunchData &data);
~OclWorker() override;
protected:

10
src/backend/opencl/cl/cn/cryptonight.cl

@ -573,7 +573,7 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, uin
if (gIdx < Threads)
# endif
{
#pragma unroll UNROLL_FACTOR
#pragma unroll CN_UNROLL
for (int i = 0; i < ITERATIONS; ++i) {
ulong c[2];
@ -686,7 +686,7 @@ __kernel void cn1_v2_monero(__global uint4 *Scratchpad, __global ulong *states,
uint2 division_result = as_uint2(states[12]);
uint sqrt_result = as_uint2(states[13]).s0;
#pragma unroll UNROLL_FACTOR
#pragma unroll CN_UNROLL
for(int i = 0; i < ITERATIONS; ++i)
{
# ifdef __NV_CL_C_VERSION
@ -846,7 +846,7 @@ __kernel void cn1_v2_half(__global uint4 *Scratchpad, __global ulong *states, ui
uint2 division_result = as_uint2(states[12]);
uint sqrt_result = as_uint2(states[13]).s0;
#pragma unroll UNROLL_FACTOR
#pragma unroll CN_UNROLL
for(int i = 0; i < 0x40000; ++i)
{
# ifdef __NV_CL_C_VERSION
@ -1074,7 +1074,7 @@ __kernel void cn1_tube(__global uint4 *Scratchpad, __global ulong *states, uint
{
uint idx0 = a[0];
#pragma unroll UNROLL_FACTOR
#pragma unroll CN_UNROLL
for (int i = 0; i < ITERATIONS; ++i) {
ulong c[2];
@ -1171,7 +1171,7 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
{
uint idx0 = a[0];
#pragma unroll UNROLL_FACTOR
#pragma unroll CN_UNROLL
for (int i = 0; i < ITERATIONS; ++i) {
ulong c[2];

6
src/backend/opencl/cl/cn/cryptonight2.cl

@ -75,7 +75,7 @@ __kernel void cn1_v2_rwz(__global uint4 *Scratchpad, __global ulong *states, uin
uint2 division_result = as_uint2(states[12]);
uint sqrt_result = as_uint2(states[13]).s0;
#pragma unroll UNROLL_FACTOR
#pragma unroll CN_UNROLL
for(int i = 0; i < 0x60000; ++i)
{
# ifdef __NV_CL_C_VERSION
@ -235,7 +235,7 @@ __kernel void cn1_v2_zls(__global uint4 *Scratchpad, __global ulong *states, uin
uint2 division_result = as_uint2(states[12]);
uint sqrt_result = as_uint2(states[13]).s0;
#pragma unroll UNROLL_FACTOR
#pragma unroll CN_UNROLL
for(int i = 0; i < 0x60000; ++i)
{
# ifdef __NV_CL_C_VERSION
@ -395,7 +395,7 @@ __kernel void cn1_v2_double(__global uint4 *Scratchpad, __global ulong *states,
uint2 division_result = as_uint2(states[12]);
uint sqrt_result = as_uint2(states[13]).s0;
#pragma unroll UNROLL_FACTOR
#pragma unroll CN_UNROLL
for(int i = 0; i < 0x100000; ++i)
{
# ifdef __NV_CL_C_VERSION

4
src/backend/opencl/cl/cn/cryptonight_gpu.cl

@ -199,7 +199,7 @@ struct SharedMemChunk
float4 va[16];
};
__attribute__((reqd_work_group_size(WORKSIZE_GPU * 16, 1, 1)))
__attribute__((reqd_work_group_size(WORKSIZE * 16, 1, 1)))
__kernel void cn1_cn_gpu(__global int *lpad_in, __global int *spad, uint numThreads)
{
const uint gIdx = getIdx();
@ -214,7 +214,7 @@ __kernel void cn1_cn_gpu(__global int *lpad_in, __global int *spad, uint numThre
__global int* lpad = (__global int*)((__global char*)lpad_in + MEMORY * (gIdx/16));
__local struct SharedMemChunk smem_in[WORKSIZE_GPU];
__local struct SharedMemChunk smem_in[WORKSIZE];
__local struct SharedMemChunk* smem = smem_in + chunk;
uint tid = get_local_id(0) % 16;

2
src/backend/opencl/cl/cn/cryptonight_r.cl

@ -66,7 +66,7 @@ __kernel void cn1_cryptonight_r_N(__global uint4 *Scratchpad, __global ulong *st
uint r2 = as_uint2(states[13]).s0;
uint r3 = as_uint2(states[13]).s1;
#pragma unroll UNROLL_FACTOR
#pragma unroll CN_UNROLL
for(int i = 0; i < ITERATIONS; ++i)
{
# ifdef __NV_CL_C_VERSION

6
src/backend/opencl/interfaces/IOclRunner.h

@ -33,6 +33,7 @@ namespace xmrig {
class Job;
class OclLaunchData;
class IOclRunner
@ -42,6 +43,11 @@ public:
virtual bool selfTest() const = 0;
virtual const char *buildOptions() const = 0;
virtual const char *deviceKey() const = 0;
virtual const char *source() const = 0;
virtual const OclLaunchData &data() const = 0;
virtual size_t threadId() const = 0;
virtual void build() = 0;
virtual void run(uint32_t *hashOutput) = 0;
virtual void set(const Job &job) = 0;
};

6
src/backend/opencl/opencl.cmake

@ -51,6 +51,12 @@ if (WITH_OPENCL)
list(APPEND HEADERS_BACKEND_OPENCL src/backend/opencl/runners/OclRxRunner.h)
list(APPEND SOURCES_BACKEND_OPENCL src/backend/opencl/runners/OclRxRunner.cpp)
endif()
if (WITH_STRICT_CACHE)
add_definitions(/DXMRIG_STRICT_OPENCL_CACHE)
else()
remove_definitions(/DXMRIG_STRICT_OPENCL_CACHE)
endif()
else()
remove_definitions(/DXMRIG_FEATURE_OPENCL)

41
src/backend/opencl/runners/OclBaseRunner.cpp

@ -23,29 +23,49 @@
*/
#include "backend/opencl/cl/OclSource.h"
#include "backend/opencl/OclCache.h"
#include "backend/opencl/OclLaunchData.h"
#include "backend/opencl/runners/OclBaseRunner.h"
#include "backend/opencl/wrappers/OclLib.h"
#include "base/io/log/Log.h"
#include "base/net/stratum/Job.h"
xmrig::OclBaseRunner::OclBaseRunner(size_t, const OclLaunchData &data) :
xmrig::OclBaseRunner::OclBaseRunner(size_t id, const OclLaunchData &data) :
m_algorithm(data.algorithm),
m_ctx(data.ctx)
m_source(OclSource::get(data.algorithm)),
m_data(data),
m_threadId(id)
{
cl_int ret;
m_queue = OclLib::createCommandQueue(m_ctx, data.device.id(), &ret);
m_queue = OclLib::createCommandQueue(data.ctx, data.device.id(), &ret);
if (ret != CL_SUCCESS) {
return;
}
m_input = OclLib::createBuffer(m_ctx, CL_MEM_READ_ONLY, Job::kMaxBlobSize, nullptr, &ret);
m_output = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, nullptr, &ret);
m_input = OclLib::createBuffer(data.ctx, CL_MEM_READ_ONLY, Job::kMaxBlobSize, nullptr, &ret);
m_output = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * 0x100, nullptr, &ret);
m_deviceKey = data.device.name();
# ifdef XMRIG_STRICT_OPENCL_CACHE
m_deviceKey += ":";
m_deviceKey += data.platform.version();
m_deviceKey += ":";
m_deviceKey += OclLib::getDeviceString(data.device.id(), CL_DRIVER_VERSION);
# endif
# if defined(__x86_64__) || defined(_M_AMD64) || defined (__arm64__) || defined (__aarch64__)
m_deviceKey += ":64";
# endif
}
xmrig::OclBaseRunner::~OclBaseRunner()
{
OclLib::releaseProgram(m_program);
OclLib::releaseMemObject(m_input);
OclLib::releaseMemObject(m_output);
@ -55,14 +75,17 @@ xmrig::OclBaseRunner::~OclBaseRunner()
bool xmrig::OclBaseRunner::selfTest() const
{
return m_queue != nullptr && m_input != nullptr && m_output != nullptr && !m_options.empty();
return m_queue != nullptr && m_input != nullptr && m_output != nullptr && !m_options.empty() && m_source != nullptr;
}
const char *xmrig::OclBaseRunner::buildOptions() const
void xmrig::OclBaseRunner::build()
{
return m_options.c_str();
if (!selfTest()) {
return;
}
m_program = OclCache::build(this);
}

16
src/backend/opencl/runners/OclBaseRunner.h

@ -43,21 +43,31 @@ class OclLaunchData;
class OclBaseRunner : public IOclRunner
{
public:
OclBaseRunner(size_t index, const OclLaunchData &data);
OclBaseRunner(size_t id, const OclLaunchData &data);
~OclBaseRunner() override;
protected:
inline const char *buildOptions() const override { return m_options.c_str(); }
inline const char *deviceKey() const override { return m_deviceKey.c_str(); }
inline const char *source() const override { return m_source; }
inline const OclLaunchData &data() const override { return m_data; }
inline size_t threadId() const override { return m_threadId; }
bool selfTest() const override;
const char *buildOptions() const override;
void build() override;
void run(uint32_t *hashOutput) override;
void set(const Job &job) override;
protected:
Algorithm m_algorithm;
cl_command_queue m_queue = nullptr;
cl_context m_ctx;
cl_mem m_input = nullptr;
cl_mem m_output = nullptr;
cl_program m_program = nullptr;
const char *m_source;
const OclLaunchData &m_data;
const size_t m_threadId;
std::string m_deviceKey;
std::string m_options;
};

12
src/backend/opencl/runners/OclCnRunner.cpp

@ -38,16 +38,16 @@ xmrig::OclCnRunner::OclCnRunner(size_t index, const OclLaunchData &data) : OclBa
const size_t g_thd = data.thread.intensity();
cl_int ret;
m_scratchpads = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, data.algorithm.l3() * g_thd, nullptr, &ret);
m_scratchpads = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, data.algorithm.l3() * g_thd, nullptr, &ret);
if (ret != CL_SUCCESS) {
return;
}
m_states = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, 200 * g_thd, nullptr, &ret);
m_blake256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
m_groestl256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
m_jh256 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
m_skein512 = OclLib::createBuffer(m_ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
m_states = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, 200 * g_thd, nullptr, &ret);
m_blake256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
m_groestl256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
m_jh256 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
m_skein512 = OclLib::createBuffer(data.ctx, CL_MEM_READ_WRITE, sizeof(cl_uint) * (g_thd + 2), nullptr, &ret);
uint32_t stridedIndex = data.thread.stridedIndex();
if (data.device.vendorId() == OCL_VENDOR_NVIDIA) {

22
src/backend/opencl/wrappers/OclLib.cpp

@ -375,23 +375,13 @@ cl_int xmrig::OclLib::releaseCommandQueue(cl_command_queue command_queue)
assert(pReleaseCommandQueue != nullptr);
assert(pGetCommandQueueInfo != nullptr);
finish(command_queue);
cl_int ret = pReleaseCommandQueue(command_queue);
if (ret != CL_SUCCESS) {
LOG_ERR(kErrorTemplate, OclError::toString(ret), kReleaseCommandQueue);
}
cl_uint refs = 0;
ret = pGetCommandQueueInfo(command_queue, CL_QUEUE_REFERENCE_COUNT, sizeof(refs), &refs, nullptr);
if (ret == CL_SUCCESS && refs > 0) {
std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
# ifndef NDEBUG
ret = pGetCommandQueueInfo(command_queue, CL_QUEUE_REFERENCE_COUNT, sizeof(refs), &refs, nullptr);
assert(ret == CL_SUCCESS);
assert(refs == 0);
# endif
return ret;
}
@ -447,6 +437,10 @@ cl_int xmrig::OclLib::releaseProgram(cl_program program)
{
assert(pReleaseProgram != nullptr);
if (program == nullptr) {
return CL_SUCCESS;
}
const cl_int ret = pReleaseProgram(program);
if (ret != CL_SUCCESS) {
LOG_ERR(kErrorTemplate, OclError::toString(ret), kReleaseProgram);
@ -500,6 +494,8 @@ cl_program xmrig::OclLib::createProgramWithBinary(cl_context context, cl_uint nu
auto result = pCreateProgramWithBinary(context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret);
if (*errcode_ret != CL_SUCCESS) {
LOG_ERR(kErrorTemplate, OclError::toString(*errcode_ret), kCreateProgramWithBinary);
return nullptr;
}
return result;
@ -513,6 +509,8 @@ cl_program xmrig::OclLib::createProgramWithSource(cl_context context, cl_uint co
auto result = pCreateProgramWithSource(context, count, strings, lengths, errcode_ret);
if (*errcode_ret != CL_SUCCESS) {
LOG_ERR(kErrorTemplate, OclError::toString(*errcode_ret), kCreateProgramWithSource);
return nullptr;
}
return result;

Loading…
Cancel
Save