Use external script to prepare OpenCL source.

5 years ago · fcfb738ded
17 changed files with 2949 additions and 694 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,4 @@
 /build
 /CMakeLists.txt.user
+/.idea
+/src/backend/opencl/cl/cn/cryptonight_gen.cl
--- a/package.json
+++ b/package.json
@ -0,0 +1,23 @@
+{
+  "name": "xmrig",
+  "version": "3.0.0",
+  "description": "RandomX, CryptoNight and Argon2 miner",
+  "main": "index.js",
+  "directories": {
+    "doc": "doc"
+  },
+  "scripts": {
+    "build": "node scripts/generate_cl.js"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/xmrig/xmrig.git"
+  },
+  "keywords": [],
+  "author": "",
+  "license": "GPLv3",
+  "bugs": {
+    "url": "https://github.com/xmrig/xmrig/issues"
+  },
+  "homepage": "https://github.com/xmrig/xmrig#readme"
+}
--- a/scripts/generate_cl.js
+++ b/scripts/generate_cl.js
@ -0,0 +1,32 @@
+#!/usr/bin/env node
+
+'use strict';
+
+const fs = require('fs');
+const path = require('path');
+const { text2h, addIncludes } = require('./js/opencl');
+const cwd = process.cwd();
+
+
+function cn()
+{
+    process.chdir(cwd);
+    process.chdir(path.resolve('src/backend/opencl/cl/cn'));
+
+    const cn = addIncludes('cryptonight.cl', [
+        'algorithm.cl',
+        'wolf-aes.cl',
+        'wolf-skein.cl',
+        'jh.cl',
+        'blake256.cl',
+        'groestl256.cl',
+        'fast_int_math_v2.cl',
+        'fast_div_heavy.cl'
+    ]);
+
+    //fs.writeFileSync('cryptonight_gen.cl', cn);
+    fs.writeFileSync('cryptonight_cl.h', text2h(cn, 'xmrig', 'cryptonight_cl'));
+}
+
+
+cn();
--- a/scripts/js/opencl.js
+++ b/scripts/js/opencl.js
@ -0,0 +1,72 @@
+'use strict';
+
+const fs = require('fs');
+
+
+function bin2h(buf, namespace, name)
+{
+    const size = buf.byteLength;
+    let out    = `#pragma once\n\nnamespace ${namespace} {\n\nstatic unsigned char ${name}[${size}] = {\n    `;
+
+    let b = 32;
+    for (let i = 0; i < size; i++) {
+        out += `0x${buf.readUInt8(i).toString(16).padStart(2, '0')}${size - i > 1 ? ',' : ''}`;
+
+        if (--b === 0) {
+            b = 32;
+            out += '\n    ';
+        }
+    }
+
+    out += `\n};\n\n} // namespace ${namespace}\n`;
+
+    return out;
+}
+
+
+function text2h(text, namespace, name)
+{
+    const buf  = Buffer.from(text);
+    const size = buf.byteLength;
+    let out    = `#pragma once\n\nnamespace ${namespace} {\n\nstatic char ${name}[${size + 1}] = {\n    `;
+
+    let b = 32;
+    for (let i = 0; i < size; i++) {
+        out += `0x${buf.readUInt8(i).toString(16).padStart(2, '0')},`;
+
+        if (--b === 0) {
+            b = 32;
+            out += '\n    ';
+        }
+    }
+
+    out += '0x00';
+
+    out += `\n};\n\n} // namespace ${namespace}\n`;
+
+    return out;
+}
+
+
+function addInclude(input, name)
+{
+    return input.replace(`#include "${name}"`, fs.readFileSync(name, 'utf8'));
+}
+
+
+function addIncludes(inputFileName, names)
+{
+    let data = fs.readFileSync(inputFileName, 'utf8');
+
+    for (let name of names) {
+        data = addInclude(data, name);
+    }
+
+    return data;
+}
+
+
+module.exports.bin2h        = bin2h;
+module.exports.text2h       = text2h;
+module.exports.addInclude   = addInclude;
+module.exports.addIncludes  = addIncludes;
--- a/src/backend/opencl/OclBackend.cpp
+++ b/src/backend/opencl/OclBackend.cpp
@ -29,7 +29,6 @@
 #include "backend/common/Hashrate.h"
 #include "backend/common/interfaces/IWorker.h"
 #include "backend/common/Workers.h"
-#include "backend/opencl/cl/OclSource.h"
 #include "backend/opencl/OclBackend.h"
 #include "backend/opencl/OclConfig.h"
 #include "backend/opencl/OclLaunchData.h"
@ -96,8 +95,6 @@ public:
    inline OclBackendPrivate(Controller *controller) :
        controller(controller)
    {
-        OclSource::init();
-
        init(controller->config()->cl());
    }

--- a/src/backend/opencl/cl/OclSource.cpp
+++ b/src/backend/opencl/cl/OclSource.cpp
@ -23,75 +23,16 @@
 */


-#include <string>
-#include <regex>
-
-
+#include "backend/opencl/cl/cn/cryptonight_cl.h"
 #include "backend/opencl/cl/OclSource.h"
 #include "crypto/common/Algorithm.h"


-namespace xmrig {
-
-
-static std::string cn_source;
-
-
-} // namespace xmrig
-
-
-
 const char *xmrig::OclSource::get(const Algorithm &algorithm)
 {
    if (algorithm.family() == Algorithm::RANDOM_X) {
        return nullptr; // FIXME
    }

-    return cn_source.c_str();
-}
-
-
-void xmrig::OclSource::init()
-{
-    const char *cryptonightCL =
-        #include "./cn/cryptonight.cl"
-    ;
-    const char *cryptonightCL2 =
-        #include "./cn/cryptonight2.cl"
-    ;
-    const char *blake256CL =
-        #include "./cn/blake256.cl"
-    ;
-    const char *groestl256CL =
-        #include "./cn/groestl256.cl"
-    ;
-    const char *jhCL =
-        #include "./cn/jh.cl"
-    ;
-    const char *wolfAesCL =
-        #include "./cn/wolf-aes.cl"
-    ;
-    const char *wolfSkeinCL =
-        #include "./cn/wolf-skein.cl"
-    ;
-    const char *fastIntMathV2CL =
-        #include "./cn/fast_int_math_v2.cl"
-    ;
-    const char *fastDivHeavyCL =
-        #include "./cn/fast_div_heavy.cl"
-    ;
-    const char *cryptonight_gpu =
-        #include "./cn/cryptonight_gpu.cl"
-    ;
-
-    cn_source.append(cryptonightCL);
-    cn_source.append(cryptonightCL2);
-    cn_source = std::regex_replace(cn_source, std::regex("XMRIG_INCLUDE_WOLF_AES"),         wolfAesCL);
-    cn_source = std::regex_replace(cn_source, std::regex("XMRIG_INCLUDE_WOLF_SKEIN"),       wolfSkeinCL);
-    cn_source = std::regex_replace(cn_source, std::regex("XMRIG_INCLUDE_JH"),               jhCL);
-    cn_source = std::regex_replace(cn_source, std::regex("XMRIG_INCLUDE_BLAKE256"),         blake256CL);
-    cn_source = std::regex_replace(cn_source, std::regex("XMRIG_INCLUDE_GROESTL256"),       groestl256CL);
-    cn_source = std::regex_replace(cn_source, std::regex("XMRIG_INCLUDE_FAST_INT_MATH_V2"), fastIntMathV2CL);
-    cn_source = std::regex_replace(cn_source, std::regex("XMRIG_INCLUDE_FAST_DIV_HEAVY"),   fastDivHeavyCL);
-    cn_source = std::regex_replace(cn_source, std::regex("XMRIG_INCLUDE_CN_GPU"),           cryptonight_gpu);
+    return cryptonight_cl;
 }
--- a/src/backend/opencl/cl/cn/algorithm.cl
+++ b/src/backend/opencl/cl/cn/algorithm.cl
@ -0,0 +1,27 @@
+enum Algorithm {
+    ALGO_INVALID = -1,
+    ALGO_CN_0,          // "cn/0"             CryptoNight (original).
+    ALGO_CN_1,          // "cn/1"             CryptoNight variant 1 also known as Monero7 and CryptoNightV7.
+    ALGO_CN_2,          // "cn/2"             CryptoNight variant 2.
+    ALGO_CN_R,          // "cn/r"             CryptoNightR (Monero's variant 4).
+    ALGO_CN_FAST,       // "cn/fast"          CryptoNight variant 1 with half iterations.
+    ALGO_CN_HALF,       // "cn/half"          CryptoNight variant 2 with half iterations (Masari/Torque).
+    ALGO_CN_XAO,        // "cn/xao"           CryptoNight variant 0 (modified, Alloy only).
+    ALGO_CN_RTO,        // "cn/rto"           CryptoNight variant 1 (modified, Arto only).
+    ALGO_CN_RWZ,        // "cn/rwz"           CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft).
+    ALGO_CN_ZLS,        // "cn/zls"           CryptoNight variant 2 with 3/4 iterations (Zelerius).
+    ALGO_CN_DOUBLE,     // "cn/double"        CryptoNight variant 2 with double iterations (X-CASH).
+    ALGO_CN_GPU,        // "cn/gpu"           CryptoNight-GPU (Ryo).
+    ALGO_CN_LITE_0,     // "cn-lite/0"        CryptoNight-Lite variant 0.
+    ALGO_CN_LITE_1,     // "cn-lite/1"        CryptoNight-Lite variant 1.
+    ALGO_CN_HEAVY_0,    // "cn-heavy/0"       CryptoNight-Heavy (4 MB).
+    ALGO_CN_HEAVY_TUBE, // "cn-heavy/tube"    CryptoNight-Heavy (modified, TUBE only).
+    ALGO_CN_HEAVY_XHV,  // "cn-heavy/xhv"     CryptoNight-Heavy (modified, Haven Protocol only).
+    ALGO_CN_PICO_0,     // "cn-pico"          CryptoNight Turtle (TRTL)
+    ALGO_RX_0,          // "rx/0"             RandomX (reference configuration).
+    ALGO_RX_WOW,        // "rx/wow"           RandomWOW (Wownero).
+    ALGO_RX_LOKI,       // "rx/loki"          RandomXL (Loki).
+    ALGO_AR2_CHUKWA,    // "argon2/chukwa"    Argon2id (Chukwa).
+    ALGO_AR2_WRKZ,      // "argon2/wrkz"      Argon2id (WRKZ)
+    ALGO_MAX
+};
--- a/src/backend/opencl/cl/cn/blake256.cl
+++ b/src/backend/opencl/cl/cn/blake256.cl
@ -1,4 +1,3 @@
-R"===(
 /*
 * blake256 kernel implementation.
 *
@ -90,4 +89,3 @@ __constant static const sph_u32  c_u256[16] = {
    v[b] ^= v[c]; \
 	v[b] = rotate(v[b], 25U); \
 }
-)==="
--- a/src/backend/opencl/cl/cn/cryptonight.cl
+++ b/src/backend/opencl/cl/cn/cryptonight.cl
@ -1,4 +1,3 @@
-R"===(
 /*
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@ -19,39 +18,16 @@ R"===(
 #   pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
 #endif

-//#include "opencl/wolf-aes.cl"
-XMRIG_INCLUDE_WOLF_AES
-//#include "opencl/wolf-skein.cl"
-XMRIG_INCLUDE_WOLF_SKEIN
-//#include "opencl/jh.cl"
-XMRIG_INCLUDE_JH
-//#include "opencl/blake256.cl"
-XMRIG_INCLUDE_BLAKE256
-//#include "opencl/groestl256.cl"
-XMRIG_INCLUDE_GROESTL256
-//#include "fast_int_math_v2.cl"
-XMRIG_INCLUDE_FAST_INT_MATH_V2
-//#include "fast_div_heavy.cl"
-XMRIG_INCLUDE_FAST_DIV_HEAVY
-
-
-#define VARIANT_0    0  // Original CryptoNight or CryptoNight-Heavy
-#define VARIANT_1    1  // CryptoNight variant 1 also known as Monero7 and CryptoNightV7
-#define VARIANT_TUBE 2  // Modified CryptoNight Lite variant 1 with XOR (IPBC/TUBE only)
-#define VARIANT_XTL  3  // Modified CryptoNight variant 1 (Stellite only)
-#define VARIANT_MSR  4  // Modified CryptoNight variant 1 (Masari only)
-#define VARIANT_XHV  5  // Modified CryptoNight-Heavy (Haven Protocol only)
-#define VARIANT_XAO  6  // Modified CryptoNight variant 0 (Alloy only)
-#define VARIANT_RTO  7  // Modified CryptoNight variant 1 (Arto only)
-#define VARIANT_2    8  // CryptoNight variant 2
-#define VARIANT_HALF 9  // CryptoNight variant 2 with half iterations (Masari/Stellite)
-#define VARIANT_TRTL 10 // CryptoNight Turtle (TRTL)
-#define VARIANT_GPU  11 // CryptoNight-GPU (Ryo)
-
-#define CRYPTONIGHT       0 /* CryptoNight (2 MB) */
-#define CRYPTONIGHT_LITE  1 /* CryptoNight (1 MB) */
-#define CRYPTONIGHT_HEAVY 2 /* CryptoNight (4 MB) */
-#define CRYPTONIGHT_PICO  3 /* CryptoNight (256 KB) */
+
+#include "algorithm.cl"
+#include "wolf-aes.cl"
+#include "wolf-skein.cl"
+#include "jh.cl"
+#include "blake256.cl"
+#include "groestl256.cl"
+#include "fast_int_math_v2.cl"
+#include "fast_div_heavy.cl"
+

 #if defined(__NV_CL_C_VERSION) && STRIDED_INDEX != 0
 #   undef STRIDED_INDEX
@ -71,6 +47,7 @@ static const __constant ulong keccakf_rndc[24] =
    0x8000000000008080, 0x0000000080000001, 0x8000000080008008
 };

+
 static const __constant uchar sbox[256] =
 {
    0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
@ -92,75 +69,27 @@ static const __constant uchar sbox[256] =
 };


-void keccakf1600(ulong *s)
-{
-    for(int i = 0; i < 24; ++i)
-    {
-        ulong bc[5], tmp1, tmp2;
-        bc[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20] ^ rotate(s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22], 1UL);
-        bc[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21] ^ rotate(s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23], 1UL);
-        bc[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22] ^ rotate(s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24], 1UL);
-        bc[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23] ^ rotate(s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20], 1UL);
-        bc[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24] ^ rotate(s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21], 1UL);
-
-        tmp1 = s[1] ^ bc[0];
-
-        s[0] ^= bc[4];
-        s[1] = rotate(s[6] ^ bc[0], 44UL);
-        s[6] = rotate(s[9] ^ bc[3], 20UL);
-        s[9] = rotate(s[22] ^ bc[1], 61UL);
-        s[22] = rotate(s[14] ^ bc[3], 39UL);
-        s[14] = rotate(s[20] ^ bc[4], 18UL);
-        s[20] = rotate(s[2] ^ bc[1], 62UL);
-        s[2] = rotate(s[12] ^ bc[1], 43UL);
-        s[12] = rotate(s[13] ^ bc[2], 25UL);
-        s[13] = rotate(s[19] ^ bc[3], 8UL);
-        s[19] = rotate(s[23] ^ bc[2], 56UL);
-        s[23] = rotate(s[15] ^ bc[4], 41UL);
-        s[15] = rotate(s[4] ^ bc[3], 27UL);
-        s[4] = rotate(s[24] ^ bc[3], 14UL);
-        s[24] = rotate(s[21] ^ bc[0], 2UL);
-        s[21] = rotate(s[8] ^ bc[2], 55UL);
-        s[8] = rotate(s[16] ^ bc[0], 35UL);
-        s[16] = rotate(s[5] ^ bc[4], 36UL);
-        s[5] = rotate(s[3] ^ bc[2], 28UL);
-        s[3] = rotate(s[18] ^ bc[2], 21UL);
-        s[18] = rotate(s[17] ^ bc[1], 15UL);
-        s[17] = rotate(s[11] ^ bc[0], 10UL);
-        s[11] = rotate(s[7] ^ bc[1], 6UL);
-        s[7] = rotate(s[10] ^ bc[4], 3UL);
-        s[10] = rotate(tmp1, 1UL);
-
-        tmp1 = s[0]; tmp2 = s[1]; s[0] = bitselect(s[0] ^ s[2], s[0], s[1]); s[1] = bitselect(s[1] ^ s[3], s[1], s[2]); s[2] = bitselect(s[2] ^ s[4], s[2], s[3]); s[3] = bitselect(s[3] ^ tmp1, s[3], s[4]); s[4] = bitselect(s[4] ^ tmp2, s[4], tmp1);
-        tmp1 = s[5]; tmp2 = s[6]; s[5] = bitselect(s[5] ^ s[7], s[5], s[6]); s[6] = bitselect(s[6] ^ s[8], s[6], s[7]); s[7] = bitselect(s[7] ^ s[9], s[7], s[8]); s[8] = bitselect(s[8] ^ tmp1, s[8], s[9]); s[9] = bitselect(s[9] ^ tmp2, s[9], tmp1);
-        tmp1 = s[10]; tmp2 = s[11]; s[10] = bitselect(s[10] ^ s[12], s[10], s[11]); s[11] = bitselect(s[11] ^ s[13], s[11], s[12]); s[12] = bitselect(s[12] ^ s[14], s[12], s[13]); s[13] = bitselect(s[13] ^ tmp1, s[13], s[14]); s[14] = bitselect(s[14] ^ tmp2, s[14], tmp1);
-        tmp1 = s[15]; tmp2 = s[16]; s[15] = bitselect(s[15] ^ s[17], s[15], s[16]); s[16] = bitselect(s[16] ^ s[18], s[16], s[17]); s[17] = bitselect(s[17] ^ s[19], s[17], s[18]); s[18] = bitselect(s[18] ^ tmp1, s[18], s[19]); s[19] = bitselect(s[19] ^ tmp2, s[19], tmp1);
-        tmp1 = s[20]; tmp2 = s[21]; s[20] = bitselect(s[20] ^ s[22], s[20], s[21]); s[21] = bitselect(s[21] ^ s[23], s[21], s[22]); s[22] = bitselect(s[22] ^ s[24], s[22], s[23]); s[23] = bitselect(s[23] ^ tmp1, s[23], s[24]); s[24] = bitselect(s[24] ^ tmp2, s[24], tmp1);
-        s[0] ^= keccakf_rndc[i];
-    }
-}
-
 static const __constant uint keccakf_rotc[24] =
 {
    1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
    27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
 };

+
 static const __constant uint keccakf_piln[24] =
 {
    10, 7,  11, 17, 18, 3, 5,  16, 8,  21, 24, 4,
    15, 23, 19, 13, 12, 2, 20, 14, 22, 9,  6,  1
 };

+
 void keccakf1600_1(ulong *st)
 {
    int i, round;
    ulong t, bc[5];

    #pragma unroll 1
-    for(round = 0; round < 24; ++round)
-    {
-
+    for (round = 0; round < 24; ++round) {
        // Theta
        bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
        bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
@ -180,7 +109,7 @@ void keccakf1600_1(ulong *st)

        // Rho Pi
        t = st[1];
-        #pragma unroll
+        #pragma unroll 1
        for (i = 0; i < 24; ++i) {
            bc[0] = st[keccakf_piln[i]];
            st[keccakf_piln[i]] = rotate(t, (ulong)keccakf_rotc[i]);
@ -188,16 +117,18 @@ void keccakf1600_1(ulong *st)
        }

        #pragma unroll 1
-        for(int i = 0; i < 25; i += 5)
-        {
+        for (int i = 0; i < 25; i += 5) {
            ulong tmp[5];

            #pragma unroll 1
-            for(int x = 0; x < 5; ++x)
+            for (int x = 0; x < 5; ++x) {
                tmp[x] = bitselect(st[i + x] ^ st[i + ((x + 2) % 5)], st[i + x], st[i + ((x + 1) % 5)]);
+            }

            #pragma unroll 1
-            for(int x = 0; x < 5; ++x) st[i + x] = tmp[x];
+            for (int x = 0; x < 5; ++x) {
+                st[i + x] = tmp[x];
+            }
        }

        //  Iota
@ -205,8 +136,6 @@ void keccakf1600_1(ulong *st)
    }
 }

-)==="
-R"===(

 void keccakf1600_2(__local ulong *st)
 {
@ -214,56 +143,54 @@ void keccakf1600_2(__local ulong *st)
    ulong t, bc[5];

    #pragma unroll 1
-    for (round = 0; round < 24; ++round)
-    {
+    for (round = 0; round < 24; ++round) {
        bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20] ^ rotate(st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22], 1UL);
        bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21] ^ rotate(st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23], 1UL);
        bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22] ^ rotate(st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24], 1UL);
        bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23] ^ rotate(st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20], 1UL);
        bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24] ^ rotate(st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21], 1UL);

-        st[0] ^= bc[4];
-        st[5] ^= bc[4];
+        st[0]  ^= bc[4];
+        st[5]  ^= bc[4];
        st[10] ^= bc[4];
        st[15] ^= bc[4];
        st[20] ^= bc[4];

-        st[1] ^= bc[0];
-        st[6] ^= bc[0];
+        st[1]  ^= bc[0];
+        st[6]  ^= bc[0];
        st[11] ^= bc[0];
        st[16] ^= bc[0];
        st[21] ^= bc[0];

-        st[2] ^= bc[1];
-        st[7] ^= bc[1];
+        st[2]  ^= bc[1];
+        st[7]  ^= bc[1];
        st[12] ^= bc[1];
        st[17] ^= bc[1];
        st[22] ^= bc[1];

-        st[3] ^= bc[2];
-        st[8] ^= bc[2];
+        st[3]  ^= bc[2];
+        st[8]  ^= bc[2];
        st[13] ^= bc[2];
        st[18] ^= bc[2];
        st[23] ^= bc[2];

-        st[4] ^= bc[3];
-        st[9] ^= bc[3];
+        st[4]  ^= bc[3];
+        st[9]  ^= bc[3];
        st[14] ^= bc[3];
        st[19] ^= bc[3];
        st[24] ^= bc[3];

        // Rho Pi
        t = st[1];
-        #pragma unroll
+        #pragma unroll 1
        for (i = 0; i < 24; ++i) {
            bc[0] = st[keccakf_piln[i]];
            st[keccakf_piln[i]] = rotate(t, (ulong)keccakf_rotc[i]);
            t = bc[0];
        }

-        #pragma unroll
-        for(int i = 0; i < 25; i += 5)
-        {
+        #pragma unroll 1
+        for (int i = 0; i < 25; i += 5) {
            ulong tmp1 = st[i], tmp2 = st[i + 1];

            st[i] = bitselect(st[i] ^ st[i + 2], st[i], st[i + 1]);
@ -278,40 +205,17 @@ void keccakf1600_2(__local ulong *st)
    }
 }

-)==="
-R"===(
-
-void CNKeccak(ulong *output, ulong *input)
-{
-    ulong st[25];
-
-    // Copy 72 bytes
-    for(int i = 0; i < 9; ++i) st[i] = input[i];
-
-    // Last four and '1' bit for padding
-    //st[9] = as_ulong((uint2)(((uint *)input)[18], 0x00000001U));
-
-    st[9] = (input[9] & 0x00000000FFFFFFFFUL) | 0x0000000100000000UL;
-
-    for(int i = 10; i < 25; ++i) st[i] = 0x00UL;

-    // Last bit of padding
-    st[16] = 0x8000000000000000UL;
-
-    keccakf1600_1(st);
+static const __constant uchar rcon[8] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40 };

-    for(int i = 0; i < 25; ++i) output[i] = st[i];
-}

-static const __constant uchar rcon[8] = { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40 };
+#define SubWord(inw) ((sbox[BYTE(inw, 3)] << 24) | (sbox[BYTE(inw, 2)] << 16) | (sbox[BYTE(inw, 1)] << 8) | sbox[BYTE(inw, 0)])

-#define SubWord(inw)        ((sbox[BYTE(inw, 3)] << 24) | (sbox[BYTE(inw, 2)] << 16) | (sbox[BYTE(inw, 1)] << 8) | sbox[BYTE(inw, 0)])

 void AESExpandKey256(uint *keybuf)
 {
    //#pragma unroll 4
-    for(uint c = 8, i = 1; c < 40; ++c)
-    {
+    for (uint c = 8, i = 1; c < 40; ++c) {
        // For 256-bit keys, an sbox permutation is done every other 4th uint generated, AND every 8th
        uint t = ((!(c & 7)) || ((c & 7) == 4)) ? SubWord(keybuf[c - 1]) : keybuf[c - 1];

@ -322,8 +226,10 @@ void AESExpandKey256(uint *keybuf)
    }
 }

+
 #define MEM_CHUNK (1 << MEM_CHUNK_EXPONENT)

+
 #if (STRIDED_INDEX == 0)
 #   define IDX(x)   (x)
 #elif (STRIDED_INDEX == 1)
@ -336,6 +242,7 @@ void AESExpandKey256(uint *keybuf)
 #   define IDX(x)   (((x) % MEM_CHUNK) + ((x) / MEM_CHUNK) * WORKSIZE * MEM_CHUNK)
 #endif

+
 inline ulong getIdx()
 {
 #   if (STRIDED_INDEX == 0 || STRIDED_INDEX == 1 || STRIDED_INDEX == 2)
@ -343,11 +250,13 @@ inline ulong getIdx()
 #   endif
 }

+
 //#include "opencl/cryptonight_gpu.cl"
-XMRIG_INCLUDE_CN_GPU
+//XMRIG_INCLUDE_CN_GPU

 #define mix_and_propagate(xin) (xin)[(get_local_id(1)) % 8][get_local_id(0)] ^ (xin)[(get_local_id(1) + 1) % 8][get_local_id(0)]

+
 __attribute__((reqd_work_group_size(8, 8, 1)))
 __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ulong *states, uint Threads)
 {
@ -388,8 +297,7 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
        Scratchpad += (gIdx / WORKSIZE) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * (gIdx % WORKSIZE);
 #       endif

-        if (get_local_id(1) == 0)
-        {
+        if (get_local_id(1) == 0) {
            __local ulong* State = State_buf + get_local_id(0) * 25;

            ((__local ulong8 *)State)[0] = vload8(0, input);
@ -421,7 +329,7 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul

            keccakf1600_2(State);

-            #pragma unroll
+            #pragma unroll 1
            for (int i = 0; i < 25; ++i) {
                states[i] = State[i];
            }
@ -495,25 +403,20 @@ __kernel void cn0(__global ulong *input, __global uint4 *Scratchpad, __global ul
            Scratchpad[IDX(i + local_id1)] = text;
        }
    }
+
    mem_fence(CLK_GLOBAL_MEM_FENCE);
 }

-)==="
-R"===(

 #define VARIANT1_1(p) \
        uint table = 0x75310U; \
        uint index = (((p).s2 >> 26) & 12) | (((p).s2 >> 23) & 2); \
        (p).s2 ^= ((table >> index) & 0x30U) << 24

-#define VARIANT1_1_XTL(p) \
-        uint table  = 0x75310U; \
-        uint offset = variant == VARIANT_XTL ? 27 : 26; \
-        uint index  = (((p).s2 >> offset) & 12) | (((p).s2 >> 23) & 2); \
-        (p).s2 ^= ((table >> index) & 0x30U) << 24

 #define VARIANT1_2(p) ((uint2 *)&(p))[0] ^= tweak1_2_0

+
 #define VARIANT1_INIT() \
        tweak1_2 = as_uint2(input[4]); \
        tweak1_2.s0 >>= 24; \
@ -521,8 +424,9 @@ R"===(
        tweak1_2.s1 = (uint) get_global_id(0); \
        tweak1_2 ^= as_uint2(states[24])

+
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
+__kernel void cn1_v1(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
 {
    ulong a[2], b[2];
    __local uint AES0[256], AES1[256];
@ -581,7 +485,7 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, uin
            ((uint4 *)c)[0] = AES_Round_Two_Tables(AES0, AES1, ((uint4 *)c)[0], ((uint4 *)a)[0]);

            b_x ^= ((uint4 *)c)[0];
-            VARIANT1_1_XTL(b_x);
+            VARIANT1_1(b_x);
            Scratchpad[IDX((as_uint2(a[0]).s0 & MASK) >> 4)] = b_x;

            uint4 tmp;
@ -591,9 +495,9 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, uin
            a[0] += mul_hi(c[0], as_ulong2(tmp).s0);

            uint2 tweak1_2_0 = tweak1_2;
-            if (variant == VARIANT_RTO) {
-                tweak1_2_0 ^= ((uint2 *)&(a[0]))[0];
-            }
+#           if ALGO == ALGO_CN_RTO
+            tweak1_2_0 ^= ((uint2 *)&(a[0]))[0];
+#           endif

            VARIANT1_2(a[1]);
            Scratchpad[IDX((as_uint2(c[0]).s0 & MASK) >> 4)] = ((uint4 *)a)[0];
@ -604,15 +508,13 @@ __kernel void cn1_monero(__global uint4 *Scratchpad, __global ulong *states, uin
            b_x = ((uint4 *)c)[0];
        }
    }
+
    mem_fence(CLK_GLOBAL_MEM_FENCE);
 }


-)==="
-R"===(
-
 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void cn1_v2_monero(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
+__kernel void cn1_v2(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
 {
 #   if (ALGO == CRYPTONIGHT || ALGO == CRYPTONIGHT_PICO)
    ulong a[2], b[4];
@ -768,356 +670,6 @@ __kernel void cn1_v2_monero(__global uint4 *Scratchpad, __global ulong *states,
 #   endif
 }

-)==="
-R"===(
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void cn1_v2_half(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
-{
-#   if (ALGO == CRYPTONIGHT)
-    ulong a[2], b[4];
-    __local uint AES0[256], AES1[256], AES2[256], AES3[256];
-
-    const ulong gIdx = getIdx();
-
-    for(int i = get_local_id(0); i < 256; i += WORKSIZE)
-    {
-        const uint tmp = AES0_C[i];
-        AES0[i] = tmp;
-        AES1[i] = rotate(tmp, 8U);
-        AES2[i] = rotate(tmp, 16U);
-        AES3[i] = rotate(tmp, 24U);
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-#   if (COMP_MODE == 1)
-    // do not use early return here
-    if (gIdx < Threads)
-#   endif
-    {
-        states += 25 * gIdx;
-
-#       if defined(__NV_CL_C_VERSION)
-            Scratchpad += gIdx * (0x40000 >> 2);
-#       else
-#           if (STRIDED_INDEX == 0)
-                Scratchpad += gIdx * (MEMORY >> 4);
-#           elif (STRIDED_INDEX == 1)
-                Scratchpad += gIdx;
-#           elif (STRIDED_INDEX == 2)
-                Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0);
-#           endif
-#       endif
-
-        a[0] = states[0] ^ states[4];
-        a[1] = states[1] ^ states[5];
-
-        b[0] = states[2] ^ states[6];
-        b[1] = states[3] ^ states[7];
-        b[2] = states[8] ^ states[10];
-        b[3] = states[9] ^ states[11];
-    }
-
-    ulong2 bx0 = ((ulong2 *)b)[0];
-    ulong2 bx1 = ((ulong2 *)b)[1];
-
-    mem_fence(CLK_LOCAL_MEM_FENCE);
-
-#   ifdef __NV_CL_C_VERSION
-        __local uint16 scratchpad_line_buf[WORKSIZE];
-        __local uint16* scratchpad_line = scratchpad_line_buf + get_local_id(0);
-#       define SCRATCHPAD_CHUNK(N) (*(__local uint4*)((__local uchar*)(scratchpad_line) + (idx1 ^ (N << 4))))
-#   else
-#       if (STRIDED_INDEX == 0)
-#           define SCRATCHPAD_CHUNK(N) (*(__global uint4*)((__global uchar*)(Scratchpad) + (idx ^ (N << 4))))
-#       elif (STRIDED_INDEX == 1)
-#           define SCRATCHPAD_CHUNK(N) (*(__global uint4*)((__global uchar*)(Scratchpad) + mul24(as_uint(idx ^ (N << 4)), Threads)))
-#       elif (STRIDED_INDEX == 2)
-#           define SCRATCHPAD_CHUNK(N) (*(__global uint4*)((__global uchar*)(Scratchpad) + (((idx ^ (N << 4)) % (MEM_CHUNK << 4)) + ((idx ^ (N << 4)) / (MEM_CHUNK << 4)) * WORKSIZE * (MEM_CHUNK << 4))))
-#       endif
-#   endif
-
-#   if (COMP_MODE == 1)
-    // do not use early return here
-    if (gIdx < Threads)
-#   endif
-    {
-    uint2 division_result = as_uint2(states[12]);
-    uint sqrt_result = as_uint2(states[13]).s0;
-
-    #pragma unroll CN_UNROLL
-    for(int i = 0; i < 0x40000; ++i)
-    {
-#       ifdef __NV_CL_C_VERSION
-            uint idx  = a[0] & 0x1FFFC0;
-            uint idx1 = a[0] & 0x30;
-
-            *scratchpad_line = *(__global uint16*)((__global uchar*)(Scratchpad) + idx);
-#       else
-            uint idx = a[0] & MASK;
-#       endif
-
-        uint4 c = SCRATCHPAD_CHUNK(0);
-        c = AES_Round(AES0, AES1, AES2, AES3, c, ((uint4 *)a)[0]);
-
-        {
-            const ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(1));
-            const ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2));
-            const ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3));
-
-            SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + bx1);
-            SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + bx0);
-            SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
-        }
-
-        SCRATCHPAD_CHUNK(0) = as_uint4(bx0) ^ c;
-
-#       ifdef __NV_CL_C_VERSION
-            *(__global uint16*)((__global uchar*)(Scratchpad) + idx) = *scratchpad_line;
-
-            idx = as_ulong2(c).s0 & 0x1FFFC0;
-            idx1 = as_ulong2(c).s0 & 0x30;
-
-            *scratchpad_line = *(__global uint16*)((__global uchar*)(Scratchpad) + idx);
-#       else
-            idx = as_ulong2(c).s0 & MASK;
-#       endif
-
-        uint4 tmp = SCRATCHPAD_CHUNK(0);
-
-        {
-            tmp.s0 ^= division_result.s0;
-            tmp.s1 ^= division_result.s1 ^ sqrt_result;
-
-            division_result = fast_div_v2(as_ulong2(c).s1, (c.s0 + (sqrt_result << 1)) | 0x80000001UL);
-            sqrt_result = fast_sqrt_v2(as_ulong2(c).s0 + as_ulong(division_result));
-        }
-
-        ulong2 t;
-        t.s0 = mul_hi(as_ulong2(c).s0, as_ulong2(tmp).s0);
-        t.s1 = as_ulong2(c).s0 * as_ulong2(tmp).s0;
-        {
-            const ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(1)) ^ t;
-            const ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2));
-            t ^= chunk2;
-            const ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3));
-
-            SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + bx1);
-            SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + bx0);
-            SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]);
-        }
-
-        a[1] += t.s1;
-        a[0] += t.s0;
-
-        SCRATCHPAD_CHUNK(0) = ((uint4 *)a)[0];
-
-#       ifdef __NV_CL_C_VERSION
-            *(__global uint16*)((__global uchar*)(Scratchpad) + idx) = *scratchpad_line;
-#       endif
-
-        ((uint4 *)a)[0] ^= tmp;
-        bx1 = bx0;
-        bx0 = as_ulong2(c);
-    }
-
-#   undef SCRATCHPAD_CHUNK
-    }
-    mem_fence(CLK_GLOBAL_MEM_FENCE);
-#   endif
-}
-
-)==="
-R"===(
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void cn1_msr(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
-{
-#   if (ALGO == CRYPTONIGHT)
-    ulong a[2], b[2];
-    __local uint AES0[256], AES1[256];
-
-    const ulong gIdx = getIdx();
-
-    for (int i = get_local_id(0); i < 256; i += WORKSIZE) {
-        const uint tmp = AES0_C[i];
-        AES0[i] = tmp;
-        AES1[i] = rotate(tmp, 8U);
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    uint2 tweak1_2;
-    uint4 b_x;
-#   if (COMP_MODE == 1)
-    // do not use early return here
-    if (gIdx < Threads)
-#   endif
-    {
-        states += 25 * gIdx;
-#       if (STRIDED_INDEX == 0)
-        Scratchpad += gIdx * (MEMORY >> 4);
-#       elif (STRIDED_INDEX == 1)
-#       if (ALGO == CRYPTONIGHT_HEAVY)
-            Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + get_local_id(0);
-#       else
-            Scratchpad += gIdx;
-#       endif
-#       elif (STRIDED_INDEX == 2)
-        Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0);
-#       endif
-
-        a[0] = states[0] ^ states[4];
-        b[0] = states[2] ^ states[6];
-        a[1] = states[1] ^ states[5];
-        b[1] = states[3] ^ states[7];
-
-        b_x = ((uint4 *)b)[0];
-        VARIANT1_INIT();
-    }
-
-    mem_fence(CLK_LOCAL_MEM_FENCE);
-
-#   if (COMP_MODE == 1)
-    // do not use early return here
-    if (gIdx < Threads)
-#   endif
-    {
-        #pragma unroll 8
-        for (int i = 0; i < 0x40000; ++i) {
-            ulong c[2];
-
-            ((uint4 *)c)[0] = Scratchpad[IDX((as_uint2(a[0]).s0 & MASK) >> 4)];
-            ((uint4 *)c)[0] = AES_Round_Two_Tables(AES0, AES1, ((uint4 *)c)[0], ((uint4 *)a)[0]);
-
-            b_x ^= ((uint4 *)c)[0];
-            VARIANT1_1(b_x);
-            Scratchpad[IDX((as_uint2(a[0]).s0 & MASK) >> 4)] = b_x;
-
-            uint4 tmp;
-            tmp = Scratchpad[IDX((as_uint2(c[0]).s0 & MASK) >> 4)];
-
-            a[1] += c[0] * as_ulong2(tmp).s0;
-            a[0] += mul_hi(c[0], as_ulong2(tmp).s0);
-
-            uint2 tweak1_2_0 = tweak1_2;
-
-            VARIANT1_2(a[1]);
-            Scratchpad[IDX((as_uint2(c[0]).s0 & MASK) >> 4)] = ((uint4 *)a)[0];
-            VARIANT1_2(a[1]);
-
-            ((uint4 *)a)[0] ^= tmp;
-
-            b_x = ((uint4 *)c)[0];
-        }
-    }
-    mem_fence(CLK_GLOBAL_MEM_FENCE);
-#   endif
-}
-
-)==="
-R"===(
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void cn1_tube(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
-{
-#   if (ALGO == CRYPTONIGHT_HEAVY)
-    ulong a[2], b[2];
-    __local uint AES0[256], AES1[256];
-
-    const ulong gIdx = getIdx();
-
-    for (int i = get_local_id(0); i < 256; i += WORKSIZE) {
-        const uint tmp = AES0_C[i];
-        AES0[i] = tmp;
-        AES1[i] = rotate(tmp, 8U);
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    uint2 tweak1_2;
-    uint4 b_x;
-#   if (COMP_MODE == 1)
-    // do not use early return here
-    if (gIdx < Threads)
-#   endif
-    {
-        states += 25 * gIdx;
-#       if (STRIDED_INDEX == 0)
-        Scratchpad += gIdx * (MEMORY >> 4);
-#       elif (STRIDED_INDEX == 1)
-#       if (ALGO == CRYPTONIGHT_HEAVY)
-            Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + get_local_id(0);
-#       else
-            Scratchpad += gIdx;
-#       endif
-#       elif (STRIDED_INDEX == 2)
-        Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0);
-#       endif
-
-        a[0] = states[0] ^ states[4];
-        b[0] = states[2] ^ states[6];
-        a[1] = states[1] ^ states[5];
-        b[1] = states[3] ^ states[7];
-
-        b_x = ((uint4 *)b)[0];
-        VARIANT1_INIT();
-    }
-
-    mem_fence(CLK_LOCAL_MEM_FENCE);
-
-#   if (COMP_MODE == 1)
-    // do not use early return here
-    if (gIdx < Threads)
-#   endif
-    {
-        uint idx0 = a[0];
-
-        #pragma unroll CN_UNROLL
-        for (int i = 0; i < ITERATIONS; ++i) {
-            ulong c[2];
-
-            ((uint4 *)c)[0] = Scratchpad[IDX((idx0 & MASK) >> 4)];
-            ((uint4 *)c)[0] = AES_Round_bittube2(AES0, AES1, ((uint4 *)c)[0], ((uint4 *)a)[0]);
-
-            b_x ^= ((uint4 *)c)[0];
-            VARIANT1_1(b_x);
-            Scratchpad[IDX((idx0 & MASK) >> 4)] = b_x;
-
-            uint4 tmp;
-            tmp = Scratchpad[IDX((as_uint2(c[0]).s0 & MASK) >> 4)];
-
-            a[1] += c[0] * as_ulong2(tmp).s0;
-            a[0] += mul_hi(c[0], as_ulong2(tmp).s0);
-
-            uint2 tweak1_2_0 = tweak1_2;
-            tweak1_2_0 ^= ((uint2 *)&(a[0]))[0];
-
-            VARIANT1_2(a[1]);
-            Scratchpad[IDX((as_uint2(c[0]).s0 & MASK) >> 4)] = ((uint4 *)a)[0];
-            VARIANT1_2(a[1]);
-
-            ((uint4 *)a)[0] ^= tmp;
-            idx0 = a[0];
-
-            b_x = ((uint4 *)c)[0];
-
-            {
-                long n = *((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4))));
-                int d  = ((__global int*)(Scratchpad + (IDX((idx0 & MASK) >> 4))))[2];
-                long q = fast_div_heavy(n, d | 0x5);
-                *((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))) = n ^ q;
-                idx0 = d ^ q;
-            }
-        }
-    }
-    mem_fence(CLK_GLOBAL_MEM_FENCE);
-#   endif
-}
-
-)==="
-R"===(

 __attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
 __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
@ -1211,91 +763,6 @@ __kernel void cn1(__global uint4 *Scratchpad, __global ulong *states, uint varia
    mem_fence(CLK_GLOBAL_MEM_FENCE);
 }

-)==="
-R"===(
-
-__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
-__kernel void cn1_xao(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads)
-{
-#   if (ALGO == CRYPTONIGHT)
-    ulong a[2], b[2];
-    __local uint AES0[256], AES1[256];
-
-    const ulong gIdx = getIdx();
-
-    for (int i = get_local_id(0); i < 256; i += WORKSIZE) {
-        const uint tmp = AES0_C[i];
-        AES0[i] = tmp;
-        AES1[i] = rotate(tmp, 8U);
-    }
-
-    barrier(CLK_LOCAL_MEM_FENCE);
-
-    uint4 b_x;
-#   if (COMP_MODE == 1)
-    // do not use early return here
-    if (gIdx < Threads)
-#   endif
-    {
-        states += 25 * gIdx;
-#       if (STRIDED_INDEX == 0)
-        Scratchpad += gIdx * (MEMORY >> 4);
-#       elif (STRIDED_INDEX == 1)
-#       if (ALGO == CRYPTONIGHT_HEAVY)
-            Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + get_local_id(0);
-#       else
-            Scratchpad += gIdx;
-#       endif
-#       elif(STRIDED_INDEX == 2)
-        Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0);
-#       endif
-
-        a[0] = states[0] ^ states[4];
-        b[0] = states[2] ^ states[6];
-        a[1] = states[1] ^ states[5];
-        b[1] = states[3] ^ states[7];
-
-        b_x = ((uint4 *)b)[0];
-    }
-
-    mem_fence(CLK_LOCAL_MEM_FENCE);
-
-#   if (COMP_MODE == 1)
-    // do not use early return here
-    if (gIdx < Threads)
-#   endif
-    {
-        uint idx0 = a[0];
-
-        #pragma unroll 8
-        for (int i = 0; i < 0x100000; ++i) {
-            ulong c[2];
-
-            ((uint4 *)c)[0] = Scratchpad[IDX((idx0 & MASK) >> 4)];
-            ((uint4 *)c)[0] = AES_Round_Two_Tables(AES0, AES1, ((uint4 *)c)[0], ((uint4 *)a)[0]);
-
-            Scratchpad[IDX((idx0 & MASK) >> 4)] = b_x ^ ((uint4 *)c)[0];
-
-            uint4 tmp;
-            tmp = Scratchpad[IDX((as_uint2(c[0]).s0 & MASK) >> 4)];
-
-            a[1] += c[0] * as_ulong2(tmp).s0;
-            a[0] += mul_hi(c[0], as_ulong2(tmp).s0);
-
-            Scratchpad[IDX((as_uint2(c[0]).s0 & MASK) >> 4)] = ((uint4 *)a)[0];
-
-            ((uint4 *)a)[0] ^= tmp;
-            idx0 = a[0];
-
-            b_x = ((uint4 *)c)[0];
-        }
-    }
-    mem_fence(CLK_GLOBAL_MEM_FENCE);
-#   endif
-}
-
-)==="
-R"===(

 __attribute__((reqd_work_group_size(8, 8, 1)))
 __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global uint *Branch0, __global uint *Branch1, __global uint *Branch2, __global uint *Branch3, uint Threads)
@ -1463,15 +930,15 @@ __kernel void cn2(__global uint4 *Scratchpad, __global ulong *states, __global u
    mem_fence(CLK_GLOBAL_MEM_FENCE);
 }

-)==="
-R"===(

 #define VSWAP8(x)   (((x) >> 56) | (((x) >> 40) & 0x000000000000FF00UL) | (((x) >> 24) & 0x0000000000FF0000UL) \
          | (((x) >>  8) & 0x00000000FF000000UL) | (((x) <<  8) & 0x000000FF00000000UL) \
          | (((x) << 24) & 0x0000FF0000000000UL) | (((x) << 40) & 0x00FF000000000000UL) | (((x) << 56) & 0xFF00000000000000UL))

+
 #define VSWAP4(x)   ((((x) >> 24) & 0xFFU) | (((x) >> 8) & 0xFF00U) | (((x) << 8) & 0xFF0000U) | (((x) << 24) & 0xFF000000U))

+
 __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
    const uint idx = get_global_id(0) - get_global_offset(0);
@ -1529,7 +996,9 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u
    mem_fence(CLK_GLOBAL_MEM_FENCE);
 }

-#define SWAP8(x)    as_ulong(as_uchar8(x).s76543210)
+
+#define SWAP8(x) as_ulong(as_uchar8(x).s76543210)
+

 #define JHXOR \
    h0h ^= input[0]; \
@ -1552,6 +1021,7 @@ __kernel void Skein(__global ulong *states, __global uint *BranchBuf, __global u
    h7h ^= input[6]; \
    h7l ^= input[7]

+
 __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
    const uint idx = get_global_id(0) - get_global_offset(0);
@ -1597,8 +1067,10 @@ __kernel void JH(__global ulong *states, __global uint *BranchBuf, __global uint
    }
 }

+
 #define SWAP4(x)    as_uint(as_uchar4(x).s3210)

+
 __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global uint *output, ulong Target, uint Threads)
 {
    const uint idx = get_global_id(0) - get_global_offset(0);
@ -1697,6 +1169,7 @@ __kernel void Blake(__global ulong *states, __global uint *BranchBuf, __global u
    }
 }

+
 #undef SWAP4


@ -1796,5 +1269,3 @@ __kernel void Groestl(__global ulong *states, __global uint *BranchBuf, __global
        }
    }
 }
-
-)==="
--- a/src/backend/opencl/cl/cn/cryptonight_cl.h
+++ b/src/backend/opencl/cl/cn/cryptonight_cl.h
--- a/src/backend/opencl/cl/cn/fast_div_heavy.cl
+++ b/src/backend/opencl/cl/cn/fast_div_heavy.cl
@ -1,4 +1,3 @@
-R"===(
 #ifndef FAST_DIV_HEAVY_CL
 #define FAST_DIV_HEAVY_CL

@ -26,4 +25,3 @@ inline long fast_div_heavy(long _a, int _b)
 }

 #endif
-)==="
--- a/src/backend/opencl/cl/cn/fast_int_math_v2.cl
+++ b/src/backend/opencl/cl/cn/fast_int_math_v2.cl
@ -1,4 +1,3 @@
-R"===(
 /*
 * @author SChernykh
 */
@ -53,5 +52,3 @@ inline uint fast_sqrt_v2(const ulong n1)

    return result;
 }
-
-)==="
--- a/src/backend/opencl/cl/cn/groestl256.cl
+++ b/src/backend/opencl/cl/cn/groestl256.cl
@ -1,4 +1,3 @@
-R"===(
 /* $Id: groestl.c 260 2011-07-21 01:02:38Z tp $ */
 /*
 * Groestl256
@ -124,9 +123,6 @@ static const __constant ulong T0_G[] =
 	0x7bcbf646cb463d7bUL, 0xa8fc4b1ffc1fb7a8UL, 0x6dd6da61d6610c6dUL, 0x2c3a584e3a4e622cUL
 };

-)==="
-R"===(
-
 static const __constant ulong T4_G[] =
 {
 	0xA5F432C6C6A597F4UL, 0x84976FF8F884EB97UL, 0x99B05EEEEE99C7B0UL, 0x8D8C7AF6F68DF78CUL, 
@ -291,5 +287,3 @@ static const __constant ulong T4_G[] =
 			ROUND_SMALL_Q(a, r); \
 		} while (0)

-)==="
-		
--- a/src/backend/opencl/cl/cn/jh.cl
+++ b/src/backend/opencl/cl/cn/jh.cl
@ -1,4 +1,3 @@
-R"===(
 /* $Id: jh.c 255 2011-06-07 19:50:20Z tp $ */
 /*
 * JH implementation.
@ -270,5 +269,3 @@ static const __constant ulong C[] =
  } while (0)

 #endif
-
-)==="
--- a/src/backend/opencl/cl/cn/wolf-aes.cl
+++ b/src/backend/opencl/cl/cn/wolf-aes.cl
@ -1,4 +1,3 @@
-R"===(
 #ifndef WOLF_AES_CL
 #define WOLF_AES_CL

@ -149,5 +148,3 @@ uint4 AES_Round_Two_Tables(const __local uint *AES0, const __local uint *AES1, c
 }

 #endif
-
-)==="
--- a/src/backend/opencl/cl/cn/wolf-skein.cl
+++ b/src/backend/opencl/cl/cn/wolf-skein.cl
@ -1,4 +1,3 @@
-R"===(
 #ifndef WOLF_SKEIN_CL
 #define WOLF_SKEIN_CL

@ -137,5 +136,3 @@ ulong8 Skein512Block(ulong8 p, ulong8 h, ulong h8, const ulong *t)
 }

 #endif
-
-)==="
--- a/src/crypto/common/Algorithm.h
+++ b/src/crypto/common/Algorithm.h
@ -52,30 +52,18 @@ public:
        CN_RWZ,        // "cn/rwz"           CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft).
        CN_ZLS,        // "cn/zls"           CryptoNight variant 2 with 3/4 iterations (Zelerius).
        CN_DOUBLE,     // "cn/double"        CryptoNight variant 2 with double iterations (X-CASH).
-#       ifdef XMRIG_ALGO_CN_GPU
        CN_GPU,        // "cn/gpu"           CryptoNight-GPU (Ryo).
-#       endif
-#       ifdef XMRIG_ALGO_CN_LITE
        CN_LITE_0,     // "cn-lite/0"        CryptoNight-Lite variant 0.
        CN_LITE_1,     // "cn-lite/1"        CryptoNight-Lite variant 1.
-#       endif
-#       ifdef XMRIG_ALGO_CN_HEAVY
        CN_HEAVY_0,    // "cn-heavy/0"       CryptoNight-Heavy (4 MB).
        CN_HEAVY_TUBE, // "cn-heavy/tube"    CryptoNight-Heavy (modified, TUBE only).
        CN_HEAVY_XHV,  // "cn-heavy/xhv"     CryptoNight-Heavy (modified, Haven Protocol only).
-#       endif
-#       ifdef XMRIG_ALGO_CN_PICO
        CN_PICO_0,     // "cn-pico"          CryptoNight Turtle (TRTL)
-#       endif
-#       ifdef XMRIG_ALGO_RANDOMX
        RX_0,          // "rx/0"             RandomX (reference configuration).
        RX_WOW,        // "rx/wow"           RandomWOW (Wownero).
        RX_LOKI,       // "rx/loki"          RandomXL (Loki).
-#       endif
-#       ifdef XMRIG_ALGO_ARGON2
-        AR2_CHUKWA,    // "argon2/chukwa"
-        AR2_WRKZ,      // "argon2/wrkz"
-#       endif
+        AR2_CHUKWA,    // "argon2/chukwa"    Argon2id (Chukwa).
+        AR2_WRKZ,      // "argon2/wrkz"      Argon2id (WRKZ)
        MAX
    };