Browse Source

Added generic Argon2 implementation (conflicts with RandomX).

pull/1142/head
XMRig 5 years ago
parent
commit
0c25424a3e
  1. 80
      CMakeLists.txt
  2. 13
      cmake/argon2.cmake
  3. 58
      cmake/randomx.cmake
  4. 33
      src/3rdparty/argon2.h
  5. 95
      src/3rdparty/argon2/CMakeLists.txt
  6. 21
      src/3rdparty/argon2/LICENSE
  7. 58
      src/3rdparty/argon2/README.md
  8. 20
      src/3rdparty/argon2/arch/generic/lib/argon2-arch.c
  9. 41
      src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c
  10. 343
      src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c
  11. 11
      src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h
  12. 328
      src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c
  13. 11
      src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h
  14. 124
      src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c
  15. 11
      src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h
  16. 136
      src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c
  17. 11
      src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h
  18. 164
      src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h
  19. 124
      src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c
  20. 11
      src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h
  21. 91
      src/3rdparty/argon2/arch/x86_64/lib/cpu-flags.c
  22. 12
      src/3rdparty/argon2/arch/x86_64/lib/cpu-flags.h
  23. 8
      src/3rdparty/argon2/arch/x86_64/src/test-feature-avx2.c
  24. 8
      src/3rdparty/argon2/arch/x86_64/src/test-feature-avx512f.c
  25. 8
      src/3rdparty/argon2/arch/x86_64/src/test-feature-sse2.c
  26. 8
      src/3rdparty/argon2/arch/x86_64/src/test-feature-ssse3.c
  27. 8
      src/3rdparty/argon2/arch/x86_64/src/test-feature-xop.c
  28. 455
      src/3rdparty/argon2/include/argon2.h
  29. 193
      src/3rdparty/argon2/lib/argon2-template-64.h
  30. 476
      src/3rdparty/argon2/lib/argon2.c
  31. 90
      src/3rdparty/argon2/lib/blake2/blake2-impl.h
  32. 225
      src/3rdparty/argon2/lib/blake2/blake2.c
  33. 30
      src/3rdparty/argon2/lib/blake2/blake2.h
  34. 633
      src/3rdparty/argon2/lib/core.c
  35. 226
      src/3rdparty/argon2/lib/core.h
  36. 432
      src/3rdparty/argon2/lib/encoding.c
  37. 40
      src/3rdparty/argon2/lib/encoding.h
  38. 117
      src/3rdparty/argon2/lib/genkat.c
  39. 47
      src/3rdparty/argon2/lib/genkat.h
  40. 120
      src/3rdparty/argon2/lib/impl-select.c
  41. 23
      src/3rdparty/argon2/lib/impl-select.h
  42. 36
      src/3rdparty/argon2/lib/thread.c
  43. 47
      src/3rdparty/argon2/lib/thread.h
  44. 2
      src/backend/common/Workers.cpp
  45. 7
      src/backend/cpu/CpuWorker.cpp
  46. 67
      src/crypto/argon2/Argon2.h
  47. 12
      src/crypto/cn/CnAlgo.h
  48. 12
      src/crypto/cn/CnHash.cpp
  49. 33
      src/crypto/cn/CryptoNight_test.h
  50. 74
      src/crypto/common/Algorithm.cpp
  51. 9
      src/crypto/common/Algorithm.h
  52. 20
      src/crypto/rx/RxAlgo.cpp
  53. 4
      src/crypto/rx/RxAlgo.h

80
CMakeLists.txt

@ -1,14 +1,15 @@
cmake_minimum_required(VERSION 2.8)
project(xmrig)
option(WITH_LIBCPUID "Use Libcpuid" ON)
option(WITH_HWLOC "Use hwloc" ON)
option(WITH_CN_LITE "CryptoNight-Lite support" ON)
option(WITH_CN_HEAVY "CryptoNight-Heavy support" ON)
option(WITH_CN_PICO "CryptoNight-Pico support" ON)
option(WITH_CN_GPU "CryptoNight-GPU support" ON)
option(WITH_RANDOMX "RandomX support" ON)
option(WITH_HTTP "HTTP protocol support (client/server)" ON)
option(WITH_LIBCPUID "Enable libcpuid support" ON)
option(WITH_HWLOC "Enable hwloc support" ON)
option(WITH_CN_LITE "Enable CryptoNight-Lite algorithms family" ON)
option(WITH_CN_HEAVY "Enable CryptoNight-Heavy algorithms family" ON)
option(WITH_CN_PICO "Enable CryptoNight-Pico algorithm" ON)
option(WITH_CN_GPU "Enable CryptoNight-GPU algorithm" ON)
option(WITH_RANDOMX "Enable RandomX algorithms family" ON)
option(WITH_ARGON2 "Enable Argon2 algorithms family" ON)
option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON)
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
option(WITH_TLS "Enable OpenSSL support" ON)
option(WITH_ASM "Enable ASM PoW implementations" ON)
@ -151,66 +152,9 @@ add_definitions(/DUNICODE)
find_package(UV REQUIRED)
if (WITH_RANDOMX)
include_directories(src/crypto/randomx)
add_definitions(/DXMRIG_ALGO_RANDOMX)
set(SOURCES_CRYPTO
"${SOURCES_CRYPTO}"
src/crypto/randomx/aes_hash.cpp
src/crypto/randomx/allocator.cpp
src/crypto/randomx/argon2_core.c
src/crypto/randomx/argon2_ref.c
src/crypto/randomx/blake2_generator.cpp
src/crypto/randomx/blake2/blake2b.c
src/crypto/randomx/bytecode_machine.cpp
src/crypto/randomx/dataset.cpp
src/crypto/randomx/instructions_portable.cpp
src/crypto/randomx/randomx.cpp
src/crypto/randomx/reciprocal.c
src/crypto/randomx/soft_aes.cpp
src/crypto/randomx/superscalar.cpp
src/crypto/randomx/virtual_machine.cpp
src/crypto/randomx/virtual_memory.cpp
src/crypto/randomx/vm_compiled_light.cpp
src/crypto/randomx/vm_compiled.cpp
src/crypto/randomx/vm_interpreted_light.cpp
src/crypto/randomx/vm_interpreted.cpp
src/crypto/rx/Rx.cpp
src/crypto/rx/Rx.h
src/crypto/rx/RxAlgo.cpp
src/crypto/rx/RxAlgo.h
src/crypto/rx/RxCache.cpp
src/crypto/rx/RxCache.h
src/crypto/rx/RxConfig.cpp
src/crypto/rx/RxConfig.h
src/crypto/rx/RxDataset.cpp
src/crypto/rx/RxDataset.h
src/crypto/rx/RxVm.cpp
src/crypto/rx/RxVm.h
)
if (NOT ARCH_ID)
set(ARCH_ID ${CMAKE_HOST_SYSTEM_PROCESSOR})
endif()
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
enable_language(ASM_MASM)
list(APPEND SOURCES_CRYPTO
src/crypto/randomx/jit_compiler_x86_static.asm
src/crypto/randomx/jit_compiler_x86.cpp
)
elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
list(APPEND SOURCES_CRYPTO
src/crypto/randomx/jit_compiler_x86_static.S
src/crypto/randomx/jit_compiler_x86.cpp
)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/crypto/randomx/jit_compiler_x86_static.S PROPERTY LANGUAGE C)
endif()
else()
remove_definitions(/DXMRIG_ALGO_RANDOMX)
endif()
include(cmake/flags.cmake)
include(cmake/randomx.cmake)
include(cmake/argon2.cmake)
include(cmake/OpenSSL.cmake)
include(cmake/asm.cmake)
include(cmake/cn-gpu.cmake)
@ -244,4 +188,4 @@ if (WITH_DEBUG_LOG)
endif()
add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${SOURCES_CPUID} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES} ${CN_GPU_SOURCES})
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB})
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY})

13
cmake/argon2.cmake

@ -0,0 +1,13 @@
if (WITH_ARGON2)
add_definitions(/DXMRIG_ALGO_ARGON2)
list(APPEND HEADERS_CRYPTO
src/crypto/argon2/Argon2.h
)
add_subdirectory(src/3rdparty/argon2)
set(ARGON2_LIBRARY argon2)
else()
remove_definitions(/DXMRIG_ALGO_ARGON2)
set(ARGON2_LIBRARY "")
endif()

58
cmake/randomx.cmake

@ -0,0 +1,58 @@
if (WITH_RANDOMX)
include_directories(src/crypto/randomx)
add_definitions(/DXMRIG_ALGO_RANDOMX)
list(APPEND HEADERS_CRYPTO
src/crypto/rx/Rx.h
src/crypto/rx/RxAlgo.h
src/crypto/rx/RxCache.h
src/crypto/rx/RxConfig.h
src/crypto/rx/RxDataset.h
src/crypto/rx/RxVm.h
)
list(APPEND SOURCES_CRYPTO
src/crypto/randomx/aes_hash.cpp
src/crypto/randomx/allocator.cpp
src/crypto/randomx/argon2_core.c
src/crypto/randomx/argon2_ref.c
src/crypto/randomx/blake2_generator.cpp
src/crypto/randomx/blake2/blake2b.c
src/crypto/randomx/bytecode_machine.cpp
src/crypto/randomx/dataset.cpp
src/crypto/randomx/instructions_portable.cpp
src/crypto/randomx/randomx.cpp
src/crypto/randomx/reciprocal.c
src/crypto/randomx/soft_aes.cpp
src/crypto/randomx/superscalar.cpp
src/crypto/randomx/virtual_machine.cpp
src/crypto/randomx/virtual_memory.cpp
src/crypto/randomx/vm_compiled_light.cpp
src/crypto/randomx/vm_compiled.cpp
src/crypto/randomx/vm_interpreted_light.cpp
src/crypto/randomx/vm_interpreted.cpp
src/crypto/rx/Rx.cpp
src/crypto/rx/RxAlgo.cpp
src/crypto/rx/RxCache.cpp
src/crypto/rx/RxConfig.cpp
src/crypto/rx/RxDataset.cpp
src/crypto/rx/RxVm.cpp
)
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
enable_language(ASM_MASM)
list(APPEND SOURCES_CRYPTO
src/crypto/randomx/jit_compiler_x86_static.asm
src/crypto/randomx/jit_compiler_x86.cpp
)
elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
list(APPEND SOURCES_CRYPTO
src/crypto/randomx/jit_compiler_x86_static.S
src/crypto/randomx/jit_compiler_x86.cpp
)
# cheat because cmake and ccache hate each other
set_property(SOURCE src/crypto/randomx/jit_compiler_x86_static.S PROPERTY LANGUAGE C)
endif()
else()
remove_definitions(/DXMRIG_ALGO_RANDOMX)
endif()

33
src/3rdparty/argon2.h

@ -0,0 +1,33 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_3RDPARTY_ARGON2_H
#define XMRIG_3RDPARTY_ARGON2_H
#include "3rdparty/argon2/include/argon2.h"
#endif /* XMRIG_3RDPARTY_ARGON2_H */

95
src/3rdparty/argon2/CMakeLists.txt

@ -0,0 +1,95 @@
cmake_minimum_required(VERSION 2.6)
project(Argon2 C)
set(ARGON2_VERSION 1.0)
set(CMAKE_C_STANDARD 90)
set(CMAKE_C_STANDARD_REQUIRED ON)
include(CheckCSourceCompiles)
find_package(Threads REQUIRED)
add_library(argon2-interface INTERFACE)
target_include_directories(argon2-interface INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
)
add_library(argon2-internal INTERFACE)
target_include_directories(argon2-internal INTERFACE lib lib/blake2)
target_link_libraries(argon2-internal INTERFACE argon2-interface)
add_library(argon2 STATIC
lib/argon2.c
lib/core.c
lib/encoding.c
lib/genkat.c
lib/impl-select.c
lib/thread.c
lib/blake2/blake2.c
)
target_link_libraries(argon2
PUBLIC argon2-interface ${CMAKE_THREAD_LIBS_INIT}
PRIVATE argon2-internal
)
set_property(TARGET argon2 PROPERTY VERSION ${Upstream_VERSION})
set_property(TARGET argon2 PROPERTY SOVERSION 1)
set_property(TARGET argon2 PROPERTY INTERFACE_ARGON2_MAJOR_VERSION 1)
set_property(TARGET argon2 APPEND PROPERTY
COMPATIBLE_INTERFACE_STRING ARGON2_MAJOR_VERSION
)
#if (CMAKE_C_COMPILER_ID MATCHES MSVC)
#elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
# function(add_feature_impl FEATURE GCC_FLAG DEF)
# add_library(argon2-${FEATURE} STATIC
# arch/x86_64/lib/argon2-${FEATURE}.c
# )
# target_link_libraries(argon2-${FEATURE} PRIVATE argon2-internal)
# set_target_properties(argon2-${FEATURE}
# PROPERTIES POSITION_INDEPENDENT_CODE True
# )
# message("-- Detecting feature '${FEATURE}'...")
# file(READ arch/x86_64/src/test-feature-${FEATURE}.c SOURCE_CODE)
# # try without flag:
# check_c_source_compiles("${SOURCE_CODE}" FEATURE_${FEATURE}_NOFLAG)
# set(HAS_FEATURE ${FEATURE_${FEATURE}_NOFLAG})
# if(NOT "${HAS_FEATURE}")
# # try with -m<feature> flag:
# set(CMAKE_REQUIRED_FLAGS ${GCC_FLAG})
# check_c_source_compiles("${SOURCE_CODE}" FEATURE_${FEATURE}_FLAG)
# set(CMAKE_REQUIRED_FLAGS "")
# set(HAS_FEATURE ${FEATURE_${FEATURE}_FLAG})
# if(${HAS_FEATURE})
# target_compile_options(argon2-${FEATURE} PRIVATE ${GCC_FLAG})
# endif()
# endif()
# if(${HAS_FEATURE})
# message("-- Feature '${FEATURE}' detected!")
# target_compile_definitions(argon2-${FEATURE} PRIVATE ${DEF})
# endif()
# target_link_libraries(argon2 PUBLIC argon2-${FEATURE})
# endfunction()
# target_include_directories(argon2-internal INTERFACE arch/x86_64/lib)
# add_feature_impl(sse2 -msse2 HAVE_SSE2)
# add_feature_impl(ssse3 -mssse3 HAVE_SSSE3)
# add_feature_impl(xop -mxop HAVE_XOP)
# add_feature_impl(avx2 -mavx2 HAVE_AVX2)
# add_feature_impl(avx512f -mavx512f HAVE_AVX512F)
# target_sources(argon2 PRIVATE
# arch/x86_64/lib/argon2-arch.c
# arch/x86_64/lib/cpu-flags.c
# )
#else()
target_sources(argon2 PRIVATE
arch/generic/lib/argon2-arch.c
)
#endif()

21
src/3rdparty/argon2/LICENSE

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2016 Ondrej Mosnáček
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

58
src/3rdparty/argon2/README.md

@ -0,0 +1,58 @@
# Argon2 [![Build Status](https://travis-ci.org/WOnder93/argon2.svg?branch=master)](https://travis-ci.org/WOnder93/argon2)
A multi-arch library implementing the Argon2 password hashing algorithm.
This project is based on the [original source code](https://github.com/P-H-C/phc-winner-argon2) by the Argon2 authors. The goal of this project is to provide efficient Argon2 implementations for various HW architectures (x86, SSE, ARM, PowerPC, ...).
For the x86_64 architecture, the library implements a simple CPU dispatch which automatically selects the best implementation based on CPU flags and quick benchmarks.
# Building
## Using GNU autotools
To prepare the build environment, run:
```bash
autoreconf -i
./configure
```
After that, just run `make` to build the library.
### Running tests
After configuring the build environment, run `make check` to run the tests.
### Architecture options
You can specify the target architecture by passing the `--host=...` flag to `./configure`.
Supported architectures:
* `x86_64` &ndash; 64-bit x86 architecture
* `generic` &ndash; use generic C impementation
## Using CMake
To prepare the build environment, run:
```bash
cmake -DCMAKE_BUILD_TYPE=Release .
```
Then you can run `make` to build the library.
## Using QMake/Qt Creator
A [QMake](http://doc.qt.io/qt-4.8/qmake-manual.html) project is also available in the `qmake` directory. You can open it in the [Qt Creator IDE](http://wiki.qt.io/Category:Tools::QtCreator) or build it from terminal:
```bash
cd qmake
# see table below for the list of possible ARCH and CONFIG values
qmake ARCH=... CONFIG+=...
make
```
### Architecture options
For QMake builds you can configure support for different architectures. Use the `ARCH` variable to choose the architecture and the `CONFIG` variable to set additional options.
Supported architectures:
* `x86_64` &ndash; 64-bit x86 architecture
* QMake config flags:
* `USE_SSE2` &ndash; use SSE2 instructions
* `USE_SSSE3` &ndash; use SSSE3 instructions
* `USE_XOP` &ndash; use XOP instructions
* `USE_AVX2` &ndash; use AVX2 instructions
* `USE_AVX512F` &ndash; use AVX-512F instructions
* `generic` &ndash; use generic C impementation

20
src/3rdparty/argon2/arch/generic/lib/argon2-arch.c

@ -0,0 +1,20 @@
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "impl-select.h"
#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#include "argon2-template-64.h"
void fill_segment_default(const argon2_instance_t *instance,
argon2_position_t position)
{
fill_segment_64(instance, position);
}
void argon2_get_impl_list(argon2_impl_list *list)
{
list->count = 0;
}

41
src/3rdparty/argon2/arch/x86_64/lib/argon2-arch.c

@ -0,0 +1,41 @@
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "impl-select.h"
#include "cpu-flags.h"
#include "argon2-sse2.h"
#include "argon2-ssse3.h"
#include "argon2-xop.h"
#include "argon2-avx2.h"
#include "argon2-avx512f.h"
/* NOTE: there is no portable intrinsic for 64-bit rotate, but any
* sane compiler should be able to compile this into a ROR instruction: */
#define rotr64(x, n) ((x) >> (n)) | ((x) << (64 - (n)))
#include "argon2-template-64.h"
void fill_segment_default(const argon2_instance_t *instance,
argon2_position_t position)
{
fill_segment_64(instance, position);
}
void argon2_get_impl_list(argon2_impl_list *list)
{
static const argon2_impl IMPLS[] = {
{ "x86_64", NULL, fill_segment_default },
{ "SSE2", check_sse2, fill_segment_sse2 },
{ "SSSE3", check_ssse3, fill_segment_ssse3 },
{ "XOP", check_xop, fill_segment_xop },
{ "AVX2", check_avx2, fill_segment_avx2 },
{ "AVX-512F", check_avx512f, fill_segment_avx512f },
};
cpu_flags_get();
list->count = sizeof(IMPLS) / sizeof(IMPLS[0]);
list->entries = IMPLS;
}

343
src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.c

@ -0,0 +1,343 @@
#include "argon2-avx2.h"
#ifdef HAVE_AVX2
#include <string.h>
#include <x86intrin.h>
#include "cpu-flags.h"
#define r16 (_mm256_setr_epi8( \
2, 3, 4, 5, 6, 7, 0, 1, \
10, 11, 12, 13, 14, 15, 8, 9, \
18, 19, 20, 21, 22, 23, 16, 17, \
26, 27, 28, 29, 30, 31, 24, 25))
#define r24 (_mm256_setr_epi8( \
3, 4, 5, 6, 7, 0, 1, 2, \
11, 12, 13, 14, 15, 8, 9, 10, \
19, 20, 21, 22, 23, 16, 17, 18, \
27, 28, 29, 30, 31, 24, 25, 26))
#define ror64_16(x) _mm256_shuffle_epi8((x), r16)
#define ror64_24(x) _mm256_shuffle_epi8((x), r24)
#define ror64_32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))
#define ror64_63(x) \
_mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
static __m256i f(__m256i x, __m256i y)
{
__m256i z = _mm256_mul_epu32(x, y);
return _mm256_add_epi64(_mm256_add_epi64(x, y), _mm256_add_epi64(z, z));
}
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = f(A0, B0); \
A1 = f(A1, B1); \
\
D0 = _mm256_xor_si256(D0, A0); \
D1 = _mm256_xor_si256(D1, A1); \
\
D0 = ror64_32(D0); \
D1 = ror64_32(D1); \
\
C0 = f(C0, D0); \
C1 = f(C1, D1); \
\
B0 = _mm256_xor_si256(B0, C0); \
B1 = _mm256_xor_si256(B1, C1); \
\
B0 = ror64_24(B0); \
B1 = ror64_24(B1); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = f(A0, B0); \
A1 = f(A1, B1); \
\
D0 = _mm256_xor_si256(D0, A0); \
D1 = _mm256_xor_si256(D1, A1); \
\
D0 = ror64_16(D0); \
D1 = ror64_16(D1); \
\
C0 = f(C0, D0); \
C1 = f(C1, D1); \
\
B0 = _mm256_xor_si256(B0, C0); \
B1 = _mm256_xor_si256(B1, C1); \
\
B0 = ror64_63(B0); \
B1 = ror64_63(B1); \
} while ((void)0, 0)
#define DIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
\
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
\
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
} while ((void)0, 0)
#define UNDIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
\
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
\
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
} while ((void)0, 0)
#define DIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m256i tmp1, tmp2; \
tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
\
tmp1 = C0; \
C0 = C1; \
C1 = tmp1; \
\
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
} while ((void)0, 0)
#define UNDIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m256i tmp1, tmp2; \
tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
\
tmp1 = C0; \
C0 = C1; \
C1 = tmp1; \
\
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
D1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
D0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
} while ((void)0, 0)
#define BLAKE2_ROUND1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
UNDIAGONALIZE1(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#define BLAKE2_ROUND2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
UNDIAGONALIZE2(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
enum {
ARGON2_HWORDS_IN_BLOCK = ARGON2_OWORDS_IN_BLOCK / 2,
};
static void fill_block(__m256i *s, const block *ref_block, block *next_block,
int with_xor)
{
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
s[i] =_mm256_xor_si256(
s[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
block_XY[i] = _mm256_xor_si256(
s[i], _mm256_loadu_si256((const __m256i *)next_block->v + i));
}
} else {
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
block_XY[i] = s[i] =_mm256_xor_si256(
s[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
}
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND1(
s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3],
s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]);
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND2(
s[4 * 0 + i], s[4 * 1 + i], s[4 * 2 + i], s[4 * 3 + i],
s[4 * 4 + i], s[4 * 5 + i], s[4 * 6 + i], s[4 * 7 + i]);
}
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
s[i] = _mm256_xor_si256(s[i], block_XY[i]);
_mm256_storeu_si256((__m256i *)next_block->v + i, s[i]);
}
}
static void next_addresses(block *address_block, block *input_block)
{
/*Temporary zero-initialized blocks*/
__m256i zero_block[ARGON2_HWORDS_IN_BLOCK];
__m256i zero2_block[ARGON2_HWORDS_IN_BLOCK];
memset(zero_block, 0, sizeof(zero_block));
memset(zero2_block, 0, sizeof(zero2_block));
/*Increasing index counter*/
input_block->v[6]++;
/*First iteration of G*/
fill_block(zero_block, input_block, address_block, 0);
/*Second iteration of G*/
fill_block(zero2_block, address_block, address_block, 0);
}
void fill_segment_avx2(const argon2_instance_t *instance,
argon2_position_t position)
{
block *ref_block = NULL, *curr_block = NULL;
block address_block, input_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
__m256i state[ARGON2_HWORDS_IN_BLOCK];
int data_independent_addressing;
if (instance == NULL) {
return;
}
data_independent_addressing = (instance->type == Argon2_i) ||
(instance->type == Argon2_id && (position.pass == 0) &&
(position.slice < ARGON2_SYNC_POINTS / 2));
if (data_independent_addressing) {
init_block_value(&input_block, 0);
input_block.v[0] = position.pass;
input_block.v[1] = position.lane;
input_block.v[2] = position.slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = instance->type;
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
/* Don't forget to generate the first block of addresses: */
if (data_independent_addressing) {
next_addresses(&address_block, &input_block);
}
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
next_addresses(&address_block, &input_block);
}
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
} else {
pseudo_rand = instance->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block =
instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
/* version 1.2.1 and earlier: overwrite, not XOR */
if (0 == position.pass || ARGON2_VERSION_10 == instance->version) {
fill_block(state, ref_block, curr_block, 0);
} else {
fill_block(state, ref_block, curr_block, 1);
}
}
}
int check_avx2(void)
{
return cpu_flags_have_avx2();
}
#else
void fill_segment_avx2(const argon2_instance_t *instance,
argon2_position_t position)
{
}
int check_avx2(void)
{
return 0;
}
#endif

11
src/3rdparty/argon2/arch/x86_64/lib/argon2-avx2.h

@ -0,0 +1,11 @@
#ifndef ARGON2_AVX2_H
#define ARGON2_AVX2_H
#include "core.h"
void fill_segment_avx2(const argon2_instance_t *instance,
argon2_position_t position);
int check_avx2(void);
#endif // ARGON2_AVX2_H

328
src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.c

@ -0,0 +1,328 @@
#include "argon2-avx512f.h"
#ifdef HAVE_AVX512F
#include <stdint.h>
#include <string.h>
#include <x86intrin.h>
#include "cpu-flags.h"
#define ror64(x, n) _mm512_ror_epi64((x), (n))
static __m512i f(__m512i x, __m512i y)
{
__m512i z = _mm512_mul_epu32(x, y);
return _mm512_add_epi64(_mm512_add_epi64(x, y), _mm512_add_epi64(z, z));
}
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = f(A0, B0); \
A1 = f(A1, B1); \
\
D0 = _mm512_xor_si512(D0, A0); \
D1 = _mm512_xor_si512(D1, A1); \
\
D0 = ror64(D0, 32); \
D1 = ror64(D1, 32); \
\
C0 = f(C0, D0); \
C1 = f(C1, D1); \
\
B0 = _mm512_xor_si512(B0, C0); \
B1 = _mm512_xor_si512(B1, C1); \
\
B0 = ror64(B0, 24); \
B1 = ror64(B1, 24); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = f(A0, B0); \
A1 = f(A1, B1); \
\
D0 = _mm512_xor_si512(D0, A0); \
D1 = _mm512_xor_si512(D1, A1); \
\
D0 = ror64(D0, 16); \
D1 = ror64(D1, 16); \
\
C0 = f(C0, D0); \
C1 = f(C1, D1); \
\
B0 = _mm512_xor_si512(B0, C0); \
B1 = _mm512_xor_si512(B1, C1); \
\
B0 = ror64(B0, 63); \
B1 = ror64(B1, 63); \
} while ((void)0, 0)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
\
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
\
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm512_permutex_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
B1 = _mm512_permutex_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
\
C0 = _mm512_permutex_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
C1 = _mm512_permutex_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
\
D0 = _mm512_permutex_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
D1 = _mm512_permutex_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
} while ((void)0, 0)
#define BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#define SWAP_HALVES(A0, A1) \
do { \
__m512i t0, t1; \
t0 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(1, 0, 1, 0)); \
t1 = _mm512_shuffle_i64x2(A0, A1, _MM_SHUFFLE(3, 2, 3, 2)); \
A0 = t0; \
A1 = t1; \
} while((void)0, 0)
#define SWAP_QUARTERS(A0, A1) \
do { \
SWAP_HALVES(A0, A1); \
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
} while((void)0, 0)
#define UNSWAP_QUARTERS(A0, A1) \
do { \
A0 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A0); \
A1 = _mm512_permutexvar_epi64(_mm512_setr_epi64(0, 1, 4, 5, 2, 3, 6, 7), A1); \
SWAP_HALVES(A0, A1); \
} while((void)0, 0)
#define BLAKE2_ROUND1(A0, C0, B0, D0, A1, C1, B1, D1) \
do { \
SWAP_HALVES(A0, B0); \
SWAP_HALVES(C0, D0); \
SWAP_HALVES(A1, B1); \
SWAP_HALVES(C1, D1); \
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
SWAP_HALVES(A0, B0); \
SWAP_HALVES(C0, D0); \
SWAP_HALVES(A1, B1); \
SWAP_HALVES(C1, D1); \
} while ((void)0, 0)
#define BLAKE2_ROUND2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
SWAP_QUARTERS(A0, A1); \
SWAP_QUARTERS(B0, B1); \
SWAP_QUARTERS(C0, C1); \
SWAP_QUARTERS(D0, D1); \
BLAKE2_ROUND(A0, B0, C0, D0, A1, B1, C1, D1); \
UNSWAP_QUARTERS(A0, A1); \
UNSWAP_QUARTERS(B0, B1); \
UNSWAP_QUARTERS(C0, C1); \
UNSWAP_QUARTERS(D0, D1); \
} while ((void)0, 0)
enum {
ARGON2_VECS_IN_BLOCK = ARGON2_OWORDS_IN_BLOCK / 4,
};
static void fill_block(__m512i *s, const block *ref_block, block *next_block,
int with_xor)
{
__m512i block_XY[ARGON2_VECS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) {
s[i] =_mm512_xor_si512(
s[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
block_XY[i] = _mm512_xor_si512(
s[i], _mm512_loadu_si512((const __m512i *)next_block->v + i));
}
} else {
for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) {
block_XY[i] = s[i] =_mm512_xor_si512(
s[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
}
}
for (i = 0; i < 2; ++i) {
BLAKE2_ROUND1(
s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3],
s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]);
}
for (i = 0; i < 2; ++i) {
BLAKE2_ROUND2(
s[2 * 0 + i], s[2 * 1 + i], s[2 * 2 + i], s[2 * 3 + i],
s[2 * 4 + i], s[2 * 5 + i], s[2 * 6 + i], s[2 * 7 + i]);
}
for (i = 0; i < ARGON2_VECS_IN_BLOCK; i++) {
s[i] = _mm512_xor_si512(s[i], block_XY[i]);
_mm512_storeu_si512((__m512i *)next_block->v + i, s[i]);
}
}
static void next_addresses(block *address_block, block *input_block)
{
/*Temporary zero-initialized blocks*/
__m512i zero_block[ARGON2_VECS_IN_BLOCK];
__m512i zero2_block[ARGON2_VECS_IN_BLOCK];
memset(zero_block, 0, sizeof(zero_block));
memset(zero2_block, 0, sizeof(zero2_block));
/*Increasing index counter*/
input_block->v[6]++;
/*First iteration of G*/
fill_block(zero_block, input_block, address_block, 0);
/*Second iteration of G*/
fill_block(zero2_block, address_block, address_block, 0);
}
void fill_segment_avx512f(const argon2_instance_t *instance,
argon2_position_t position)
{
block *ref_block = NULL, *curr_block = NULL;
block address_block, input_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
__m512i state[ARGON2_VECS_IN_BLOCK];
int data_independent_addressing;
if (instance == NULL) {
return;
}
data_independent_addressing = (instance->type == Argon2_i) ||
(instance->type == Argon2_id && (position.pass == 0) &&
(position.slice < ARGON2_SYNC_POINTS / 2));
if (data_independent_addressing) {
init_block_value(&input_block, 0);
input_block.v[0] = position.pass;
input_block.v[1] = position.lane;
input_block.v[2] = position.slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = instance->type;
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
/* Don't forget to generate the first block of addresses: */
if (data_independent_addressing) {
next_addresses(&address_block, &input_block);
}
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
next_addresses(&address_block, &input_block);
}
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
} else {
pseudo_rand = instance->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block =
instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
/* version 1.2.1 and earlier: overwrite, not XOR */
if (0 == position.pass || ARGON2_VERSION_10 == instance->version) {
fill_block(state, ref_block, curr_block, 0);
} else {
fill_block(state, ref_block, curr_block, 1);
}
}
}
int check_avx512f(void)
{
return cpu_flags_have_avx512f();
}
#else
void fill_segment_avx512f(const argon2_instance_t *instance,
argon2_position_t position)
{
}
int check_avx512f(void)
{
return 0;
}
#endif

11
src/3rdparty/argon2/arch/x86_64/lib/argon2-avx512f.h

@ -0,0 +1,11 @@
#ifndef ARGON2_AVX512F_H
#define ARGON2_AVX512F_H
#include "core.h"
void fill_segment_avx512f(const argon2_instance_t *instance,
argon2_position_t position);
int check_avx512f(void);
#endif // ARGON2_AVX512F_H

124
src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.c

@ -0,0 +1,124 @@
#include "argon2-sse2.h"
#ifdef HAVE_SSE2
#include <x86intrin.h>
#include "cpu-flags.h"
#define ror64_16(x) \
_mm_shufflehi_epi16( \
_mm_shufflelo_epi16((x), _MM_SHUFFLE(0, 3, 2, 1)), \
_MM_SHUFFLE(0, 3, 2, 1))
#define ror64_24(x) \
_mm_xor_si128(_mm_srli_epi64((x), 24), _mm_slli_epi64((x), 40))
#define ror64_32(x) _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))
#define ror64_63(x) \
_mm_xor_si128(_mm_srli_epi64((x), 63), _mm_add_epi64((x), (x)))
static __m128i f(__m128i x, __m128i y)
{
__m128i z = _mm_mul_epu32(x, y);
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
}
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = f(A0, B0); \
A1 = f(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = ror64_32(D0); \
D1 = ror64_32(D1); \
\
C0 = f(C0, D0); \
C1 = f(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = ror64_24(B0); \
B1 = ror64_24(B1); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = f(A0, B0); \
A1 = f(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = ror64_16(D0); \
D1 = ror64_16(D1); \
\
C0 = f(C0, D0); \
C1 = f(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = ror64_63(B0); \
B1 = ror64_63(B1); \
} while ((void)0, 0)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = D0; \
__m128i t1 = B0; \
D0 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t0, t0)); \
D1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(D1, D1)); \
B0 = _mm_unpackhi_epi64(B0, _mm_unpacklo_epi64(B1, B1)); \
B1 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(t1, t1)); \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = B0; \
__m128i t1 = D0; \
B0 = _mm_unpackhi_epi64(B1, _mm_unpacklo_epi64(B0, B0)); \
B1 = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(B1, B1)); \
D0 = _mm_unpackhi_epi64(D0, _mm_unpacklo_epi64(D1, D1)); \
D1 = _mm_unpackhi_epi64(D1, _mm_unpacklo_epi64(t1, t1)); \
} while ((void)0, 0)
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C1, D0, A1, B1, C0, D1); \
G2(A0, B0, C1, D0, A1, B1, C0, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#include "argon2-template-128.h"
void fill_segment_sse2(const argon2_instance_t *instance,
argon2_position_t position)
{
fill_segment_128(instance, position);
}
int check_sse2(void)
{
return cpu_flags_have_sse2();
}
#else
void fill_segment_sse2(const argon2_instance_t *instance,
argon2_position_t position)
{
}
int check_sse2(void)
{
return 0;
}
#endif

11
src/3rdparty/argon2/arch/x86_64/lib/argon2-sse2.h

@ -0,0 +1,11 @@
#ifndef ARGON2_SSE2_H
#define ARGON2_SSE2_H
#include "core.h"
void fill_segment_sse2(const argon2_instance_t *instance,
argon2_position_t position);
int check_sse2(void);
#endif // ARGON2_SSE2_H

136
src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.c

@ -0,0 +1,136 @@
#include "argon2-ssse3.h"
#ifdef HAVE_SSSE3
#include <string.h>
#include <x86intrin.h>
#include "cpu-flags.h"
#define r16 (_mm_setr_epi8( \
2, 3, 4, 5, 6, 7, 0, 1, \
10, 11, 12, 13, 14, 15, 8, 9))
#define r24 (_mm_setr_epi8( \
3, 4, 5, 6, 7, 0, 1, 2, \
11, 12, 13, 14, 15, 8, 9, 10))
#define ror64_16(x) _mm_shuffle_epi8((x), r16)
#define ror64_24(x) _mm_shuffle_epi8((x), r24)
#define ror64_32(x) _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1))
#define ror64_63(x) \
_mm_xor_si128(_mm_srli_epi64((x), 63), _mm_add_epi64((x), (x)))
static __m128i f(__m128i x, __m128i y)
{
__m128i z = _mm_mul_epu32(x, y);
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
}
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = f(A0, B0); \
A1 = f(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = ror64_32(D0); \
D1 = ror64_32(D1); \
\
C0 = f(C0, D0); \
C1 = f(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = ror64_24(B0); \
B1 = ror64_24(B1); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = f(A0, B0); \
A1 = f(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = ror64_16(D0); \
D1 = ror64_16(D1); \
\
C0 = f(C0, D0); \
C1 = f(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = ror64_63(B0); \
B1 = ror64_63(B1); \
} while ((void)0, 0)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
B0 = t0; \
B1 = t1; \
\
t0 = _mm_alignr_epi8(D1, D0, 8); \
t1 = _mm_alignr_epi8(D0, D1, 8); \
D0 = t1; \
D1 = t0; \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
B0 = t0; \
B1 = t1; \
\
t0 = _mm_alignr_epi8(D0, D1, 8); \
t1 = _mm_alignr_epi8(D1, D0, 8); \
D0 = t1; \
D1 = t0; \
} while ((void)0, 0)
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C1, D0, A1, B1, C0, D1); \
G2(A0, B0, C1, D0, A1, B1, C0, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#include "argon2-template-128.h"
void fill_segment_ssse3(const argon2_instance_t *instance,
argon2_position_t position)
{
fill_segment_128(instance, position);
}
int check_ssse3(void)
{
return cpu_flags_have_ssse3();
}
#else
void fill_segment_ssse3(const argon2_instance_t *instance,
argon2_position_t position)
{
}
int check_ssse3(void)
{
return 0;
}
#endif

11
src/3rdparty/argon2/arch/x86_64/lib/argon2-ssse3.h

@ -0,0 +1,11 @@
#ifndef ARGON2_SSSE3_H
#define ARGON2_SSSE3_H
#include "core.h"
void fill_segment_ssse3(const argon2_instance_t *instance,
argon2_position_t position);
int check_ssse3(void);
#endif // ARGON2_SSSE3_H

164
src/3rdparty/argon2/arch/x86_64/lib/argon2-template-128.h

@ -0,0 +1,164 @@
#include <string.h>
#include <x86intrin.h>
#include "core.h"
static void fill_block(__m128i *s, const block *ref_block, block *next_block,
int with_xor)
{
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
s[i] = _mm_xor_si128(
s[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
block_XY[i] = _mm_xor_si128(
s[i], _mm_loadu_si128((const __m128i *)next_block->v + i));
}
} else {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
block_XY[i] = s[i] = _mm_xor_si128(
s[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
}
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(
s[8 * i + 0], s[8 * i + 1], s[8 * i + 2], s[8 * i + 3],
s[8 * i + 4], s[8 * i + 5], s[8 * i + 6], s[8 * i + 7]);
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(
s[8 * 0 + i], s[8 * 1 + i], s[8 * 2 + i], s[8 * 3 + i],
s[8 * 4 + i], s[8 * 5 + i], s[8 * 6 + i], s[8 * 7 + i]);
}
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
s[i] = _mm_xor_si128(s[i], block_XY[i]);
_mm_storeu_si128((__m128i *)next_block->v + i, s[i]);
}
}
static void next_addresses(block *address_block, block *input_block)
{
/*Temporary zero-initialized blocks*/
__m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
__m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
memset(zero_block, 0, sizeof(zero_block));
memset(zero2_block, 0, sizeof(zero2_block));
/*Increasing index counter*/
input_block->v[6]++;
/*First iteration of G*/
fill_block(zero_block, input_block, address_block, 0);
/*Second iteration of G*/
fill_block(zero2_block, address_block, address_block, 0);
}
static void fill_segment_128(const argon2_instance_t *instance,
argon2_position_t position)
{
block *ref_block = NULL, *curr_block = NULL;
block address_block, input_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
__m128i state[ARGON2_OWORDS_IN_BLOCK];
int data_independent_addressing;
if (instance == NULL) {
return;
}
data_independent_addressing = (instance->type == Argon2_i) ||
(instance->type == Argon2_id && (position.pass == 0) &&
(position.slice < ARGON2_SYNC_POINTS / 2));
if (data_independent_addressing) {
init_block_value(&input_block, 0);
input_block.v[0] = position.pass;
input_block.v[1] = position.lane;
input_block.v[2] = position.slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = instance->type;
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
/* Don't forget to generate the first block of addresses: */
if (data_independent_addressing) {
next_addresses(&address_block, &input_block);
}
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
next_addresses(&address_block, &input_block);
}
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
} else {
pseudo_rand = instance->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block =
instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
/* version 1.2.1 and earlier: overwrite, not XOR */
if (0 == position.pass || ARGON2_VERSION_10 == instance->version) {
fill_block(state, ref_block, curr_block, 0);
} else {
fill_block(state, ref_block, curr_block, 1);
}
}
}

124
src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.c

@ -0,0 +1,124 @@
#include "argon2-xop.h"
#ifdef HAVE_XOP
#include <string.h>
#include <x86intrin.h>
#include "cpu-flags.h"
#define ror64(x, c) _mm_roti_epi64((x), -(c))
static __m128i f(__m128i x, __m128i y)
{
__m128i z = _mm_mul_epu32(x, y);
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
}
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = f(A0, B0); \
A1 = f(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = ror64(D0, 32); \
D1 = ror64(D1, 32); \
\
C0 = f(C0, D0); \
C1 = f(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = ror64(B0, 24); \
B1 = ror64(B1, 24); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = f(A0, B0); \
A1 = f(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = ror64(D0, 16); \
D1 = ror64(D1, 16); \
\
C0 = f(C0, D0); \
C1 = f(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = ror64(B0, 63); \
B1 = ror64(B1, 63); \
} while ((void)0, 0)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
B0 = t0; \
B1 = t1; \
\
t0 = _mm_alignr_epi8(D1, D0, 8); \
t1 = _mm_alignr_epi8(D0, D1, 8); \
D0 = t1; \
D1 = t0; \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
B0 = t0; \
B1 = t1; \
\
t0 = _mm_alignr_epi8(D0, D1, 8); \
t1 = _mm_alignr_epi8(D1, D0, 8); \
D0 = t1; \
D1 = t0; \
} while ((void)0, 0)
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C1, D0, A1, B1, C0, D1); \
G2(A0, B0, C1, D0, A1, B1, C0, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#include "argon2-template-128.h"
void fill_segment_xop(const argon2_instance_t *instance,
argon2_position_t position)
{
fill_segment_128(instance, position);
}
int check_xop(void)
{
return cpu_flags_have_xop();
}
#else
void fill_segment_xop(const argon2_instance_t *instance,
argon2_position_t position)
{
}
int check_xop(void)
{
return 0;
}
#endif

11
src/3rdparty/argon2/arch/x86_64/lib/argon2-xop.h

@ -0,0 +1,11 @@
#ifndef ARGON2_XOP_H
#define ARGON2_XOP_H
#include "core.h"
void fill_segment_xop(const argon2_instance_t *instance,
argon2_position_t position);
int check_xop(void);
#endif // ARGON2_XOP_H

91
src/3rdparty/argon2/arch/x86_64/lib/cpu-flags.c

@ -0,0 +1,91 @@
#include "cpu-flags.h"
#include <cpuid.h>
enum {
X86_64_FEATURE_SSE2 = (1 << 0),
X86_64_FEATURE_SSSE3 = (1 << 1),
X86_64_FEATURE_XOP = (1 << 2),
X86_64_FEATURE_AVX2 = (1 << 3),
X86_64_FEATURE_AVX512F = (1 << 4),
};
static unsigned int cpu_flags;
static unsigned int get_cpuid(int ext, unsigned int level, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
unsigned int eax;
__cpuid(ext ? (0x80000000 | level) : level,
eax, *ebx, *ecx, *edx);
return eax;
}
static unsigned int get_cpuid_count(int ext, unsigned int level,
unsigned int count, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
unsigned int eax;
__cpuid_count(ext ? (0x80000000 | level) : level,
count, eax, *ebx, *ecx, *edx);
return 1;
}
void cpu_flags_get(void)
{
unsigned int ebx, ecx, edx;
unsigned int level, level_ext;
cpu_flags = 0;
level = get_cpuid(0, 0, &ebx, &ecx, &edx);
level_ext = get_cpuid(1, 0, &ebx, &ecx, &edx);
if (level >= 1 && get_cpuid(0, 1, &ebx, &ecx, &edx)) {
if (edx & (1 << 26)) {
cpu_flags |= X86_64_FEATURE_SSE2;
}
if (ecx & (1 << 9)) {
cpu_flags |= X86_64_FEATURE_SSSE3;
}
}
if (level >= 7 && get_cpuid_count(0, 7, 0, &ebx, &ecx, &edx)) {
if (ebx & (1 << 5)) {
cpu_flags |= X86_64_FEATURE_AVX2;
}
if (ebx & (1 << 16)) {
cpu_flags |= X86_64_FEATURE_AVX512F;
}
}
if (level_ext >= 1 && get_cpuid(1, 1, &ebx, &ecx, &edx)) {
if (ecx & (1 << 11)) {
cpu_flags |= X86_64_FEATURE_XOP;
}
}
/* FIXME: check also OS support! */
}
int cpu_flags_have_sse2(void)
{
return cpu_flags & X86_64_FEATURE_SSE2;
}
int cpu_flags_have_ssse3(void)
{
return cpu_flags & X86_64_FEATURE_SSSE3;
}
int cpu_flags_have_xop(void)
{
return cpu_flags & X86_64_FEATURE_XOP;
}
int cpu_flags_have_avx2(void)
{
return cpu_flags & X86_64_FEATURE_AVX2;
}
int cpu_flags_have_avx512f(void)
{
return cpu_flags & X86_64_FEATURE_AVX512F;
}

12
src/3rdparty/argon2/arch/x86_64/lib/cpu-flags.h

@ -0,0 +1,12 @@
#ifndef ARGON2_CPU_FLAGS_H
#define ARGON2_CPU_FLAGS_H
void cpu_flags_get(void);
int cpu_flags_have_sse2(void);
int cpu_flags_have_ssse3(void);
int cpu_flags_have_xop(void);
int cpu_flags_have_avx2(void);
int cpu_flags_have_avx512f(void);
#endif // ARGON2_CPU_FLAGS_H

8
src/3rdparty/argon2/arch/x86_64/src/test-feature-avx2.c

@ -0,0 +1,8 @@
#include <x86intrin.h>
void function_avx2(__m256i *dst, const __m256i *a, const __m256i *b)
{
*dst = _mm256_xor_si256(*a, *b);
}
int main(void) { return 0; }

8
src/3rdparty/argon2/arch/x86_64/src/test-feature-avx512f.c

@ -0,0 +1,8 @@
#include <x86intrin.h>
void function_avx512f(__m512i *dst, const __m512i *a)
{
*dst = _mm512_ror_epi64(*a, 57);
}
int main(void) { return 0; }

8
src/3rdparty/argon2/arch/x86_64/src/test-feature-sse2.c

@ -0,0 +1,8 @@
#include <x86intrin.h>
void function_sse2(__m128i *dst, const __m128i *a, const __m128i *b)
{
*dst = _mm_xor_si128(*a, *b);
}
int main(void) { return 0; }

8
src/3rdparty/argon2/arch/x86_64/src/test-feature-ssse3.c

@ -0,0 +1,8 @@
#include <x86intrin.h>
void function_ssse3(__m128i *dst, const __m128i *a, const __m128i *b)
{
*dst = _mm_shuffle_epi8(*a, *b);
}
int main(void) { return 0; }

8
src/3rdparty/argon2/arch/x86_64/src/test-feature-xop.c

@ -0,0 +1,8 @@
#include <x86intrin.h>
void function_xop(__m128i *dst, const __m128i *a, int b)
{
*dst = _mm_roti_epi64(*a, b);
}
int main(void) { return 0; }

455
src/3rdparty/argon2/include/argon2.h

@ -0,0 +1,455 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication
* along with this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef ARGON2_H
#define ARGON2_H
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <limits.h>
/* Symbols visibility control */
#define ARGON2_PUBLIC
#if defined(__cplusplus)
extern "C" {
#endif
/*
* Argon2 input parameter restrictions
*/
/* Minimum and maximum number of lanes (degree of parallelism) */
#define ARGON2_MIN_LANES UINT32_C(1)
#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
/* Minimum and maximum number of threads */
#define ARGON2_MIN_THREADS UINT32_C(1)
#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
/* Number of synchronization points between lanes per pass */
#define ARGON2_SYNC_POINTS UINT32_C(4)
/* Minimum and maximum digest size in bytes */
#define ARGON2_MIN_OUTLEN UINT32_C(4)
#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */
#define ARGON2_MAX_MEMORY_BITS \
ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
#define ARGON2_MAX_MEMORY \
ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
/* Minimum and maximum number of passes */
#define ARGON2_MIN_TIME UINT32_C(1)
#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
/* Minimum and maximum password length in bytes */
#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum associated data length in bytes */
#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum salt length in bytes */
#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum key length in bytes */
#define ARGON2_MIN_SECRET UINT32_C(0)
#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
/* Flags to determine which fields are securely wiped (default = no wipe). */
#define ARGON2_DEFAULT_FLAGS UINT32_C(0)
#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
#define ARGON2_FLAG_GENKAT (UINT32_C(1) << 3)
/* Global flag to determine if we are wiping internal memory buffers. This flag
* is defined in core.c and deafults to 1 (wipe internal memory). */
extern int FLAG_clear_internal_memory;
/* Error codes */
typedef enum Argon2_ErrorCodes {
ARGON2_OK = 0,
ARGON2_OUTPUT_PTR_NULL = -1,
ARGON2_OUTPUT_TOO_SHORT = -2,
ARGON2_OUTPUT_TOO_LONG = -3,
ARGON2_PWD_TOO_SHORT = -4,
ARGON2_PWD_TOO_LONG = -5,
ARGON2_SALT_TOO_SHORT = -6,
ARGON2_SALT_TOO_LONG = -7,
ARGON2_AD_TOO_SHORT = -8,
ARGON2_AD_TOO_LONG = -9,
ARGON2_SECRET_TOO_SHORT = -10,
ARGON2_SECRET_TOO_LONG = -11,
ARGON2_TIME_TOO_SMALL = -12,
ARGON2_TIME_TOO_LARGE = -13,
ARGON2_MEMORY_TOO_LITTLE = -14,
ARGON2_MEMORY_TOO_MUCH = -15,
ARGON2_LANES_TOO_FEW = -16,
ARGON2_LANES_TOO_MANY = -17,
ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */
ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */
ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */
ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */
ARGON2_MEMORY_ALLOCATION_ERROR = -22,
ARGON2_FREE_MEMORY_CBK_NULL = -23,
ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24,
ARGON2_INCORRECT_PARAMETER = -25,
ARGON2_INCORRECT_TYPE = -26,
ARGON2_OUT_PTR_MISMATCH = -27,
ARGON2_THREADS_TOO_FEW = -28,
ARGON2_THREADS_TOO_MANY = -29,
ARGON2_MISSING_ARGS = -30,
ARGON2_ENCODING_FAIL = -31,
ARGON2_DECODING_FAIL = -32,
ARGON2_THREAD_FAIL = -33,
ARGON2_DECODING_LENGTH_FAIL = -34,
ARGON2_VERIFY_MISMATCH = -35
} argon2_error_codes;
/* Memory allocator types --- for external allocation */
typedef int (*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate);
typedef void (*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate);
/* Argon2 external data structures */
/*
*****
* Context: structure to hold Argon2 inputs:
* output array and its length,
* password and its length,
* salt and its length,
* secret and its length,
* associated data and its length,
* number of passes, amount of used memory (in KBytes, can be rounded up a bit)
* number of parallel threads that will be run.
* All the parameters above affect the output hash value.
* Additionally, two function pointers can be provided to allocate and
* deallocate the memory (if NULL, memory will be allocated internally).
* Also, three flags indicate whether to erase password, secret as soon as they
* are pre-hashed (and thus not needed anymore), and the entire memory
*****
* Simplest situation: you have output array out[8], password is stored in
* pwd[32], salt is stored in salt[16], you do not have keys nor associated
* data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with
* 4 parallel lanes.
* You want to erase the password, but you're OK with last pass not being
* erased. You want to use the default memory allocator.
* Then you initialize:
Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false)
*/
typedef struct Argon2_Context {
uint8_t *out; /* output array */
uint32_t outlen; /* digest length */
uint8_t *pwd; /* password array */
uint32_t pwdlen; /* password length */
uint8_t *salt; /* salt array */
uint32_t saltlen; /* salt length */
uint8_t *secret; /* key array */
uint32_t secretlen; /* key length */
uint8_t *ad; /* associated data array */
uint32_t adlen; /* associated data length */
uint32_t t_cost; /* number of passes */
uint32_t m_cost; /* amount of memory requested (KB) */
uint32_t lanes; /* number of lanes */
uint32_t threads; /* maximum number of threads */
uint32_t version; /* version number */
allocate_fptr allocate_cbk; /* pointer to memory allocator */
deallocate_fptr free_cbk; /* pointer to memory deallocator */
uint32_t flags; /* array of bool options */
} argon2_context;
/* Argon2 primitive type */
typedef enum Argon2_type {
Argon2_d = 0,
Argon2_i = 1,
Argon2_id = 2
} argon2_type;
/* Version of the algorithm */
typedef enum Argon2_version {
ARGON2_VERSION_10 = 0x10,
ARGON2_VERSION_13 = 0x13,
ARGON2_VERSION_NUMBER = ARGON2_VERSION_13
} argon2_version;
/*
* Function that gives the string representation of an argon2_type.
* @param type The argon2_type that we want the string for
* @param uppercase Whether the string should have the first letter uppercase
* @return NULL if invalid type, otherwise the string representation.
*/
ARGON2_PUBLIC const char *argon2_type2string(argon2_type type, int uppercase);
/*
* Function that performs memory-hard hashing with certain degree of parallelism
* @param context Pointer to the Argon2 internal structure
* @return Error code if smth is wrong, ARGON2_OK otherwise
*/
ARGON2_PUBLIC int argon2_ctx(argon2_context *context, argon2_type type);
/**
* Hashes a password with Argon2i, producing an encoded hash
* @param t_cost Number of iterations
* @param m_cost Sets memory usage to m_cost kibibytes
* @param parallelism Number of threads and compute lanes
* @param pwd Pointer to password
* @param pwdlen Password size in bytes
* @param salt Pointer to salt
* @param saltlen Salt size in bytes
* @param hashlen Desired length of the hash in bytes
* @param encoded Buffer where to write the encoded hash
* @param encodedlen Size of the buffer (thus max size of the encoded hash)
* @pre Different parallelism levels will give different results
* @pre Returns ARGON2_OK if successful
*/
ARGON2_PUBLIC int argon2i_hash_encoded(const uint32_t t_cost,
const uint32_t m_cost,
const uint32_t parallelism,
const void *pwd, const size_t pwdlen,
const void *salt, const size_t saltlen,
const size_t hashlen, char *encoded,
const size_t encodedlen);
/**
* Hashes a password with Argon2i, producing a raw hash by allocating memory at
* @hash
* @param t_cost Number of iterations
* @param m_cost Sets memory usage to m_cost kibibytes
* @param parallelism Number of threads and compute lanes
* @param pwd Pointer to password
* @param pwdlen Password size in bytes
* @param salt Pointer to salt
* @param saltlen Salt size in bytes
* @param hash Buffer where to write the raw hash - updated by the function
* @param hashlen Desired length of the hash in bytes
* @pre Different parallelism levels will give different results
* @pre Returns ARGON2_OK if successful
*/
ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash,
const size_t hashlen);
ARGON2_PUBLIC int argon2d_hash_encoded(const uint32_t t_cost,
const uint32_t m_cost,
const uint32_t parallelism,
const void *pwd, const size_t pwdlen,
const void *salt, const size_t saltlen,
const size_t hashlen, char *encoded,
const size_t encodedlen);
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost,
const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash,
const size_t hashlen);
ARGON2_PUBLIC int argon2id_hash_encoded(const uint32_t t_cost,
const uint32_t m_cost,
const uint32_t parallelism,
const void *pwd, const size_t pwdlen,
const void *salt, const size_t saltlen,
const size_t hashlen, char *encoded,
const size_t encodedlen);
ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost,
const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash,
const size_t hashlen);
/* generic function underlying the above ones */
ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash,
const size_t hashlen, char *encoded,
const size_t encodedlen, argon2_type type,
const uint32_t version);
/**
* Verifies a password against an encoded string
* Encoded string is restricted as in validate_inputs()
* @param encoded String encoding parameters, salt, hash
* @param pwd Pointer to password
* @pre Returns ARGON2_OK if successful
*/
ARGON2_PUBLIC int argon2i_verify(const char *encoded, const void *pwd,
const size_t pwdlen);
ARGON2_PUBLIC int argon2d_verify(const char *encoded, const void *pwd,
const size_t pwdlen);
ARGON2_PUBLIC int argon2id_verify(const char *encoded, const void *pwd,
const size_t pwdlen);
/* generic function underlying the above ones */
ARGON2_PUBLIC int argon2_verify(const char *encoded, const void *pwd,
const size_t pwdlen, argon2_type type);
/**
* Argon2d: Version of Argon2 that picks memory blocks depending
* on the password and salt. Only for side-channel-free
* environment!!
*****
* @param context Pointer to current Argon2 context
* @return Zero if successful, a non zero error code otherwise
*/
ARGON2_PUBLIC int argon2d_ctx(argon2_context *context);
/**
* Argon2i: Version of Argon2 that picks memory blocks
* independent on the password and salt. Good for side-channels,
* but worse w.r.t. tradeoff attacks if only one pass is used.
*****
* @param context Pointer to current Argon2 context
* @return Zero if successful, a non zero error code otherwise
*/
ARGON2_PUBLIC int argon2i_ctx(argon2_context *context);
/**
* Argon2id: Version of Argon2 where the first half-pass over memory is
* password-independent, the rest are password-dependent (on the password and
* salt). OK against side channels (they reduce to 1/2-pass Argon2i), and
* better with w.r.t. tradeoff attacks (similar to Argon2d).
*****
* @param context Pointer to current Argon2 context
* @return Zero if successful, a non zero error code otherwise
*/
ARGON2_PUBLIC int argon2id_ctx(argon2_context *context);
/**
* Verify if a given password is correct for Argon2d hashing
* @param context Pointer to current Argon2 context
* @param hash The password hash to verify. The length of the hash is
* specified by the context outlen member
* @return Zero if successful, a non zero error code otherwise
*/
ARGON2_PUBLIC int argon2d_verify_ctx(argon2_context *context, const char *hash);
/**
* Verify if a given password is correct for Argon2i hashing
* @param context Pointer to current Argon2 context
* @param hash The password hash to verify. The length of the hash is
* specified by the context outlen member
* @return Zero if successful, a non zero error code otherwise
*/
ARGON2_PUBLIC int argon2i_verify_ctx(argon2_context *context, const char *hash);
/**
* Verify if a given password is correct for Argon2id hashing
* @param context Pointer to current Argon2 context
* @param hash The password hash to verify. The length of the hash is
* specified by the context outlen member
* @return Zero if successful, a non zero error code otherwise
*/
ARGON2_PUBLIC int argon2id_verify_ctx(argon2_context *context,
const char *hash);
/* generic function underlying the above ones */
ARGON2_PUBLIC int argon2_verify_ctx(argon2_context *context, const char *hash,
argon2_type type);
/**
* Get the associated error message for given error code
* @return The error message associated with the given error code
*/
ARGON2_PUBLIC const char *argon2_error_message(int error_code);
/**
* Returns the encoded hash length for the given input parameters
* @param t_cost Number of iterations
* @param m_cost Memory usage in kibibytes
* @param parallelism Number of threads; used to compute lanes
* @param saltlen Salt size in bytes
* @param hashlen Hash size in bytes
* @param type The argon2_type that we want the encoded length for
* @return The encoded hash length in bytes
*/
ARGON2_PUBLIC size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost,
uint32_t parallelism, uint32_t saltlen,
uint32_t hashlen, argon2_type type);
/* signals availability of argon2_select_impl: */
#define ARGON2_SELECTABLE_IMPL
/**
* Selects the fastest available optimized implementation.
* @param out The file for debug output (e. g. stderr; pass NULL for no
* debug output)
* @param prefix What to print before each line; NULL is equivalent to empty
* string
*/
ARGON2_PUBLIC void argon2_select_impl(FILE *out, const char *prefix);
/* signals support for passing preallocated memory: */
#define ARGON2_PREALLOCATED_MEMORY
ARGON2_PUBLIC size_t argon2_memory_size(uint32_t m_cost, uint32_t parallelism);
/**
* Function that performs memory-hard hashing with certain degree of parallelism
* @param context Pointer to the Argon2 internal structure
* @param type The Argon2 type
* @param memory Preallocated memory for blocks (or NULL)
* @param memory_size The size of preallocated memory
* @return Error code if smth is wrong, ARGON2_OK otherwise
*/
ARGON2_PUBLIC int argon2_ctx_mem(argon2_context *context, argon2_type type,
void *memory, size_t memory_size);
#if defined(__cplusplus)
}
#endif
#endif

193
src/3rdparty/argon2/lib/argon2-template-64.h

@ -0,0 +1,193 @@
#include <string.h>
#include "core.h"
#define MASK_32 UINT64_C(0xFFFFFFFF)
#define F(x, y) ((x) + (y) + 2 * ((x) & MASK_32) * ((y) & MASK_32))
#define G(a, b, c, d) \
do { \
a = F(a, b); \
d = rotr64(d ^ a, 32); \
c = F(c, d); \
b = rotr64(b ^ c, 24); \
a = F(a, b); \
d = rotr64(d ^ a, 16); \
c = F(c, d); \
b = rotr64(b ^ c, 63); \
} while ((void)0, 0)
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, \
v8, v9, v10, v11, v12, v13, v14, v15) \
do { \
G(v0, v4, v8, v12); \
G(v1, v5, v9, v13); \
G(v2, v6, v10, v14); \
G(v3, v7, v11, v15); \
G(v0, v5, v10, v15); \
G(v1, v6, v11, v12); \
G(v2, v7, v8, v13); \
G(v3, v4, v9, v14); \
} while ((void)0, 0)
#define BLAKE2_ROUND_NOMSG1(v) \
BLAKE2_ROUND_NOMSG( \
(v)[ 0], (v)[ 1], (v)[ 2], (v)[ 3], \
(v)[ 4], (v)[ 5], (v)[ 6], (v)[ 7], \
(v)[ 8], (v)[ 9], (v)[10], (v)[11], \
(v)[12], (v)[13], (v)[14], (v)[15])
#define BLAKE2_ROUND_NOMSG2(v) \
BLAKE2_ROUND_NOMSG( \
(v)[ 0], (v)[ 1], (v)[ 16], (v)[ 17], \
(v)[ 32], (v)[ 33], (v)[ 48], (v)[ 49], \
(v)[ 64], (v)[ 65], (v)[ 80], (v)[ 81], \
(v)[ 96], (v)[ 97], (v)[112], (v)[113])
static void fill_block(const block *prev_block, const block *ref_block,
block *next_block, int with_xor)
{
block blockR, block_tmp;
copy_block(&blockR, ref_block);
xor_block(&blockR, prev_block);
copy_block(&block_tmp, &blockR);
if (with_xor) {
xor_block(&block_tmp, next_block);
}
/* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
(16,17,..31)... finally (112,113,...127) */
BLAKE2_ROUND_NOMSG1(blockR.v + 0 * 16);
BLAKE2_ROUND_NOMSG1(blockR.v + 1 * 16);
BLAKE2_ROUND_NOMSG1(blockR.v + 2 * 16);
BLAKE2_ROUND_NOMSG1(blockR.v + 3 * 16);
BLAKE2_ROUND_NOMSG1(blockR.v + 4 * 16);
BLAKE2_ROUND_NOMSG1(blockR.v + 5 * 16);
BLAKE2_ROUND_NOMSG1(blockR.v + 6 * 16);
BLAKE2_ROUND_NOMSG1(blockR.v + 7 * 16);
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
BLAKE2_ROUND_NOMSG2(blockR.v + 0 * 2);
BLAKE2_ROUND_NOMSG2(blockR.v + 1 * 2);
BLAKE2_ROUND_NOMSG2(blockR.v + 2 * 2);
BLAKE2_ROUND_NOMSG2(blockR.v + 3 * 2);
BLAKE2_ROUND_NOMSG2(blockR.v + 4 * 2);
BLAKE2_ROUND_NOMSG2(blockR.v + 5 * 2);
BLAKE2_ROUND_NOMSG2(blockR.v + 6 * 2);
BLAKE2_ROUND_NOMSG2(blockR.v + 7 * 2);
copy_block(next_block, &block_tmp);
xor_block(next_block, &blockR);
}
static void next_addresses(block *address_block, block *input_block,
const block *zero_block)
{
input_block->v[6]++;
fill_block(zero_block, input_block, address_block, 0);
fill_block(zero_block, address_block, address_block, 0);
}
static void fill_segment_64(const argon2_instance_t *instance,
argon2_position_t position)
{
block *ref_block, *curr_block, *prev_block;
block address_block, input_block, zero_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
int data_independent_addressing;
if (instance == NULL) {
return;
}
data_independent_addressing = (instance->type == Argon2_i) ||
(instance->type == Argon2_id && (position.pass == 0) &&
(position.slice < ARGON2_SYNC_POINTS / 2));
if (data_independent_addressing) {
init_block_value(&zero_block, 0);
init_block_value(&input_block, 0);
input_block.v[0] = position.pass;
input_block.v[1] = position.lane;
input_block.v[2] = position.slice;
input_block.v[3] = instance->memory_blocks;
input_block.v[4] = instance->passes;
input_block.v[5] = instance->type;
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
/* Don't forget to generate the first block of addresses: */
if (data_independent_addressing) {
next_addresses(&address_block, &input_block, &zero_block);
}
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
} else {
/* Previous block */
prev_offset = curr_offset - 1;
}
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
if (data_independent_addressing) {
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
next_addresses(&address_block, &input_block, &zero_block);
}
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
} else {
pseudo_rand = instance->memory[prev_offset].v[0];
}
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block =
instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
prev_block = instance->memory + prev_offset;
/* version 1.2.1 and earlier: overwrite, not XOR */
if (0 == position.pass || ARGON2_VERSION_10 == instance->version) {
fill_block(prev_block, ref_block, curr_block, 0);
} else {
fill_block(prev_block, ref_block, curr_block, 1);
}
}
}

476
src/3rdparty/argon2/lib/argon2.c

@ -0,0 +1,476 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "argon2.h"
#include "encoding.h"
#include "core.h"
const char *argon2_type2string(argon2_type type, int uppercase) {
switch (type) {
case Argon2_d:
return uppercase ? "Argon2d" : "argon2d";
case Argon2_i:
return uppercase ? "Argon2i" : "argon2i";
case Argon2_id:
return uppercase ? "Argon2id" : "argon2id";
}
return NULL;
}
static void argon2_compute_memory_blocks(uint32_t *memory_blocks,
uint32_t *segment_length,
uint32_t m_cost, uint32_t lanes)
{
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
*memory_blocks = m_cost;
if (*memory_blocks < 2 * ARGON2_SYNC_POINTS * lanes) {
*memory_blocks = 2 * ARGON2_SYNC_POINTS * lanes;
}
*segment_length = *memory_blocks / (lanes * ARGON2_SYNC_POINTS);
/* Ensure that all segments have equal length */
*memory_blocks = *segment_length * (lanes * ARGON2_SYNC_POINTS);
}
size_t argon2_memory_size(uint32_t m_cost, uint32_t parallelism) {
uint32_t memory_blocks, segment_length;
argon2_compute_memory_blocks(&memory_blocks, &segment_length, m_cost,
parallelism);
return memory_blocks * ARGON2_BLOCK_SIZE;
}
int argon2_ctx_mem(argon2_context *context, argon2_type type, void *memory,
size_t memory_size) {
/* 1. Validate all inputs */
int result = validate_inputs(context);
uint32_t memory_blocks, segment_length;
argon2_instance_t instance;
if (ARGON2_OK != result) {
return result;
}
if (Argon2_d != type && Argon2_i != type && Argon2_id != type) {
return ARGON2_INCORRECT_TYPE;
}
/* 2. Align memory size */
argon2_compute_memory_blocks(&memory_blocks, &segment_length,
context->m_cost, context->lanes);
/* check for sufficient memory size: */
if (memory != NULL && (memory_size % ARGON2_BLOCK_SIZE != 0 ||
memory_size / ARGON2_BLOCK_SIZE < memory_blocks)) {
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
instance.version = context->version;
instance.memory = (block *)memory;
instance.passes = context->t_cost;
instance.memory_blocks = memory_blocks;
instance.segment_length = segment_length;
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
instance.lanes = context->lanes;
instance.threads = context->threads;
instance.type = type;
instance.print_internals = !!(context->flags & ARGON2_FLAG_GENKAT);
instance.keep_memory = memory != NULL;
if (instance.threads > instance.lanes) {
instance.threads = instance.lanes;
}
/* 3. Initialization: Hashing inputs, allocating memory, filling first
* blocks
*/
result = initialize(&instance, context);
if (ARGON2_OK != result) {
return result;
}
/* 4. Filling memory */
result = fill_memory_blocks(&instance);
if (ARGON2_OK != result) {
return result;
}
/* 5. Finalization */
finalize(context, &instance);
return ARGON2_OK;
}
int argon2_ctx(argon2_context *context, argon2_type type) {
return argon2_ctx_mem(context, type, NULL, 0);
}
int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt, const size_t saltlen,
void *hash, const size_t hashlen, char *encoded,
const size_t encodedlen, argon2_type type,
const uint32_t version){
argon2_context context;
int result;
uint8_t *out;
if (pwdlen > ARGON2_MAX_PWD_LENGTH) {
return ARGON2_PWD_TOO_LONG;
}
if (saltlen > ARGON2_MAX_SALT_LENGTH) {
return ARGON2_SALT_TOO_LONG;
}
if (hashlen > ARGON2_MAX_OUTLEN) {
return ARGON2_OUTPUT_TOO_LONG;
}
if (hashlen < ARGON2_MIN_OUTLEN) {
return ARGON2_OUTPUT_TOO_SHORT;
}
out = malloc(hashlen);
if (!out) {
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
context.out = (uint8_t *)out;
context.outlen = (uint32_t)hashlen;
context.pwd = CONST_CAST(uint8_t *)pwd;
context.pwdlen = (uint32_t)pwdlen;
context.salt = CONST_CAST(uint8_t *)salt;
context.saltlen = (uint32_t)saltlen;
context.secret = NULL;
context.secretlen = 0;
context.ad = NULL;
context.adlen = 0;
context.t_cost = t_cost;
context.m_cost = m_cost;
context.lanes = parallelism;
context.threads = parallelism;
context.allocate_cbk = NULL;
context.free_cbk = NULL;
context.flags = ARGON2_DEFAULT_FLAGS;
context.version = version;
result = argon2_ctx(&context, type);
if (result != ARGON2_OK) {
clear_internal_memory(out, hashlen);
free(out);
return result;
}
/* if raw hash requested, write it */
if (hash) {
memcpy(hash, out, hashlen);
}
/* if encoding requested, write it */
if (encoded && encodedlen) {
if (encode_string(encoded, encodedlen, &context, type) != ARGON2_OK) {
clear_internal_memory(out, hashlen); /* wipe buffers if error */
clear_internal_memory(encoded, encodedlen);
free(out);
return ARGON2_ENCODING_FAIL;
}
}
clear_internal_memory(out, hashlen);
free(out);
return ARGON2_OK;
}
int argon2i_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, const size_t hashlen,
char *encoded, const size_t encodedlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
NULL, hashlen, encoded, encodedlen, Argon2_i,
ARGON2_VERSION_NUMBER);
}
int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash, const size_t hashlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
hash, hashlen, NULL, 0, Argon2_i, ARGON2_VERSION_NUMBER);
}
int argon2d_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, const size_t hashlen,
char *encoded, const size_t encodedlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
NULL, hashlen, encoded, encodedlen, Argon2_d,
ARGON2_VERSION_NUMBER);
}
int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash, const size_t hashlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
hash, hashlen, NULL, 0, Argon2_d, ARGON2_VERSION_NUMBER);
}
int argon2id_hash_encoded(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, const size_t hashlen,
char *encoded, const size_t encodedlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
NULL, hashlen, encoded, encodedlen, Argon2_id,
ARGON2_VERSION_NUMBER);
}
int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
const uint32_t parallelism, const void *pwd,
const size_t pwdlen, const void *salt,
const size_t saltlen, void *hash, const size_t hashlen) {
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
hash, hashlen, NULL, 0, Argon2_id,
ARGON2_VERSION_NUMBER);
}
static int argon2_compare(const uint8_t *b1, const uint8_t *b2, size_t len) {
size_t i;
uint8_t d = 0U;
for (i = 0U; i < len; i++) {
d |= b1[i] ^ b2[i];
}
return (int)((1 & ((d - 1) >> 8)) - 1);
}
int argon2_verify(const char *encoded, const void *pwd, const size_t pwdlen,
argon2_type type) {
argon2_context ctx;
uint8_t *desired_result = NULL;
int ret = ARGON2_OK;
size_t encoded_len;
uint32_t max_field_len;
if (pwdlen > ARGON2_MAX_PWD_LENGTH) {
return ARGON2_PWD_TOO_LONG;
}
if (encoded == NULL) {
return ARGON2_DECODING_FAIL;
}
encoded_len = strlen(encoded);
if (encoded_len > UINT32_MAX) {
return ARGON2_DECODING_FAIL;
}
/* No field can be longer than the encoded length */
max_field_len = (uint32_t)encoded_len;
ctx.saltlen = max_field_len;
ctx.outlen = max_field_len;
ctx.salt = malloc(ctx.saltlen);
ctx.out = malloc(ctx.outlen);
if (!ctx.salt || !ctx.out) {
ret = ARGON2_MEMORY_ALLOCATION_ERROR;
goto fail;
}
ctx.pwd = (uint8_t *)pwd;
ctx.pwdlen = (uint32_t)pwdlen;
ret = decode_string(&ctx, encoded, type);
if (ret != ARGON2_OK) {
goto fail;
}
/* Set aside the desired result, and get a new buffer. */
desired_result = ctx.out;
ctx.out = malloc(ctx.outlen);
if (!ctx.out) {
ret = ARGON2_MEMORY_ALLOCATION_ERROR;
goto fail;
}
ret = argon2_verify_ctx(&ctx, (char *)desired_result, type);
if (ret != ARGON2_OK) {
goto fail;
}
fail:
free(ctx.salt);
free(ctx.out);
free(desired_result);
return ret;
}
int argon2i_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
return argon2_verify(encoded, pwd, pwdlen, Argon2_i);
}
int argon2d_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
return argon2_verify(encoded, pwd, pwdlen, Argon2_d);
}
int argon2id_verify(const char *encoded, const void *pwd, const size_t pwdlen) {
return argon2_verify(encoded, pwd, pwdlen, Argon2_id);
}
int argon2d_ctx(argon2_context *context) {
return argon2_ctx(context, Argon2_d);
}
int argon2i_ctx(argon2_context *context) {
return argon2_ctx(context, Argon2_i);
}
int argon2id_ctx(argon2_context *context) {
return argon2_ctx(context, Argon2_id);
}
int argon2_verify_ctx(argon2_context *context, const char *hash,
argon2_type type) {
int ret = argon2_ctx(context, type);
if (ret != ARGON2_OK) {
return ret;
}
if (argon2_compare((uint8_t *)hash, context->out, context->outlen)) {
return ARGON2_VERIFY_MISMATCH;
}
return ARGON2_OK;
}
int argon2d_verify_ctx(argon2_context *context, const char *hash) {
return argon2_verify_ctx(context, hash, Argon2_d);
}
int argon2i_verify_ctx(argon2_context *context, const char *hash) {
return argon2_verify_ctx(context, hash, Argon2_i);
}
int argon2id_verify_ctx(argon2_context *context, const char *hash) {
return argon2_verify_ctx(context, hash, Argon2_id);
}
const char *argon2_error_message(int error_code) {
switch (error_code) {
case ARGON2_OK:
return "OK";
case ARGON2_OUTPUT_PTR_NULL:
return "Output pointer is NULL";
case ARGON2_OUTPUT_TOO_SHORT:
return "Output is too short";
case ARGON2_OUTPUT_TOO_LONG:
return "Output is too long";
case ARGON2_PWD_TOO_SHORT:
return "Password is too short";
case ARGON2_PWD_TOO_LONG:
return "Password is too long";
case ARGON2_SALT_TOO_SHORT:
return "Salt is too short";
case ARGON2_SALT_TOO_LONG:
return "Salt is too long";
case ARGON2_AD_TOO_SHORT:
return "Associated data is too short";
case ARGON2_AD_TOO_LONG:
return "Associated data is too long";
case ARGON2_SECRET_TOO_SHORT:
return "Secret is too short";
case ARGON2_SECRET_TOO_LONG:
return "Secret is too long";
case ARGON2_TIME_TOO_SMALL:
return "Time cost is too small";
case ARGON2_TIME_TOO_LARGE:
return "Time cost is too large";
case ARGON2_MEMORY_TOO_LITTLE:
return "Memory cost is too small";
case ARGON2_MEMORY_TOO_MUCH:
return "Memory cost is too large";
case ARGON2_LANES_TOO_FEW:
return "Too few lanes";
case ARGON2_LANES_TOO_MANY:
return "Too many lanes";
case ARGON2_PWD_PTR_MISMATCH:
return "Password pointer is NULL, but password length is not 0";
case ARGON2_SALT_PTR_MISMATCH:
return "Salt pointer is NULL, but salt length is not 0";
case ARGON2_SECRET_PTR_MISMATCH:
return "Secret pointer is NULL, but secret length is not 0";
case ARGON2_AD_PTR_MISMATCH:
return "Associated data pointer is NULL, but ad length is not 0";
case ARGON2_MEMORY_ALLOCATION_ERROR:
return "Memory allocation error";
case ARGON2_FREE_MEMORY_CBK_NULL:
return "The free memory callback is NULL";
case ARGON2_ALLOCATE_MEMORY_CBK_NULL:
return "The allocate memory callback is NULL";
case ARGON2_INCORRECT_PARAMETER:
return "Argon2_Context context is NULL";
case ARGON2_INCORRECT_TYPE:
return "There is no such version of Argon2";
case ARGON2_OUT_PTR_MISMATCH:
return "Output pointer mismatch";
case ARGON2_THREADS_TOO_FEW:
return "Not enough threads";
case ARGON2_THREADS_TOO_MANY:
return "Too many threads";
case ARGON2_MISSING_ARGS:
return "Missing arguments";
case ARGON2_ENCODING_FAIL:
return "Encoding failed";
case ARGON2_DECODING_FAIL:
return "Decoding failed";
case ARGON2_THREAD_FAIL:
return "Threading failure";
case ARGON2_DECODING_LENGTH_FAIL:
return "Some of encoded parameters are too long or too short";
case ARGON2_VERIFY_MISMATCH:
return "The password does not match the supplied hash";
default:
return "Unknown error code";
}
}
size_t argon2_encodedlen(uint32_t t_cost, uint32_t m_cost, uint32_t parallelism,
uint32_t saltlen, uint32_t hashlen, argon2_type type) {
return strlen("$$v=$m=,t=,p=$$") + strlen(argon2_type2string(type, 0)) +
numlen(t_cost) + numlen(m_cost) + numlen(parallelism) +
b64len(saltlen) + b64len(hashlen) + numlen(ARGON2_VERSION_NUMBER) +
1;
}

90
src/3rdparty/argon2/lib/blake2/blake2-impl.h

@ -0,0 +1,90 @@
#ifndef ARGON2_BLAKE2_IMPL_H
#define ARGON2_BLAKE2_IMPL_H
#include <stdint.h>
/* Argon2 Team - Begin Code */
/*
Not an exhaustive list, but should cover the majority of modern platforms
Additionally, the code will always be correct---this is only a performance
tweak.
*/
#if (defined(__BYTE_ORDER__) && \
(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
defined(_M_ARM)
#define NATIVE_LITTLE_ENDIAN
#endif
/* Argon2 Team - End Code */
static inline uint32_t load32(const void *src) {
#if defined(NATIVE_LITTLE_ENDIAN)
return *(const uint32_t *)src;
#else
const uint8_t *p = (const uint8_t *)src;
uint32_t w = *p++;
w |= (uint32_t)(*p++) << 8;
w |= (uint32_t)(*p++) << 16;
w |= (uint32_t)(*p++) << 24;
return w;
#endif
}
static inline uint64_t load64(const void *src) {
#if defined(NATIVE_LITTLE_ENDIAN)
return *(const uint64_t *)src;
#else
const uint8_t *p = (const uint8_t *)src;
uint64_t w = *p++;
w |= (uint64_t)(*p++) << 8;
w |= (uint64_t)(*p++) << 16;
w |= (uint64_t)(*p++) << 24;
w |= (uint64_t)(*p++) << 32;
w |= (uint64_t)(*p++) << 40;
w |= (uint64_t)(*p++) << 48;
w |= (uint64_t)(*p++) << 56;
return w;
#endif
}
static inline void store32(void *dst, uint32_t w) {
#if defined(NATIVE_LITTLE_ENDIAN)
*(uint32_t *)dst = w;
#else
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
#endif
}
static inline void store64(void *dst, uint64_t w) {
#if defined(NATIVE_LITTLE_ENDIAN)
*(uint64_t *)dst = w;
#else
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
#endif
}
#endif // ARGON2_BLAKE2_IMPL_H

225
src/3rdparty/argon2/lib/blake2/blake2.c

@ -0,0 +1,225 @@
#include <string.h>
#include "blake2/blake2.h"
#include "blake2/blake2-impl.h"
#include "core.h"
static const uint64_t blake2b_IV[8] = {
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)
};
#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
static const unsigned int blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
};
#define G(m, r, i, a, b, c, d) \
do { \
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while ((void)0, 0)
#define ROUND(m, v, r) \
do { \
G(m, r, 0, v[0], v[4], v[ 8], v[12]); \
G(m, r, 1, v[1], v[5], v[ 9], v[13]); \
G(m, r, 2, v[2], v[6], v[10], v[14]); \
G(m, r, 3, v[3], v[7], v[11], v[15]); \
G(m, r, 4, v[0], v[5], v[10], v[15]); \
G(m, r, 5, v[1], v[6], v[11], v[12]); \
G(m, r, 6, v[2], v[7], v[ 8], v[13]); \
G(m, r, 7, v[3], v[4], v[ 9], v[14]); \
} while ((void)0, 0)
void blake2b_compress(blake2b_state *S, const void *block, uint64_t f0)
{
uint64_t m[16];
uint64_t v[16];
m[ 0] = load64((const uint64_t *)block + 0);
m[ 1] = load64((const uint64_t *)block + 1);
m[ 2] = load64((const uint64_t *)block + 2);
m[ 3] = load64((const uint64_t *)block + 3);
m[ 4] = load64((const uint64_t *)block + 4);
m[ 5] = load64((const uint64_t *)block + 5);
m[ 6] = load64((const uint64_t *)block + 6);
m[ 7] = load64((const uint64_t *)block + 7);
m[ 8] = load64((const uint64_t *)block + 8);
m[ 9] = load64((const uint64_t *)block + 9);
m[10] = load64((const uint64_t *)block + 10);
m[11] = load64((const uint64_t *)block + 11);
m[12] = load64((const uint64_t *)block + 12);
m[13] = load64((const uint64_t *)block + 13);
m[14] = load64((const uint64_t *)block + 14);
m[15] = load64((const uint64_t *)block + 15);
v[ 0] = S->h[0];
v[ 1] = S->h[1];
v[ 2] = S->h[2];
v[ 3] = S->h[3];
v[ 4] = S->h[4];
v[ 5] = S->h[5];
v[ 6] = S->h[6];
v[ 7] = S->h[7];
v[ 8] = blake2b_IV[0];
v[ 9] = blake2b_IV[1];
v[10] = blake2b_IV[2];
v[11] = blake2b_IV[3];
v[12] = blake2b_IV[4] ^ S->t[0];
v[13] = blake2b_IV[5] ^ S->t[1];
v[14] = blake2b_IV[6] ^ f0;
v[15] = blake2b_IV[7];
ROUND(m, v, 0);
ROUND(m, v, 1);
ROUND(m, v, 2);
ROUND(m, v, 3);
ROUND(m, v, 4);
ROUND(m, v, 5);
ROUND(m, v, 6);
ROUND(m, v, 7);
ROUND(m, v, 8);
ROUND(m, v, 9);
ROUND(m, v, 10);
ROUND(m, v, 11);
S->h[0] ^= v[0] ^ v[ 8];
S->h[1] ^= v[1] ^ v[ 9];
S->h[2] ^= v[2] ^ v[10];
S->h[3] ^= v[3] ^ v[11];
S->h[4] ^= v[4] ^ v[12];
S->h[5] ^= v[5] ^ v[13];
S->h[6] ^= v[6] ^ v[14];
S->h[7] ^= v[7] ^ v[15];
}
static void blake2b_increment_counter(blake2b_state *S, uint64_t inc)
{
S->t[0] += inc;
S->t[1] += (S->t[0] < inc);
}
static void blake2b_init_state(blake2b_state *S)
{
memcpy(S->h, blake2b_IV, sizeof(S->h));
S->t[1] = S->t[0] = 0;
S->buflen = 0;
}
void blake2b_init(blake2b_state *S, size_t outlen)
{
blake2b_init_state(S);
/* XOR initial state with param block: */
S->h[0] ^= (uint64_t)outlen | (UINT64_C(1) << 16) | (UINT64_C(1) << 24);
}
void blake2b_update(blake2b_state *S, const void *in, size_t inlen)
{
const uint8_t *pin = (const uint8_t *)in;
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) {
size_t left = S->buflen;
size_t fill = BLAKE2B_BLOCKBYTES - left;
memcpy(&S->buf[left], pin, fill);
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, S->buf, 0);
S->buflen = 0;
inlen -= fill;
pin += fill;
/* Avoid buffer copies when possible */
while (inlen > BLAKE2B_BLOCKBYTES) {
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, pin, 0);
inlen -= BLAKE2B_BLOCKBYTES;
pin += BLAKE2B_BLOCKBYTES;
}
}
memcpy(&S->buf[S->buflen], pin, inlen);
S->buflen += inlen;
}
void blake2b_final(blake2b_state *S, void *out, size_t outlen)
{
uint8_t buffer[BLAKE2B_OUTBYTES] = {0};
unsigned int i;
blake2b_increment_counter(S, S->buflen);
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
blake2b_compress(S, S->buf, UINT64_C(0xFFFFFFFFFFFFFFFF));
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
store64(buffer + i * sizeof(uint64_t), S->h[i]);
}
memcpy(out, buffer, outlen);
clear_internal_memory(buffer, sizeof(buffer));
clear_internal_memory(S->buf, sizeof(S->buf));
clear_internal_memory(S->h, sizeof(S->h));
}
void blake2b_long(void *out, size_t outlen, const void *in, size_t inlen)
{
uint8_t *pout = (uint8_t *)out;
blake2b_state blake_state;
uint8_t outlen_bytes[sizeof(uint32_t)] = {0};
store32(outlen_bytes, (uint32_t)outlen);
if (outlen <= BLAKE2B_OUTBYTES) {
blake2b_init(&blake_state, outlen);
blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes));
blake2b_update(&blake_state, in, inlen);
blake2b_final(&blake_state, pout, outlen);
} else {
uint32_t toproduce;
uint8_t out_buffer[BLAKE2B_OUTBYTES];
blake2b_init(&blake_state, BLAKE2B_OUTBYTES);
blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes));
blake2b_update(&blake_state, in, inlen);
blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES);
memcpy(pout, out_buffer, BLAKE2B_OUTBYTES / 2);
pout += BLAKE2B_OUTBYTES / 2;
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
while (toproduce > BLAKE2B_OUTBYTES) {
blake2b_init(&blake_state, BLAKE2B_OUTBYTES);
blake2b_update(&blake_state, out_buffer, BLAKE2B_OUTBYTES);
blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES);
memcpy(pout, out_buffer, BLAKE2B_OUTBYTES / 2);
pout += BLAKE2B_OUTBYTES / 2;
toproduce -= BLAKE2B_OUTBYTES / 2;
}
blake2b_init(&blake_state, toproduce);
blake2b_update(&blake_state, out_buffer, BLAKE2B_OUTBYTES);
blake2b_final(&blake_state, out_buffer, toproduce);
memcpy(pout, out_buffer, toproduce);
clear_internal_memory(out_buffer, sizeof(out_buffer));
}
}

30
src/3rdparty/argon2/lib/blake2/blake2.h

@ -0,0 +1,30 @@
#ifndef ARGON2_BLAKE2_H
#define ARGON2_BLAKE2_H
#include <stddef.h>
#include <stdint.h>
enum blake2b_constant {
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
typedef struct __blake2b_state {
uint64_t h[8];
uint64_t t[2];
uint8_t buf[BLAKE2B_BLOCKBYTES];
size_t buflen;
} blake2b_state;
/* Streaming API */
void blake2b_init(blake2b_state *S, size_t outlen);
void blake2b_update(blake2b_state *S, const void *in, size_t inlen);
void blake2b_final(blake2b_state *S, void *out, size_t outlen);
void blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
#endif // ARGON2_BLAKE2_H

633
src/3rdparty/argon2/lib/core.c

@ -0,0 +1,633 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
/*For memory wiping*/
#ifdef _MSC_VER
#include <windows.h>
#include <winbase.h> /* For SecureZeroMemory */
#endif
#if defined __STDC_LIB_EXT1__
#define __STDC_WANT_LIB_EXT1__ 1
#endif
#define VC_GE_2005(version) (version >= 1400)
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "core.h"
#include "thread.h"
#include "blake2/blake2.h"
#include "blake2/blake2-impl.h"
#include "genkat.h"
#if defined(__clang__)
#if __has_attribute(optnone)
#define NOT_OPTIMIZED __attribute__((optnone))
#endif
#elif defined(__GNUC__)
#define GCC_VERSION \
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#if GCC_VERSION >= 40400
#define NOT_OPTIMIZED __attribute__((optimize("O0")))
#endif
#endif
#ifndef NOT_OPTIMIZED
#define NOT_OPTIMIZED
#endif
/***************Instance and Position constructors**********/
void init_block_value(block *b, uint8_t in) { memset(b->v, in, sizeof(b->v)); }
void copy_block(block *dst, const block *src) {
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
}
void xor_block(block *dst, const block *src) {
int i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
dst->v[i] ^= src->v[i];
}
}
static void load_block(block *dst, const void *input) {
unsigned i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
}
}
static void store_block(void *output, const block *src) {
unsigned i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
}
}
/***************Memory functions*****************/
int allocate_memory(const argon2_context *context,
argon2_instance_t *instance) {
size_t blocks = instance->memory_blocks;
size_t memory_size = blocks * ARGON2_BLOCK_SIZE;
/* 0. Check for memory supplied by user: */
/* NOTE: Sufficient memory size is already checked in argon2_ctx_mem() */
if (instance->memory != NULL) {
return ARGON2_OK;
}
/* 1. Check for multiplication overflow */
if (blocks != 0 && memory_size / ARGON2_BLOCK_SIZE != blocks) {
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
/* 2. Try to allocate with appropriate allocator */
if (context->allocate_cbk) {
(context->allocate_cbk)((uint8_t **)&instance->memory, memory_size);
} else {
instance->memory = malloc(memory_size);
}
if (instance->memory == NULL) {
return ARGON2_MEMORY_ALLOCATION_ERROR;
}
return ARGON2_OK;
}
void free_memory(const argon2_context *context,
const argon2_instance_t *instance) {
size_t memory_size = instance->memory_blocks * ARGON2_BLOCK_SIZE;
clear_internal_memory(instance->memory, memory_size);
if (instance->keep_memory) {
/* user-supplied memory -- do not free */
return;
}
if (context->free_cbk) {
(context->free_cbk)((uint8_t *)instance->memory, memory_size);
} else {
free(instance->memory);
}
}
void NOT_OPTIMIZED secure_wipe_memory(void *v, size_t n) {
#if defined(_MSC_VER) && VC_GE_2005(_MSC_VER)
SecureZeroMemory(v, n);
#elif defined memset_s
memset_s(v, n, 0, n);
#elif defined(__OpenBSD__)
explicit_bzero(v, n);
#else
static void *(*const volatile memset_sec)(void *, int, size_t) = &memset;
memset_sec(v, 0, n);
#endif
}
/* Memory clear flag defaults to true. */
int FLAG_clear_internal_memory = 1;
void clear_internal_memory(void *v, size_t n) {
if (FLAG_clear_internal_memory && v) {
secure_wipe_memory(v, n);
}
}
void finalize(const argon2_context *context, argon2_instance_t *instance) {
if (context != NULL && instance != NULL) {
block blockhash;
uint32_t l;
copy_block(&blockhash, instance->memory + instance->lane_length - 1);
/* XOR the last blocks */
for (l = 1; l < instance->lanes; ++l) {
uint32_t last_block_in_lane =
l * instance->lane_length + (instance->lane_length - 1);
xor_block(&blockhash, instance->memory + last_block_in_lane);
}
/* Hash the result */
{
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
store_block(blockhash_bytes, &blockhash);
blake2b_long(context->out, context->outlen, blockhash_bytes,
ARGON2_BLOCK_SIZE);
/* clear blockhash and blockhash_bytes */
clear_internal_memory(blockhash.v, ARGON2_BLOCK_SIZE);
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
}
if (instance->print_internals) {
print_tag(context->out, context->outlen);
}
free_memory(context, instance);
}
}
uint32_t index_alpha(const argon2_instance_t *instance,
const argon2_position_t *position, uint32_t pseudo_rand,
int same_lane) {
/*
* Pass 0:
* This lane : all already finished segments plus already constructed
* blocks in this segment
* Other lanes : all already finished segments
* Pass 1+:
* This lane : (SYNC_POINTS - 1) last segments plus already constructed
* blocks in this segment
* Other lanes : (SYNC_POINTS - 1) last segments
*/
uint32_t reference_area_size;
uint64_t relative_position;
uint32_t start_position, absolute_position;
if (0 == position->pass) {
/* First pass */
if (0 == position->slice) {
/* First slice */
reference_area_size =
position->index - 1; /* all but the previous */
} else {
if (same_lane) {
/* The same lane => add current segment */
reference_area_size =
position->slice * instance->segment_length +
position->index - 1;
} else {
reference_area_size =
position->slice * instance->segment_length +
((position->index == 0) ? (-1) : 0);
}
}
} else {
/* Second pass */
if (same_lane) {
reference_area_size = instance->lane_length -
instance->segment_length + position->index -
1;
} else {
reference_area_size = instance->lane_length -
instance->segment_length +
((position->index == 0) ? (-1) : 0);
}
}
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
* relative position */
relative_position = pseudo_rand;
relative_position = relative_position * relative_position >> 32;
relative_position = reference_area_size - 1 -
(reference_area_size * relative_position >> 32);
/* 1.2.5 Computing starting position */
start_position = 0;
if (0 != position->pass) {
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)
? 0
: (position->slice + 1) * instance->segment_length;
}
/* 1.2.6. Computing absolute position */
absolute_position = (start_position + relative_position) %
instance->lane_length; /* absolute position */
return absolute_position;
}
#ifdef _WIN32
static unsigned __stdcall fill_segment_thr(void *thread_data)
#else
static void *fill_segment_thr(void *thread_data)
#endif
{
argon2_thread_data *my_data = thread_data;
fill_segment(my_data->instance_ptr, my_data->pos);
argon2_thread_exit();
return 0;
}
/* Single-threaded version for p=1 case */
static int fill_memory_blocks_st(argon2_instance_t *instance) {
uint32_t r, s, l;
for (r = 0; r < instance->passes; ++r) {
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
for (l = 0; l < instance->lanes; ++l) {
argon2_position_t position = { r, l, (uint8_t)s, 0 };
fill_segment(instance, position);
}
}
if (instance->print_internals) {
internal_kat(instance, r); /* Print all memory blocks */
}
}
return ARGON2_OK;
}
/* Multi-threaded version for p > 1 case */
static int fill_memory_blocks_mt(argon2_instance_t *instance) {
uint32_t r, s;
argon2_thread_handle_t *thread = NULL;
argon2_thread_data *thr_data = NULL;
int rc = ARGON2_OK;
/* 1. Allocating space for threads */
thread = calloc(instance->lanes, sizeof(argon2_thread_handle_t));
if (thread == NULL) {
rc = ARGON2_MEMORY_ALLOCATION_ERROR;
goto fail;
}
thr_data = calloc(instance->lanes, sizeof(argon2_thread_data));
if (thr_data == NULL) {
rc = ARGON2_MEMORY_ALLOCATION_ERROR;
goto fail;
}
for (r = 0; r < instance->passes; ++r) {
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
uint32_t l;
/* 2. Calling threads */
for (l = 0; l < instance->lanes; ++l) {
argon2_position_t position;
/* 2.1 Join a thread if limit is exceeded */
if (l >= instance->threads) {
if (argon2_thread_join(thread[l - instance->threads])) {
rc = ARGON2_THREAD_FAIL;
goto fail;
}
}
/* 2.2 Create thread */
position.pass = r;
position.lane = l;
position.slice = (uint8_t)s;
position.index = 0;
thr_data[l].instance_ptr =
instance; /* preparing the thread input */
memcpy(&(thr_data[l].pos), &position,
sizeof(argon2_position_t));
if (argon2_thread_create(&thread[l], &fill_segment_thr,
(void *)&thr_data[l])) {
rc = ARGON2_THREAD_FAIL;
goto fail;
}
/* fill_segment(instance, position); */
/*Non-thread equivalent of the lines above */
}
/* 3. Joining remaining threads */
for (l = instance->lanes - instance->threads; l < instance->lanes;
++l) {
if (argon2_thread_join(thread[l])) {
rc = ARGON2_THREAD_FAIL;
goto fail;
}
}
}
if (instance->print_internals) {
internal_kat(instance, r); /* Print all memory blocks */
}
}
fail:
if (thread != NULL) {
free(thread);
}
if (thr_data != NULL) {
free(thr_data);
}
return rc;
}
int fill_memory_blocks(argon2_instance_t *instance) {
if (instance == NULL || instance->lanes == 0) {
return ARGON2_INCORRECT_PARAMETER;
}
return instance->threads == 1 ?
fill_memory_blocks_st(instance) : fill_memory_blocks_mt(instance);
}
int validate_inputs(const argon2_context *context) {
if (NULL == context) {
return ARGON2_INCORRECT_PARAMETER;
}
if (NULL == context->out) {
return ARGON2_OUTPUT_PTR_NULL;
}
/* Validate output length */
if (ARGON2_MIN_OUTLEN > context->outlen) {
return ARGON2_OUTPUT_TOO_SHORT;
}
if (ARGON2_MAX_OUTLEN < context->outlen) {
return ARGON2_OUTPUT_TOO_LONG;
}
/* Validate password (required param) */
if (NULL == context->pwd) {
if (0 != context->pwdlen) {
return ARGON2_PWD_PTR_MISMATCH;
}
}
if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) {
return ARGON2_PWD_TOO_SHORT;
}
if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) {
return ARGON2_PWD_TOO_LONG;
}
/* Validate salt (required param) */
if (NULL == context->salt) {
if (0 != context->saltlen) {
return ARGON2_SALT_PTR_MISMATCH;
}
}
if (ARGON2_MIN_SALT_LENGTH > context->saltlen) {
return ARGON2_SALT_TOO_SHORT;
}
if (ARGON2_MAX_SALT_LENGTH < context->saltlen) {
return ARGON2_SALT_TOO_LONG;
}
/* Validate secret (optional param) */
if (NULL == context->secret) {
if (0 != context->secretlen) {
return ARGON2_SECRET_PTR_MISMATCH;
}
} else {
if (ARGON2_MIN_SECRET > context->secretlen) {
return ARGON2_SECRET_TOO_SHORT;
}
if (ARGON2_MAX_SECRET < context->secretlen) {
return ARGON2_SECRET_TOO_LONG;
}
}
/* Validate associated data (optional param) */
if (NULL == context->ad) {
if (0 != context->adlen) {
return ARGON2_AD_PTR_MISMATCH;
}
} else {
if (ARGON2_MIN_AD_LENGTH > context->adlen) {
return ARGON2_AD_TOO_SHORT;
}
if (ARGON2_MAX_AD_LENGTH < context->adlen) {
return ARGON2_AD_TOO_LONG;
}
}
/* Validate memory cost */
if (ARGON2_MIN_MEMORY > context->m_cost) {
return ARGON2_MEMORY_TOO_LITTLE;
}
if (ARGON2_MAX_MEMORY < context->m_cost) {
return ARGON2_MEMORY_TOO_MUCH;
}
if (context->m_cost < 8 * context->lanes) {
return ARGON2_MEMORY_TOO_LITTLE;
}
/* Validate time cost */
if (ARGON2_MIN_TIME > context->t_cost) {
return ARGON2_TIME_TOO_SMALL;
}
if (ARGON2_MAX_TIME < context->t_cost) {
return ARGON2_TIME_TOO_LARGE;
}
/* Validate lanes */
if (ARGON2_MIN_LANES > context->lanes) {
return ARGON2_LANES_TOO_FEW;
}
if (ARGON2_MAX_LANES < context->lanes) {
return ARGON2_LANES_TOO_MANY;
}
/* Validate threads */
if (ARGON2_MIN_THREADS > context->threads) {
return ARGON2_THREADS_TOO_FEW;
}
if (ARGON2_MAX_THREADS < context->threads) {
return ARGON2_THREADS_TOO_MANY;
}
if (NULL != context->allocate_cbk && NULL == context->free_cbk) {
return ARGON2_FREE_MEMORY_CBK_NULL;
}
if (NULL == context->allocate_cbk && NULL != context->free_cbk) {
return ARGON2_ALLOCATE_MEMORY_CBK_NULL;
}
return ARGON2_OK;
}
void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
uint32_t l;
/* Make the first and second block in each lane as G(H0||0||i) or
G(H0||1||i) */
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE];
for (l = 0; l < instance->lanes; ++l) {
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
ARGON2_PREHASH_SEED_LENGTH);
load_block(&instance->memory[l * instance->lane_length + 0],
blockhash_bytes);
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,
ARGON2_PREHASH_SEED_LENGTH);
load_block(&instance->memory[l * instance->lane_length + 1],
blockhash_bytes);
}
clear_internal_memory(blockhash_bytes, ARGON2_BLOCK_SIZE);
}
void initial_hash(uint8_t *blockhash, argon2_context *context,
argon2_type type) {
blake2b_state BlakeHash;
uint8_t value[sizeof(uint32_t)];
if (NULL == context || NULL == blockhash) {
return;
}
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
store32(&value, context->lanes);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->outlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->m_cost);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->t_cost);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->version);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, (uint32_t)type);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->pwdlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->pwd != NULL) {
blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
context->pwdlen);
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
secure_wipe_memory(context->pwd, context->pwdlen);
context->pwdlen = 0;
}
}
store32(&value, context->saltlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->salt != NULL) {
blake2b_update(&BlakeHash, (const uint8_t *)context->salt,
context->saltlen);
}
store32(&value, context->secretlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->secret != NULL) {
blake2b_update(&BlakeHash, (const uint8_t *)context->secret,
context->secretlen);
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
secure_wipe_memory(context->secret, context->secretlen);
context->secretlen = 0;
}
}
store32(&value, context->adlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->ad != NULL) {
blake2b_update(&BlakeHash, (const uint8_t *)context->ad,
context->adlen);
}
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
}
int initialize(argon2_instance_t *instance, argon2_context *context) {
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH];
int result = ARGON2_OK;
if (instance == NULL || context == NULL)
return ARGON2_INCORRECT_PARAMETER;
instance->context_ptr = context;
/* 1. Memory allocation */
result = allocate_memory(context, instance);
if (result != ARGON2_OK) {
return result;
}
/* 2. Initial hashing */
/* H_0 + 8 extra bytes to produce the first blocks */
/* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
/* Hashing all inputs */
initial_hash(blockhash, context, instance->type);
/* Zeroing 8 extra bytes */
clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
ARGON2_PREHASH_SEED_LENGTH -
ARGON2_PREHASH_DIGEST_LENGTH);
if (instance->print_internals) {
initial_kat(blockhash, context, instance->type);
}
/* 3. Creating first blocks, we always have at least two blocks in a slice
*/
fill_first_blocks(blockhash, instance);
/* Clearing the hash */
clear_internal_memory(blockhash, ARGON2_PREHASH_SEED_LENGTH);
return ARGON2_OK;
}

226
src/3rdparty/argon2/lib/core.h

@ -0,0 +1,226 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef ARGON2_CORE_H
#define ARGON2_CORE_H
#include "argon2.h"
#if defined(_MSC_VER)
#define ALIGN(n) __declspec(align(16))
#elif defined(__GNUC__) || defined(__clang)
#define ALIGN(x) __attribute__((__aligned__(x)))
#else
#define ALIGN(x)
#endif
#define CONST_CAST(x) (x)(uintptr_t)
/**********************Argon2 internal constants*******************************/
enum argon2_core_constants {
/* Memory block size in bytes */
ARGON2_BLOCK_SIZE = 1024,
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
/* Number of pseudo-random values generated by one call to Blake in Argon2i
to
generate reference block positions */
ARGON2_ADDRESSES_IN_BLOCK = 128,
/* Pre-hashing digest length and its extension*/
ARGON2_PREHASH_DIGEST_LENGTH = 64,
ARGON2_PREHASH_SEED_LENGTH = 72
};
/*************************Argon2 internal data types***********************/
/*
* Structure for the (1KB) memory block implemented as 128 64-bit words.
* Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
* bounds checking).
*/
typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
/*****************Functions that work with the block******************/
/* Initialize each byte of the block with @in */
void init_block_value(block *b, uint8_t in);
/* Copy block @src to block @dst */
void copy_block(block *dst, const block *src);
/* XOR @src onto @dst bytewise */
void xor_block(block *dst, const block *src);
/*
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
* and derived values.
* Used to evaluate the number and location of blocks to construct in each
* thread
*/
typedef struct Argon2_instance_t {
block *memory; /* Memory pointer */
uint32_t version;
uint32_t passes; /* Number of passes */
uint32_t memory_blocks; /* Number of blocks in memory */
uint32_t segment_length;
uint32_t lane_length;
uint32_t lanes;
uint32_t threads;
argon2_type type;
int print_internals; /* whether to print the memory blocks */
int keep_memory;
argon2_context *context_ptr; /* points back to original context */
} argon2_instance_t;
/*
* Argon2 position: where we construct the block right now. Used to distribute
* work between threads.
*/
typedef struct Argon2_position_t {
uint32_t pass;
uint32_t lane;
uint8_t slice;
uint32_t index;
} argon2_position_t;
/*Struct that holds the inputs for thread handling FillSegment*/
typedef struct Argon2_thread_data {
argon2_instance_t *instance_ptr;
argon2_position_t pos;
} argon2_thread_data;
/*************************Argon2 core functions********************************/
/* Allocates memory to the given pointer, uses the appropriate allocator as
* specified in the context. Total allocated memory is num*size.
* @param context argon2_context which specifies the allocator
* @param instance the Argon2 instance
* @return ARGON2_OK if memory is allocated successfully
*/
int allocate_memory(const argon2_context *context,
argon2_instance_t *instance);
/*
* Frees memory at the given pointer, uses the appropriate deallocator as
* specified in the context. Also cleans the memory using clear_internal_memory.
* @param context argon2_context which specifies the deallocator
* @param instance the Argon2 instance
*/
void free_memory(const argon2_context *context,
const argon2_instance_t *instance);
/* Function that securely cleans the memory. This ignores any flags set
* regarding clearing memory. Usually one just calls clear_internal_memory.
* @param mem Pointer to the memory
* @param s Memory size in bytes
*/
void secure_wipe_memory(void *v, size_t n);
/* Function that securely clears the memory if FLAG_clear_internal_memory is
* set. If the flag isn't set, this function does nothing.
* @param mem Pointer to the memory
* @param s Memory size in bytes
*/
ARGON2_PUBLIC void clear_internal_memory(void *v, size_t n);
/*
* Computes absolute position of reference block in the lane following a skewed
* distribution and using a pseudo-random value as input
* @param instance Pointer to the current instance
* @param position Pointer to the current position
* @param pseudo_rand 32-bit pseudo-random value used to determine the position
* @param same_lane Indicates if the block will be taken from the current lane.
* If so we can reference the current segment
* @pre All pointers must be valid
*/
uint32_t index_alpha(const argon2_instance_t *instance,
const argon2_position_t *position, uint32_t pseudo_rand,
int same_lane);
/*
* Function that validates all inputs against predefined restrictions and return
* an error code
* @param context Pointer to current Argon2 context
* @return ARGON2_OK if everything is all right, otherwise one of error codes
* (all defined in <argon2.h>
*/
int validate_inputs(const argon2_context *context);
/*
* Hashes all the inputs into @a blockhash[PREHASH_DIGEST_LENGTH], clears
* password and secret if needed
* @param context Pointer to the Argon2 internal structure containing memory
* pointer, and parameters for time and space requirements.
* @param blockhash Buffer for pre-hashing digest
* @param type Argon2 type
* @pre @a blockhash must have at least @a PREHASH_DIGEST_LENGTH bytes
* allocated
*/
void initial_hash(uint8_t *blockhash, argon2_context *context,
argon2_type type);
/*
* Function creates first 2 blocks per lane
* @param instance Pointer to the current instance
* @param blockhash Pointer to the pre-hashing digest
* @pre blockhash must point to @a PREHASH_SEED_LENGTH allocated values
*/
void fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance);
/*
* Function allocates memory, hashes the inputs with Blake, and creates first
* two blocks. Returns the pointer to the main memory with 2 blocks per lane
* initialized
* @param context Pointer to the Argon2 internal structure containing memory
* pointer, and parameters for time and space requirements.
* @param instance Current Argon2 instance
* @return Zero if successful, -1 if memory failed to allocate. @context->state
* will be modified if successful.
*/
int initialize(argon2_instance_t *instance, argon2_context *context);
/*
* XORing the last block of each lane, hashing it, making the tag. Deallocates
* the memory.
* @param context Pointer to current Argon2 context (use only the out parameters
* from it)
* @param instance Pointer to current instance of Argon2
* @pre instance->state must point to necessary amount of memory
* @pre context->out must point to outlen bytes of memory
* @pre if context->free_cbk is not NULL, it should point to a function that
* deallocates memory
*/
void finalize(const argon2_context *context, argon2_instance_t *instance);
/*
* Function that fills the segment using previous segments also from other
* threads
* @param instance Pointer to the current instance
* @param position Current position
* @pre all block pointers must be valid
*/
void fill_segment(const argon2_instance_t *instance,
argon2_position_t position);
/*
* Function that fills the entire memory t_cost times based on the first two
* blocks in each lane
* @param instance Pointer to the current instance
* @return ARGON2_OK if successful, @context->state
*/
int fill_memory_blocks(argon2_instance_t *instance);
#endif

432
src/3rdparty/argon2/lib/encoding.c

@ -0,0 +1,432 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "encoding.h"
#include "core.h"
/*
* Example code for a decoder and encoder of "hash strings", with Argon2
* parameters.
*
* This code comprises three sections:
*
* -- The first section contains generic Base64 encoding and decoding
* functions. It is conceptually applicable to any hash function
* implementation that uses Base64 to encode and decode parameters,
* salts and outputs. It could be made into a library, provided that
* the relevant functions are made public (non-static) and be given
* reasonable names to avoid collisions with other functions.
*
* -- The second section is specific to Argon2. It encodes and decodes
* the parameters, salts and outputs. It does not compute the hash
* itself.
*
* The code was originally written by Thomas Pornin <pornin@bolet.org>,
* to whom comments and remarks may be sent. It is released under what
* should amount to Public Domain or its closest equivalent; the
* following mantra is supposed to incarnate that fact with all the
* proper legal rituals:
*
* ---------------------------------------------------------------------
* This file is provided under the terms of Creative Commons CC0 1.0
* Public Domain Dedication. To the extent possible under law, the
* author (Thomas Pornin) has waived all copyright and related or
* neighboring rights to this file. This work is published from: Canada.
* ---------------------------------------------------------------------
*
* Copyright (c) 2015 Thomas Pornin
*/
/* ==================================================================== */
/*
* Common code; could be shared between different hash functions.
*
* Note: the Base64 functions below assume that uppercase letters (resp.
* lowercase letters) have consecutive numerical codes, that fit on 8
* bits. All modern systems use ASCII-compatible charsets, where these
* properties are true. If you are stuck with a dinosaur of a system
* that still defaults to EBCDIC then you already have much bigger
* interoperability issues to deal with.
*/
/*
* Some macros for constant-time comparisons. These work over values in
* the 0..255 range. Returned value is 0x00 on "false", 0xFF on "true".
*/
#define EQ(x, y) ((((0U - ((unsigned)(x) ^ (unsigned)(y))) >> 8) & 0xFF) ^ 0xFF)
#define GT(x, y) ((((unsigned)(y) - (unsigned)(x)) >> 8) & 0xFF)
#define GE(x, y) (GT(y, x) ^ 0xFF)
#define LT(x, y) GT(y, x)
#define LE(x, y) GE(y, x)
/*
* Convert value x (0..63) to corresponding Base64 character.
*/
static int b64_byte_to_char(unsigned x) {
return (LT(x, 26) & (x + 'A')) |
(GE(x, 26) & LT(x, 52) & (x + ('a' - 26))) |
(GE(x, 52) & LT(x, 62) & (x + ('0' - 52))) | (EQ(x, 62) & '+') |
(EQ(x, 63) & '/');
}
/*
* Convert character c to the corresponding 6-bit value. If character c
* is not a Base64 character, then 0xFF (255) is returned.
*/
static unsigned b64_char_to_byte(int c) {
unsigned x;
x = (GE(c, 'A') & LE(c, 'Z') & (c - 'A')) |
(GE(c, 'a') & LE(c, 'z') & (c - ('a' - 26))) |
(GE(c, '0') & LE(c, '9') & (c - ('0' - 52))) | (EQ(c, '+') & 62) |
(EQ(c, '/') & 63);
return x | (EQ(x, 0) & (EQ(c, 'A') ^ 0xFF));
}
/*
* Convert some bytes to Base64. 'dst_len' is the length (in characters)
* of the output buffer 'dst'; if that buffer is not large enough to
* receive the result (including the terminating 0), then (size_t)-1
* is returned. Otherwise, the zero-terminated Base64 string is written
* in the buffer, and the output length (counted WITHOUT the terminating
* zero) is returned.
*/
static size_t to_base64(char *dst, size_t dst_len, const void *src,
size_t src_len) {
size_t olen;
const unsigned char *buf;
unsigned acc, acc_len;
olen = (src_len / 3) << 2;
switch (src_len % 3) {
case 2:
olen++;
/* fall through */
case 1:
olen += 2;
break;
}
if (dst_len <= olen) {
return (size_t)-1;
}
acc = 0;
acc_len = 0;
buf = (const unsigned char *)src;
while (src_len-- > 0) {
acc = (acc << 8) + (*buf++);
acc_len += 8;
while (acc_len >= 6) {
acc_len -= 6;
*dst++ = (char)b64_byte_to_char((acc >> acc_len) & 0x3F);
}
}
if (acc_len > 0) {
*dst++ = (char)b64_byte_to_char((acc << (6 - acc_len)) & 0x3F);
}
*dst++ = 0;
return olen;
}
/*
* Decode Base64 chars into bytes. The '*dst_len' value must initially
* contain the length of the output buffer '*dst'; when the decoding
* ends, the actual number of decoded bytes is written back in
* '*dst_len'.
*
* Decoding stops when a non-Base64 character is encountered, or when
* the output buffer capacity is exceeded. If an error occurred (output
* buffer is too small, invalid last characters leading to unprocessed
* buffered bits), then NULL is returned; otherwise, the returned value
* points to the first non-Base64 character in the source stream, which
* may be the terminating zero.
*/
static const char *from_base64(void *dst, size_t *dst_len, const char *src) {
size_t len;
unsigned char *buf;
unsigned acc, acc_len;
buf = (unsigned char *)dst;
len = 0;
acc = 0;
acc_len = 0;
for (;;) {
unsigned d;
d = b64_char_to_byte(*src);
if (d == 0xFF) {
break;
}
src++;
acc = (acc << 6) + d;
acc_len += 6;
if (acc_len >= 8) {
acc_len -= 8;
if ((len++) >= *dst_len) {
return NULL;
}
*buf++ = (acc >> acc_len) & 0xFF;
}
}
/*
* If the input length is equal to 1 modulo 4 (which is
* invalid), then there will remain 6 unprocessed bits;
* otherwise, only 0, 2 or 4 bits are buffered. The buffered
* bits must also all be zero.
*/
if (acc_len > 4 || (acc & (((unsigned)1 << acc_len) - 1)) != 0) {
return NULL;
}
*dst_len = len;
return src;
}
/*
* Decode decimal integer from 'str'; the value is written in '*v'.
* Returned value is a pointer to the next non-decimal character in the
* string. If there is no digit at all, or the value encoding is not
* minimal (extra leading zeros), or the value does not fit in an
* 'unsigned long', then NULL is returned.
*/
static const char *decode_decimal(const char *str, unsigned long *v) {
const char *orig;
unsigned long acc;
acc = 0;
for (orig = str;; str++) {
int c;
c = *str;
if (c < '0' || c > '9') {
break;
}
c -= '0';
if (acc > (ULONG_MAX / 10)) {
return NULL;
}
acc *= 10;
if ((unsigned long)c > (ULONG_MAX - acc)) {
return NULL;
}
acc += (unsigned long)c;
}
if (str == orig || (*orig == '0' && str != (orig + 1))) {
return NULL;
}
*v = acc;
return str;
}
/* ==================================================================== */
/*
* Code specific to Argon2.
*
* The code below applies the following format:
*
* $argon2<T>[$v=<num>]$m=<num>,t=<num>,p=<num>$<bin>$<bin>
*
* where <T> is either 'd', 'id', or 'i', <num> is a decimal integer (positive,
* fits in an 'unsigned long'), and <bin> is Base64-encoded data (no '=' padding
* characters, no newline or whitespace).
*
* The last two binary chunks (encoded in Base64) are, in that order,
* the salt and the output. Both are required. The binary salt length and the
* output length must be in the allowed ranges defined in argon2.h.
*
* The ctx struct must contain buffers large enough to hold the salt and pwd
* when it is fed into decode_string.
*/
int decode_string(argon2_context *ctx, const char *str, argon2_type type) {
/* check for prefix */
#define CC(prefix) \
do { \
size_t cc_len = strlen(prefix); \
if (strncmp(str, prefix, cc_len) != 0) { \
return ARGON2_DECODING_FAIL; \
} \
str += cc_len; \
} while ((void)0, 0)
/* optional prefix checking with supplied code */
#define CC_opt(prefix, code) \
do { \
size_t cc_len = strlen(prefix); \
if (strncmp(str, prefix, cc_len) == 0) { \
str += cc_len; \
{ code; } \
} \
} while ((void)0, 0)
/* Decoding prefix into uint32_t decimal */
#define DECIMAL_U32(x) \
do { \
unsigned long dec_x; \
str = decode_decimal(str, &dec_x); \
if (str == NULL || dec_x > UINT32_MAX) { \
return ARGON2_DECODING_FAIL; \
} \
(x) = (uint32_t)dec_x; \
} while ((void)0, 0)
/* Decoding base64 into a binary buffer */
#define BIN(buf, max_len, len) \
do { \
size_t bin_len = (max_len); \
str = from_base64(buf, &bin_len, str); \
if (str == NULL || bin_len > UINT32_MAX) { \
return ARGON2_DECODING_FAIL; \
} \
(len) = (uint32_t)bin_len; \
} while ((void)0, 0)
size_t maxsaltlen = ctx->saltlen;
size_t maxoutlen = ctx->outlen;
int validation_result;
const char* type_string;
/* We should start with the argon2_type we are using */
type_string = argon2_type2string(type, 0);
if (!type_string) {
return ARGON2_INCORRECT_TYPE;
}
CC("$");
CC(type_string);
/* Reading the version number if the default is suppressed */
ctx->version = ARGON2_VERSION_10;
CC_opt("$v=", DECIMAL_U32(ctx->version));
CC("$m=");
DECIMAL_U32(ctx->m_cost);
CC(",t=");
DECIMAL_U32(ctx->t_cost);
CC(",p=");
DECIMAL_U32(ctx->lanes);
ctx->threads = ctx->lanes;
CC("$");
BIN(ctx->salt, maxsaltlen, ctx->saltlen);
CC("$");
BIN(ctx->out, maxoutlen, ctx->outlen);
/* The rest of the fields get the default values */
ctx->secret = NULL;
ctx->secretlen = 0;
ctx->ad = NULL;
ctx->adlen = 0;
ctx->allocate_cbk = NULL;
ctx->free_cbk = NULL;
ctx->flags = ARGON2_DEFAULT_FLAGS;
/* On return, must have valid context */
validation_result = validate_inputs(ctx);
if (validation_result != ARGON2_OK) {
return validation_result;
}
/* Can't have any additional characters */
if (*str == 0) {
return ARGON2_OK;
} else {
return ARGON2_DECODING_FAIL;
}
#undef CC
#undef CC_opt
#undef DECIMAL_U32
#undef BIN
}
int encode_string(char *dst, size_t dst_len, argon2_context *ctx,
argon2_type type) {
#define SS(str) \
do { \
size_t pp_len = strlen(str); \
if (pp_len >= dst_len) { \
return ARGON2_ENCODING_FAIL; \
} \
memcpy(dst, str, pp_len + 1); \
dst += pp_len; \
dst_len -= pp_len; \
} while ((void)0, 0)
#define SX(x) \
do { \
char tmp[30]; \
sprintf(tmp, "%lu", (unsigned long)(x)); \
SS(tmp); \
} while ((void)0, 0)
#define SB(buf, len) \
do { \
size_t sb_len = to_base64(dst, dst_len, buf, len); \
if (sb_len == (size_t)-1) { \
return ARGON2_ENCODING_FAIL; \
} \
dst += sb_len; \
dst_len -= sb_len; \
} while ((void)0, 0)
const char* type_string = argon2_type2string(type, 0);
int validation_result = validate_inputs(ctx);
if (!type_string) {
return ARGON2_ENCODING_FAIL;
}
if (validation_result != ARGON2_OK) {
return validation_result;
}
SS("$");
SS(type_string);
SS("$v=");
SX(ctx->version);
SS("$m=");
SX(ctx->m_cost);
SS(",t=");
SX(ctx->t_cost);
SS(",p=");
SX(ctx->lanes);
SS("$");
SB(ctx->salt, ctx->saltlen);
SS("$");
SB(ctx->out, ctx->outlen);
return ARGON2_OK;
#undef SS
#undef SX
#undef SB
}
size_t b64len(uint32_t len) {
size_t olen = ((size_t)len / 3) << 2;
switch (len % 3) {
case 2:
olen++;
/* fall through */
case 1:
olen += 2;
break;
}
return olen;
}
size_t numlen(uint32_t num) {
size_t len = 1;
while (num >= 10) {
++len;
num = num / 10;
}
return len;
}

40
src/3rdparty/argon2/lib/encoding.h

@ -0,0 +1,40 @@
#ifndef ENCODING_H
#define ENCODING_H
#include "argon2.h"
#define ARGON2_MAX_DECODED_LANES UINT32_C(255)
#define ARGON2_MIN_DECODED_SALT_LEN UINT32_C(8)
#define ARGON2_MIN_DECODED_OUT_LEN UINT32_C(12)
/*
* encode an Argon2 hash string into the provided buffer. 'dst_len'
* contains the size, in characters, of the 'dst' buffer; if 'dst_len'
* is less than the number of required characters (including the
* terminating 0), then this function returns ARGON2_ENCODING_ERROR.
*
* on success, ARGON2_OK is returned.
*/
int encode_string(char *dst, size_t dst_len, argon2_context *ctx,
argon2_type type);
/*
* Decodes an Argon2 hash string into the provided structure 'ctx'.
* The only fields that must be set prior to this call are ctx.saltlen and
* ctx.outlen (which must be the maximal salt and out length values that are
* allowed), ctx.salt and ctx.out (which must be buffers of the specified
* length), and ctx.pwd and ctx.pwdlen which must hold a valid password.
*
* Invalid input string causes an error. On success, the ctx is valid and all
* fields have been initialized.
*
* Returned value is ARGON2_OK on success, other ARGON2_ codes on error.
*/
int decode_string(argon2_context *ctx, const char *str, argon2_type type);
/* Returns the length of the encoded byte stream with length len */
size_t b64len(uint32_t len);
/* Returns the length of the encoded number num */
size_t numlen(uint32_t num);
#endif

117
src/3rdparty/argon2/lib/genkat.c

@ -0,0 +1,117 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#include <inttypes.h>
#include <stdio.h>
#include "genkat.h"
void initial_kat(const uint8_t *blockhash, const argon2_context *context,
argon2_type type) {
unsigned i;
if (blockhash != NULL && context != NULL) {
printf("=======================================\n");
printf("%s version number %d\n", argon2_type2string(type, 1),
context->version);
printf("=======================================\n");
printf("Memory: %u KiB, Iterations: %u, Parallelism: %u lanes, Tag "
"length: %u bytes\n",
context->m_cost, context->t_cost, context->lanes,
context->outlen);
printf("Password[%u]: ", context->pwdlen);
if (context->flags & ARGON2_FLAG_CLEAR_PASSWORD) {
printf("CLEARED\n");
} else {
for (i = 0; i < context->pwdlen; ++i) {
printf("%2.2x ", ((unsigned char *)context->pwd)[i]);
}
printf("\n");
}
printf("Salt[%u]: ", context->saltlen);
for (i = 0; i < context->saltlen; ++i) {
printf("%2.2x ", ((unsigned char *)context->salt)[i]);
}
printf("\n");
printf("Secret[%u]: ", context->secretlen);
if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
printf("CLEARED\n");
} else {
for (i = 0; i < context->secretlen; ++i) {
printf("%2.2x ", ((unsigned char *)context->secret)[i]);
}
printf("\n");
}
printf("Associated data[%u]: ", context->adlen);
for (i = 0; i < context->adlen; ++i) {
printf("%2.2x ", ((unsigned char *)context->ad)[i]);
}
printf("\n");
printf("Pre-hashing digest: ");
for (i = 0; i < ARGON2_PREHASH_DIGEST_LENGTH; ++i) {
printf("%2.2x ", ((unsigned char *)blockhash)[i]);
}
printf("\n");
}
}
void print_tag(const void *out, uint32_t outlen) {
unsigned i;
if (out != NULL) {
printf("Tag: ");
for (i = 0; i < outlen; ++i) {
printf("%2.2x ", ((uint8_t *)out)[i]);
}
printf("\n");
}
}
void internal_kat(const argon2_instance_t *instance, uint32_t pass) {
if (instance != NULL) {
uint32_t i, j;
printf("\n After pass %u:\n", pass);
for (i = 0; i < instance->memory_blocks; ++i) {
uint32_t how_many_words =
(instance->memory_blocks > ARGON2_QWORDS_IN_BLOCK)
? 1
: ARGON2_QWORDS_IN_BLOCK;
for (j = 0; j < how_many_words; ++j)
printf("Block %.4u [%3u]: %016" PRIx64 "\n", i, j,
instance->memory[i].v[j]);
}
}
}

47
src/3rdparty/argon2/lib/genkat.h

@ -0,0 +1,47 @@
/*
* Argon2 source code package
*
* Written by Daniel Dinu and Dmitry Khovratovich, 2015
*
* This work is licensed under a Creative Commons CC0 1.0 License/Waiver.
*
* You should have received a copy of the CC0 Public Domain Dedication along
* with
* this software. If not, see
* <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
#ifndef ARGON2_KAT_H
#define ARGON2_KAT_H
#include "core.h"
/*
* Initial KAT function that prints the inputs to the file
* @param blockhash Array that contains pre-hashing digest
* @param context Holds inputs
* @param type Argon2 type
* @pre blockhash must point to INPUT_INITIAL_HASH_LENGTH bytes
* @pre context member pointers must point to allocated memory of size according
* to the length values
*/
void initial_kat(const uint8_t *blockhash, const argon2_context *context,
argon2_type type);
/*
* Function that prints the output tag
* @param out output array pointer
* @param outlen digest length
* @pre out must point to @a outlen bytes
**/
void print_tag(const void *out, uint32_t outlen);
/*
* Function that prints the internal state at given moment
* @param instance pointer to the current instance
* @param pass current pass number
* @pre instance must have necessary memory allocated
**/
void internal_kat(const argon2_instance_t *instance, uint32_t pass);
#endif

120
src/3rdparty/argon2/lib/impl-select.c

@ -0,0 +1,120 @@
#include <time.h>
#include <string.h>
#include "impl-select.h"
#include "argon2.h"
#define log_maybe(file, ...) \
do { \
if (file) { \
fprintf(file, __VA_ARGS__); \
} \
} while((void)0, 0)
#define BENCH_SAMPLES 512
#define BENCH_MEM_BLOCKS 512
static argon2_impl selected_argon_impl = {
"(default)", NULL, fill_segment_default
};
/* the benchmark routine is not thread-safe, so we can use a global var here: */
static block memory[BENCH_MEM_BLOCKS];
static uint64_t benchmark_impl(const argon2_impl *impl) {
clock_t time;
unsigned int i;
uint64_t bench;
argon2_instance_t instance;
argon2_position_t pos;
memset(memory, 0, sizeof(memory));
instance.version = ARGON2_VERSION_NUMBER;
instance.memory = memory;
instance.passes = 1;
instance.memory_blocks = BENCH_MEM_BLOCKS;
instance.segment_length = BENCH_MEM_BLOCKS / ARGON2_SYNC_POINTS;
instance.lane_length = instance.segment_length * ARGON2_SYNC_POINTS;
instance.lanes = 1;
instance.threads = 1;
instance.type = Argon2_i;
pos.lane = 0;
pos.pass = 0;
pos.slice = 0;
pos.index = 0;
/* warm-up cache: */
impl->fill_segment(&instance, pos);
/* OK, now measure: */
bench = 0;
time = clock();
for (i = 0; i < BENCH_SAMPLES; i++) {
impl->fill_segment(&instance, pos);
}
time = clock() - time;
bench = (uint64_t)time;
return bench;
}
static void select_impl(FILE *out, const char *prefix)
{
argon2_impl_list impls;
unsigned int i;
const argon2_impl *best_impl = NULL;
uint64_t best_bench = UINT_MAX;
log_maybe(out, "%sSelecting best fill_segment implementation...\n", prefix);
argon2_get_impl_list(&impls);
for (i = 0; i < impls.count; i++) {
const argon2_impl *impl = &impls.entries[i];
uint64_t bench;
log_maybe(out, "%s%s: Checking availability... ", prefix, impl->name);
if (impl->check != NULL && !impl->check()) {
log_maybe(out, "FAILED!\n");
continue;
}
log_maybe(out, "OK!\n");
log_maybe(out, "%s%s: Benchmarking...\n", prefix, impl->name);
bench = benchmark_impl(impl);
log_maybe(out, "%s%s: Benchmark result: %llu\n", prefix, impl->name,
(unsigned long long)bench);
if (bench < best_bench) {
best_bench = bench;
best_impl = impl;
}
}
if (best_impl != NULL) {
log_maybe(out,
"%sBest implementation: '%s' (bench %llu)\n", prefix,
best_impl->name, (unsigned long long)best_bench);
selected_argon_impl = *best_impl;
} else {
log_maybe(out,
"%sNo optimized implementation available, using default!\n",
prefix);
}
}
void fill_segment(const argon2_instance_t *instance, argon2_position_t position)
{
selected_argon_impl.fill_segment(instance, position);
}
void argon2_select_impl(FILE *out, const char *prefix)
{
if (prefix == NULL) {
prefix = "";
}
select_impl(out, prefix);
}

23
src/3rdparty/argon2/lib/impl-select.h

@ -0,0 +1,23 @@
#ifndef ARGON2_IMPL_SELECT_H
#define ARGON2_IMPL_SELECT_H
#include "core.h"
typedef struct Argon2_impl {
const char *name;
int (*check)(void);
void (*fill_segment)(const argon2_instance_t *instance,
argon2_position_t position);
} argon2_impl;
typedef struct Argon2_impl_list {
const argon2_impl *entries;
size_t count;
} argon2_impl_list;
void argon2_get_impl_list(argon2_impl_list *list);
void fill_segment_default(const argon2_instance_t *instance,
argon2_position_t position);
#endif // ARGON2_IMPL_SELECT_H

36
src/3rdparty/argon2/lib/thread.c

@ -0,0 +1,36 @@
#include "thread.h"
#if defined(_WIN32)
#include <windows.h>
#endif
int argon2_thread_create(argon2_thread_handle_t *handle,
argon2_thread_func_t func, void *args) {
if (NULL == handle || func == NULL) {
return -1;
}
#if defined(_WIN32)
*handle = _beginthreadex(NULL, 0, func, args, 0, NULL);
return *handle != 0 ? 0 : -1;
#else
return pthread_create(handle, NULL, func, args);
#endif
}
int argon2_thread_join(argon2_thread_handle_t handle) {
#if defined(_WIN32)
if (WaitForSingleObject((HANDLE)handle, INFINITE) == WAIT_OBJECT_0) {
return CloseHandle((HANDLE)handle) != 0 ? 0 : -1;
}
return -1;
#else
return pthread_join(handle, NULL);
#endif
}
void argon2_thread_exit(void) {
#if defined(_WIN32)
_endthreadex(0);
#else
pthread_exit(NULL);
#endif
}

47
src/3rdparty/argon2/lib/thread.h

@ -0,0 +1,47 @@
#ifndef ARGON2_THREAD_H
#define ARGON2_THREAD_H
/*
Here we implement an abstraction layer for the simpĺe requirements
of the Argon2 code. We only require 3 primitives---thread creation,
joining, and termination---so full emulation of the pthreads API
is unwarranted. Currently we wrap pthreads and Win32 threads.
The API defines 2 types: the function pointer type,
argon2_thread_func_t,
and the type of the thread handle---argon2_thread_handle_t.
*/
#if defined(_WIN32)
#include <process.h>
#include <stdint.h>
typedef unsigned(__stdcall *argon2_thread_func_t)(void *);
typedef uintptr_t argon2_thread_handle_t;
#else
#include <pthread.h>
typedef void *(*argon2_thread_func_t)(void *);
typedef pthread_t argon2_thread_handle_t;
#endif
/* Creates a thread
* @param handle pointer to a thread handle, which is the output of this
* function. Must not be NULL.
* @param func A function pointer for the thread's entry point. Must not be
* NULL.
* @param args Pointer that is passed as an argument to @func. May be NULL.
* @return 0 if @handle and @func are valid pointers and a thread is successfuly
* created.
*/
int argon2_thread_create(argon2_thread_handle_t *handle,
argon2_thread_func_t func, void *args);
/* Waits for a thread to terminate
* @param handle Handle to a thread created with argon2_thread_create.
* @return 0 if @handle is a valid handle, and joining completed successfully.
*/
int argon2_thread_join(argon2_thread_handle_t handle);
/* Terminate the current thread. Must be run inside a thread created by
* argon2_thread_create.
*/
void argon2_thread_exit(void);
#endif

2
src/backend/common/Workers.cpp

@ -166,7 +166,7 @@ namespace xmrig {
template<>
xmrig::IWorker *xmrig::Workers<CpuLaunchData>::create(Thread<CpuLaunchData> *handle)
{
const int intensity = handle->config().intensity;
const uint32_t intensity = static_cast<uint32_t>(handle->config().intensity);
# if defined(XMRIG_ALGO_RANDOMX) || defined(XMRIG_ALGO_CN_GPU)
if (intensity > handle->config().algorithm.maxIntensity()) {

7
src/backend/cpu/CpuWorker.cpp

@ -156,6 +156,13 @@ bool xmrig::CpuWorker<N>::selfTest()
}
# endif
# ifdef XMRIG_ALGO_ARGON2
if (m_algorithm.family() == Algorithm::ARGON2) {
return verify(Algorithm::AR2_CHUKWA, argon2_chukwa_test_out) &&
verify(Algorithm::AR2_WRKZ, argon2_wrkz_test_out);
}
# endif
return false;
}

67
src/crypto/argon2/Argon2.h

@ -0,0 +1,67 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_ARGON2_H
#define XMRIG_ARGON2_H
#include "3rdparty/argon2.h"
#include "crypto/common/Algorithm.h"
struct cryptonight_ctx;
namespace xmrig {
template<Algorithm::Id ALGO>
inline void argon2_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__, uint64_t)
{
// static bool argon_optimization_selected = false;
// if (!argon_optimization_selected) {
// argon2_select_impl(stdout, nullptr);
// argon_optimization_selected = true;
// }
uint8_t salt[16];
memcpy(salt, input, sizeof(salt));
if (ALGO == Algorithm::AR2_CHUKWA) {
argon2id_hash_raw(3, 512, 1, input, size, salt, 16, output, 32);
}
else if (ALGO == Algorithm::AR2_WRKZ) {
argon2id_hash_raw(4, 256, 1, input, size, salt, 16, output, 32);
}
}
} // namespace xmrig
#endif /* XMRIG_ARGON2_H */

12
src/crypto/cn/CnAlgo.h

@ -132,6 +132,10 @@ private:
0, // RX_0
0, // RX_WOW
0, // RX_LOKI
# endif
# ifdef XMRIG_ALGO_ARGON2
0, // AR2_CHUKWA
0, // AR2_WRKZ
# endif
};
@ -167,6 +171,10 @@ private:
0, // RX_0
0, // RX_WOW
0, // RX_LOKI
# endif
# ifdef XMRIG_ALGO_ARGON2
0, // AR2_CHUKWA
0, // AR2_WRKZ
# endif
};
@ -202,6 +210,10 @@ private:
Algorithm::INVALID, // RX_0
Algorithm::INVALID, // RX_WOW
Algorithm::INVALID, // RX_LOKI
# endif
# ifdef XMRIG_ALGO_ARGON2
Algorithm::INVALID, // AR2_CHUKWA
Algorithm::INVALID, // AR2_WRKZ
# endif
};
};

12
src/crypto/cn/CnHash.cpp

@ -38,6 +38,11 @@
#endif
#ifdef XMRIG_ALGO_ARGON2
# include "crypto/argon2/Argon2.h"
#endif
#define ADD_FN(algo) \
m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash<algo, false>; \
m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash<algo, true>; \
@ -249,6 +254,13 @@ xmrig::CnHash::CnHash()
ADD_FN_ASM(Algorithm::CN_PICO_0);
# endif
# ifdef XMRIG_ALGO_ARGON2
m_map[Algorithm::AR2_CHUKWA][AV_SINGLE][Assembly::NONE] = argon2_single_hash<Algorithm::AR2_CHUKWA>;
m_map[Algorithm::AR2_CHUKWA][AV_SINGLE_SOFT][Assembly::NONE] = argon2_single_hash<Algorithm::AR2_CHUKWA>;
m_map[Algorithm::AR2_WRKZ][AV_SINGLE][Assembly::NONE] = argon2_single_hash<Algorithm::AR2_WRKZ>;
m_map[Algorithm::AR2_WRKZ][AV_SINGLE_SOFT][Assembly::NONE] = argon2_single_hash<Algorithm::AR2_WRKZ>;
# endif
# ifdef XMRIG_FEATURE_ASM
patchAsmVariants();
# endif

33
src/crypto/cn/CryptoNight_test.h

@ -30,6 +30,9 @@
#include <stdint.h>
namespace xmrig {
const static uint8_t test_input[380] = {
0x03, 0x05, 0xA0, 0xDB, 0xD6, 0xBF, 0x05, 0xCF, 0x16, 0xE5, 0x03, 0xF3, 0xA6, 0x6F, 0x78, 0x00,
0x7C, 0xBF, 0x34, 0x14, 0x43, 0x32, 0xEC, 0xBF, 0xC2, 0x2E, 0xD9, 0x5C, 0x87, 0x00, 0x38, 0x3B,
@ -355,19 +358,29 @@ const static uint8_t test_output_pico_trtl[160] = {
#ifdef XMRIG_ALGO_CN_GPU
// "cn/gpu"
const static uint8_t test_output_gpu[160] = {
const static uint8_t test_output_gpu[32] = {
0xE5, 0x5C, 0xB2, 0x3E, 0x51, 0x64, 0x9A, 0x59, 0xB1, 0x27, 0xB9, 0x6B, 0x51, 0x5F, 0x2B, 0xF7,
0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xBF, 0xEA, 0x19, 0x97, 0x41, 0xA0, 0x21, 0x6C, 0xF8, 0x38, 0xDE, 0xD0, 0x6E, 0xFF, 0x82, 0xDF
};
#endif
#ifdef XMRIG_ALGO_ARGON2
// "argon2/chukwa"
const static uint8_t argon2_chukwa_test_out[32] = {
0xC1, 0x58, 0xA1, 0x05, 0xAE, 0x75, 0xC7, 0x56, 0x1C, 0xFD, 0x02, 0x90, 0x83, 0xA4, 0x7A, 0x87,
0x65, 0x3D, 0x51, 0xF9, 0x14, 0x12, 0x8E, 0x21, 0xC1, 0x97, 0x1D, 0x8B, 0x10, 0xC4, 0x90, 0x34
};
// "argon2/wrkz"
const static uint8_t argon2_wrkz_test_out[32] = {
0x35, 0xE0, 0x83, 0xD4, 0xB9, 0xC6, 0x4C, 0x2A, 0x68, 0x82, 0x0A, 0x43, 0x1F, 0x61, 0x31, 0x19,
0x98, 0xA8, 0xCD, 0x18, 0x64, 0xDB, 0xA4, 0x07, 0x7E, 0x25, 0xB7, 0xF1, 0x21, 0xD5, 0x4B, 0xD1,
};
#endif
} // namespace xmrig
#endif /* XMRIG_CRYPTONIGHT_TEST_H */

74
src/crypto/common/Algorithm.cpp

@ -116,30 +116,17 @@ static AlgoName const algorithm_names[] = {
{ "randomx/loki", "rx/loki", Algorithm::RX_LOKI },
{ "RandomXL", nullptr, Algorithm::RX_LOKI },
# endif
# ifdef XMRIG_ALGO_ARGON2
{ "argon2/chukwa", nullptr, Algorithm::AR2_CHUKWA },
{ "chukwa", nullptr, Algorithm::AR2_CHUKWA },
{ "argon2/wrkz", nullptr, Algorithm::AR2_WRKZ },
# endif
};
} /* namespace xmrig */
int xmrig::Algorithm::maxIntensity() const
{
# ifdef XMRIG_ALGO_RANDOMX
if (family() == RANDOM_X) {
return 1;
}
# endif
# ifdef XMRIG_ALGO_CN_GPU
if (m_id == CN_GPU) {
return 1;
}
# endif
return 5;
}
rapidjson::Value xmrig::Algorithm::toJSON() const
{
using namespace rapidjson;
@ -170,6 +157,8 @@ size_t xmrig::Algorithm::l2() const
size_t xmrig::Algorithm::l3() const
{
constexpr size_t oneMiB = 0x100000;
const Family f = family();
assert(f != UNKNOWN);
@ -179,8 +168,6 @@ size_t xmrig::Algorithm::l3() const
# ifdef XMRIG_ALGO_RANDOMX
if (f == RANDOM_X) {
constexpr size_t oneMiB = 0x100000;
switch (m_id) {
case RX_0:
case RX_LOKI:
@ -195,10 +182,49 @@ size_t xmrig::Algorithm::l3() const
}
# endif
# ifdef XMRIG_ALGO_ARGON2
if (f == ARGON2) {
switch (m_id) {
case AR2_CHUKWA:
return oneMiB / 2;
case AR2_WRKZ:
return oneMiB / 4;
default:
break;
}
}
# endif
return 0;
}
uint32_t xmrig::Algorithm::maxIntensity() const
{
# ifdef XMRIG_ALGO_RANDOMX
if (family() == RANDOM_X) {
return 1;
}
# endif
# ifdef XMRIG_ALGO_ARGON2
if (family() == ARGON2) {
return 1;
}
# endif
# ifdef XMRIG_ALGO_CN_GPU
if (m_id == CN_GPU) {
return 1;
}
# endif
return 5;
}
xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
{
switch (id) {
@ -244,6 +270,12 @@ xmrig::Algorithm::Family xmrig::Algorithm::family(Id id)
return RANDOM_X;
# endif
# ifdef XMRIG_ALGO_ARGON2
case AR2_CHUKWA:
case AR2_WRKZ:
return ARGON2;
# endif
case INVALID:
case MAX:
return UNKNOWN;
@ -273,7 +305,7 @@ const char *xmrig::Algorithm::name(bool shortName) const
{
for (size_t i = 0; i < ARRAY_SIZE(algorithm_names); i++) {
if (algorithm_names[i].id == m_id) {
return shortName ? algorithm_names[i].shortName : algorithm_names[i].name;
return (shortName && algorithm_names[i].shortName) ? algorithm_names[i].shortName : algorithm_names[i].name;
}
}

9
src/crypto/common/Algorithm.h

@ -72,6 +72,10 @@ public:
RX_0, // "rx/0" RandomX (reference configuration).
RX_WOW, // "rx/wow" RandomWOW (Wownero).
RX_LOKI, // "rx/loki" RandomXL (Loki).
# endif
# ifdef XMRIG_ALGO_ARGON2
AR2_CHUKWA, // "argon2/chukwa"
AR2_WRKZ, // "argon2/wrkz"
# endif
MAX
};
@ -82,7 +86,8 @@ public:
CN_LITE,
CN_HEAVY,
CN_PICO,
RANDOM_X
RANDOM_X,
ARGON2
};
inline Algorithm() {}
@ -102,10 +107,10 @@ public:
inline bool operator==(const Algorithm &other) const { return isEqual(other); }
inline operator Algorithm::Id() const { return m_id; }
int maxIntensity() const;
rapidjson::Value toJSON() const;
size_t l2() const;
size_t l3() const;
uint32_t maxIntensity() const;
static Family family(Id id);
static Id parse(const char *name);

20
src/crypto/rx/RxAlgo.cpp

@ -47,23 +47,3 @@ xmrig::Algorithm::Id xmrig::RxAlgo::apply(Algorithm::Id algorithm)
return algorithm;
}
size_t xmrig::RxAlgo::l3(Algorithm::Id algorithm)
{
switch (algorithm) {
case Algorithm::RX_0:
return RandomX_MoneroConfig.ScratchpadL3_Size;
case Algorithm::RX_WOW:
return RandomX_WowneroConfig.ScratchpadL3_Size;
case Algorithm::RX_LOKI:
return RandomX_LokiConfig.ScratchpadL3_Size;
default:
break;
}
return 0;
}

4
src/crypto/rx/RxAlgo.h

@ -35,9 +35,6 @@
#include "crypto/common/Algorithm.h"
struct RandomX_ConfigurationBase;
namespace xmrig
{
@ -46,7 +43,6 @@ class RxAlgo
{
public:
static Algorithm::Id apply(Algorithm::Id algorithm);
static size_t l3(Algorithm::Id algorithm);
};

Loading…
Cancel
Save