SChernykh
2 years ago
14 changed files with 752 additions and 29 deletions
@ -0,0 +1,121 @@ |
|||
Creative Commons Legal Code |
|||
|
|||
CC0 1.0 Universal |
|||
|
|||
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE |
|||
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN |
|||
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS |
|||
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES |
|||
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS |
|||
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM |
|||
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED |
|||
HEREUNDER. |
|||
|
|||
Statement of Purpose |
|||
|
|||
The laws of most jurisdictions throughout the world automatically confer |
|||
exclusive Copyright and Related Rights (defined below) upon the creator |
|||
and subsequent owner(s) (each and all, an "owner") of an original work of |
|||
authorship and/or a database (each, a "Work"). |
|||
|
|||
Certain owners wish to permanently relinquish those rights to a Work for |
|||
the purpose of contributing to a commons of creative, cultural and |
|||
scientific works ("Commons") that the public can reliably and without fear |
|||
of later claims of infringement build upon, modify, incorporate in other |
|||
works, reuse and redistribute as freely as possible in any form whatsoever |
|||
and for any purposes, including without limitation commercial purposes. |
|||
These owners may contribute to the Commons to promote the ideal of a free |
|||
culture and the further production of creative, cultural and scientific |
|||
works, or to gain reputation or greater distribution for their Work in |
|||
part through the use and efforts of others. |
|||
|
|||
For these and/or other purposes and motivations, and without any |
|||
expectation of additional consideration or compensation, the person |
|||
associating CC0 with a Work (the "Affirmer"), to the extent that he or she |
|||
is an owner of Copyright and Related Rights in the Work, voluntarily |
|||
elects to apply CC0 to the Work and publicly distribute the Work under its |
|||
terms, with knowledge of his or her Copyright and Related Rights in the |
|||
Work and the meaning and intended legal effect of CC0 on those rights. |
|||
|
|||
1. Copyright and Related Rights. A Work made available under CC0 may be |
|||
protected by copyright and related or neighboring rights ("Copyright and |
|||
Related Rights"). Copyright and Related Rights include, but are not |
|||
limited to, the following: |
|||
|
|||
i. the right to reproduce, adapt, distribute, perform, display, |
|||
communicate, and translate a Work; |
|||
ii. moral rights retained by the original author(s) and/or performer(s); |
|||
iii. publicity and privacy rights pertaining to a person's image or |
|||
likeness depicted in a Work; |
|||
iv. rights protecting against unfair competition in regards to a Work, |
|||
subject to the limitations in paragraph 4(a), below; |
|||
v. rights protecting the extraction, dissemination, use and reuse of data |
|||
in a Work; |
|||
vi. database rights (such as those arising under Directive 96/9/EC of the |
|||
European Parliament and of the Council of 11 March 1996 on the legal |
|||
protection of databases, and under any national implementation |
|||
thereof, including any amended or successor version of such |
|||
directive); and |
|||
vii. other similar, equivalent or corresponding rights throughout the |
|||
world based on applicable law or treaty, and any national |
|||
implementations thereof. |
|||
|
|||
2. Waiver. To the greatest extent permitted by, but not in contravention |
|||
of, applicable law, Affirmer hereby overtly, fully, permanently, |
|||
irrevocably and unconditionally waives, abandons, and surrenders all of |
|||
Affirmer's Copyright and Related Rights and associated claims and causes |
|||
of action, whether now known or unknown (including existing as well as |
|||
future claims and causes of action), in the Work (i) in all territories |
|||
worldwide, (ii) for the maximum duration provided by applicable law or |
|||
treaty (including future time extensions), (iii) in any current or future |
|||
medium and for any number of copies, and (iv) for any purpose whatsoever, |
|||
including without limitation commercial, advertising or promotional |
|||
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each |
|||
member of the public at large and to the detriment of Affirmer's heirs and |
|||
successors, fully intending that such Waiver shall not be subject to |
|||
revocation, rescission, cancellation, termination, or any other legal or |
|||
equitable action to disrupt the quiet enjoyment of the Work by the public |
|||
as contemplated by Affirmer's express Statement of Purpose. |
|||
|
|||
3. Public License Fallback. Should any part of the Waiver for any reason |
|||
be judged legally invalid or ineffective under applicable law, then the |
|||
Waiver shall be preserved to the maximum extent permitted taking into |
|||
account Affirmer's express Statement of Purpose. In addition, to the |
|||
extent the Waiver is so judged Affirmer hereby grants to each affected |
|||
person a royalty-free, non transferable, non sublicensable, non exclusive, |
|||
irrevocable and unconditional license to exercise Affirmer's Copyright and |
|||
Related Rights in the Work (i) in all territories worldwide, (ii) for the |
|||
maximum duration provided by applicable law or treaty (including future |
|||
time extensions), (iii) in any current or future medium and for any number |
|||
of copies, and (iv) for any purpose whatsoever, including without |
|||
limitation commercial, advertising or promotional purposes (the |
|||
"License"). The License shall be deemed effective as of the date CC0 was |
|||
applied by Affirmer to the Work. Should any part of the License for any |
|||
reason be judged legally invalid or ineffective under applicable law, such |
|||
partial invalidity or ineffectiveness shall not invalidate the remainder |
|||
of the License, and in such case Affirmer hereby affirms that he or she |
|||
will not (i) exercise any of his or her remaining Copyright and Related |
|||
Rights in the Work or (ii) assert any associated claims and causes of |
|||
action with respect to the Work, in either case contrary to Affirmer's |
|||
express Statement of Purpose. |
|||
|
|||
4. Limitations and Disclaimers. |
|||
|
|||
a. No trademark or patent rights held by Affirmer are waived, abandoned, |
|||
surrendered, licensed or otherwise affected by this document. |
|||
b. Affirmer offers the Work as-is and makes no representations or |
|||
warranties of any kind concerning the Work, express, implied, |
|||
statutory or otherwise, including without limitation warranties of |
|||
title, merchantability, fitness for a particular purpose, non |
|||
infringement, or the absence of latent or other defects, accuracy, or |
|||
the present or absence of errors, whether or not discoverable, all to |
|||
the greatest extent permissible under applicable law. |
|||
c. Affirmer disclaims responsibility for clearing rights of other persons |
|||
that may apply to the Work or any use thereof, including without |
|||
limitation any person's Copyright and Related Rights in the Work. |
|||
Further, Affirmer disclaims responsibility for obtaining any necessary |
|||
consents, permissions or other rights required for any use of the |
|||
Work. |
|||
d. Affirmer understands and acknowledges that Creative Commons is not a |
|||
party to this document and has no duty or obligation with respect to |
|||
this CC0 or use of the Work. |
@ -0,0 +1,38 @@ |
|||
#ifndef BLAKE2_AVX2_BLAKE2_H |
|||
#define BLAKE2_AVX2_BLAKE2_H |
|||
|
|||
#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L) |
|||
#if defined(_MSC_VER) |
|||
#define INLINE __inline |
|||
#elif defined(__GNUC__) |
|||
#define INLINE __inline__ |
|||
#else |
|||
#define INLINE |
|||
#endif |
|||
#else |
|||
#define INLINE inline |
|||
#endif |
|||
|
|||
#if defined(_MSC_VER) |
|||
#define ALIGN(x) __declspec(align(x)) |
|||
#else |
|||
#define ALIGN(x) __attribute__((aligned(x))) |
|||
#endif |
|||
|
|||
enum blake2s_constant { |
|||
BLAKE2S_BLOCKBYTES = 64, |
|||
BLAKE2S_OUTBYTES = 32, |
|||
BLAKE2S_KEYBYTES = 32, |
|||
BLAKE2S_SALTBYTES = 8, |
|||
BLAKE2S_PERSONALBYTES = 8 |
|||
}; |
|||
|
|||
enum blake2b_constant { |
|||
BLAKE2B_BLOCKBYTES = 128, |
|||
BLAKE2B_OUTBYTES = 64, |
|||
BLAKE2B_KEYBYTES = 64, |
|||
BLAKE2B_SALTBYTES = 16, |
|||
BLAKE2B_PERSONALBYTES = 16 |
|||
}; |
|||
|
|||
#endif |
@ -0,0 +1,48 @@ |
|||
#ifndef BLAKE2_AVX2_BLAKE2B_COMMON_H |
|||
#define BLAKE2_AVX2_BLAKE2B_COMMON_H |
|||
|
|||
#include <stddef.h> |
|||
#include <stdint.h> |
|||
#include <string.h> |
|||
|
|||
#include <immintrin.h> |
|||
|
|||
#include "blake2.h" |
|||
|
|||
#define LOAD128(p) _mm_load_si128( (__m128i *)(p) ) |
|||
#define STORE128(p,r) _mm_store_si128((__m128i *)(p), r) |
|||
|
|||
#define LOADU128(p) _mm_loadu_si128( (__m128i *)(p) ) |
|||
#define STOREU128(p,r) _mm_storeu_si128((__m128i *)(p), r) |
|||
|
|||
#define LOAD(p) _mm256_load_si256( (__m256i *)(p) ) |
|||
#define STORE(p,r) _mm256_store_si256((__m256i *)(p), r) |
|||
|
|||
#define LOADU(p) _mm256_loadu_si256( (__m256i *)(p) ) |
|||
#define STOREU(p,r) _mm256_storeu_si256((__m256i *)(p), r) |
|||
|
|||
static INLINE uint64_t LOADU64(void const * p) { |
|||
uint64_t v; |
|||
memcpy(&v, p, sizeof v); |
|||
return v; |
|||
} |
|||
|
|||
#define ROTATE16 _mm256_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, \ |
|||
2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 ) |
|||
|
|||
#define ROTATE24 _mm256_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, \ |
|||
3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 ) |
|||
|
|||
#define ADD(a, b) _mm256_add_epi64(a, b) |
|||
#define SUB(a, b) _mm256_sub_epi64(a, b) |
|||
|
|||
#define XOR(a, b) _mm256_xor_si256(a, b) |
|||
#define AND(a, b) _mm256_and_si256(a, b) |
|||
#define OR(a, b) _mm256_or_si256(a, b) |
|||
|
|||
#define ROT32(x) _mm256_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) |
|||
#define ROT24(x) _mm256_shuffle_epi8((x), ROTATE24) |
|||
#define ROT16(x) _mm256_shuffle_epi8((x), ROTATE16) |
|||
#define ROT63(x) _mm256_or_si256(_mm256_srli_epi64((x), 63), ADD((x), (x))) |
|||
|
|||
#endif |
@ -0,0 +1,340 @@ |
|||
#ifndef BLAKE2_AVX2_BLAKE2B_LOAD_AVX2_H |
|||
#define BLAKE2_AVX2_BLAKE2B_LOAD_AVX2_H |
|||
|
|||
#define BLAKE2B_LOAD_MSG_0_1(b0) do { \ |
|||
t0 = _mm256_unpacklo_epi64(m0, m1); \ |
|||
t1 = _mm256_unpacklo_epi64(m2, m3); \ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0); \ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_0_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m0, m1);\ |
|||
t1 = _mm256_unpackhi_epi64(m2, m3);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_0_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m7, m4);\ |
|||
t1 = _mm256_unpacklo_epi64(m5, m6);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_0_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m7, m4);\ |
|||
t1 = _mm256_unpackhi_epi64(m5, m6);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_1_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m7, m2);\ |
|||
t1 = _mm256_unpackhi_epi64(m4, m6);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_1_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m5, m4);\ |
|||
t1 = _mm256_alignr_epi8(m3, m7, 8);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_1_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m2, m0);\ |
|||
t1 = _mm256_blend_epi32(m5, m0, 0x33);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_1_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_alignr_epi8(m6, m1, 8);\ |
|||
t1 = _mm256_blend_epi32(m3, m1, 0x33);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_2_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_alignr_epi8(m6, m5, 8);\ |
|||
t1 = _mm256_unpackhi_epi64(m2, m7);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_2_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m4, m0);\ |
|||
t1 = _mm256_blend_epi32(m6, m1, 0x33);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_2_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_alignr_epi8(m5, m4, 8);\ |
|||
t1 = _mm256_unpackhi_epi64(m1, m3);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_2_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m2, m7);\ |
|||
t1 = _mm256_blend_epi32(m0, m3, 0x33);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_3_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m3, m1);\ |
|||
t1 = _mm256_unpackhi_epi64(m6, m5);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_3_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m4, m0);\ |
|||
t1 = _mm256_unpacklo_epi64(m6, m7);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_3_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_alignr_epi8(m1, m7, 8);\ |
|||
t1 = _mm256_shuffle_epi32(m2, _MM_SHUFFLE(1,0,3,2));\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_3_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m4, m3);\ |
|||
t1 = _mm256_unpacklo_epi64(m5, m0);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_4_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m4, m2);\ |
|||
t1 = _mm256_unpacklo_epi64(m1, m5);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_4_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_blend_epi32(m3, m0, 0x33);\ |
|||
t1 = _mm256_blend_epi32(m7, m2, 0x33);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_4_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_alignr_epi8(m7, m1, 8);\ |
|||
t1 = _mm256_alignr_epi8(m3, m5, 8);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_4_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m6, m0);\ |
|||
t1 = _mm256_unpacklo_epi64(m6, m4);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_5_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m1, m3);\ |
|||
t1 = _mm256_unpacklo_epi64(m0, m4);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_5_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m6, m5);\ |
|||
t1 = _mm256_unpackhi_epi64(m5, m1);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_5_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_alignr_epi8(m2, m0, 8);\ |
|||
t1 = _mm256_unpackhi_epi64(m3, m7);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_5_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m4, m6);\ |
|||
t1 = _mm256_alignr_epi8(m7, m2, 8);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_6_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_blend_epi32(m0, m6, 0x33);\ |
|||
t1 = _mm256_unpacklo_epi64(m7, m2);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_6_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m2, m7);\ |
|||
t1 = _mm256_alignr_epi8(m5, m6, 8);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_6_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m4, m0);\ |
|||
t1 = _mm256_blend_epi32(m4, m3, 0x33);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_6_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m5, m3);\ |
|||
t1 = _mm256_shuffle_epi32(m1, _MM_SHUFFLE(1,0,3,2));\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_7_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m6, m3);\ |
|||
t1 = _mm256_blend_epi32(m1, m6, 0x33);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_7_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_alignr_epi8(m7, m5, 8);\ |
|||
t1 = _mm256_unpackhi_epi64(m0, m4);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_7_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_blend_epi32(m2, m1, 0x33);\ |
|||
t1 = _mm256_alignr_epi8(m4, m7, 8);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_7_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m5, m0);\ |
|||
t1 = _mm256_unpacklo_epi64(m2, m3);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_8_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m3, m7);\ |
|||
t1 = _mm256_alignr_epi8(m0, m5, 8);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_8_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m7, m4);\ |
|||
t1 = _mm256_alignr_epi8(m4, m1, 8);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_8_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m5, m6);\ |
|||
t1 = _mm256_unpackhi_epi64(m6, m0);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_8_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_alignr_epi8(m1, m2, 8);\ |
|||
t1 = _mm256_alignr_epi8(m2, m3, 8);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_9_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m5, m4);\ |
|||
t1 = _mm256_unpackhi_epi64(m3, m0);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_9_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m1, m2);\ |
|||
t1 = _mm256_blend_epi32(m2, m3, 0x33);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_9_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m6, m7);\ |
|||
t1 = _mm256_unpackhi_epi64(m4, m1);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_9_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_blend_epi32(m5, m0, 0x33);\ |
|||
t1 = _mm256_unpacklo_epi64(m7, m6);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_10_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m0, m1);\ |
|||
t1 = _mm256_unpacklo_epi64(m2, m3);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_10_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m0, m1);\ |
|||
t1 = _mm256_unpackhi_epi64(m2, m3);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_10_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m7, m4);\ |
|||
t1 = _mm256_unpacklo_epi64(m5, m6);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_10_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m7, m4);\ |
|||
t1 = _mm256_unpackhi_epi64(m5, m6);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_11_1(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m7, m2);\ |
|||
t1 = _mm256_unpackhi_epi64(m4, m6);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_11_2(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpacklo_epi64(m5, m4);\ |
|||
t1 = _mm256_alignr_epi8(m3, m7, 8);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_11_3(b0) \ |
|||
do { \ |
|||
t0 = _mm256_unpackhi_epi64(m2, m0);\ |
|||
t1 = _mm256_blend_epi32(m5, m0, 0x33);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_LOAD_MSG_11_4(b0) \ |
|||
do { \ |
|||
t0 = _mm256_alignr_epi8(m6, m1, 8);\ |
|||
t1 = _mm256_blend_epi32(m3, m1, 0x33);\ |
|||
b0 = _mm256_blend_epi32(t0, t1, 0xF0);\ |
|||
} while(0) |
|||
|
|||
#endif |
|||
|
@ -0,0 +1,16 @@ |
|||
#ifndef BLAKE2_AVX2_BLAKE2B_H |
|||
#define BLAKE2_AVX2_BLAKE2B_H |
|||
|
|||
#include <stddef.h> |
|||
|
|||
#if defined(__cplusplus) |
|||
extern "C" { |
|||
#endif |
|||
|
|||
int blake2b_avx2(void* out, size_t outlen, const void* in, size_t inlen); |
|||
|
|||
#if defined(__cplusplus) |
|||
} |
|||
#endif |
|||
|
|||
#endif |
@ -0,0 +1,141 @@ |
|||
#include <stddef.h> |
|||
#include <stdint.h> |
|||
#include <stdlib.h> |
|||
#include <string.h> |
|||
|
|||
#include "blake2.h" |
|||
#include "blake2b.h" |
|||
#include "blake2b-common.h" |
|||
|
|||
ALIGN(64) static const uint64_t blake2b_IV[8] = { |
|||
UINT64_C(0x6A09E667F3BCC908), UINT64_C(0xBB67AE8584CAA73B), |
|||
UINT64_C(0x3C6EF372FE94F82B), UINT64_C(0xA54FF53A5F1D36F1), |
|||
UINT64_C(0x510E527FADE682D1), UINT64_C(0x9B05688C2B3E6C1F), |
|||
UINT64_C(0x1F83D9ABFB41BD6B), UINT64_C(0x5BE0CD19137E2179), |
|||
}; |
|||
|
|||
#define BLAKE2B_G1_V1(a, b, c, d, m) do { \ |
|||
a = ADD(a, m); \ |
|||
a = ADD(a, b); d = XOR(d, a); d = ROT32(d); \ |
|||
c = ADD(c, d); b = XOR(b, c); b = ROT24(b); \ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_G2_V1(a, b, c, d, m) do { \ |
|||
a = ADD(a, m); \ |
|||
a = ADD(a, b); d = XOR(d, a); d = ROT16(d); \ |
|||
c = ADD(c, d); b = XOR(b, c); b = ROT63(b); \ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_DIAG_V1(a, b, c, d) do { \ |
|||
a = _mm256_permute4x64_epi64(a, _MM_SHUFFLE(2,1,0,3)); \ |
|||
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(1,0,3,2)); \ |
|||
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(0,3,2,1)); \ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_UNDIAG_V1(a, b, c, d) do { \ |
|||
a = _mm256_permute4x64_epi64(a, _MM_SHUFFLE(0,3,2,1)); \ |
|||
d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(1,0,3,2)); \ |
|||
c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(2,1,0,3)); \ |
|||
} while(0) |
|||
|
|||
#include "blake2b-load-avx2.h" |
|||
|
|||
#define BLAKE2B_ROUND_V1(a, b, c, d, r, m) do { \ |
|||
__m256i b0; \ |
|||
BLAKE2B_LOAD_MSG_ ##r ##_1(b0); \ |
|||
BLAKE2B_G1_V1(a, b, c, d, b0); \ |
|||
BLAKE2B_LOAD_MSG_ ##r ##_2(b0); \ |
|||
BLAKE2B_G2_V1(a, b, c, d, b0); \ |
|||
BLAKE2B_DIAG_V1(a, b, c, d); \ |
|||
BLAKE2B_LOAD_MSG_ ##r ##_3(b0); \ |
|||
BLAKE2B_G1_V1(a, b, c, d, b0); \ |
|||
BLAKE2B_LOAD_MSG_ ##r ##_4(b0); \ |
|||
BLAKE2B_G2_V1(a, b, c, d, b0); \ |
|||
BLAKE2B_UNDIAG_V1(a, b, c, d); \ |
|||
} while(0) |
|||
|
|||
#define BLAKE2B_ROUNDS_V1(a, b, c, d, m) do { \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 0, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 1, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 2, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 3, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 4, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 5, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 6, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 7, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 8, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 9, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 10, (m)); \ |
|||
BLAKE2B_ROUND_V1(a, b, c, d, 11, (m)); \ |
|||
} while(0) |
|||
|
|||
#define DECLARE_MESSAGE_WORDS(m) \ |
|||
const __m256i m0 = _mm256_broadcastsi128_si256(LOADU128((m) + 0)); \ |
|||
const __m256i m1 = _mm256_broadcastsi128_si256(LOADU128((m) + 16)); \ |
|||
const __m256i m2 = _mm256_broadcastsi128_si256(LOADU128((m) + 32)); \ |
|||
const __m256i m3 = _mm256_broadcastsi128_si256(LOADU128((m) + 48)); \ |
|||
const __m256i m4 = _mm256_broadcastsi128_si256(LOADU128((m) + 64)); \ |
|||
const __m256i m5 = _mm256_broadcastsi128_si256(LOADU128((m) + 80)); \ |
|||
const __m256i m6 = _mm256_broadcastsi128_si256(LOADU128((m) + 96)); \ |
|||
const __m256i m7 = _mm256_broadcastsi128_si256(LOADU128((m) + 112)); \ |
|||
__m256i t0, t1; |
|||
|
|||
#define BLAKE2B_COMPRESS_V1(a, b, m, t0, t1, f0, f1) do { \ |
|||
DECLARE_MESSAGE_WORDS(m) \ |
|||
const __m256i iv0 = a; \ |
|||
const __m256i iv1 = b; \ |
|||
__m256i c = LOAD(&blake2b_IV[0]); \ |
|||
__m256i d = XOR( \ |
|||
LOAD(&blake2b_IV[4]), \ |
|||
_mm256_set_epi64x(f1, f0, t1, t0) \ |
|||
); \ |
|||
BLAKE2B_ROUNDS_V1(a, b, c, d, m); \ |
|||
a = XOR(a, c); \ |
|||
b = XOR(b, d); \ |
|||
a = XOR(a, iv0); \ |
|||
b = XOR(b, iv1); \ |
|||
} while(0) |
|||
|
|||
int blake2b_avx2(void* out_ptr, size_t outlen, const void* in_ptr, size_t inlen) { |
|||
const __m256i parameter_block = _mm256_set_epi64x(0, 0, 0, 0x01010000UL | (uint32_t)outlen); |
|||
ALIGN(64) uint8_t buffer[BLAKE2B_BLOCKBYTES]; |
|||
__m256i a = XOR(LOAD(&blake2b_IV[0]), parameter_block); |
|||
__m256i b = LOAD(&blake2b_IV[4]); |
|||
uint64_t counter = 0; |
|||
const uint8_t* in = (const uint8_t*)in_ptr; |
|||
do { |
|||
const uint64_t flag = (inlen <= BLAKE2B_BLOCKBYTES) ? -1 : 0; |
|||
size_t block_size = BLAKE2B_BLOCKBYTES; |
|||
if(inlen < BLAKE2B_BLOCKBYTES) { |
|||
memcpy(buffer, in, inlen); |
|||
memset(buffer + inlen, 0, BLAKE2B_BLOCKBYTES - inlen); |
|||
block_size = inlen; |
|||
in = buffer; |
|||
} |
|||
counter += block_size; |
|||
BLAKE2B_COMPRESS_V1(a, b, in, counter, 0, flag, 0); |
|||
inlen -= block_size; |
|||
in += block_size; |
|||
} while(inlen > 0); |
|||
|
|||
uint8_t* out = (uint8_t*)out_ptr; |
|||
|
|||
switch (outlen) { |
|||
case 64: |
|||
STOREU(out + 32, b); |
|||
// Fall through
|
|||
|
|||
case 32: |
|||
STOREU(out, a); |
|||
break; |
|||
|
|||
default: |
|||
STOREU(buffer, a); |
|||
STOREU(buffer + 32, b); |
|||
memcpy(out, buffer, outlen); |
|||
break; |
|||
} |
|||
|
|||
_mm256_zeroupper(); |
|||
return 0; |
|||
} |
Loading…
Reference in new issue