Browse Source

Refactor simd_opt code

pull/2/head
Cesar Eduardo Barros 9 years ago
parent
commit
464c01e282
  1. 15
      src/simd_opt/mod.rs
  2. 75
      src/simd_opt/u32x4.rs
  3. 164
      src/simd_opt/u64x4.rs

15
src/simd_opt/mod.rs

@ -24,6 +24,21 @@
// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
#[cfg(feature = "simd")]
macro_rules! transmute_shuffle {
($tmp:ident, $shuffle:ident, $vec:expr, $idx:expr) => {
unsafe {
use simdty::$tmp;
use simdint::$shuffle;
use std::mem::transmute;
let tmp: $tmp = transmute($vec);
let tmp: $tmp = $shuffle(tmp, tmp, $idx);
transmute(tmp)
}
}
}
#[cfg(feature = "simd")] pub mod u32x4;
#[cfg(feature = "simd")] pub mod u64x4;

75
src/simd_opt/u32x4.rs

@ -26,14 +26,6 @@
use simdty::u32x4;
#[inline(always)]
fn rotate_right_any(vec: u32x4, n: u32) -> u32x4 {
let r = n as u32;
let l = 32 - r;
(vec >> u32x4::new(r, r, r, r)) ^ (vec << u32x4::new(l, l, l, l))
}
#[cfg(feature = "simd_opt")]
#[inline(always)]
pub fn rotate_right_const(vec: u32x4, n: u32) -> u32x4 {
@ -50,50 +42,47 @@ pub fn rotate_right_const(vec: u32x4, n: u32) -> u32x4 {
rotate_right_any(vec, n)
}
#[cfg(feature = "simd_opt")]
#[cfg(any(target_feature = "sse2", target_feature = "neon"))]
#[inline(always)]
fn rotate_right_16(vec: u32x4) -> u32x4 {
use simdint::simd_shuffle8;
use simdty::u16x8;
use std::mem::transmute;
fn rotate_right_any(vec: u32x4, n: u32) -> u32x4 {
let r = n as u32;
let l = 32 - r;
unsafe {
let tmp: u16x8 = transmute(vec);
let tmp: u16x8 = simd_shuffle8(tmp, tmp,
[1, 0,
3, 2,
5, 4,
7, 6]);
transmute(tmp)
}
(vec >> u32x4::new(r, r, r, r)) ^ (vec << u32x4::new(l, l, l, l))
}
#[cfg(feature = "simd_opt")]
#[cfg(not(any(target_feature = "sse2", target_feature = "neon")))]
#[inline(always)]
fn rotate_right_16(vec: u32x4) -> u32x4 { rotate_right_any(vec, 16) }
fn rotate_right_16(vec: u32x4) -> u32x4 {
if cfg!(target_feature = "ssse3") {
// pshufb (SSSE3) / vpshufb (AVX2)
transmute_shuffle!(u8x16, simd_shuffle16, vec,
[ 2, 3, 0, 1,
6, 7, 4, 5,
10, 11, 8, 9,
14, 15, 12, 13])
} else if cfg!(any(target_feature = "sse2", target_feature = "neon")) {
// pshuflw+pshufhw (SSE2) / vrev (NEON)
transmute_shuffle!(u16x8, simd_shuffle8, vec,
[1, 0,
3, 2,
5, 4,
7, 6])
} else {
rotate_right_any(vec, 16)
}
}
#[cfg(feature = "simd_opt")]
#[cfg(target_feature = "ssse3")]
#[inline(always)]
fn rotate_right_8(vec: u32x4) -> u32x4 {
use simdint::simd_shuffle16;
use simdty::u8x16;
use std::mem::transmute;
unsafe {
let tmp: u8x16 = transmute(vec);
let tmp: u8x16 = simd_shuffle16(tmp, tmp,
[ 1, 2, 3, 0,
5, 6, 7, 4,
9, 10, 11, 8,
13, 14, 15, 12]);
transmute(tmp)
if cfg!(target_feature = "ssse3") {
// pshufb (SSSE3) / vpshufb (AVX2)
transmute_shuffle!(u8x16, simd_shuffle16, vec,
[ 1, 2, 3, 0,
5, 6, 7, 4,
9, 10, 11, 8,
13, 14, 15, 12])
} else {
rotate_right_any(vec, 8)
}
}
#[cfg(feature = "simd_opt")]
#[cfg(not(target_feature = "ssse3"))]
#[inline(always)]
fn rotate_right_8(vec: u32x4) -> u32x4 { rotate_right_any(vec, 8) }

164
src/simd_opt/u64x4.rs

@ -26,14 +26,6 @@
use simdty::u64x4;
#[inline(always)]
fn rotate_right_any(vec: u64x4, n: u32) -> u64x4 {
let r = n as u64;
let l = 64 - r;
(vec >> u64x4::new(r, r, r, r)) ^ (vec << u64x4::new(l, l, l, l))
}
#[cfg(feature = "simd_opt")]
#[inline(always)]
pub fn rotate_right_const(vec: u64x4, n: u32) -> u64x4 {
@ -51,108 +43,84 @@ pub fn rotate_right_const(vec: u64x4, n: u32) -> u64x4 {
rotate_right_any(vec, n)
}
#[cfg(feature = "simd_opt")]
#[cfg(any(target_feature = "sse2", target_feature = "neon"))]
#[inline(always)]
fn rotate_right_32(vec: u64x4) -> u64x4 {
use simdint::simd_shuffle8;
use simdty::u32x8;
use std::mem::transmute;
fn rotate_right_any(vec: u64x4, n: u32) -> u64x4 {
let r = n as u64;
let l = 64 - r;
unsafe {
let tmp: u32x8 = transmute(vec);
let tmp: u32x8 = simd_shuffle8(tmp, tmp,
[1, 0,
3, 2,
5, 4,
7, 6]);
transmute(tmp)
}
(vec >> u64x4::new(r, r, r, r)) ^ (vec << u64x4::new(l, l, l, l))
}
#[cfg(feature = "simd_opt")]
#[cfg(not(any(target_feature = "sse2", target_feature = "neon")))]
#[inline(always)]
fn rotate_right_32(vec: u64x4) -> u64x4 { rotate_right_any(vec, 32) }
#[cfg(feature = "simd_opt")]
#[cfg(target_feature = "ssse3")]
#[inline(always)]
fn rotate_right_24(vec: u64x4) -> u64x4 {
use simdint::simd_shuffle32;
use simdty::u8x32;
use std::mem::transmute;
unsafe {
let tmp: u8x32 = transmute(vec);
let tmp: u8x32 = simd_shuffle32(tmp, tmp,
[ 3, 4, 5, 6, 7, 0, 1, 2,
11, 12, 13, 14, 15, 8, 9, 10,
19, 20, 21, 22, 23, 16, 17, 18,
27, 28, 29, 30, 31, 24, 25, 26]);
transmute(tmp)
fn rotate_right_32(vec: u64x4) -> u64x4 {
if cfg!(any(target_feature = "sse2", target_feature = "neon")) {
// 2 x pshufd (SSE2) / vpshufd (AVX2) / 2 x vrev (NEON)
transmute_shuffle!(u32x8, simd_shuffle8, vec,
[1, 0,
3, 2,
5, 4,
7, 6])
} else {
rotate_right_any(vec, 32)
}
}
#[cfg(feature = "simd_asm")]
#[cfg(target_feature = "neon")]
#[cfg(target_arch = "arm")]
#[cfg(feature = "simd_opt")]
#[inline(always)]
fn rotate_right_24(vec: u64x4) -> u64x4 {
rotate_right_u8(vec, 3)
if cfg!(all(feature = "simd_asm",
target_feature = "neon",
target_arch = "arm")) {
// 4 x vext (NEON)
rotate_right_vext(vec, 3)
} else if cfg!(target_feature = "ssse3") {
// 2 x pshufb (SSSE3) / vpshufb (AVX2)
transmute_shuffle!(u8x32, simd_shuffle32, vec,
[ 3, 4, 5, 6, 7, 0, 1, 2,
11, 12, 13, 14, 15, 8, 9, 10,
19, 20, 21, 22, 23, 16, 17, 18,
27, 28, 29, 30, 31, 24, 25, 26])
} else {
rotate_right_any(vec, 24)
}
}
#[cfg(feature = "simd_opt")]
#[cfg(not(any(target_feature = "ssse3",
all(feature = "simd_asm",
target_feature = "neon",
target_arch = "arm"))))]
#[inline(always)]
fn rotate_right_24(vec: u64x4) -> u64x4 { rotate_right_any(vec, 24) }
#[cfg(feature = "simd_opt")]
#[cfg(target_feature = "sse2")]
#[inline(always)]
fn rotate_right_16(vec: u64x4) -> u64x4 {
use simdint::simd_shuffle16;
use simdty::u16x16;
use std::mem::transmute;
unsafe {
let tmp: u16x16 = transmute(vec);
let tmp: u16x16 = simd_shuffle16(tmp, tmp,
[ 1, 2, 3, 0,
5, 6, 7, 4,
9, 10, 11, 8,
13, 14, 15, 12]);
transmute(tmp)
if cfg!(all(feature = "simd_asm",
target_feature = "neon",
target_arch = "arm")) {
// 4 x vext (NEON)
rotate_right_vext(vec, 2)
} else if cfg!(target_feature = "ssse3") {
// 2 x pshufb (SSSE3) / vpshufb (AVX2)
transmute_shuffle!(u8x32, simd_shuffle32, vec,
[ 2, 3, 4, 5, 6, 7, 0, 1,
10, 11, 12, 13, 14, 15, 8, 9,
18, 19, 20, 21, 22, 23, 16, 17,
26, 27, 28, 29, 30, 31, 24, 25])
} else if cfg!(target_feature = "sse2") {
// 2 x pshuflw+pshufhw (SSE2)
transmute_shuffle!(u16x16, simd_shuffle16, vec,
[ 1, 2, 3, 0,
5, 6, 7, 4,
9, 10, 11, 8,
13, 14, 15, 12])
} else {
rotate_right_any(vec, 16)
}
}
#[cfg(feature = "simd_asm")]
#[cfg(target_feature = "neon")]
#[cfg(target_arch = "arm")]
#[inline(always)]
fn rotate_right_16(vec: u64x4) -> u64x4 {
rotate_right_u8(vec, 2)
}
#[cfg(feature = "simd_opt")]
#[cfg(not(any(target_feature = "sse2",
all(feature = "simd_asm",
target_feature = "neon",
target_arch = "arm"))))]
#[inline(always)]
fn rotate_right_16(vec: u64x4) -> u64x4 { rotate_right_any(vec, 16) }
#[cfg(feature = "simd_asm")]
#[cfg(target_feature = "neon")]
#[cfg(target_arch = "arm")]
#[cfg(all(feature = "simd_asm",
target_feature = "neon",
target_arch = "arm"))]
mod simd_asm_neon_arm {
use simdty::u64x2;
use simdty::{u64x2, u64x4};
#[inline(always)]
fn vext_u64_u8(vec: u64x2, b: u8) -> u64x2 {
fn vext_u64(vec: u64x2, b: u8) -> u64x2 {
unsafe {
let result: u64x2;
asm!("vext.8 ${0:e}, ${1:e}, ${1:e}, $2\nvext.8 ${0:f}, ${1:f}, ${1:f}, $2"
@ -163,18 +131,24 @@ mod simd_asm_neon_arm {
}
#[inline(always)]
pub fn rotate_right_u8(vec: u64x4, n: u8) -> u64x4 {
pub fn rotate_right_vext(vec: u64x4, b: u8) -> u64x4 {
use simdint::{simd_shuffle2, simd_shuffle4};
unsafe {
let tmp0 = vext_u64_u8(simd_shuffle2(vec, vec, [0, 1]), n);
let tmp1 = vext_u64_u8(simd_shuffle2(vec, vec, [2, 3]), n);
let tmp0 = vext_u64(simd_shuffle2(vec, vec, [0, 1]), b);
let tmp1 = vext_u64(simd_shuffle2(vec, vec, [2, 3]), b);
simd_shuffle4(tmp0, tmp1, [0, 1, 2, 3])
}
}
}
#[cfg(feature = "simd_asm")]
#[cfg(target_feature = "neon")]
#[cfg(target_arch = "arm")]
use simd_asm_neon_arm::*;
#[cfg(all(feature = "simd_asm",
target_feature = "neon",
target_arch = "arm"))]
use self::simd_asm_neon_arm::rotate_right_vext;
#[cfg(feature = "simd_opt")]
#[cfg(not(all(feature = "simd_asm",
target_feature = "neon",
target_arch = "arm")))]
fn rotate_right_vext(_vec: u64x4, _n: u8) -> u64x4 { unreachable!() }

Loading…
Cancel
Save