Hush fork of xmrig
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

54 lines
1.7 KiB

/*
* @author SChernykh
*/
inline uint get_reciprocal(uint a)
{
const float a_hi = as_float((a >> 8) + ((126U + 31U) << 23));
const float a_lo = convert_float_rte(a & 0xFF);
const float r = native_recip(a_hi);
const float r_scaled = as_float(as_uint(r) + (64U << 23));
const float h = fma(a_lo, r, fma(a_hi, r, -1.0f));
return (as_uint(r) << 9) - convert_int_rte(h * r_scaled);
}
inline uint2 fast_div_v2(ulong a, uint b)
{
const uint r = get_reciprocal(b);
const ulong k = mul_hi(as_uint2(a).s0, r) + ((ulong)(r) * as_uint2(a).s1) + a;
const uint q = as_uint2(k).s1;
long tmp = a - ((ulong)(q) * b);
((int*)&tmp)[1] -= (as_uint2(k).s1 < as_uint2(a).s1) ? b : 0;
const int overshoot = ((int*)&tmp)[1] >> 31;
const int undershoot = as_int2(as_uint(b - 1) - tmp).s1 >> 31;
return (uint2)(q + overshoot - undershoot, as_uint2(tmp).s0 + (as_uint(overshoot) & b) - (as_uint(undershoot) & b));
}
inline uint fast_sqrt_v2(const ulong n1)
{
float x = as_float((as_uint2(n1).s1 >> 9) + ((64U + 127U) << 23));
float x1 = native_rsqrt(x);
x = native_sqrt(x);
// The following line does x1 *= 4294967296.0f;
x1 = as_float(as_uint(x1) + (32U << 23));
const uint x0 = as_uint(x) - (158U << 23);
const long delta0 = n1 - (as_ulong((uint2)(mul24(x0, x0), mul_hi(x0, x0))) << 18);
const float delta = convert_float_rte(as_int2(delta0).s1) * x1;
uint result = (x0 << 10) + convert_int_rte(delta);
const uint s = result >> 1;
const uint b = result & 1;
const ulong x2 = (ulong)(s) * (s + b) + ((ulong)(result) << 32) - n1;
if ((long)(x2 + as_int(b - 1)) >= 0) --result;
if ((long)(x2 + 0x100000000UL + s) < 0) ++result;
return result;
}