You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.7 KiB
54 lines
1.7 KiB
/*
|
|
* @author SChernykh
|
|
*/
|
|
|
|
inline uint get_reciprocal(uint a)
|
|
{
|
|
const float a_hi = as_float((a >> 8) + ((126U + 31U) << 23));
|
|
const float a_lo = convert_float_rte(a & 0xFF);
|
|
|
|
const float r = native_recip(a_hi);
|
|
const float r_scaled = as_float(as_uint(r) + (64U << 23));
|
|
|
|
const float h = fma(a_lo, r, fma(a_hi, r, -1.0f));
|
|
return (as_uint(r) << 9) - convert_int_rte(h * r_scaled);
|
|
}
|
|
|
|
inline uint2 fast_div_v2(ulong a, uint b)
|
|
{
|
|
const uint r = get_reciprocal(b);
|
|
const ulong k = mul_hi(as_uint2(a).s0, r) + ((ulong)(r) * as_uint2(a).s1) + a;
|
|
|
|
const uint q = as_uint2(k).s1;
|
|
long tmp = a - ((ulong)(q) * b);
|
|
((int*)&tmp)[1] -= (as_uint2(k).s1 < as_uint2(a).s1) ? b : 0;
|
|
|
|
const int overshoot = ((int*)&tmp)[1] >> 31;
|
|
const int undershoot = as_int2(as_uint(b - 1) - tmp).s1 >> 31;
|
|
return (uint2)(q + overshoot - undershoot, as_uint2(tmp).s0 + (as_uint(overshoot) & b) - (as_uint(undershoot) & b));
|
|
}
|
|
|
|
inline uint fast_sqrt_v2(const ulong n1)
|
|
{
|
|
float x = as_float((as_uint2(n1).s1 >> 9) + ((64U + 127U) << 23));
|
|
|
|
float x1 = native_rsqrt(x);
|
|
x = native_sqrt(x);
|
|
|
|
// The following line does x1 *= 4294967296.0f;
|
|
x1 = as_float(as_uint(x1) + (32U << 23));
|
|
|
|
const uint x0 = as_uint(x) - (158U << 23);
|
|
const long delta0 = n1 - (as_ulong((uint2)(mul24(x0, x0), mul_hi(x0, x0))) << 18);
|
|
const float delta = convert_float_rte(as_int2(delta0).s1) * x1;
|
|
|
|
uint result = (x0 << 10) + convert_int_rte(delta);
|
|
const uint s = result >> 1;
|
|
const uint b = result & 1;
|
|
|
|
const ulong x2 = (ulong)(s) * (s + b) + ((ulong)(result) << 32) - n1;
|
|
if ((long)(x2 + as_int(b - 1)) >= 0) --result;
|
|
if ((long)(x2 + 0x100000000UL + s) < 0) ++result;
|
|
|
|
return result;
|
|
}
|
|
|