Dark Bit Factory & Gravity
PROGRAMMING => C / C++ /C# => Topic started by: rain_storm on October 05, 2014
-
Here are some nifty bithacks for the IEEE floating point format
The infamous Quakes fast inverse square root...
float invSqrt(float x)
{
float xhalf = 0.5f*x;
union
{
float x;
int i;
} u;
u.x = x;
u.i = 0x5f3759df - (u.i >> 1);
/* The next line can be repeated any number of times to increase accuracy */
u.x = u.x * (1.5f - xhalf * u.x * u.x);
return u.x;
}
// Quakes version
float Q_rsqrt( float number )
{
int i;
float x2, y;
const float threehalfs = 1.5f;
x2 = number * 0.5f;
y = number;
i = *( int* )&y; // evil floating point bit level hacking
i = 0x5F3759DF - ( i >> 1 ); // what the f***?
y = *( float* )&i;
y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
// y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed
return y;
}
// Lomonts version
float InvSqrt( float x )
{
float xhalf = 0.5f*x;
int i = *( int* )&x; // get bits for floating value
i = 0x5F375A86 - ( i >> 1 ); // gives initial guess y0
x = *( float* )&i; // convert bits back to float
x = x*(1.5f - xhalf*x*x ); // Newton step, repeating increases accuracy
return x;
}
rsqrt:
_asm {
rsqrtps xmm0, xmmword ptr [ rcx ] ;
movaps xmmword ptr [ rcx ], xmm0 ;
ret ;
}
sqrthw:
_asm {
rsqrtps xmm0, xmmword ptr [ rcx ] ;
rcpps xmm0, xmm0 ;
movaps xmmword ptr [ rcx ], xmm0 ;
ret ;
}
Approximate square root (software solution is faster than hardware)
float fastsqrt( float n )
{
int i = *( int* )&n;
i -= 1 << 23;
i >>= 1;
i += 1 << 29;
return *( float* )&i;
}
Newton raphson
float sqrtNewRapf( float x )
{
float y = sqrtf( x ); // Get approx
// y = ( y*y + x ) / ( 2*y ); // Newton-Raphson as many times as needed
return ( y*y + x ) / ( 2*y );
}
OneAsIntShr1 dd 4 dup ( 0x1FC00000 ) ; OneAsInt >> 1
fastsqrt:
_asm {
movdqa xmm0, xmmword ptr [ rcx ] ;
psrld xmm0, 1 ;
paddd xmm0, xmmword ptr [ OneAsIntShr1 ] ;
movdqa xmmword ptr [ rcx ], xmm0 ;
ret ;
}
OneAsIntShr1 dd 4 dup ( 0x1FC00000 ) ; OneAsInt >> 1
fastsqrt:
_asm {
movdqa xmm0, xmmword ptr [ rcx ] ;
psrld xmm0, 1 ;
paddd xmm0, xmmword ptr [ OneAsIntShr1 ] ;
; Newton Raphson
movaps xmm1, xmm0 ; Place y ( our guess ) into xmm1 for doubling
addps xmm1, xmm1 ; double y
mulps xmm0, xmm0 ; square y
addps xmm0, xmmword ptr [ rcx ] ; add original x
rcpps xmm1, xmm1 ; Divide by our 2y
mulps xmm0, xmm1 ;
movaps xmmword ptr [ rcx ], xmm0 ; Store result
ret ;
}
Obviously this one simply flips the sign bit
_inline float neg( float f )
{
return AsFloat( AsInt( f ) ^ 0x80000000 );
}
fneg:
_asm {
pcmpeqd xmm1, xmm1 ; Get 0x80000000, -0.0 int xmm1
pslld xmm1, 31 ;
; loop here down as necessary ;
movdqa xmm0, xmmword ptr [ rcx ] ;
pxor xmm0, xmm1 ;
movdqa xmmword ptr [ rcx ], xmm0 ;
ret ;
}
Log2 can be done in a similar fashion
const unsigned int OneAsInt = 0x3F800000; // 1.0f as int
const float ScaleUp = float( 0x00800000 ); // 8388608.0, 2^23
const float ScaleDown = 1.0f / ScaleUp;
float Log2( float x )
{
return float ( AsInt( x )-OneAsInt )*ScaleDown;
}
OneAsInt dd 4 dup ( 0x3F800000 ) ; 1.0 bit pattern as int
ScaleDown dd 4 dup ( 0x34000000 ) ; 4 copies of 1.0 / ScaleUp
Log2f:
asm {
movdqa xmm0, xmmword ptr [ rcx ] ;
psubd xmm0, xmmword ptr [ OneAsInt ] ;
cvtdq2ps xmm0, xmm0 ; cast to singles
mulps xmm0, xmmword ptr [ ScaleDown ] ;
movdqa xmmword ptr [ rcx ], xmm0 ;
ret ;
}
Exponent2 as well
float Exp2( float x )
{
return AsFloat( int( x*ScaleUp ) + OneAsInt );
}
OneAsInt dd 4 dup ( 0x3F800000 ) ; 1.0 bit pattern as int
Pow2f:
_asm {
movdqa xmm0, xmmword ptr [ rcx ] ;
movaps xmm1, xmmword ptr [ rdx ] ;
psubd xmm0, xmmword ptr [ OneAsInt ] ;
cvtdq2ps xmm0, xmm0 ;
mulps xmm0, xmm1 ;
cvtps2dq xmm0, xmm0 ;
paddd xmm0, xmmword ptr [ OneAsInt ] ;
movaps xmmword ptr [ rcx ], xmm0 ;
ret ;
}
Faster fmod1
#include <math.h>
#include <time.h>
#include <stdio.h>
static volatile float f2 = 0.0f;
float fmod1_fast( const float f )
{
const int iBits = *( reinterpret_cast< const int* >( &f ) );
// extract exponent
const int iExponent = ( ( iBits & 0x7F800000 ) - 0x3F800000 ) >> 23;
if( iExponent > 23 )
{
return 0.0f;
}
if( iExponent < 0 )
{
return f;
}
const int iSignificand = iBits & 0x007FFFFF;
if( iSignificand == 0 )
{
return 0.0f;
}
// find the most significant bit of the significand - this can be done by binary search
/*
0111 1111 1111 1111 1111 1111
0111 1111 1111 0000 0000 0000
7 F F 0 0 0
0111 1100 0000 1111 1100 0000
7 C 0 F C 0
0110 0011 1000 1110 0011 1000
A 3 8 E 3 8
0001 0010 0100 1001 0010 0100
1 2 4 9 2 4
0100 1001 0010 0100 1001 0010
4 9 2 4 9 2
*/
int iMSB = ( iSignificand & 0x007FF000 ) ? 11 : 0;
iMSB += ( iSignificand & 0x007C0FC0 ) ? 6 : 0;
iMSB += ( iSignificand & 0x00A38E38 ) ? 3 : 0;
iMSB += ( iSignificand & 0x00124924 ) ? 2 : 0;
iMSB += ( iSignificand & 0x00492492 ) ? 1 : 0;
iMSB = 23 - iMSB;
const int iNewBits = ( ( iSignificand << ( iMSB + iExponent ) ) & 0x007FFFFF ) | ( 0x3F800000 - ( ( ( iMSB ) - iExponent ) << 23 ) );
const float fPreMod = *reinterpret_cast< const float* >( &iNewBits );
const float fMod = ( ( iBits & 0x80000000 ) ? ( 1.0f - fPreMod ) : fPreMod ) - static_cast< float > ( 1 << ( iExponent ) );
const int iNewNewBits = *reinterpret_cast< const int* >( &fMod );
const int iNewNewNewBits = iNewNewBits - ( ( iExponent << 23 ) & 0x3F800000 );
const float fMaybeSignFlippedMod = *reinterpret_cast< const float* >( &iNewNewNewBits );
return ( fMaybeSignFlippedMod > 0.0f ) ? fMaybeSignFlippedMod : ( 1.0f - -fMaybeSignFlippedMod );
}