Dark Bit Factory & Gravity

PROGRAMMING => C / C++ /C# => Topic started by: rain_storm on October 05, 2014

Title: Floating Point Hacks
Post by: rain_storm on October 05, 2014: Here are some nifty bithacks for the IEEE floating point format

The infamous Quakes fast inverse square root...
Code: [Select]
float invSqrt(float x) { float xhalf = 0.5f*x; union { float x; int i; } u; u.x = x; u.i = 0x5f3759df - (u.i >> 1); /* The next line can be repeated any number of times to increase accuracy */ u.x = u.x * (1.5f - xhalf * u.x * u.x); return u.x; } // Quakes version float Q_rsqrt( float number ) { int i; float x2, y; const float threehalfs = 1.5f; x2 = number * 0.5f; y = number; i = *( int* )&y; // evil floating point bit level hacking i = 0x5F3759DF - ( i >> 1 ); // what the f***? y = *( float* )&i; y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration // y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed return y; } // Lomonts version float InvSqrt( float x ) { float xhalf = 0.5f*x; int i = *( int* )&x; // get bits for floating value i = 0x5F375A86 - ( i >> 1 ); // gives initial guess y0 x = *( float* )&i; // convert bits back to float x = x*(1.5f - xhalf*x*x ); // Newton step, repeating increases accuracy return x; } rsqrt: _asm { rsqrtps xmm0, xmmword ptr [ rcx ] ; movaps xmmword ptr [ rcx ], xmm0 ; ret ; } sqrthw: _asm { rsqrtps xmm0, xmmword ptr [ rcx ] ; rcpps xmm0, xmm0 ; movaps xmmword ptr [ rcx ], xmm0 ; ret ; }

Approximate square root (software solution is faster than hardware)
Code: [Select]
float fastsqrt( float n ) { int i = *( int* )&n; i -= 1 << 23; i >>= 1; i += 1 << 29; return *( float* )&i; }

Newton raphson
Code: [Select]
float sqrtNewRapf( float x ) { float y = sqrtf( x ); // Get approx // y = ( y*y + x ) / ( 2*y ); // Newton-Raphson as many times as needed return ( y*y + x ) / ( 2*y ); } OneAsIntShr1 dd 4 dup ( 0x1FC00000 ) ; OneAsInt >> 1 fastsqrt: _asm { movdqa xmm0, xmmword ptr [ rcx ] ; psrld xmm0, 1 ; paddd xmm0, xmmword ptr [ OneAsIntShr1 ] ; movdqa xmmword ptr [ rcx ], xmm0 ; ret ; } OneAsIntShr1 dd 4 dup ( 0x1FC00000 ) ; OneAsInt >> 1 fastsqrt: _asm { movdqa xmm0, xmmword ptr [ rcx ] ; psrld xmm0, 1 ; paddd xmm0, xmmword ptr [ OneAsIntShr1 ] ; ; Newton Raphson movaps xmm1, xmm0 ; Place y ( our guess ) into xmm1 for doubling addps xmm1, xmm1 ; double y mulps xmm0, xmm0 ; square y addps xmm0, xmmword ptr [ rcx ] ; add original x rcpps xmm1, xmm1 ; Divide by our 2y mulps xmm0, xmm1 ; movaps xmmword ptr [ rcx ], xmm0 ; Store result ret ; }

Obviously this one simply flips the sign bit
Code: [Select]
_inline float neg( float f ) { return AsFloat( AsInt( f ) ^ 0x80000000 ); } fneg: _asm { pcmpeqd xmm1, xmm1 ; Get 0x80000000, -0.0 int xmm1 pslld xmm1, 31 ; ; loop here down as necessary ; movdqa xmm0, xmmword ptr [ rcx ] ; pxor xmm0, xmm1 ; movdqa xmmword ptr [ rcx ], xmm0 ; ret ; }

Log2 can be done in a similar fashion
Code: [Select]
const unsigned int OneAsInt = 0x3F800000; // 1.0f as int const float ScaleUp = float( 0x00800000 ); // 8388608.0, 2^23 const float ScaleDown = 1.0f / ScaleUp; float Log2( float x ) { return float ( AsInt( x )-OneAsInt )*ScaleDown; } OneAsInt dd 4 dup ( 0x3F800000 ) ; 1.0 bit pattern as int ScaleDown dd 4 dup ( 0x34000000 ) ; 4 copies of 1.0 / ScaleUp Log2f: asm { movdqa xmm0, xmmword ptr [ rcx ] ; psubd xmm0, xmmword ptr [ OneAsInt ] ; cvtdq2ps xmm0, xmm0 ; cast to singles mulps xmm0, xmmword ptr [ ScaleDown ] ; movdqa xmmword ptr [ rcx ], xmm0 ; ret ; }

Exponent2 as well
Code: [Select]
float Exp2( float x ) { return AsFloat( int( x*ScaleUp ) + OneAsInt ); } OneAsInt dd 4 dup ( 0x3F800000 ) ; 1.0 bit pattern as int Pow2f: _asm { movdqa xmm0, xmmword ptr [ rcx ] ; movaps xmm1, xmmword ptr [ rdx ] ; psubd xmm0, xmmword ptr [ OneAsInt ] ; cvtdq2ps xmm0, xmm0 ; mulps xmm0, xmm1 ; cvtps2dq xmm0, xmm0 ; paddd xmm0, xmmword ptr [ OneAsInt ] ; movaps xmmword ptr [ rcx ], xmm0 ; ret ; }

Faster fmod1
Code: [Select]
#include <math.h> #include <time.h> #include <stdio.h> static volatile float f2 = 0.0f; float fmod1_fast( const float f ) { const int iBits = *( reinterpret_cast< const int* >( &f ) ); // extract exponent const int iExponent = ( ( iBits & 0x7F800000 ) - 0x3F800000 ) >> 23; if( iExponent > 23 ) { return 0.0f; } if( iExponent < 0 ) { return f; } const int iSignificand = iBits & 0x007FFFFF; if( iSignificand == 0 ) { return 0.0f; } // find the most significant bit of the significand - this can be done by binary search /* 0111 1111 1111 1111 1111 1111 0111 1111 1111 0000 0000 0000 7 F F 0 0 0 0111 1100 0000 1111 1100 0000 7 C 0 F C 0 0110 0011 1000 1110 0011 1000 A 3 8 E 3 8 0001 0010 0100 1001 0010 0100 1 2 4 9 2 4 0100 1001 0010 0100 1001 0010 4 9 2 4 9 2 */ int iMSB = ( iSignificand & 0x007FF000 ) ? 11 : 0; iMSB += ( iSignificand & 0x007C0FC0 ) ? 6 : 0; iMSB += ( iSignificand & 0x00A38E38 ) ? 3 : 0; iMSB += ( iSignificand & 0x00124924 ) ? 2 : 0; iMSB += ( iSignificand & 0x00492492 ) ? 1 : 0; iMSB = 23 - iMSB; const int iNewBits = ( ( iSignificand << ( iMSB + iExponent ) ) & 0x007FFFFF ) | ( 0x3F800000 - ( ( ( iMSB ) - iExponent ) << 23 ) ); const float fPreMod = *reinterpret_cast< const float* >( &iNewBits ); const float fMod = ( ( iBits & 0x80000000 ) ? ( 1.0f - fPreMod ) : fPreMod ) - static_cast< float > ( 1 << ( iExponent ) ); const int iNewNewBits = *reinterpret_cast< const int* >( &fMod ); const int iNewNewNewBits = iNewNewBits - ( ( iExponent << 23 ) & 0x3F800000 ); const float fMaybeSignFlippedMod = *reinterpret_cast< const float* >( &iNewNewNewBits ); return ( fMaybeSignFlippedMod > 0.0f ) ? fMaybeSignFlippedMod : ( 1.0f - -fMaybeSignFlippedMod ); }