### Author Topic: Floating Point Hacks  (Read 1429 times)

0 Members and 1 Guest are viewing this topic.

#### rain_storm

• Here comes the Rain
• Posts: 3088
• Karma: 182
• Rain never hurt nobody
##### Floating Point Hacks
« on: October 05, 2014 »
Here are some nifty bithacks for the IEEE floating point format

The infamous Quakes fast inverse square root...
Code: [Select]
`float invSqrt(float x){        float xhalf = 0.5f*x;        union        {          float x;            int i;        } u;        u.x = x;        u.i = 0x5f3759df - (u.i >> 1);        /* The next line can be repeated any number of times to increase accuracy */        u.x = u.x * (1.5f - xhalf * u.x * u.x);        return u.x;}// Quakes versionfloat Q_rsqrt( float number ){ int i; float x2, y; const float threehalfs = 1.5f; x2 = number * 0.5f; y = number; i = *( int* )&y;                         // evil floating point bit level hacking i = 0x5F3759DF - ( i >> 1 );             // what the f***? y = *( float* )&i; y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration// y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed return y;}// Lomonts versionfloat InvSqrt( float x ){ float xhalf = 0.5f*x; int i = *( int* )&x;         // get bits for floating value i = 0x5F375A86 - ( i >> 1 ); // gives initial guess y0 x = *( float* )&i;           // convert bits back to float x = x*(1.5f - xhalf*x*x );   // Newton step, repeating increases accuracy return x;} rsqrt:_asm { rsqrtps xmm0, xmmword ptr [ rcx ] ; movaps  xmmword ptr [ rcx ], xmm0 ; ret                               ;}sqrthw:_asm { rsqrtps xmm0, xmmword ptr [ rcx ] ; rcpps   xmm0, xmm0                ; movaps  xmmword ptr [ rcx ], xmm0 ; ret                               ;}`

Approximate square root (software solution is faster than hardware)
Code: [Select]
`float fastsqrt( float n ){ int i = *( int* )&n; i -= 1 << 23; i >>= 1; i += 1 << 29; return *( float* )&i;}`

Newton raphson
Code: [Select]
`float sqrtNewRapf( float x ){ float y = sqrtf( x ); // Get approx// y = ( y*y + x ) / ( 2*y ); // Newton-Raphson as many times as needed return ( y*y + x ) / ( 2*y );}OneAsIntShr1 dd 4 dup ( 0x1FC00000 ) ; OneAsInt >> 1fastsqrt:_asm { movdqa xmm0, xmmword ptr [ rcx ]          ; psrld  xmm0, 1                            ; paddd  xmm0, xmmword ptr [ OneAsIntShr1 ] ; movdqa xmmword ptr [ rcx ], xmm0          ; ret                                       ;}OneAsIntShr1 dd 4 dup ( 0x1FC00000 ) ; OneAsInt >> 1fastsqrt:_asm { movdqa xmm0, xmmword ptr [ rcx ]          ; psrld  xmm0, 1                            ; paddd  xmm0, xmmword ptr [ OneAsIntShr1 ] ; ; Newton Raphson movaps xmm1, xmm0                         ; Place y ( our guess ) into xmm1 for doubling addps  xmm1, xmm1                         ; double y mulps  xmm0, xmm0                         ; square y addps  xmm0, xmmword ptr [ rcx ]          ; add original x rcpps  xmm1, xmm1                         ; Divide by our 2y mulps  xmm0, xmm1                         ; movaps xmmword ptr [ rcx ], xmm0          ; Store result ret                                       ;}`

Obviously this one simply flips the sign bit
Code: [Select]
`_inline float neg( float f ){ return AsFloat( AsInt( f ) ^ 0x80000000 );}fneg:_asm { pcmpeqd xmm1, xmm1                ; Get 0x80000000, -0.0 int xmm1 pslld   xmm1, 31                  ; ; loop here down as necessary     ; movdqa  xmm0, xmmword ptr [ rcx ] ; pxor    xmm0, xmm1                ; movdqa  xmmword ptr [ rcx ], xmm0 ; ret                               ;}`

Log2 can be done in a similar fashion
Code: [Select]
`const unsigned int OneAsInt = 0x3F800000;  // 1.0f as intconst float ScaleUp = float( 0x00800000 ); // 8388608.0, 2^23const float ScaleDown = 1.0f / ScaleUp;float Log2( float x ){ return float ( AsInt( x )-OneAsInt )*ScaleDown;}OneAsInt  dd 4 dup ( 0x3F800000 ) ; 1.0 bit pattern as intScaleDown dd 4 dup ( 0x34000000 ) ; 4 copies of 1.0 / ScaleUpLog2f:asm { movdqa   xmm0, xmmword ptr [ rcx ]       ; psubd    xmm0, xmmword ptr [ OneAsInt ]  ; cvtdq2ps xmm0, xmm0                      ; cast to singles mulps    xmm0, xmmword ptr [ ScaleDown ] ; movdqa   xmmword ptr [ rcx ], xmm0       ; ret                                      ;}`

Exponent2 as well
Code: [Select]
`float Exp2( float x ){ return AsFloat( int( x*ScaleUp ) + OneAsInt );}OneAsInt  dd 4 dup ( 0x3F800000 ) ; 1.0 bit pattern as intPow2f:_asm { movdqa   xmm0, xmmword ptr [ rcx ]      ; movaps   xmm1, xmmword ptr [ rdx ]      ; psubd    xmm0, xmmword ptr [ OneAsInt ] ; cvtdq2ps xmm0, xmm0                     ; mulps    xmm0, xmm1                     ; cvtps2dq xmm0, xmm0                     ; paddd    xmm0, xmmword ptr [ OneAsInt ] ; movaps   xmmword ptr [ rcx ], xmm0      ; ret                                     ;}`

Faster fmod1
Code: [Select]
`#include <math.h>#include <time.h>#include <stdio.h>static volatile float f2 = 0.0f;float fmod1_fast( const float f ){    const int iBits = *( reinterpret_cast< const int* >( &f ) );    // extract exponent    const int iExponent = ( ( iBits & 0x7F800000 ) - 0x3F800000 ) >> 23;    if( iExponent > 23 )    {        return 0.0f;    }    if( iExponent < 0 )    {        return f;    }    const int iSignificand = iBits & 0x007FFFFF;    if( iSignificand == 0 )    {        return 0.0f;    }    // find the most significant bit of the significand - this can be done by binary search    /*    0111 1111 1111 1111 1111 1111    0111 1111 1111 0000 0000 0000    7    F    F    0    0    0    0111 1100 0000 1111 1100 0000    7    C    0    F    C    0    0110 0011 1000 1110 0011 1000    A    3    8    E    3    8    0001 0010 0100 1001 0010 0100    1    2    4    9    2    4    0100 1001 0010 0100 1001 0010    4    9    2    4    9    2    */    int iMSB = ( iSignificand & 0x007FF000 ) ? 11 : 0;    iMSB += ( iSignificand & 0x007C0FC0 ) ? 6 : 0;    iMSB += ( iSignificand & 0x00A38E38 ) ? 3 : 0;    iMSB += ( iSignificand & 0x00124924 ) ? 2 : 0;    iMSB += ( iSignificand & 0x00492492 ) ? 1 : 0;    iMSB = 23 - iMSB;    const int iNewBits = ( ( iSignificand << ( iMSB + iExponent ) ) & 0x007FFFFF ) | ( 0x3F800000 - ( ( ( iMSB ) - iExponent ) << 23 ) );    const float fPreMod = *reinterpret_cast< const float* >( &iNewBits );    const float fMod = ( ( iBits & 0x80000000 ) ? ( 1.0f - fPreMod ) : fPreMod ) - static_cast< float > ( 1 << ( iExponent ) );    const int iNewNewBits = *reinterpret_cast< const int* >( &fMod );    const int iNewNewNewBits = iNewNewBits - ( ( iExponent << 23 ) & 0x3F800000 );    const float fMaybeSignFlippedMod =  *reinterpret_cast< const float* >( &iNewNewNewBits );    return ( fMaybeSignFlippedMod > 0.0f ) ? fMaybeSignFlippedMod : ( 1.0f - -fMaybeSignFlippedMod );}`

Challenge Trophies Won: