Author Topic: Floating Point Hacks  (Read 1395 times)

0 Members and 1 Guest are viewing this topic.

Offline rain_storm

  • Here comes the Rain
  • DBF Aficionado
  • ******
  • Posts: 3088
  • Karma: 182
  • Rain never hurt nobody
    • View Profile
    • org_100h
Floating Point Hacks
« on: October 05, 2014 »
Here are some nifty bithacks for the IEEE floating point format


The infamous Quakes fast inverse square root...
Code: [Select]
float invSqrt(float x)
{
        float xhalf = 0.5f*x;
        union
        {
          float x;
            int i;
        } u;
        u.x = x;
        u.i = 0x5f3759df - (u.i >> 1);
        /* The next line can be repeated any number of times to increase accuracy */
        u.x = u.x * (1.5f - xhalf * u.x * u.x);
        return u.x;
}

// Quakes version
float Q_rsqrt( float number )
{
int i;
float x2, y;
const float threehalfs = 1.5f;

x2 = number * 0.5f;
y = number;
i = *( int* )&y;                         // evil floating point bit level hacking
i = 0x5F3759DF - ( i >> 1 );             // what the f***?

y = *( float* )&i;
y = y * ( threehalfs - ( x2 * y * y ) ); // 1st iteration
// y = y * ( threehalfs - ( x2 * y * y ) ); // 2nd iteration, this can be removed
return y;
}

// Lomonts version
float InvSqrt( float x )
{
float xhalf = 0.5f*x;
int i = *( int* )&x;         // get bits for floating value
i = 0x5F375A86 - ( i >> 1 ); // gives initial guess y0
x = *( float* )&i;           // convert bits back to float
x = x*(1.5f - xhalf*x*x );   // Newton step, repeating increases accuracy
return x;
}
 
rsqrt:
_asm {
rsqrtps xmm0, xmmword ptr [ rcx ] ;
movaps  xmmword ptr [ rcx ], xmm0 ;
ret                               ;
}

sqrthw:
_asm {
rsqrtps xmm0, xmmword ptr [ rcx ] ;
rcpps   xmm0, xmm0                ;
movaps  xmmword ptr [ rcx ], xmm0 ;
ret                               ;
}


Approximate square root (software solution is faster than hardware)
Code: [Select]
float fastsqrt( float n )
{
int i = *( int* )&n;
i -= 1 << 23;
i >>= 1;
i += 1 << 29;
return *( float* )&i;
}


Newton raphson
Code: [Select]
float sqrtNewRapf( float x )
{
float y = sqrtf( x ); // Get approx
// y = ( y*y + x ) / ( 2*y ); // Newton-Raphson as many times as needed
return ( y*y + x ) / ( 2*y );
}

OneAsIntShr1 dd 4 dup ( 0x1FC00000 ) ; OneAsInt >> 1
fastsqrt:
_asm {
movdqa xmm0, xmmword ptr [ rcx ]          ;
psrld  xmm0, 1                            ;
paddd  xmm0, xmmword ptr [ OneAsIntShr1 ] ;
movdqa xmmword ptr [ rcx ], xmm0          ;
ret                                       ;
}

OneAsIntShr1 dd 4 dup ( 0x1FC00000 ) ; OneAsInt >> 1
fastsqrt:
_asm {
movdqa xmm0, xmmword ptr [ rcx ]          ;
psrld  xmm0, 1                            ;
paddd  xmm0, xmmword ptr [ OneAsIntShr1 ] ;

; Newton Raphson
movaps xmm1, xmm0                         ; Place y ( our guess ) into xmm1 for doubling
addps  xmm1, xmm1                         ; double y
mulps  xmm0, xmm0                         ; square y
addps  xmm0, xmmword ptr [ rcx ]          ; add original x
rcpps  xmm1, xmm1                         ; Divide by our 2y
mulps  xmm0, xmm1                         ;

movaps xmmword ptr [ rcx ], xmm0          ; Store result
ret                                       ;
}


Obviously this one simply flips the sign bit
Code: [Select]
_inline float neg( float f )
{
return AsFloat( AsInt( f ) ^ 0x80000000 );
}

fneg:
_asm {
pcmpeqd xmm1, xmm1                ; Get 0x80000000, -0.0 int xmm1
pslld   xmm1, 31                  ;
; loop here down as necessary     ;
movdqa  xmm0, xmmword ptr [ rcx ] ;
pxor    xmm0, xmm1                ;
movdqa  xmmword ptr [ rcx ], xmm0 ;
ret                               ;
}


Log2 can be done in a similar fashion
Code: [Select]
const unsigned int OneAsInt = 0x3F800000;  // 1.0f as int
const float ScaleUp = float( 0x00800000 ); // 8388608.0, 2^23
const float ScaleDown = 1.0f / ScaleUp;

float Log2( float x )
{
return float ( AsInt( x )-OneAsInt )*ScaleDown;
}


OneAsInt  dd 4 dup ( 0x3F800000 ) ; 1.0 bit pattern as int
ScaleDown dd 4 dup ( 0x34000000 ) ; 4 copies of 1.0 / ScaleUp
Log2f:
asm {
movdqa   xmm0, xmmword ptr [ rcx ]       ;
psubd    xmm0, xmmword ptr [ OneAsInt ]  ;
cvtdq2ps xmm0, xmm0                      ; cast to singles
mulps    xmm0, xmmword ptr [ ScaleDown ] ;
movdqa   xmmword ptr [ rcx ], xmm0       ;
ret                                      ;
}


Exponent2 as well
Code: [Select]
float Exp2( float x )
{
return AsFloat( int( x*ScaleUp ) + OneAsInt );
}

OneAsInt  dd 4 dup ( 0x3F800000 ) ; 1.0 bit pattern as int

Pow2f:
_asm {
movdqa   xmm0, xmmword ptr [ rcx ]      ;
movaps   xmm1, xmmword ptr [ rdx ]      ;
psubd    xmm0, xmmword ptr [ OneAsInt ] ;
cvtdq2ps xmm0, xmm0                     ;
mulps    xmm0, xmm1                     ;
cvtps2dq xmm0, xmm0                     ;
paddd    xmm0, xmmword ptr [ OneAsInt ] ;
movaps   xmmword ptr [ rcx ], xmm0      ;
ret                                     ;
}


Faster fmod1
Code: [Select]
#include <math.h>
#include <time.h>
#include <stdio.h>

static volatile float f2 = 0.0f;

float fmod1_fast( const float f )
{
    const int iBits = *( reinterpret_cast< const int* >( &f ) );

    // extract exponent
    const int iExponent = ( ( iBits & 0x7F800000 ) - 0x3F800000 ) >> 23;

    if( iExponent > 23 )
    {
        return 0.0f;
    }

    if( iExponent < 0 )
    {
        return f;
    }

    const int iSignificand = iBits & 0x007FFFFF;

    if( iSignificand == 0 )
    {
        return 0.0f;
    }

    // find the most significant bit of the significand - this can be done by binary search

    /*
    0111 1111 1111 1111 1111 1111

    0111 1111 1111 0000 0000 0000
    7    F    F    0    0    0
    0111 1100 0000 1111 1100 0000
    7    C    0    F    C    0
    0110 0011 1000 1110 0011 1000
    A    3    8    E    3    8
    0001 0010 0100 1001 0010 0100
    1    2    4    9    2    4

    0100 1001 0010 0100 1001 0010
    4    9    2    4    9    2
    */

    int iMSB = ( iSignificand & 0x007FF000 ) ? 11 : 0;
    iMSB += ( iSignificand & 0x007C0FC0 ) ? 6 : 0;
    iMSB += ( iSignificand & 0x00A38E38 ) ? 3 : 0;
    iMSB += ( iSignificand & 0x00124924 ) ? 2 : 0;
    iMSB += ( iSignificand & 0x00492492 ) ? 1 : 0;

    iMSB = 23 - iMSB;

    const int iNewBits = ( ( iSignificand << ( iMSB + iExponent ) ) & 0x007FFFFF ) | ( 0x3F800000 - ( ( ( iMSB ) - iExponent ) << 23 ) );
    const float fPreMod = *reinterpret_cast< const float* >( &iNewBits );
    const float fMod = ( ( iBits & 0x80000000 ) ? ( 1.0f - fPreMod ) : fPreMod ) - static_cast< float > ( 1 << ( iExponent ) );
    const int iNewNewBits = *reinterpret_cast< const int* >( &fMod );
    const int iNewNewNewBits = iNewNewBits - ( ( iExponent << 23 ) & 0x3F800000 );

    const float fMaybeSignFlippedMod =  *reinterpret_cast< const float* >( &iNewNewNewBits );

    return ( fMaybeSignFlippedMod > 0.0f ) ? fMaybeSignFlippedMod : ( 1.0f - -fMaybeSignFlippedMod );
}

Challenge Trophies Won: