I did one small change to the blur code, which knocked a lot of the processing time of it. Don't know, why I didn't do that in the first place:
Graphics 1024,768
' test image = 256x256 pixels
' first version = 4956ms
' second version = 108ms
Cls
Local pixm:TPixmap = LoadPixmap("testimage.png")
Local time:Int = MilliSecs()
Local pixm2:TPixmap = gaussianBlur(pixm,9,9)
Print MilliSecs()-time
DrawPixmap(pixm,0,0)
DrawPixmap(pixm2,256,0)
Flip
WaitKey()
End
' pixmap gaussian blur function
' based on blitzbasic code by Elias_T
' maskWidth & maskHeight should always be uneven numbers [3,5,7,9]
Function gaussianBlur:TPixmap(pMap:TPixmap,maskWidth:Int,maskHeight:Int)
Local tmp:TPixmap = CopyPixmap(pMap)
Local width:Int = pMap.width
Local height:Int = pMap.height
Local texel:Float[width,height,3]
Local result:Float[width,height,3]
Local maskData:Float[width*height]
Local x:Int,y:Int,ym:Int,xm:Int
Local cy:Float,cx:Float,rt:Float
Local r1:Float,g1:Float,b1:Float
Local rr:Float,gg:Float,bb:Float
Local mult:Float = 0.0
For x = 0 To width-1
For y = 0 To height-1
rgb = ReadPixel(pMap,x,y)
texel[x,y,0] = (rgb Shr 16) & 255
texel[x,y,1] = (rgb Shr 8) & 255
texel[x,y,2] = rgb & 255
Next
Next
For ym = 0 To maskHeight-1
For xm = 0 To maskWidth-1
cx = Float(xm - (maskWidth - 1) / 2.0)
cy = Float(ym - (maskHeight - 1) / 2.0)
rt = cx*cx + cy*cy
mult :+ Exp(-0.35 * rt)
Next
Next
mult = 1.0 / mult
For ym = 0 To maskHeight-1
For xm = 0 To maskWidth-1
cx = xm - (maskWidth-1) / 2.0
cy = ym - (maskHeight-1) / 2.0
rt = cx*cx + cy*cy
maskData[ym * maskWidth + xm] = mult * Exp(-0.35 * rt)
Next
Next
Local mhhalf:Int = Floor(maskHeight/2.0)
Local mwhalf:Int = Floor(maskWidth/2.0)
For ym = 0 To height-1
For xm = 0 To width-1
rr = 0.0
gg = 0.0
bb = 0.0
For yy = 0 To maskHeight-1
For xx = 0 To maskWidth-1
If (xm+xx-mwhalf<0) Or (ym+yy-mhhalf<0) Or (xm+xx-mwhalf>width-1) Or (ym+yy-mhhalf>height-1) Then
r1 = 0.0
g1 = 0.0
b1 = 0.0
Else
r1 = texel[xm + xx - mwhalf, ym + yy - mhhalf,0]
g1 = texel[xm + xx - mwhalf, ym + yy - mhhalf,1]
b1 = texel[xm + xx - mwhalf, ym + yy - mhhalf,2]
End If
rr :+ r1 * maskData[xx + yy * maskWidth]
gg :+ g1 * maskData[xx + yy * maskWidth]
bb :+ b1 * maskData[xx + yy * maskWidth]
Next
Next
result[xm,ym,0] = rr
result[xm,ym,1] = gg
result[xm,ym,2] = bb
Next
Next
For x = 0 To width-1
For y = 0 To height-1
WritePixel(tmp,x,y,Int(result[x,y,0]) Shl 16 + Int(result[x,y,1]) Shl 8 + Int(result[x,y,2]))
Next
Next
Return tmp
End Function
Difference is that I calculate that / 2.0 value before the loop instead of each time within the loop. And it went from almost 5000ms to about 110ms, so thats a big saving just for that.