And for anyone interested, here's what I've come up with for fullscreen antialiasing, it takes a buffer 1.5*1.5 times the size of the display and shrinks it down. A lot less pixels to fill than 2*AA (just over half) but still an improvement on not using anything.
void shrink_by_3_to_2(int *source_i,int *target_i)
{
image_struct *source=(image_struct*)source_i;
image_struct *target=(image_struct*)target_i;
int *src=(int*)source->argb,w=source->wwidth;
int *dst=(int*)target->argb;
unsigned int a0,a1,a2,a3,a4,a5,a6,a7,a8;
int xcount;
int ycount=target->height;
while(ycount>0)
{
xcount=target->wwidth;
while(xcount>0)
{
a0=*src<<2;
a1=*(src+1)<<1;
a2=*(src+2)<<2;
a3=*(src+w)<<1;
a4=*(src+w+1);
a5=*(src+w+2)<<1;
a6=*(src+(w<<1))<<2;
a7=*(src+(w<<1)+1)<<1;
a8=*(src+(w<<1)+2)<<2;
*dst=(((((a0&0x3fc03fc)+(a1&0x1fe01fe)+(a3&0x1fe01fe)+(a4&0xff00ff))*28)&0xff00ff00)|
((((a0&0x003fc00)+(a1&0x001fe00)+(a3&0x001fe00)+(a4&0x00ff00))*28)&0x00ff0000))>>8;
*(dst+1)=(((((a2&0x3fc03fc)+(a1&0x1fe01fe)+(a5&0x1fe01fe)+(a4&0xff00ff))*28)&0xff00ff00)|
((((a2&0x003fc00)+(a1&0x001fe00)+(a5&0x001fe00)+(a4&0x00ff00))*28)&0x00ff0000))>>8;
*(dst+target->wwidth)=(((((a6&0x3fc03fc)+(a7&0x1fe01fe)+(a3&0x1fe01fe)+(a4&0xff00ff))*28)&0xff00ff00)|
((((a6&0x003fc00)+(a7&0x001fe00)+(a3&0x001fe00)+(a4&0x00ff00))*28)&0x00ff0000))>>8;
*(dst+target->wwidth+1)=(((((a8&0x3fc03fc)+(a7&0x1fe01fe)+(a5&0x1fe01fe)+(a4&0xff00ff))*28)&0xff00ff00)|
((((a8&0x003fc00)+(a7&0x001fe00)+(a5&0x001fe00)+(a4&0x00ff00))*28)&0x00ff0000))>>8;
src+=3;
dst+=2;
xcount-=2;
}
src+=w<<1;
dst+=target->wwidth;
ycount-=2;
}
}