Intel官方网站有一个ablend_565的快速汇编算法,理论上是是把一块32bit RGBA渲染到16bit的buffer上,我的机器是PIII800,函数在system menory中进行,640*480的256级alpha blending,达到100fps,我想可以满足绝大部分的要求了,在这里,我提供了这个算法的应用,希望可以对大家有所帮助。
ablend_565函数,源代码可以直接编译使用,无需其他库函数,感谢intel提供这么好的东西。
首先,我提供一些本人编写的把32bit tga文件读入pRGBABuffer的函数
文件尺寸保存在 width,height
//-----------------------------------------------------------------------
// Name: LoadTgaFile( TCHAR* strPathname, DWORD** pRGBABuffer, long* width, long* height )
// Desc: 读取32bit tga文件到DWORD缓冲里,返回其尺寸
// Time: 2002.06.22 00:36
// Author: RealRender
// Para:
// Return:
// Note: 这段代码来自directx 7.0 sample中的d3dtextr.cpp,我把他提取了出来
// 方便使用
//-----------------------------------------------------------------------
BOOL LoadTgaFile( TCHAR* strPathname, DWORD** pRGBABuffer, long* width, long* height )
{
FILE* file = fopen( strPathname, "rb" );
if( NULL == file )
return false;
struct TargaHeader
{
BYTE IDLength;
BYTE ColormapType;
BYTE ImageType;
BYTE ColormapSpecification[5];
WORD XOrigin;
WORD YOrigin;
WORD ImageWidth;
WORD ImageHeight;
BYTE PixelDepth;
BYTE ImageDescriptor;
} tga;
fread( &tga, sizeof(TargaHeader), 1, file );
// Only true color, non-mapped images are supported
if( ( 0 != tga.ColormapType ) ||
( tga.ImageType != 10 && tga.ImageType != 2 ) )
{
fclose( file );
return false;
}
// Skip the ID field. The first byte of the header is the length of this field
if( tga.IDLength )
fseek( file, tga.IDLength, SEEK_CUR );
DWORD m_dwWidth = tga.ImageWidth;
DWORD m_dwHeight = tga.ImageHeight;
DWORD m_dwBPP = tga.PixelDepth;
DWORD *m_pRGBAData = new DWORD[m_dwWidth*m_dwHeight];
if( m_pRGBAData == NULL )
{
fclose(file);
return false;
}
for( DWORD y=0; y
{
DWORD dwOffset = y*m_dwWidth;
if( 0 == ( tga.ImageDescriptor & 0x0010 ) )
dwOffset = (m_dwHeight-y-1)*m_dwWidth;
for( DWORD x=0; x
{
if( tga.ImageType == 10 )
{
BYTE PacketInfo = getc( file );
WORD PacketType = 0x80 & PacketInfo;
WORD PixelCount = ( 0x007f & PacketInfo ) + 1;
if( PacketType )
{
DWORD b = getc( file );
DWORD g = getc( file );
DWORD r = getc( file );
DWORD a = 0xff;
if( m_dwBPP == 32 )
a = getc( file );
while( PixelCount-- )
{
m_pRGBAData[dwOffset+x] = (r
x++;
}
}
else
{
while( PixelCount-- )
{
BYTE b = getc( file );
BYTE g = getc( file );
BYTE r = getc( file );
BYTE a = 0xff;
if( m_dwBPP == 32 )
a = getc( file );
m_pRGBAData[dwOffset+x] = (r
x++;
}
}
}
else
{
BYTE b = getc( file );
BYTE g = getc( file );
BYTE r = getc( file );
BYTE a = 0xff;
if( m_dwBPP == 32 )
a = getc( file );
m_pRGBAData[dwOffset+x] = (r
x++;
}
}
}
fclose( file );
// Check for alpha content
for( DWORD i=0; i
{
if( m_pRGBAData[i] & 0x000000ff != 0xff )
{
//m_bHasAlpha = TRUE;
break;
}
}
*pRGBABuffer = m_pRGBAData;
*width = m_dwWidth;
*height = m_dwHeight;
return true;
}
把32bit buffer分割为rgb和alpha的代码。
注意,分割后的pBitmap一定要是8字节对齐,这是优化的一个重要条件,所以,我的算法中:
BYTE* p = new BYTE[lSize*2+8];
BYTE* pOrig = p;
p += (DWORD)p%8;
WORD* color = (WORD*)p;
这是不规范的写法,把指针强行改变为8位对齐,实际使用的时候,要记住释放的原始指针不是p,而是pOrig,在这里,我没有释放分配的内存,请谅解。
//-----------------------------------------------------------------------
// Name: SplitRGBA( DWORD* pRGBABuffer, LPBYTE* pAlpha, LPWORD* pBitmap, long lWidth, long lHeight )
// Desc:
// Time: 2002.06.22 00:36
// Author: RealRender
// Para:
// Return:
// Note: 把从32bit的缓冲建立16bit的565缓冲和8bit的alpha通道
//-----------------------------------------------------------------------
void SplitRGBA( DWORD* pRGBABuffer, LPBYTE* pAlpha, LPWORD* pBitmap, long lWidth, long lHeight )
{
long lSize = lWidth*lHeight;
BYTE* alpha = new BYTE[lSize];
BYTE* p = new BYTE[lSize*2+8];
// 强行转换为8字节对齐
p += (DWORD)p%8;
WORD* color = (WORD*)p;
DWORD dwPixel;
DWORD r, g, b, a;
for( int i = 0; i
{
dwPixel = pRGBABuffer[i];
r = ((dwPixel24)&0x000000ff);
g = ((dwPixel16)&0x000000ff);
b = ((dwPixel 8)&0x000000ff);
a = ((dwPixel 0)&0x000000ff);
alpha[i] = a;
// 888i转化为565
color[i] = RGBTo16( r, g, b );
}
*pAlpha = alpha;
*pBitmap = color;
}
//
这个视intel官方提供的函数,函数的描述,用我的话来说就是把一个带有256级alpha通道的565颜色数据绘制到16位目标页面。
函数说明:
unsigned char *lpAlpha, // 256 级alpha通道
unsigned int iAlpPitch, // alpha通道的pitch
unsigned char *lpSrc, // 原色彩缓冲
unsigned int iSrcX, //
unsigned int iSrcY, // 原色彩位置
unsigned int iSrcPitch, // 原色彩pitch
unsigned char *lpDst, // 目标缓冲
unsigned int iDstX,
unsigned int iDstY, // 目标位置
unsigned int iDstW,
unsigned int iDstH, // 目标缓冲的尺寸
unsigned int iDstPitch // 目标缓冲的pitch
void ablend_565(unsigned char *lpAlpha,unsigned int iAlpPitch,
unsigned char *lpSrc,unsigned int iSrcX, unsigned int iSrcY,
unsigned int iSrcPitch, unsigned char *lpDst,
unsigned int iDstX, unsigned int iDstY,
unsigned int iDstW, unsigned int iDstH,
unsigned int iDstPitch)
{
//Mask for isolating the red,green, and blue components
static __int64 MASKB=0x001F001F001F001F;
static __int64 MASKG=0x07E007E007E007E0;
static __int64 MASKSHIFTG=0x03F003F003F003F0;
static __int64 MASKR=0xF800F800F800F800;
//constants used by the integer alpha blending equation
static __int64 SIXTEEN=0x0010001000100010;
static __int64 FIVETWELVE=0x0200020002000200;
static __int64 SIXONES=0x003F003F003F003F;
unsigned char *lpLinearDstBp=(iDstX
unsigned char *lpLinearSrcBp=(iSrcX
unsigned char *lpLinearAlpBp=iSrcX+(iSrcY*iAlpPitch)+lpAlpha; //base pointer for linear alpha
_asm{
mov esi,lpLinearSrcBp; //src
mov edi,lpLinearDstBp; //dst
mov eax,lpLinearAlpBp; //alpha
mov ecx,iDstH; //ecx=number of lines to copy
mov ebx,iDstW; //ebx=span width to copy
test esi,6; //check if source address is qword aligned
//since addr coming in is always word aligned(16bit)
jnz done; //if not qword a