1 程序由五个模块组成。
(1) lzw.h 定义了一些基本的数据结构,常量,还有变量的初始化等。
#ifndef __LZW_H__
#define __LZW_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
#include <memory.h>
//------------------------------------------------------------------------------
#define LZW_BASE 0x102// The code base
#define CODE_LEN 12 // Max code length
#define TABLE_LEN 4099 // It must be prime number and bigger than 2^CODE_LEN=4096.
// Such as 5051 is also ok.
#define BUFFERSIZE 1024
//------------------------------------------------------------------------------
typedef struct
{
HANDLE h_sour; // Source file handle.
HANDLE h_dest; // Destination file handle.
HANDLE h_suffix; // Suffix table handle.
HANDLE h_prefix; // Prefix table handle.
HANDLE h_code; // Code table handle.
LPWORD lp_prefix; // Prefix table head pointer.
LPBYTE lp_suffix; // Suffix table head pointer.
LPWORD lp_code; // Code table head pointer.
WORD code;
WORD prefix;
BYTE suffix;
BYTE cur_code_len; // Current code length.[ used in Dynamic-Code-Length mode ]
}LZW_DATA,*PLZW_DATA;
typedef struct
{
WORD top;
WORD index;
LPBYTE lp_buffer;
HANDLE h_buffer;
BYTE by_left;
DWORD dw_buffer;
BOOL end_flag;
}BUFFER_DATA,*PBUFFER_DATA;
typedef struct //Stack used in decode
{
WORD index;
HANDLE h_stack;
LPBYTE lp_stack;
}STACK_DATA,*PSTACK_DATA;
//------------------------------------------------------------------------------
VOID stack_create( PSTACK_DATA stack )
{
stack->h_stack = GlobalAlloc( GHND , TABLE_LEN*sizeof(BYTE) );
stack->lp_stack = GlobalLock( stack->h_stack );
stack->index = 0;
}
//------------------------------------------------------------------------------
VOID stack_destory( PSTACK_DATA stack )
{
GlobalUnlock( stack->h_stack );
GlobalFree ( stack->h_stack );
}
//------------------------------------------------------------------------------
VOID buffer_create( PBUFFER_DATA buffer )
{
buffer->h_buffer = GlobalAlloc( GHND, BUFFERSIZE*sizeof(BYTE) );
buffer->lp_buffer = GlobalLock( buffer->h_buffer );
buffer->top = 0;
buffer->index = 0;
buffer->by_left = 0;
buffer->dw_buffer = 0;
buffer->end_flag = FALSE;
}
//------------------------------------------------------------------------------
VOID buffer_destory( PBUFFER_DATA buffer )
{
GlobalUnlock( buffer->h_buffer );
GlobalFree ( buffer->h_buffer );
}
//------------------------------------------------------------------------------
VOID re_init_lzw( PLZW_DATA lzw ) //When code table reached its top it should
{ //be reinitialized.
memset( lzw->lp_code, 0xFFFF, TABLE_LEN*sizeof(WORD) );
lzw->code = LZW_BASE;
lzw->cur_code_len = 9;
}
//------------------------------------------------------------------------------
VOID lzw_create(PLZW_DATA lzw, HANDLE h_sour, HANDLE h_dest)
{
WORD i;
lzw->h_code = GlobalAlloc( GHND, TABLE_LEN*sizeof(WORD) );
lzw->h_prefix = GlobalAlloc( GHND, TABLE_LEN*sizeof(WORD) );
lzw->h_suffix = GlobalAlloc( GHND, TABLE_LEN*sizeof(BYTE) );
lzw->lp_code = GlobalLock( lzw->h_code );
lzw->lp_prefix = GlobalLock( lzw->h_prefix );
lzw->lp_suffix = GlobalLock( lzw->h_suffix );
lzw->code = LZW_BASE;
lzw->cur_code_len = 9;
lzw->h_sour = h_sour;
lzw->h_dest = h_dest;
memset( lzw->lp_code, 0xFFFF, TABLE_LEN*sizeof(WORD) );
}
//------------------------------------------------------------------------------
VOID lzw_destory(PLZW_DATA lzw)
{
GlobalUnlock( lzw->h_code );
GlobalUnlock( lzw->h_prefix );
GlobalUnlock( lzw->h_suffix );
GlobalFree( lzw->h_code );
GlobalFree( lzw->h_prefix );
GlobalFree( lzw->h_suffix );
}
//------------------------------------------------------------------------------
#endif
(2) fileio.h 定义了一些文件操作
#ifndef __FILEIO_H__
#define __FILEIO_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
//------------------------------------------------------------------------------
HANDLE file_handle(CHAR* file_name)
{
HANDLE h_file;
h_file = CreateFile(file_name,
GENERIC_READ|GENERIC_WRITE,
FILE_SHARE_READ|FILE_SHARE_WRITE,
NULL,
OPEN_ALWAYS,
0,
NULL
);
return h_file;
}
//------------------------------------------------------------------------------
WORD load_buffer(HANDLE h_sour, PBUFFER_DATA buffer) // Load file to buffer
{
DWORD ret;
ReadFile(h_sour,buffer->lp_buffer,BUFFERSIZE,&ret,NULL);
buffer->index = 0;
buffer->top = (WORD)ret;
return (WORD)ret;
}
//------------------------------------------------------------------------------
WORD empty_buffer( PLZW_DATA lzw, PBUFFER_DATA buffer)// Output buffer to file
{
DWORD ret;
if(buffer->end_flag) // The flag mark the end of decode
{
if( buffer->by_left )
{
buffer->lp_buffer[ buffer->index++ ] = (BYTE)( buffer->dw_buffer >> 32-buffer->by_left )<<(8-buffer->by_left);
}
}
WriteFile(lzw->h_dest, buffer->lp_buffer,buffer->index,&ret,NULL);
buffer->index = 0;
buffer->top = ret;
return (WORD)ret;
}
//------------------------------------------------------------------------------
#endif
(3) hash.h 定义了压缩时所用的码表操作函数,为了快速查找使用了hash算法,还有处理hash冲突的函数
#ifndef __HASH_H__
#define __HASH_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
//------------------------------------------------------------------------------
#define DIV TABLE_LEN
#define HASHSTEP 13 // It should bigger than 0.
//------------------------------------------------------------------------------
WORD get_hash_index( PLZW_DATA lzw )
{
DWORD tmp;
WORD result;
DWORD prefix;
DWORD suffix;
prefix = lzw->prefix;
suffix = lzw->suffix;
tmp = prefix<<8 | suffix;
result = tmp % DIV;
return result;
}
//------------------------------------------------------------------------------
WORD re_hash_index( WORD hash ) // If hash conflict occured we must recalculate
{ // hash index .
WORD result;
result = hash + HASHSTEP;
result = result % DIV;
return result;
}
//------------------------------------------------------------------------------
BOOL in_table( PLZW_DATA lzw ) // To find whether current code is already in table.
{
BOOL result;
WORD hash;
hash = get_hash_index( lzw );
if( lzw->lp_code[ hash ] == 0xFFFF )
{
result = FALSE;
}
else
{
if( lzw->lp_prefix[ hash ] == lzw->prefix &&
lzw->lp_suffix[ hash ] == lzw->suffix )
{
result = TRUE;
}
else
{
result = FALSE;
while( lzw->lp_code[ hash ] != 0xFFFF )
{
if( lzw->lp_prefix[ hash ] == lzw->prefix &&
lzw->lp_suffix[ hash ] == lzw->suffix )
{
result = TRUE;
break;
}
hash = re_hash_index( hash );
}
}
}
return result;
}
//------------------------------------------------------------------------------
WORD get_code( PLZW_DATA lzw )
{
WORD hash;
WORD code;
hash = get_hash_index( lzw );
if( lzw->lp_prefix[ hash ] == lzw->prefix &&
lzw->lp_suffix[ hash ] == lzw->suffix )
{
code = lzw->lp_code[ hash ];
}
else
{
while( lzw->lp_prefix[ hash ] != lzw->prefix ||
lzw->lp_suffix[ hash ] != lzw->suffix )
{
hash = re_hash_index( hash );
}
code = lzw->lp_code[ hash ];
}
return code;
}
//------------------------------------------------------------------------------
VOID insert_table( PLZW_DATA lzw )
{
WORD hash;
hash = get_hash_index( lzw );
if( lzw->lp_code[ hash ] == 0xFFFF )
{
lzw->lp_prefix[ hash ] = lzw->prefix;
lzw->lp_suffix[ hash ] = lzw->suffix;
lzw->lp_code[ hash ] = lzw->code;
}
else
{
while( lzw->lp_code[ hash ] != 0xFFFF )
{
hash = re_hash_index( hash );
}
lzw->lp_prefix[ hash ] = lzw->prefix;
lzw->lp_suffix[ hash ] = lzw->suffix;
lzw->lp_code[ hash ] = lzw->code;
}
}
//------------------------------------------------------------------------------
#endif
(4) encode.h 压缩程序主函数
#ifndef __ENCODE_H__
#define __ENCODE_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
//------------------------------------------------------------------------------
VOID output_code( DWORD code ,PBUFFER_DATA out, PLZW_DATA lzw)
{
out->dw_buffer |= code << ( 32 - out->by_left - lzw->cur_code_len );
out->by_left += lzw->cur_code_len;
while( out->by_left >= 8 )
{
if( out->index == BUFFERSIZE )
{
empty_buffer( lzw,out);
}
out->lp_buffer[ out->index++ ] = (BYTE)( out->dw_buffer >> 24 );
out->dw_buffer <<= 8;
out->by_left -= 8;
}
}
//------------------------------------------------------------------------------
VOID do_encode( PBUFFER_DATA in, PBUFFER_DATA out, PLZW_DATA lzw)
{
WORD prefix;
while( in->index != in->top )
{
if( !in_table(lzw) )
{
// current code not in code table
// then add it to table and output prefix
insert_table(lzw);
prefix = lzw->suffix;
output_code( lzw->prefix ,out ,lzw );
lzw->code++;
if( lzw->code == (WORD)1<< lzw->cur_code_len )
{
// code reached current code top(1<<cur_code_len)
// then current code length add one
lzw->cur_code_len++;
if( lzw->cur_code_len == CODE_LEN + 1 )
{
re_init_lzw( lzw );
}
}
}
else
{
// current code already in code table
// then output nothing
prefix = get_code(lzw);
}
lzw->prefix = prefix;
lzw->suffix = in->lp_buffer[ in->index++ ];
}
}
//------------------------------------------------------------------------------
VOID encode(HANDLE h_sour,HANDLE h_dest)
{
LZW_DATA lzw;
BUFFER_DATA in ;
BUFFER_DATA out;
BOOL first_run = TRUE;
lzw_create( &lzw ,h_sour,h_dest );
buffer_create( &in );
buffer_create( &out );
while( load_buffer( h_sour, &in ) )
{
if( first_run )
{// File length should be considered but here we simply
// believe file length bigger than 2 bytes.
lzw.prefix = in.lp_buffer[ in.index++ ];
lzw.suffix = in.lp_buffer[ in.index++ ];
first_run = FALSE;
}
do_encode(&in , &out, &lzw);
}
output_code(lzw.prefix, &out , &lzw);
output_code(lzw.suffix, &out , &lzw);
out.end_flag = TRUE;
empty_buffer( &lzw,&out);
lzw_destory( &lzw );
buffer_destory( &in );
buffer_destory( &out );
}
//------------------------------------------------------------------------------
#endif
(5) decode.h 解压函数主函数
#ifndef __DECODE_H__
#define __DECODE_H__
//------------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
//------------------------------------------------------------------------------
VOID out_code( WORD code ,PBUFFER_DATA buffer,PLZW_DATA lzw,PSTACK_DATA stack)
{
WORD tmp;
if( code < 0x100 )
{
stack->lp_stack[ stack->index++ ] = code;
}
else
{
stack->lp_stack[ stack->index++ ] = lzw->lp_suffix[ code ];
tmp = lzw->lp_prefix[ code ];
while( tmp > 0x100 )
{
stack->lp_stack[ stack->index++ ] = lzw->lp_suffix[ tmp ];
tmp = lzw->lp_prefix[ tmp ];
}
stack->lp_stack[ stack->index++ ] = (BYTE)tmp;
}
while( stack->index )
{
if( buffer->index == BUFFERSIZE )
{
empty_buffer(lzw,buffer);
}
buffer->lp_buffer[ buffer->index++ ] = stack->lp_stack[ --stack->index ] ;
}
}
//------------------------------------------------------------------------------
VOID insert_2_table(PLZW_DATA lzw )
{
lzw->lp_code[ lzw->code ] = lzw->code;
lzw->lp_prefix[ lzw->code ] = lzw->prefix;
lzw->lp_suffix[ lzw->code ] = lzw->suffix;
lzw->code++;
if( lzw->code == ((WORD)1<<lzw->cur_code_len)-1 )
{
lzw->cur_code_len++;
if( lzw->cur_code_len == CODE_LEN+1 )
lzw->cur_code_len = 9;
}
if(lzw->code >= 1<<CODE_LEN )
{
re_init_lzw(lzw);
}
}
//------------------------------------------------------------------------------
WORD get_next_code( PBUFFER_DATA buffer , PLZW_DATA lzw )
{
BYTE next;
WORD code;
while( buffer->by_left < lzw->cur_code_len )
{
if( buffer->index == BUFFERSIZE )
{
load_buffer( lzw->h_sour, buffer );
}
next = buffer->lp_buffer[ buffer->index++ ];
buffer->dw_buffer |= (DWORD)next << (24-buffer->by_left);
buffer->by_left += 8;
}
code = buffer->dw_buffer >> ( 32 - lzw->cur_code_len );
buffer->dw_buffer <<= lzw->cur_code_len;
buffer->by_left -= lzw->cur_code_len;
return code;
}
//------------------------------------------------------------------------------
VOID do_decode( PBUFFER_DATA in, PBUFFER_DATA out, PLZW_DATA lzw, PSTACK_DATA stack)
{
WORD code;
WORD tmp;
while( in->index != in->top )
{
code = get_next_code( in ,lzw );
if( code < 0x100 )
{
// code already in table
// then simply output the code
lzw->suffix = (BYTE)code;
}
else
{
if( code < lzw->code )
{
// code also in table
// then output code chain
tmp = lzw->lp_prefix[ code ];
while( tmp > 0x100 )
{
tmp = lzw->lp_prefix[ tmp ];
}
lzw->suffix = (BYTE)tmp;
}
else
{
// code == lzw->code
// code not in table
// add code into table
// and out put code
tmp = lzw->prefix;
while( tmp > 0x100 )
{
tmp = lzw->lp_prefix[ tmp ];
}
lzw->suffix = (BYTE)tmp;
}
}
insert_2_table( lzw );
out_code(code,out,lzw,stack);
lzw->prefix = code;
}
}
//------------------------------------------------------------------------------
VOID decode( HANDLE h_sour, HANDLE h_dest )
{
LZW_DATA lzw;
BUFFER_DATA in ;
BUFFER_DATA out;
STACK_DATA stack;
BOOL first_run;
first_run = TRUE;
lzw_create( &lzw ,h_sour,h_dest );
buffer_create( &in );
buffer_create( &out );
stack_create(&stack );
while( load_buffer( h_sour, &in ) )
{
if( first_run )
{
lzw.prefix = get_next_code( &in, &lzw );
lzw.suffix = lzw.prefix;
out_code(lzw.prefix, &out, &lzw , &stack);
first_run = FALSE;
}
do_decode(&in , &out, &lzw, &stack);
}
empty_buffer( &lzw,&out);
lzw_destory( &lzw );
buffer_destory( &in );
buffer_destory( &out );
stack_destory( &stack);
}
#endif
2 下面给出一个应用上面模块的简单例子
#include <stdio.h>
#include <stdlib.h>
//------------------------------------------------------------------------------
#include "lzw.h"
#include "hash.h"
#include "fileio.h"
#include "encode.h"
#include "decode.h"
//------------------------------------------------------------------------------
HANDLE h_file_sour;
HANDLE h_file_dest;
HANDLE h_file;
CHAR* file_name_in = "d:\\code.c";
CHAR* file_name_out= "d:\\encode.e";
CHAR* file_name = "d:\\decode.d";
//------------------------------------------------------------------------------
int main(int argc, char *argv[])
{
h_file_sour = file_handle(file_name_in);
h_file_dest = file_handle(file_name_out);
h_file = file_handle(file_name);
encode(h_file_sour, h_file_dest);
// decode(h_file_dest,h_file);
CloseHandle(h_file_sour);
CloseHandle(h_file_dest);
CloseHandle(h_file);
return 0;
}
3 后语
之前研究gif文件格式时偶然接触了lzw压缩算法,于是就想自己动手实现。从一开始看人家的原码,然后跟着模仿,到现在用自己的语言表达出来,从理解原理到代码的实现花费了不少时间与精力,但是真正的快乐也就在这里,现在把她拿出来跟大家分享也就是分享快乐。