霍夫曼树(HuffmAn Tree,以下简称ht)压缩法:
(据我猜测,RAR使用的就是这种算法)
数据结构:
二叉树(如果不懂什么是二叉树,找一本大学的《数据结构》课本
看看,否则下面的很多名词都会看不懂)
由于VB现在还不支持链表,所以记录二叉树有一点麻烦。建议使用
以下的数据结构:
Public Type Node
DAtA As DAtAType '记录数据
Left As Integer '左子树的下标
Right As Integer '右子树的下标
End Type
Public HuffmAnTree() As Node
如果有必要,还可以在Node中定义一个FAther As Integer来记录
父节点。
===========
这只是压缩的,解压的还没写好,写好了再贴上来:)
我只加了时间的优化,没有加空间的优化,所以是严格按照标准Huffman Tree做的,压缩比不太高,但速度很快
老规矩,没写注释^_^
注:其中Progress是TfrmMain上的一个ProgressBar,Status是一个StatusBar
const
FileHead: string[8]='Huffman'#0;
HeadSize=8;
BufCount=$FFFF;
type
TCode=array[0..255]of Byte;
TNodeCode=record
Ascii: Byte;
Code: TCode;
end;
procedure TfrmMain.Compress (SName, TName: string);
type
PNode=^TNode;
TNode=record
Ascii, Code: Byte;
Num: Integer;
Left, Right, Father: PNode;
CodeStr: TCode;
end;
var
SFile, TFile: file;
Buf: array[1..BufCount]of Byte;
Size, Wrote: Integer;
Appears: array[0..255]of Integer;
NodeNum: SmallInt;
Nodes: array[1..256]of PNode;
CodeNum: SmallInt;
Codes: array[1..256]of TNodeCode;
AscCodes: array[0..255]of TCode;
I, J, ReadByte: Integer;
P: PNode;
{Varibles below are used for WriteBit}
Bits, CurByte: Byte;
OutBuf: array[1..BufCount]of Byte;
BitsSize: Word;
procedure BuildCode (P: PNode);
begin
if P=nil then Exit;
with P^ do
begin
CodeStr:= Father^.CodeStr;
Inc (CodeStr[0]);
CodeStr[CodeStr[0]]:= Code;
end;
if P^.Left=nil then
begin
Inc (CodeNum);
Codes[CodeNum].Code:= P^.CodeStr;
Codes[CodeNum].Ascii:= P^.Ascii;
Exit;
end;
BuildCode (P^.Left);
BuildCode (P^.Right);
end;
procedure FreeTree (P: PNode);
var
R: PNode;
begin
if P=nil then Exit;
R:= P^.Left;
FreeTree (R);
R:= P^.Right;
FreeTree (R);
Dispose (P);
end;
procedure WriteBit (Bit: Byte);
var
Temp: Byte;
begin
Dec (Bits);
Temp:= Bit shl Bits;
CurByte:= CurByte or Temp;
if Bits=0 then
begin
Bits:= 8;
Inc (BitsSize);
OutBuf[BitsSize]:= CurByte;
CurByte:= 0;
if BitsSize=BufCount then
begin
BlockWrite (TFile, OutBuf, BitsSize);
BitsSize:= 0;
FillChar (OutBuf, SizeOf(OutBuf), 0);
end;
end;
end;
procedure FlushBit;
begin
if (Bits=8) and (BitsSize=0) then Exit;
if Bits<>8 then
begin
Inc (BitsSize);
OutBuf[BitsSize]:= CurByte;
end;
BlockWrite (TFile, OutBuf, BitsSize);
Bits:= 8;
CurByte:= 0;
BitsSize:= 0;
FillChar (OutBuf, SizeOf(OutBuf), 0);
end;
begin
Canceled:= False;
Bits:= 8;
CurByte:= 0;
BitsSize:= 0;
FillChar (OutBuf, SizeOf(OutBuf), 0);
btnCancel.Enabled:= True;
AssignFile (SFile, SName);
AssignFile (TFile, TName);
Status.SimpleText:= '正在扫描输入文件...';
Reset (SFile, 1);
FillChar (Appears, SizeOf(Appears), 0);
while not Eof(SFile) do
begin
BlockRead (SFile, Buf, BufCount, ReadByte);
for I:= 1 to ReadByte do Inc (Appears[Buf[I]]);
end;
CloseFile (SFile);
Status.SimpleText:= '正在生成哈夫曼树...';
NodeNum:= 0;
FillChar (Nodes, SizeOf(Nodes), 0);
for I:=0 to 255 do
if Appears[I]>0 then
begin
New (P);
with P^ do
begin
Ascii:= I;
Code:= 2;
Num:= Appears[I];
Left:= nil;
Right:= nil;
Father:= nil;
FillChar (CodeStr, SizeOf(CodeStr), 0);
end;
J:= 1;
while (J<=NodeNum) and (Nodes[J]^.Num>=P^.Num) do Inc (J);
Inc (NodeNum);
Move (Nodes[J], Nodes[J+1], (NodeNum-J)*SizeOf(Nodes[J]));
Nodes[J]:= P;
end;
if NodeNum=1 then Nodes[1]^.Code:=0;
while NodeNum>1 do
begin
New (P);
with P^ do
begin
Num:= 0;
Ascii:= 0;
Code:= 2;
Left:= nil;
Right:= nil;
Father:= nil;
FillChar (CodeStr, SizeOf(CodeStr), 0);
end;
P^.Right:=Nodes[NodeNum];
Nodes[NodeNum]^.Father:= P;
Nodes[NodeNum]^.Code:= 1;
Inc (P^.Num, Nodes[NodeNum]^.Num);
Dec (NodeNum);
P^.Left:=Nodes[NodeNum];
Nodes[NodeNum]^.Father:= P;
Nodes[NodeNum]^.Code:= 0;
Inc (P^.Num, Nodes[NodeNum]^.Num);
J:= NodeNum;
while (J>=2) and (Nodes[J-1]^.Num<=P^.Num) do Dec (J);
Move (Nodes[J], Nodes[J+1], (NodeNum-J)*SizeOf(Nodes[J]));
Nodes[J]:= P;
end;
CodeNum:= 0;
if Nodes[1]<>nil then
if Nodes[1]^.Left=nil
then
begin
CodeNum:= 1;
with Codes[1] do
begin
Ascii:= Nodes[1]^.Ascii;
FillChar (Code, SizeOf(Code), 0);
Code[0]:=1;
end;
end
else
begin
BuildCode (Nodes[1]^.Left);
BuildCode (Nodes[1]^.Right);
end;
FreeTree (Nodes[1]);
FillChar (AscCodes, SizeOf(AscCodes), 0);
for I:= 1 to CodeNum do
with Codes[I] do
AscCodes[Ascii]:= Code;
Status.SimpleText:= '正在写输出文件...';
Reset (SFile, 1);
Rewrite (TFile, 1);
BlockWrite (TFile, FileHead[1], HeadSize);
BlockWrite (TFile, CodeNum, SizeOf(CodeNum));
for I:= 1 to CodeNum do
with Codes[I] do
begin
BlockWrite (TFile, Ascii, SizeOf(Ascii));
BlockWrite (TFile, Code[0], SizeOf(Code[0]));
for J:= 1 to Code[0] do WriteBit (Code[J]);
FlushBit;
end;
Size:= FileSize(SFile);
BlockWrite (TFile, Size, SizeOf(Size));
Wrote:= 0;
Progress.Min:= 0;
Progress.Max:= Size;
while not Eof(SFile) do
begin
BlockRead (SFile, Buf, BufCount, ReadByte);
for I:= 1 to ReadByte do
for J:= 1 to AscCodes[Buf[I], 0] do
WriteBit (AscCodes[Buf[I], J]);
Inc (Wrote, ReadByte);
Progress.Position:= Wrote;
end;
FlushBit;
CloseFile (TFile);
CloseFile (SFile);
Status.SimpleText:= '完成';
btnCancel.Enabled:= False;
end;
----
Worms Armageddon的武器介绍:
Homing Missile: Every home should have one.
Longbow: Robin Who?
...
算法原理:
ht首先被提出来,是为了解决这样的问题:
对于N种数据(比如5种数据:A、B、C、D、E),在出现的频率已
知的情况下(比如分别出现了3、5、2、6、4次),如何用不等长的01
串来分别表示它们,使01串的总长度最短。
比如原始串:ABADBCBDABEDBDEDCEDE
对于这个问题,首先得到:任何一个01串都不能是其他01串的前缀
。也就是说,如果用“10”来表示A,那么其他01串就不能以“10”开
头。
建立01串的步骤如下:
首先找到出现最少的两个数据(A、C),分别以它们为左右子树,
建立一个二叉树。并将它们出现次数之和作为根节点:
1)
5 5B 6D 4E
/ \
3A 2C
然后从剩下的4个数举重找到两个最小的,做同上的操作,知道只
剩一个数据为止:
2)
5 9 6D
/ \ / \
3A 2C 5B 4E
3)
11 9
/ \ / \
5 6D 5B 4E
/ \
3A 2C
4)
20
/ \
11 9
/ \ / \
5 6D 5B 4E
/ \
3A 2C
最后从根节点开始,每个左子树填0,右子树填1:
ROOT
/ \
0 1
/ \ / \
0 1D 0B 1E
/ \
0A 1C
这样每个数据对应的01串就是从根节点到数据所在的叶子节点的路
径:
A: 000
B: 10
C: 001
D: 01
E: 11
这样原始串ABADBCBDABEDBDEDCEDE就成了:
000100000110001100100010110110011101001110111
压缩:
把256个Ascii码看作256种数据,分析它们出现的次数,建立ht,
得到256个01串。用这256个01串去替换原来的字符并按照二进制处理,
就可以得到压缩后的文件了。