哈夫曼二叉树源码:
给定一个字符串,根据统计字符串中各个字符出现的频率对字符进行哈夫曼编码,然后对原字符串进行编码,并输出编码后的内容
——数据结构
#include <string.h>
#define MAX_NODE 1024
#define MAX_WEIGHT 4096
typedef struct HaffmanTreeNode {
char ch, code[15];
int weight;
int parent, lchild, rchild;
} HTNode, *HaTree;
typedef struct {
HTNode arr[MAX_NODE];
int total;
} HTree;
/* 统计字符出现的频率 */
int statistic_char(char *text, HTree *t){
int i, j;
int text_len = strlen(text);
t->total = 0;
for (i=0; i<text_len; i++) {
for (j=0; j<t->total; j++) if (t->arr[j].ch == text[i]){
t->arr[j].weight ++;
break;
}
if (j==t->total) {
t->arr[t->total].ch = text[i];
t->arr[t->total].weight = 1;
t->total ++;
}
}
printf("char frequence\n");
for (i=0; i<t->total; i++)
printf("'%c' %d\n", t->arr[i].ch, t->arr[i].weight);
printf("\n");
return t->total;
}
int create_htree(HTree *t)
{
int i;
int total_node = t->total * 2 - 1;
int min1, min2; /* 权最小的两个结点 */
int min1_i, min2_i; /* 权最小结点对应的编号 */
int leaves = t->total;
for (i=0; i<leaves; i++) {
t->arr[i].parent = -1;
t->arr[i].rchild = -1;
t->arr[i].lchild = -1;
}
while (t->total < total_node) {
min1 = min2 = MAX_WEIGHT;
for (i=0; i<t->total; i++) { /* 对每一个结点 */
if (t->arr[i].parent == -1 /* 结点没有被合并 */
&& t->arr[i].weight < min2) { /* 结点的权比最小权小 */
if (t->arr[i].weight < min1) { /* 如果它比最小的结点还小 */
min2_i = min1_i; min2 = min1;
min1_i = i; min1 = t->arr[i].weight;
}
else
{
min2_i = i; min2 = t->arr[i].weight;
}
}
}
t->arr[t->total].weight = min1 + min2;
t->arr[t->total].parent = -1;
t->arr[t->total].lchild = min1_i;
t->arr[t->total].rchild = min2_i;
t->arr[min1_i].parent = t->total;
t->arr[min2_i].parent = t->total;
t->arr[t->total].ch = ' ';
t->total ++;
}
return 0;
}
/* 对哈夫曼树进行编码 */
void coding(HTree *t, int head_i, char *code)
{
if ( head_i == -1) return;
if (t->arr[head_i].lchild == -1 && t->arr[head_i].rchild == -1) {
strcpy(t->arr[head_i].code, code);
printf("'%c': %s\n", t->arr[head_i].ch, t->arr[head_i].code);
}
else {
int len = strlen(code);
strcat(code, "0");
coding(t, t->arr[head_i].lchild, code);
code[len] = '1';
coding(t, t->arr[head_i].rchild, code);
code[len] = '\0';
}
}
/* 中序打印树 */
void print_htree_ldr(HTree *t, int head_i, int deep, int* path)
{
int i;
if (head_i == -1) return;
path[deep] = 0;
print_htree_ldr(t, t->arr[head_i].lchild, deep + 1, path);
if (deep) printf(" ");
for (i=1; i<deep; i++) printf("%s", path[i]==path[i-1]?" ":"│ ");
int dir = path[i]==path[i-1];
if ( t->arr[head_i].lchild == -1 && t->arr[head_i].rchild == -1)
printf("%s── %d '%c'\n", dir? "┌":"└",
t->arr[head_i].weight, t->arr[head_i].ch);
else if (head_i != t->total-1)
printf("%s─%02d┤\n", dir? "┌":"└"", t->arr[head_i].weight);
else
printf(" %02d┤\n", t->arr[head_i].weight);
path[deep] = 1;
print_htree_ldr(t, t->arr[head_i].rchild, deep + 1, path);
}
/* 对字符进行编码 */
void code_string(char *text, HTree *t)
{
int i, j, text_len = strlen(text);
int n = 0;
for (i=0; i<text_len; i++) {
char ch = text[i];
for (j=0; j<t->total; j++) if (ch == t->arr[j].ch) {
printf("%s ", t->arr[j].code);
n += strlen(t->arr[j].code);
break;
}
}
printf("\n%d chars, Total len = %d\n", text_len, n);
}
int main(int argc, char* argv[])
{
HTree t;
char text[128]="ABAAAAEEEAAACCCCAAAACCDEA";
char code[128] = "";
int path[16]={0};
statistic_char(text, &t);
create_htree(&t);
print_htree_ldr(&t, t.total-1, 0, path);
coding(&t, t.total-1, code);
code_string(text, &t);
return 0;
}
输出结果:
char frequence
'A' 13
'B' 1
'E' 4
'C' 6
'D' 1
┌── 6 'C'
┌─12┤
│ │ ┌── 1 'B'
│ │ ┌─02┤
│ │ │ └── 1 'D'
│ └─06┤
│ └── 4 'E'
25┤
└── 13 'A'
'C': 00
'B': 0100
'D': 0101
'E': 011
'A': 1
1 0100 1 1 1 1 011 011 011 1 1 1 00 00 00 00 1 1 1 1 00 00 0101 011 1