不错的流式压缩算法 - 王朝网络宽屏版

//#include "vec.h"

#include "def.h"

#include <stdlib.h>

#include <memory.h>

#include <math.h>

#define NEW_PUTNBITS /* putnbits() puts multiple bits at a time, instead of multiple calls to putbit() for each bit */

#ifdef DEB_malloc

void *malloc(size_t tSize)

{

void *vpTmp = NULL;

if(((int)tSize) <= 0)

{

fprintf(stderr, "Trying to allocate %d bytes, but thats Nonsense...", (int)tSize);

fflush(NULL);

return NULL;

}

vpTmp = malloc(tSize);

if(vpTmp == NULL)

{

fprintf(stderr, "Trying to allocate %d bytes, but mem is not available...", (int)tSize);

fflush(NULL);

}

return(vpTmp);

}

#endif

/* Faster way of computing floor log2 */

/* logn = (int) floor(L2 ((double) n)); */

int getlog2(unsigned int n)

{

unsigned int tmp;

/* int logn = 31; */

int logn = (sizeof(unsigned int)*8) - 1;

/*printf("n: %i ", n);*/ /* debug */

/* if most numbers will be small (0-255), this will save some time */

if((n & 0xFF) == n)

{

logn = 7;

n = n << ((sizeof(unsigned int)*8) - 8);

}

for(; logn > 0; logn--) /* find position of leftmost 1 of n */

{

tmp = n << 1;

tmp = tmp >> 1; /* clear MSB */

if(tmp == n) /* if top bit is not a 1 */

n = n << 1;

else

break;

}

/*printf("logn: %i\n",logn);*/ /* debug */

return logn;

}

/* Returns a pointer to the inverted file vector

The vector is read in from the vector file

*don't call initvec before this function, but initialise the

new vector directly e.g. BITVEC vec={NUL,0,0,0,0,0,0}

*resetvec will be called automatically after loading)

int loadvector(long fileoffset, FILE *fpVectors, BITVEC *vp)

{

if (fseek(fpVectors, fileoffset, SEEK_SET) != -1)

return vbytereadvec(vp, fpVectors);

else

return -1;

}

/* Return length of vector */

int veclen(BITVEC *vp)

{

return(vp->len);

}

/* Before a vector can be read (when loaded from file), it must be reset */

void resetvec(BITVEC *vp)

{

vp->last = -1;

vp->pos = vp->bit = 0;

vp->cur = vp->vector[0];

}

void freevec(BITVEC *vp)

{

if(vp != NULL)

{

free(vp->vector);

vp->vector = NULL;

}

vp->size = 0;

vp->last = -1;

vp->bit = vp->pos = vp->len = vp->cur = 0;

}

/* Initialising a vector before insertion of data */

void initvec(BITVEC *vp, int initveclen)

{

vp->vector = malloc(initveclen * sizeof(char));

vp->size = initveclen;

vp->last = -1;

vp->bit = vp->pos = vp->len = vp->cur = 0;

}

/* Write the contents of a vector to file. This version does not use

vbtyewrite for the vector length, so if the vector is to be opened

with loadvector, use vbytedumpvec (below) instead.

Assumes that padvec has been called. */

void dumpvec(BITVEC *vp, FILE *fp)

{

#ifdef DEBUG

int i;

resetvec(vp);

fprintf(stderr,"[", i);

while( (i=getrunlen(vp)) >=0 )

fprintf(stderr," %d", i);

fprintf(stderr,"] {", i);

resetvec(vp);

while( (i=getdelta(vp)) >=0 )

fprintf(stderr," %d", i);

fprintf(stderr,"}", i);

#endif /* DEBUG */

fwrite(&vp->len, sizeof(int), 1, fp);

fwrite(vp->vector, sizeof(char), vp->len/8 + 1, fp);

}

/* This is a different version of dumpvec that uses vbyteread to

get the vector length... this is the operation that is 'undone'

in vbytereadvec (which is called by loadvector)...

Assumes that padvec has been called. */

int vbytedumpvec(BITVEC *vp, FILE *fp)

{

vbytewrite(vp->len, fp);

return(fwrite(vp->vector, sizeof(char), vp->len/8 + 1, fp));

}

/* Read a vector back from file */

/* Returns -1 on error */

int vbytereadvec(BITVEC *vp, FILE *fp)

{

int len;

if ((vp->len = vbyteread(fp)) == -1)

{

fprintf(stderr, "vbytereadvec: Error reading vbyte coded vector length %d\n", vp->len);

return(-1);

}

vp->size = 4 * ( ( vp->len/8 + 8 ) / 4 ); /* word boundaries */

len = vp->len/8 + 1;

vp->vector = malloc(vp->size * sizeof(char));

if (fread(vp->vector, sizeof(char), len, fp) <= 0)

{

fprintf(stderr, "vbytereadvec: Error reading vector (length: %d)\n", len);

return(-1);

}

resetvec(vp);

return(len);

}

/* Read a vector back from file */

/* Returns -1 on error */

int readvec(BITVEC *vp, FILE *fp)

{

int len;

fread(&vp->len, sizeof(int), 1, fp);

vp->size = 4 * ( ( vp->len/8 + 8 ) / 4 ); /* word boundaries */

len = vp->len/8 + 1;

vp->vector = malloc(vp->size * sizeof(char));

if(vp->vector != NULL)

{

fread(vp->vector, sizeof(char), len, fp);

resetvec(vp);

}

else

{

printf("Can not allocate %i bytes...:(\n", vp->size * sizeof(char));

return -1;

}

return(0);

}

/* Read a vector back from file -- used in cafezipread.c */

/* Returns -1 on error */

int readzipvec(BITVEC *vp, FILE *fp)

{

int len;

len = vbyteread(fp);

initvec(vp, len+1);

vp->len = len;

if (vp->len > 11)

{

len = vp->len/8 + 1;

fread(vp->vector, sizeof(char), len, fp);

resetvec(vp);

}

return(vp->len);

}

int roadzipvec(BITVEC *vp, char *buf,int buflen)

{

int len;

len = buflen*10;

initvec(vp, len+1);

vp->len = len;

//if (vp->len > 11)

//{

//len = vp->len/8 + 1;

memcpy(vp->vector,buf,buflen);

//fread(vp->vector, sizeof(char), len, fp);

resetvec(vp);

// }

return(vp->len);

}

int putunary(BITVEC *vp, int n)

{

int ret;

ret = putnbits(vp, 0, n);

/*ret += putbit(vp, 1);*/

ret += putnbits(vp, 1, 1);

return(ret);

}

int getunary(BITVEC *vp)

{

int ret = 0;

for(;getbit(vp) != 1;ret++);

return(ret);

}

void initunarylookup(char *table)

{

/* Must be passed a char array of size [255].

This is initialised with the appropriate values for

the getunary2 lookup */

int d;

for (d = 255; d >= 0; d--)

{

if (d > 127)

table[d] = 0;

else if (d > 63)

table[d] = 1;

else if (d > 31)

table[d] = 2;

else if (d > 15)

table[d] = 3;

else if (d > 7)

table[d] = 4;

else if (d > 3)

table[d] = 5;

else if (d > 1)

table[d] = 6;

else if (d > 0)

table[d] = 7;

else

table[d] = -1;

}

int getunary2(BITVEC *vp, char *table)

{

/* Must use initunarylookup before calling this! */

int temp, ret = 0;

unsigned char b;

while ((b = vp->cur << vp->bit) == 0)

{ /* while (the relevant bits in this byte are all zero) */

ret += 8 - vp->bit; /* Increase ret by the number of zero bits in this byte */

vp->pos++; /* Update bitvec positions to point at start of next byte */

vp->bit = 0;

vp->cur = vp->vector[vp->pos];

}

temp = table[b]; /* look up the unary number in this byte */

vp->bit += temp+1; /* update bitvec position */

return (ret + temp);

}

/* Add the number n to the vector, assuming run-length coding */

int putrunlen(BITVEC *vp, int n)

{

int ret;

if( n <= vp->last )

return(0); /* hack: don't add the same code twice */

ret = putdelta(vp, n - vp->last);

vp->last = n;

return(ret);

}

/* Get a number from the vector, assuming run length coding */

int getrunlen(BITVEC *vp)

{

int ret;

ret = getdelta(vp);

if( ret > 0 )

{

vp->last += ret;

return(vp->last);

}

else

return(-1);

}

/* Standard delta (Elias) */

int putdelta(BITVEC *vp, int n)

{

int mag;

int ret;

// if(n < 1 || n > 33554430) {

// fprintf(stderr, "putdelta: Error, range for delta coding exceeded (%i)\n", n);

// return -1;

// }

mag = getlog2((unsigned int) n);

ret = putgamma(vp, mag+1);

ret += putnbits(vp, n, mag); /* don't output leftmost (ie, top) bit */

return(ret);

}

/* Returns -1 on eof */

int getdelta(BITVEC *vp)

{

int mag, val;

mag = getgamma(vp) - 1;

if( mag < 0 ) return(-1);

val = getnbits(vp, mag);

if( val < 0 ) return(-1);

return( ( 1 << mag ) | val );

}

int scandelta(BITVEC *vp)

{

int mag;

mag = getgamma(vp) - 1;

/* Got the length, now just scan over the rest... */

while (mag > (7 - vp->bit))

{

vp->pos++;

mag -= 8;

}

vp->bit += mag;

vp->cur = vp->vector[vp->pos];

return( 0 );

}

/* Standard gamma (Elias) */

int putgamma(BITVEC *vp, int n)

{

int mag;

int ret;

if(n < 1 || n > 33554430)

{

// fprintf(stderr, "putgamma: Error, range for gamma coding exceeded (%i)!!!\n", n);

return -1;

}

mag = getlog2((unsigned int) n);

ret = putnbits(vp, 0, mag);

ret += putnbits(vp, n, mag+1);

return(ret);

}

/* Returns -1 on eof */

int getgamma(BITVEC *vp)

{

int b, mag, val;

for( mag=0 ; ( b = getbit(vp) ) == 0 ; mag++ );

if( b < 0 ) return(-1);

val = getnbits(vp, mag);

if( val < 0 ) return(-1);

return( ( 1 << mag ) | val );

}

/* Puts integer n into vector using variable-byte coding

* Coding scheme: use first bit of a byte to indicate if more

* bytes follow (1) or not (0). Use other 7 bits of each byte

* to store the integer. E.g.

* 154 = 10011010 in binary

* = 1 0011010 0 0000001 in variable-byte

* where bits 1 and 9 are 'indicator' bits,

* bits 2-8 represent the 7 least-significant bits of the integer, and

* bits 10-16 represent the 7 most-significant bits of the integer

int OLDputvbyte(BITVEC *vp, int n)

{

int ret = 0;

if (n < 0)

{

printf("Error: putvbyte tried to put value %i", n);

exit(1);

}

while (n >= 128)

{

/*ret += putbit(vp, 0x1);*/

ret += putnbits(vp, 0x1, 1);

ret += putnbits(vp, n&0xff, 7);

n >>= 7;

}

/*ret += putbit(vp, 0x0);*/

ret += putnbits(vp, 0x0, 1);

ret += putnbits(vp, n&0xff, 7);

return ret;

}

/* Offsets are on byte boundary, so can get value directly from

vector, which is faster than via putnbits as used by OLDputvbyte. */

int putvbyte(BITVEC *vp, unsigned long n)

{

int ret = 0;

// if (n < 0)

// {

// printf("Error: putvbyte tried to put value %i", n);

// exit(1);

// }

while (n >= 128)

{

if( vp->pos >= vp->size )

expandvec(vp);

vp->vector[vp->pos++] =(unsigned char) (n & 0x7f) | 0x80;

n >>= 7;

ret += 8;

}

if( vp->pos >= vp->size )

expandvec(vp);

vp->vector[vp->pos++] =(unsigned char) (n & 0x7f) & 0x7f;

ret += 8;

return ret;

}

int getvbyte(BITVEC *vp)

{

int ret = 0x0, count = 0, get = 0;

while(((get = getnbits(vp, 8)) & 0x80) == 0x80)

{

/* For each time we get a group of 7 bits, need to left-shift the latest group by

an additional 7 places, since the groups were effectively stored in reverse order.*/

ret |= ((get & 0x7f) << count);

count += 7;

}

/* Now get the final 7 bits, which may again need to be left-shifted by a factor of 7. */

return (ret |= (get & 0x7f) << count);

}

/* getvbyte3: offsets are on byte boundary so can get value directly from

vector, which is faster than via getnbits as used by getvbyte.

This version uses shifting instead of masking as in getvbyte2*/

int getvbyte3(BITVEC *vp)

{

int ret = 0x0, count = 0;

unsigned char get = 0, temp = 0;

get = vp->vector[vp->pos++];

temp = get << 1;

temp = temp >> 1; /* clear MSB */

while(get != temp) /* while top bit is a 1 */

{

ret |= temp << count;

count += 7;

get = vp->vector[vp->pos++];

temp = get << 1;

temp = temp >> 1; /* clear MSB */

}

#ifdef DEBUG_GETVBYTE

ret |= temp << count;

printf("Offset: %i\n",ret);

if (ret < 0)

{

printf("Error: getvbyte() retrieved value %i", ret);

exit(1);

}

#endif

return ret;

}

/* getvbyte2: offsets are on byte boundary so can get value directly from

vector, which is faster than via getnbits as used by getvbyte */

/* This is the fastest */

int getvbyte2(BITVEC *vp)

{

int ret = 0x0, count = 0, get = 0;

while( ((get = vp->vector[vp->pos++]) & 0x80) == 0x80 )

{

/* For each time we get a group of 7 bits, need to left-shift the latest group by

an additional 7 places, since the groups were effectively stored in reverse order.*/

ret |= ((get & 0x7f) << count);

count += 7;

}

/* Now get the final 7 bits, which need to be left-shifted by a factor of 7. */

ret |= (get & 0x7f) << count;

/* vp->cur = vp->vector[vp->pos]; */

#undef DEBUG_GETVBYTE

#ifdef DEBUG_GETVBYTE

printf("Offset: %i\n",ret);

if (ret < 0)

{

printf("Error: getvbyte() retrieved value %i", ret);

exit(1);

}

#endif

return ret;

}

void scanvbyte(BITVEC *vp)

{

/* Use this instead of getvbyte when don't care abour the offset value, but just

want to progress along the vector */

while(((vp->cur >> (7 - vp->bit) ) & 0x1) == 0x1)

{ /* while (the sign-bit in the bitvector is 1) */

vp->pos++; /* jump forward 1 byte in vp */

vp->cur = vp->vector[vp->pos];

}

/* next bit must be a 0 sign-bit, so jump forward another byte to the end of this v-byte */

vp->pos++;

vp->cur = vp->vector[vp->pos];

}

/* scanvbyte2: offsets are on byte boundary */

void scanvbyte2(BITVEC *vp)

{

while( (vp->vector[vp->pos++] & 0x80) == 0x80 )

{ /* while (the sign-bit in the bitvector is 1) */

/* do nothing */

}

vp->cur = vp->vector[vp->pos];

}

void OLDscanvbyte(BITVEC *vp)

{

int get = 0;

while(((get = getnbits(vp, 8)) & 0x80) == 0x80)

{

; /* Do nothing! */

}

int AlignVbyteWrite(BITVEC *vp2)

{

if (vp2->bit != 0) /* start offsets on byte boundary */

{

putnbits(vp2,0,8 - vp2->bit);

}

return 0;

}

int AlignVbyteRead(BITVEC *vp)

{

if (vp->bit != 0) /* freq is on byte boundary */

{

vp->bit = 0;

vp->pos++;

vp->cur = vp->vector[vp->pos];

}

return 0;

}

int OLDgetvbyte(BITVEC *vp)

{ /*no skipping */

int ret = 0x0, count = 0;

printf("NOT skip\n");

fflush(NULL);

while(getbit(vp) == 0x1)

{

/* For each time we get a group of 7 bits, need to left-shift the

latest group by an additional 7 places, since the groups were effectively

stored in reverse order.*/

ret |= ((getnbits(vp, 7) & 0x7f) << count);

count += 7;

}

/* Now get the final 7 bits, which may again need to be left-shifted by

a factor of 7. */

ret |= (getnbits(vp, 7) & 0x7f) << count;

if (ret < 0)

{

printf("Error: getvbyte() retrieved value %i", ret);

exit(1);

}

return (ret);

}

/* -------------- Golomb Coding ---------------- */

/**Constant to signal unitnitialized Hash Table value**/

#define GOLHASH_UNINIT -1

/** This will store the maximum b_offset encountered durring

index-build in putgolomb **/

static int iMaxBOffset = 0;

/* Standard Golomb */

/* x = value to be put in */

/* b = coding parameter */

int putgolomb(BITVEC *vp, int x, int b)

{

int mag, ret, rem, q, d;

#ifdef DEBUG_GOLOMB

if(x<1)

{

printf("vec::putgolomb Coding Error with %i\n", x);

return -1;

}

#endif

if(b > iMaxBOffset) iMaxBOffset = b;

/* work out floor of (x - 1)/b */

mag = (int) floor(((double)(x - 1))/((double)b));

#ifdef DEBUGGOLOMB

printf("-----------\nstoring: %d with b: %d, mag: %d\n", x,b,mag);

#endif

/* Store mag+1 in unary */

/* Think about it: mag=3 means storing 001, hence this DOES work */

ret = putnbits(vp, 0, mag);

/*ret += putbit(vp, 1);*/

ret += putnbits(vp, 1, 1);

#ifdef DEBUGGOLOMB

printf("stored mag of: %d\n", mag+1);

#endif

/* What's the remainder? ie. x - (mag * b) */

rem = x - (mag * b) - 1;

#ifdef DEBUGGOLOMB

printf("rem: %d\n", rem);

#endif

/* Now work out the point at which coding requires floor(log(b))+1 bits */

/* There are b remainders, so 2^k < b <= 2^(k+1) */

/* Therefore, 2^k <= b and then k = log_2 b, so first k remainders */

/* can be represented in floor(log_2 b) bits [phew] */

q = getlog2((unsigned int) b);

/* Now, work out the base value to use when we hit the toggle point (q) */

/* Since there are q+1 bits (max) used to represent a remainder, there */

/* can be 2^(q+1) such values using q+1 bits. Since there are b values */

/* we want to represent, if the remainder is less than 2^(q+1) - b, in other */

/* words if there are "spares", we can use q bits [phew again!]. */

/* Geez, Golomb coding isn't fun. */

/* So, d = 2^(q+1) - b (jz/sara figured this, thanks) */

d = (1 << (q+1)) - b;

#ifdef DEBUGGOLOMB

printf("toggle point q: %d with a d of: %d\n", q, d);

#endif

/* Can this remainder be represented in floor(log_2 b) bits? */

if (rem < d)

/* Store the value of remainder directly */

ret += putnbits(vp, rem, q);

else

{

#ifdef DEBUGGOLOMB

printf("actually storing: %d in toggle+1 (%d) bits\n", d+rem, q+1);

#endif

/* Store the value of (d + remainder) in floor(log_2 b) + 1 bits */

ret += putnbits(vp, d+rem, q+1);

}

#ifdef DEBUGGOLOMB

printf("stored: %d bits\n-----------\n", ret);

#endif

return(ret);

}

/* Constant to signal unitnitialized Hash Table value */

#define GOLHASH_UNINIT -1

/* If hashing is to be used, call initgolhash before using getgolomb() */

int initgolhash(GOLHASH_ROOT *rpGolHashRoot, int iSize)

{

int x;

rpGolHashRoot->iSize = iSize;

rpGolHashRoot->ipGolHashTable = (int *)malloc(sizeof(int) * (iSize + 1));

if(rpGolHashRoot->ipGolHashTable != NULL) /*Initialise hash table*/

{

/* Table initialised to 'empty', values are calculated the first time that we

need them and then stored */

for (x=0;x<=iSize;x++)

rpGolHashRoot->ipGolHashTable[x] = GOLHASH_UNINIT;

/* Or, use this to calculate all values at the beginning

rpGolHashRoot->ipGolHashTable[x] = (int) floor(L2 ((double) x));

}

else

{

printf("Can not allocate Golomb Hash Table (size %i)\n", sizeof(int) * (iSize + 1));

rpGolHashRoot->iSize = 0;

return false;

}

return true;

}

/* Standard Golomb -> b = coding parameter must be >= 1 */

/* If several Golomb codes are to be decompressed that share the same parameter b, more efficient to make use

of getgolomb1 and getgolomb2 below */

int getgolomb(BITVEC *vp, int b, GOLHASH_ROOT *pGolHashRoot)

{

int mag;

int q, d,i;

int rem;

/* Get mag+1 in unary */

/* By setting mag=0, we actually get mag [rather than mag+1, which */

/* is what was stored] [phew!] */

for( mag=0 ; (i = getbit(vp)) == 0 ; mag++ );

if(i<0) return -1;

if(pGolHashRoot != NULL)

if(b < 0 || b > pGolHashRoot->iSize) /*** sanity check ***/

{

printf("b value: %i is outside of TABLESIZE: %i\n", b, pGolHashRoot->iSize);

fflush(NULL);

return -3;

}

q = GOLHASH_UNINIT;

/* Try and look up the log_2(b)... if not there, calculate and store */

if(pGolHashRoot != NULL)

q = pGolHashRoot->ipGolHashTable[b];

if(q == GOLHASH_UNINIT)

{

/* Now work out the toggle point for remainders as before */

q = getlog2((unsigned int) b);

if(pGolHashRoot != NULL)

pGolHashRoot->ipGolHashTable[b] = q;

}

/* Get the first q bits (we may need (q+1) actually) */

if((rem = getnbits(vp, q)) < 0)

return -1;

/* Now, work out the base value to use for the toggle point (q) */

/* It's d=2^(q+1) - b (jz/sara figured this, thanks) */

/* See my notes in putgolomb() */

d = (1 << (q+1)) - b;

/* Do we need to get an extra bit? */

if (rem >= d)

{

/* If so:

(1) shift rem left and OR with the bit we get

(2) subtract d to get remainder */

rem = ((rem << 1) | getbit(vp)) - d;

}

/* The value is the mag * b + remainder + 1 */

return(mag * b + rem + 1);

}

/* Standard Golomb -> b = coding parameter must be >= 1 */

int getgolomb1(int iItem, int iCount, GOLHASH_ROOT *pGolHashRoot, GOLOMBPARAMS *pgPar)

{

int b, q;

/* Compute b value */

if((b = (long)(0.69 * ((double)(iItem) / (double) iCount)))<1) b=1;

pgPar->b = b;

#if defined(CACHE_IN_ADVANCE) || defined(CACHE_IF_NEEDED)

if(pGolHashRoot != NULL)

{

if(b < 0 || b > pGolHashRoot->iSize) /*** sanity check ***/

{

printf("b value: %i is outside of TABLESIZE: %i\n", b, pGolHashRoot->iSize);

fflush(NULL);

return false;

}

q = pGolHashRoot->ipGolHashTable[b];

if(q == GOLHASH_UNINIT)

{

/* Now work out the toggle point for remainders as before */

q = getlog2((unsigned int) b);

if(pGolHashRoot != NULL) pGolHashRoot->ipGolHashTable[b] = q;

}

else /* No caching table */

#endif

q = getlog2((unsigned int) b);

pgPar->q = q; /* Copy parameter to be returned */

/* Now, work out the base value to use for the toggle point (q) */

/* It's d=2^(q+1) - b (jz/sara figured this, thanks) */

/* See my notes in putgolomb() */

pgPar->d = (1 << (q+1)) - b;

return true;

}

/* Standard Golomb -> b = coding parameter must be >= 1 */

int getgolomb2(BITVEC *vp, GOLOMBPARAMS *gPar)

{

int mag, i, rem;

/* Get mag+1 in unary */

/* By setting mag=0, we actually get mag [rather than mag+1, which */

/* is what was stored] [phew!] */

for( mag=0 ; (i = getbit(vp)) == 0 ; mag++ );

/* mag = getunary2(vp, UNARYTABLE); */

#ifdef DEBUG_GOLOMB

if(i<0) return -1;

#endif

/* Get the first q bits (we may need (q+1) actually) */

if((rem = getnbits(vp, gPar->q)) < 0) return -1;

/* Do we need to get an extra bit? */

if (rem >= gPar->d)

{

/* If so:

(1) shift rem left and OR with the bit we get

(2) subtract d to get remainder */

rem = ((rem << 1) | getbit(vp)) - gPar->d;

}

/* The value is the mag * b + remainder + 1 */

return(mag * gPar->b + rem + 1);

}

/* --------------------------- RICE CODING ------------------------- */

/* Standard Rice encoding.

Slightly inefficient: logb should be computed externally and passed in

at each call, to avoid repetition.

int

putrice(BITVEC *vp, int x, int b)

{

int mag, ret, rem, logb;

/* Record largest value of b, used to define hash-table size for getrice. */

if(b > iMaxBOffset) iMaxBOffset = b;

/* work out floor of (x - 1)/b */

/* should be passed in as parameter */

logb = getlog2((unsigned int) b);

/* Ensure that b is a non-negative power of 2 (required for Rice coding) */

b = 1 << logb;

mag = ( x - 1 ) >> logb;

#ifdef DEBUGRICE

printf("-----------\nstoring: %d with b: %d, mag: %d\n", x,b,mag);

#endif

ret = putnbits(vp, 0, mag);

/*ret += putbit(vp, 1);*/

ret += putnbits(vp, 1, 1);

/* What's the remainder? ie. x - (mag * b) */

rem = x - (mag * b) - 1;

#ifdef DEBUGRICE

printf("rem: %d\n", rem);

#endif

ret += putnbits(vp, rem, logb);

#ifdef DEBUGRICE

printf("stored: %d bits\n-----------\n", ret);

#endif

return(ret);

}

/* Standard Rice decoding.

NOTE. b must be a non-negative power of 2 or this will fail.

Slightly inefficient: logb should be computed externally and passed in

at each call, to avoid repetition.

int

getrice(BITVEC *vp, int b, GOLHASH_ROOT *pGolHashRoot)

{

int mag;

int logb;

int rem;

int ret;

for( mag=0 ; getbit(vp) == 0 ; mag++ )

;

if (pGolHashRoot != NULL)

{

if (b < 0 || b > pGolHashRoot->iSize)

{

printf("Warning (getrice): b value %i is outside of TABLESIZE %i\n", b, pGolHashRoot->iSize);

fflush(NULL);

return -3;

}

else

{ /* Try to look up log_2(b)... */

logb = pGolHashRoot->ipGolHashTable[b];

if (logb == GOLHASH_UNINIT)

{ /* it's not in the table yet, so calculate it and then add it to the table */

logb = getlog2((unsigned int) b);

pGolHashRoot->ipGolHashTable[b] = logb;

}

b = 1 << logb;

}

else

{ /* This is only used for the debugging function in merge.c, where it is possible

that CACHE is defined, but pGolHashRoot is still NULL...

In this case, just calculate logb each time. */

logb = getlog2((unsigned int) b);

}

rem = getnbits(vp, logb);

/* The value is the mag * b + remainder + 1 */

ret = mag * b + rem + 1;

return(ret);

}

/* Standard Rice decoding */

/* getrice1 is used to compute the coding parameters b and logb (they are looked up

in a table for efficiency) and stored in prPar */

int old_getrice1(int iItem, int iCount, GOLHASH_ROOT *pGolHashRoot, RICEPARAMS *prPar)

{

int b, logb;

/** Compute b Value for Rice coding parameters **/

if((b = (long)(0.69 * ((double)(iItem) / (double) iCount)))<1) b=1;

if(pGolHashRoot != NULL)

{

if(b < 0 || b > pGolHashRoot->iSize) /*** sanity check ***/

{

printf("b value: %i is outside of TABLESIZE: %i\n", b, pGolHashRoot->iSize);

fflush(NULL);

return false;

}

logb = pGolHashRoot->ipGolHashTable[b];

if(logb == GOLHASH_UNINIT) /* Not in table, so calculate and store */

{

logb = (int) floor(L2 ((double) b));

if(pGolHashRoot != NULL) pGolHashRoot->ipGolHashTable[b] = logb;

}

else /** No caching table, so calculate each time **/

logb = (int) floor(L2 ((double) b));

prPar->logb = logb; /** Copy parameter to be returned **/

/* Ensure that b is a non-negative power of 2, else Rice coding will fail */

prPar->b = 1 << logb;

return true;

}

/* Standard Rice decoding */

/* getrice1 is used to compute the coding parameters b and logb */

int getrice1(int iItem, int iCount, GOLHASH_ROOT *pGolHashRoot, RICEPARAMS *prPar)

{

unsigned int b, logb;

/** Compute b Value for Rice coding parameters **/

if((b = (long)(0.69 * ((double)(iItem) / (double) iCount)))<1) b=1;

logb = getlog2((unsigned int) b);

prPar->logb = logb; /** Copy parameter to be returned **/

/* Ensure that b is a non-negative power of 2, else Rice coding will fail */

prPar->b = 1 << logb;

return true;

}

/* Decode the actual Rice value */

int getrice2(BITVEC *vp, RICEPARAMS *rPar)

{

int mag, rem;

/* Get unary part of code */

for( mag=0 ; getbit(vp) == 0 ; mag++ )

;

/* mag = getunary2(vp, UNARYTABLE); */

rem = getnbits(vp, rPar->logb);

/* The values is the mag * b + remainder + 1 */

return (mag * rPar->b + rem + 1);

}

/* Used where there are Rice codes in a sequential bitvector and these can be

passed over without concern for their values */

int scanrice2(BITVEC *vp, RICEPARAMS *rPar)

{

int mag, n = rPar->logb;

for( mag=0 ; getbit(vp) == 0 ; mag++ )

;

/* getunary2(vp, UNARYTABLE); */

/* Got the unary, now just scan over the rest... */

while (n > (7 - vp->bit))

{

vp->pos++;

n -= 8;

}

vp->bit += n;

vp->cur = vp->vector[vp->pos];

return 0;

}

#undef DEGUG_PUTNBITS

#ifdef NEW_PUTNBITS

/* New putnbits() puts multiple bits at a time, instead of multiple calls to putbit() for each bit */

int putnbits(BITVEC *vp, int number, int bits)

{

unsigned char temp = 0;

unsigned int n;

int b, shift;

b = bits;

n = number;

#ifdef DEGUG_PUTNBITS

// printf("n: %i, bits: %i\n", n, bits);

if(bits > int_size)

printf("putnbits(): number of bits %i > int_size %i\n", bits, int_size);

#endif

/* This is needed because sometimes we are sent numbers that will not fit in the bits specified */

/* e.g. number = 11 (= 1011) and bits = 3 */

/* so need to clear upper bits not being put */

n = n << (int_size - bits);

n = n >> (int_size - bits);

for(shift = 8-vp->bit ; bits >= shift ; bits -= shift, shift = 8-vp->bit)

{

#ifdef DEGUG_PUTNBITS

printf("vp->pos = %i\tvp->bit = %i\tvp->cur = %X\n", vp->pos, vp->bit, vp->cur);

#endif

if( vp->pos >= vp->size )

expandvec(vp);

vp->cur = vp->cur | (n >> (bits - shift));

vp->vector[vp->pos] = vp->cur;

#ifdef DEGUG_PUTNBITS

printf("vp->pos = %i\tvp->bit = %i\tvp->vector[%i] = %X\n", vp->pos, vp->bit, vp->pos, vp->vector[vp->pos]);

#endif

n = n << (int_size - (bits - shift)); /* clear bits copied to vector */

n = n >> (int_size - (bits - shift));

vp->bit = 0;

vp->pos++; /* move to next byte */

vp->cur = 0;

/*vp->vector[vp->pos] = 0;*/

}

/* Put remaining bits */

if(bits > 0)

{

#ifdef DEGUG_PUTNBITS

printf("vp->pos = %i\tvp->bit = %i\tvp->cur = %X\n", vp->pos, vp->bit, vp->cur);

#endif

vp->cur = vp->cur | (n << (shift - bits));

/*vp->vector[vp->pos] = vp->cur;*/

vp->bit += bits;

#ifdef DEGUG_PUTNBITS

printf("vp->pos = %i\tvp->bit = %i\tvp->vector[%i] = %X\n", vp->pos, vp->bit, vp->pos, vp->vector[vp->pos]);

#endif

}

return(b);

}

#else /* Old putnbits() */

/* Put n bits in vector, starting with nth from right, going to rightmost */

/* This is not efficient. Should be recoded to insert several bits

simultaneously ... but not really worth the effort ... */

int putnbits(BITVEC *vp, int n, int num)

{

int i,ret;

#ifdef DEGUG_PUTNBITS

printf("n: %i, num: %i\n", n, num);

if(num > int_size)

printf("putnbits(): number of bits %i > int_size %i\n", num, int_size);

printf("vp->pos = %i\tvp->bit = %i\tvp->cur = %X\n", vp->pos, vp->bit, vp->cur);

#endif

for( ret=0, i=num-1 ; i>=0 ; i-- )

ret += putbit(vp, ( n >> i ) & 0x1 );

#ifdef DEGUG_PUTNBITS

printf("vp->pos = %i\tvp->bit = %i\tvp->cur = %X\n", vp->pos, vp->bit, vp->cur);

#endif

return(ret);

}

#endif /* NEW_PUTNBITS */

static int masks[9] = { 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };

/* New version of getnbits that does not need to call getbit */

int getnbits(BITVEC *vp, int num)

{

int mask, shift, val;

unsigned char temp;

val = 0;

for( shift = 8-vp->bit ; num >= shift ; num -= shift, shift = 8-vp->bit )

/* copy out whole of cur */

{

if( 8*vp->pos + vp->bit >= vp->len)

return(-1);

mask = masks[shift];

val = ( val << shift ) | ( vp->cur & mask );

vp->bit = 0;

vp->pos++;

vp->cur = vp->vector[vp->pos];

}

/* Get any remaining bits */

if ( num > 0 )

{

temp = vp->cur;

temp = temp << vp->bit;

temp = temp >> ( 8 - num );

val = ( val << num ) | temp;

vp->bit += num;

}

#ifdef VECDEBUG

if( val < 0 )

printf("Panic in getnbits: overflow\n");

#endif /* VECDEBUG */

return( ( val<0 ) ? -1 : val );

}

/* Get n bits from vector */

/* Rather than calling getbit for each bit, shifts several bits at

once if this would empty the current byte; only makes it one or

two percent faster. */

int OLDgetnbits(BITVEC *vp, int num)

{

int mask, shift, val, b;

b = val = 0;

#if true

for( shift = 8-vp->bit ; num >= shift ; num -= shift, shift = 8-vp->bit )

/* copy out whole of cur */

{

if( 8*vp->pos + vp->bit >= vp->len)

return(-1);

mask = masks[shift];

val = ( val << shift ) | ( vp->cur & mask );

vp->bit = 0;

vp->pos++;

vp->cur = vp->vector[vp->pos];

}

#endif /* true */

/* Get any remaining bits */

for( ; num>0 && ( b = getbit(vp) ) >= 0 ; num-- )

val = ( val << 1 ) | ( b & 0x1 );

#ifdef VECDEBUG

if( val < 0 )

printf("Panic in getnbits: overflow\n");

if( b < 0 && vp->pos < vp->len/8 )

printf("Early termination in getnbits.\n");

#endif /* VECDEBUG */

return( ( b<0 ) ? -1 : val );

}

#ifdef NEW_PUTNBITS

int putbit(BITVEC *vp, int b)

{

unsigned char temp;

temp = b & 0x1;

temp = temp << (8 - vp->bit - 1);

vp->cur = vp->cur | temp;

vp->bit++;

if( vp->bit == 8 ) /* go to next byte */

{

if( vp->pos >= vp->size )

expandvec(vp);

if (disaster == false)

{

vp->vector[vp->pos] = vp->cur;

vp->cur = 0;

vp->pos++;

vp->bit = 0;

/*vp->vector[vp->pos] = 0;*/

}

return(1);

}

#else /* Old putnbits() */

/* Put a bit into vector; bytes are filled left to right */

/* Because of the way bytes are filled, vectors can't be accessed until

padvec has been called. */

int putbit(BITVEC *vp, int b)

{

vp->cur = ( vp->cur << 1 ) | ( b & 0x1 );

vp->bit++;

if( vp->bit == 8 ) /* go to next byte */

{

#ifdef DEGUG_PUTNBITS

printf("vp->pos = %i\tvp->bit = %i\tvp->cur = %X\n", vp->pos, vp->bit, vp->cur);

#endif

if( vp->pos >= vp->size )

expandvec(vp);

if (disaster == false)

{

vp->vector[vp->pos] = vp->cur & 0xff;

vp->cur = 0;

vp->pos++;

vp->bit = 0;

}

return(1);

}

#endif /* NEW_PUTNBITS */

/* Get a bit from a vector */

int getbit(BITVEC *vp)

{

int b;

#ifdef DEBUGBIT

printf("yep\n");

#endif

if( 8*vp->pos + vp->bit >= vp->len )

return(-1);

b = ( vp->cur >> ( 7 - vp->bit ) ) & 0x1;

vp->bit++;

if( vp->bit == 8 )

{

vp->pos++;

vp->bit = 0;

vp->cur = vp->vector[vp->pos];

}

return(b);

}

#undef DEBUG_PADVEC

#ifdef NEW_PUTNBITS

/* Terminate a vector */

void padvec(BITVEC *vp)

{

if( vp->pos >= vp->size ) /* this is a bit yucky */

expandvec(vp);

if (disaster == false)

{

vp->vector[vp->pos] = vp->cur;

vp->len = 8*vp->pos + vp->bit;

#ifdef DEBUG_PADVEC

printf(" vp->vector[%i] = %X\n", vp->pos, vp->vector[vp->pos]);

printf(" vp->len = %i\n", vp->len);

#endif

}

#else /* Old putnbits() */

/* Terminate a vector */

void padvec(BITVEC *vp)

{

if( vp->pos >= vp->size ) /* this is a bit yucky */

expandvec(vp);

if (disaster == false)

{

vp->vector[vp->pos] = ( vp->cur << ( 8-vp->bit ) ) & 0xff;

vp->len = 8*vp->pos + vp->bit;

#ifdef DEBUG_PADVEC

printf(" vp->vector[%i] = %X\n", vp->pos, vp->vector[vp->pos]);

printf(" vp->len = %i\n", vp->len);

#endif

}

#endif /* NEW_PUTNBITS */

/* Enlarge vector */

void expandvec(BITVEC *vp)

{

char *newvec;

int i;

/* should use realloc here */

if ((newvec = malloc( vp->size*2 * sizeof(char) )) == NULL)

{

fprintf(stderr, "Error in expandvec malloc. Tried to allocate: %d bytes\n", vp->size*2 * sizeof(char));

disaster = true;

}

else

{

for( i=0 ; i<vp->size ; i++ )

newvec[i] = vp->vector[i];

vp->size = vp->size*2;

free(vp->vector);

vp->vector = newvec;

}

/** Copy one vector to another for debugging purposes**/

BITVEC *VecCpy(BITVEC *VecDest, BITVEC *VecSrc)

{

if(VecDest->vector != NULL) free(VecDest->vector);

VecDest->size = VecSrc->size; /* Of vector, in bytes */

VecDest->pos = VecSrc->pos; /* Current byte number */

VecDest->bit = VecSrc->bit; /* Current bit in byte */

VecDest->cur = VecSrc->cur; /* Temporary space for putting, getting bits */

VecDest->len = VecSrc->len; /* Number of bits used */

VecDest->last = VecSrc->last; /* Total of runlengths seen so far */

if((VecDest->vector = malloc(VecSrc->size * sizeof(char)))!=NULL)

memcpy(VecDest->vector, VecSrc->vector, VecSrc->size * sizeof(char));

else

{

VecDest->size = /* Of vector, in bytes */

VecDest->pos = /* Current byte number */

VecDest->bit = /* Current bit in byte */

VecDest->cur = /* Temporary space for putting, getting bits */

VecDest->len = /* Number of bits used */

VecDest->last = 0; /* Total of runlengths seen so far */

}

return(VecDest);

}

/******* Cache Compression Functions ***********************************/

#ifdef SUPPORT_FILEBUF_H_CACHING

#include "FileBuf.h"

/*** Variable byte functions with caching ***/

/* Hugh Williams, August 1995 */

/* Dirk (Reviewed March 2001)*/

#define NORMAL_INTS TRUE

Uncomment this if you want integers to be stored uncompressed

Otherwise, all values will be stored w/ variable bit length compression

/* Variable byte length reads and writes of integers */

/**************

Read integer value as normal Integer

or as an array of four bytes (packed representation)

**************/

int cVbyteRead(FB_ROOT *fp)

{

char tmp = 0x1;

int val = 0;

#ifdef NORMAL_INTS

/* Integers are stored in uncommpressed format */

if(cRead(&val, sizeof(int), 1, fp) == 0)

{

if(cEof(fp)) return(-1);

else

return(0);

}

#else

/* Integers are stored in compressed format */

while((tmp & 0x1) == 0x1)

{

if(cRead(&tmp, sizeof(char), 1, fp) == 0)

{

if(cEof(fp)) return(-1);

else

return(0);

}

val = (val << 7) + ((tmp >> 1) & 127);

}

#endif

return(val);

}

/**************

Write integer value as normal Integer

or as an array of four bytes (packed representation)

**************/

int cVbyteWrite(int number, FB_ROOT *fp /*, char *pcCallerID*/)

{

char bytearray[5]; /**DB: We need five byte to cover the full range of int**/

int x, started = false;

int charswritten = 0;

#ifdef NORMAL_INTS

cWrite(&number, sizeof(int), 1, fp);

#else

/* Compress Integers in 7-Bit format

and use the lowest bit to signal

whether more bytes follow (1) in the

Bitstream or not (0)

Dirk: We assume that number are not greater than 28 bit

Use this when uncertain:

if(number < 0 || number > 268435455)

printf("Vbyte Panic %i %s\n",number, pcCallerID);

for(x=0;x<5;x++)

{

/** select seven bits and transfer them one bit left **/

/** store the selected seven bits in the byte array **/

bytearray[x] = (number%128) << 1;

/** get next seven bits through masking the last seven **/

number = number / 128;

}

/**Write data to disk**/

for(x=4;x>0;x--)

{

if (bytearray[x] != 0 || started == true)

{

started = true;

bytearray[x] |= 0x1;

cWrite(&bytearray[x], sizeof(char), 1, fp);

charswritten++;

}

cWrite(&bytearray[0], sizeof(char), 1, fp);

charswritten++;

#endif

return(charswritten);

}

/* Read a vector back from file */

/* Returns -1 on error */

int cVbyteReadVec(BITVEC *vp, FB_ROOT *fp)

{

int len;

if ((vp->len = cVbyteRead(fp)) == -1)

{

fprintf(stderr, "cVbyteReadVec: Error reading vbyte coded vector length %d\n", vp->len);

return(-1);

}

vp->size = 4 * ( ( vp->len/8 + 8 ) / 4 ); /* word boundaries */

len = vp->len/8 + 1;

vp->vector = malloc(vp->size * sizeof(char));

if (cRead(vp->vector, sizeof(char), len, fp) <= 0)

{

fprintf(stderr, "cVbyteReadVec: Error reading vector (length: %d)\n", len);

return(-1);

}

resetvec(vp);

return(len);

}

/* Write the contents of a vector to file. Assumes that padvec has

been called. */

void cDumpVec(BITVEC *vp, FB_ROOT *fp)

{

cWrite(&vp->len, sizeof(int), 1, fp);

cWrite(vp->vector, sizeof(char), vp->len/8 + 1, fp);

}

/* Read a vector back from file */

/* Returns -1 on error */

int cReadVec(BITVEC *vp, FB_ROOT *fp)

{

int len;

cRead(&vp->len, sizeof(int), 1, fp);

vp->size = 4 * ( ( vp->len/8 + 8 ) / 4 ); /* word boundaries */

len = vp->len/8 + 1;

vp->vector = malloc(vp->size * sizeof(char));

if(vp->vector != NULL)

{

cRead(vp->vector, sizeof(char), len, fp);

resetvec(vp);

}

else

{

printf("Can not allocate %i bytes...:(\n", vp->size * sizeof(char));

return -1;

}

return(0);

}

#endif

如上为第一部分，

#include "def.h"

/* Variable byte length reads and writes of integers */

/* We use a representation in which seven bits in each byte is used to code an

integer, with the least significant bit set to 0 if this is the last byte,

or to 1 if further bytes follow.

In this way, we represent small integers efficiently; for example, we

represent 135 in two bytes, since it lies in the range $[2^7\cdots 2^{14})$,

as 00000011~00001110; this is read as 00000010000111 by removing the least

significant bit from each byte and concatenating the remaining~14~bits. */

int vbyteread(FILE *fp)

{

char tmp = 0x1;

int val = 0;

while((tmp & 0x1) == 0x1)

{

if (fread(&tmp, sizeof(char), 1, fp) == 0)

{

if (feof(fp))

return(-1);

else

return(0);

}

val = (val << 7) + ((tmp >> 1) & 127);

}

return(val);

}

int vbytewrite(int number, FILE *fp)

{

char bytearray[4];

char tmp = 0;

int x, started = FALSE;

int charswritten = 0;

for(x=0;x<4;x++)

{

tmp = (number%128) << 1;

bytearray[x] = tmp;

number /= 128;

}

for(x=3;x>0;x--)

{

if (bytearray[x] != 0 || started == TRUE)

{

started = TRUE;

bytearray[x] |= 0x1;

fwrite(&bytearray[x], sizeof(char), 1, fp);

charswritten++;

}

bytearray[0] |= 0x0;

fwrite(&bytearray[0], sizeof(char), 1, fp);

charswritten++;

return(charswritten);

}

第二部分

#include <stdio.h>

#include <ctype.h>

#include <math.h>

#include <sys/types.h>

//#include <sys/mman.h>

#include <sys/stat.h>

#include <fcntl.h>

/* Miscellaneous Constants */

#define true 1

#define false 0

#define FALSE 0

#define TRUE 1

#define BIGVECLEN 5000000

#define GOLHASHTABLESIZE 102397

/* Bit vector structure used in standard Golomb, Elias Gamma & Elias Delta */

typedef struct bitvecrec

{

unsigned char *vector; /* Sequence of bytes to hold bitvector */

int size; /* Of vector, in bytes */

int pos; /* Current byte number */

int bit; /* Current bit in byte */

unsigned char cur; /* Temporary space for putting, getting bits */

int len; /* Number of bits used */

int last; /* Total of runlengths seen so far */

/* The last field is an oddity; it allows us to have

several bitvectors open at once */

} BITVEC;

typedef struct golombhashstruct

{

int b; /* b value used for lookup */

int q; /* q value that is floor(log_2(b)) */

} GOLHASH;

/* Logarithmic functions:

LOGn(X) = LOG(X) / LOG(N) <--- LOG Base n computed through LOG Base 10

#define L2(f) ( log10((f)) / 0.301029995 /*=log10(2.0)*/ )

#define L4(f) ( log10((f)) / 0.602059999 /*=log10(4.0)*/ )

#define L6(f) ( log10((f)) / 0.778151250 /*=log10(6.0)*/ )

#define L8(f) ( log10((f)) / 0.903089990 /*=log10(8.0)*/ )

#define LN(f) ( log10((f)) / 0.434294480 /*=log10(e)*/ )

void padvec(BITVEC *), expandvec(BITVEC *), initcmp(), initvec(BITVEC *, int),

freevec(BITVEC *), resetvec(BITVEC *), inittree(), cleanuptree(),

dumpvec(BITVEC *,FILE *), dumptree(char *);

int vbyteread(FILE *);

int vbytewrite(int, FILE *);

static int disaster = false;

static int int_size = sizeof(int)*8;

/* =============== Bitvector Functions ============== */

/* Initialising a vector before insertion of data */

void initvec(BITVEC *vp, int initveclen);

/* Return length of vector */

int veclen(BITVEC *vp);

/* Before a vector can be read, it must be reset */

void resetvec(BITVEC *vp);

void freevec(BITVEC *vp);

/* Write the contents of a vector to file. This version does not use

vbtyewrite for the vector length, so if the vector is to be opened

with loadvector, use vbytedumpvec (below) instead.

Assumes that padvec has been called. */

void dumpvec(BITVEC *vp, FILE *fp);

/* This is a different version of dumpvec that uses vbyteread to

get the vector length... this is the operation that is 'undone'

in vbytereadvec (which is called by loadvector)...

Assumes that padvec has been called. */

int vbytedumpvec(BITVEC *vp, FILE *fp);

/* Read a vector back from file */

/* Returns -1 on error */

int vbytereadvec(BITVEC *vp, FILE *fp);

/* Read a vector back from file */

/* Returns -1 on error */

int readvec(BITVEC *vp, FILE *fp);

/* Read a vector back from file -- used in cafezipread.c */

/* Returns -1 on error */

int readzipvec(BITVEC *vp, FILE *fp);

int roadzipvec(BITVEC *vp, char *buf,int buflen);/*把buf装入vec中 add by 方旭光2004-10-11*/

/* =============== End Bitvector Functions ============== */

/* =============== Compression Functions ================== */

int putunary(BITVEC *vp, int n);

int getunary(BITVEC *vp);

int getunary2(BITVEC *vp, char *table);

void initunarylookup(char *table);

/* Add the number n to the vector, assuming run-length coding */

int putrunlen(BITVEC *vp, int n);

/* Get a number from the vector, assuming run length coding */

int getrunlen(BITVEC *vp);

/* Put an int in standard delta (Elias) Coding */

int putdelta(BITVEC *vp, int n);

/* Returns -1 on eof */

int getdelta(BITVEC *vp);

/* Returns 0 on eof */

int scandelta(BITVEC *vp);

/* Standard gamma (Elias) */

int putgamma(BITVEC *vp, int n);

/* Returns -1 on eof */

int getgamma(BITVEC *vp);

/* Variable-byte coding */

//int putvbyte(BITVEC *vp, int n);

int putvbyte(BITVEC *vp, unsigned long n);

/* Get variable-byte coded value from vector */

int getvbyte(BITVEC *vp);

void scanvbyte(BITVEC *vp);

/* Put n bits in vector, starting with nth from right, going to rightmost */

/* This is not efficient. Should be recoded to insert several bits

simultaneously ... but not really worth the effort ... */

int putnbits(BITVEC *vp, int n, int num);

/* Get n bits from vector */

/* Rather than calling getbit for each bit, shifts several bits at

once if this would empty the current byte; only makes it one or

two percent faster. */

int getnbits(BITVEC *vp, int num);

/* Put a bit into vector; bytes are filled left to right */

/* Because of the way bytes are filled, vectors can't be accessed until

padvec has been called. */

int putbit(BITVEC *vp, int b);

/* Get a bit from a vector */

int getbit(BITVEC *vp);

/* Terminate a vector */

void padvec(BITVEC *vp);

/* Enlarge vector */

void expandvec(BITVEC *vp);

/*Load a vector from a file with at a certain offset (don't call initvec,

but initialise vector on creation (eg. BITVEC vec={NULL,0,0,0,0,0,0}),

otherwise the BITVEC will be malloc'd twice)*/

int loadvector(long fileoffset, FILE *fpVectors, BITVEC *vp);

/** Copy one vector to another for debugging purposes**/

BITVEC *VecCpy(BITVEC *VecDest, BITVEC *VecSrc);

/* --------------------- Golomb Code ---------------------- */

/**Compute the golomb parameter incl. check :) **/

#define GolParam(k, x1, x2) if((k=(long)(0.69 * ((double)(x1) / (double) x2)))<1) k=1;

/* structure for b and q values in golomb decoding */

/* This structure is used to cache log base 2

results for faster lookup later on **/

typedef struct golhashroot

{

int iSize;

int *ipGolHashTable;

} GOLHASH_ROOT;

/*- structure for b, q, and d values in golomb decoding

- q and d are funcs of b, so we compute q,d whenever we compute b */

typedef struct golombparamsrec

{

int b, q, d;

} GOLOMBPARAMS;

GetGolomb coding version 1 and 2 are used to compute

Golomb Codes in an accelerated fashion

-> getgolomb1 compute coding parameters: b,d,q

-> getgolomb1 get Golomb code from vector vp

int getgolomb1(int iItem, int iCount, GOLHASH_ROOT *pGolHashRoot, GOLOMBPARAMS *pgPar);

int getgolomb2(BITVEC *vp, GOLOMBPARAMS *gPar);

PutGolomb coding version 1 and 2 are used to compute

Golomb Codes in an accelerated fashion

-> putgolomb1 compute coding parameters: b,d,q

-> putgolomb1 get Golomb code from vector vp

int putgolomb1(int iItem, int iCount, GOLOMBPARAMS *pgPar, GOLHASH_ROOT *pGolHashTable);

int putgolomb2(BITVEC *vp, int x, GOLOMBPARAMS gPar);

int GetMaxB_OffsetFromGolomb();

/* Standard Golomb ** x = value to be put in, b = coding parameter */

int putgolomb(BITVEC *vp, int x, int b);

/**Call this before using getgolomb -> Will Return false if an error occured !!!**/

int initgolhash(GOLHASH_ROOT *rpGolHashRoot, int iSize);

/* Standard Golomb, b = coding parameter */

int getgolomb(BITVEC *vp, int b, GOLHASH_ROOT *pGolHashRoot);

/*<---------------------- End of Golomb Code -------------------->*/

typedef struct riceparamsrec {

int b, logb;

} RICEPARAMS;

/* Standard Rice encoding.

NOTE. b must be a non-negative power of 2 or this will fail. */

int putrice(BITVEC *vp, int x, int b);

/* Standard Rice decoding.

NOTE. b must be a non-negative power of 2 or this will fail. */

int getrice(BITVEC *vp, int b, GOLHASH_ROOT *pGolHashRoot);

int getrice1(int iItem, int iCount, GOLHASH_ROOT *pGolHashTable, RICEPARAMS *prPar);

int getrice2(BITVEC *vp, RICEPARAMS *rPar);

int scanrice2(BITVEC *vp, RICEPARAMS *rPar);

/////////////声明