#include #include #include #include #include "huffman.h" #include "timestamp.h" int verbose = 0; void PrintByte(int byte, int nbits, int justify) { static int pos = 0; int i, power; power = 1L << (nbits - 1); if ( justify > nbits ) { for( i = 0; i < justify - nbits ; i++ ) { fprintf(stderr, " "); if( (++pos % 8) == 0) { fprintf(stderr, " "); } } } for( i = 8 - nbits ; i < 8 ; i++, power >>= 1) { if( byte & power ) { fprintf(stderr, "1"); } else { fprintf(stderr, "0"); } if( justify ) { if( (++pos % 8) == 0 ) { fprintf(stderr, " "); } } else pos = 0; } } int InitHuffmanCodes() { int i; huffman_t *huf; memset( HuffmanCodes, 0, sizeof(HuffmanCodes)); for( i = 0, huf = HuffmanCodes ; i <= MAX_CHARACTERS ; i++, huf++ ) { huf->character = i; } for( i = 0 , huf = HuffmanCodes ; i < MAX_NODES ; i++, huf++ ) { HuffmanSorted[i] = huf; } nHuffmanCodes = MAX_CHARACTERS; } int ReadSymbols( FILE *file) { int n, j, nread; unsigned char c; unsigned char tread[16]; // while( fread(&c, 1, 1, file) == 1 ) for ( n = 0 ; (nread = fread(tread, 1, 16, file)) > 0 ; n+= nread) { for (j = 0 ; j < nread ; j++ ) { c = tread[j]; HuffmanCodes[c].nOccurrences++; if ( verbose && (HuffmanCodes[c].nOccurrences == 1) ) { fprintf(stderr, " Add code ASCII %d (%c) \n", c, c); } } } rewind (file); } static int CompareHuffmanOccurences( const void *a, const void *b) { huffman_t *pa = (huffman_t *)(a); huffman_t *pb = (huffman_t *)(b); return ( pa->nOccurrences - pb->nOccurrences); } static int IndirectCompareHuffmanOccurences( const void *a, const void *b) { huffman_t *pa = *(huffman_t **)a; huffman_t *pb = *(huffman_t **)b; if ( pa == NULL && pb == NULL ) return 0; if ( pa == NULL ) return 1; if ( pb == NULL ) return -1; return CompareHuffmanOccurences(pa,pb); } static int IndirectCompareHuffmanOccurencesInv( const void *a, const void *b) { return IndirectCompareHuffmanOccurences(b,a); } void ComputeHuffmanCodes( huffman_t *node, int code, int level ) { int i; node->code = code; node->nBits = level; if( node->left != NULL ) { ComputeHuffmanCodes(node->left, code * 2, level + 1); } if ( verbose) { for( i = 0 ; i < level ; i++) { fprintf(stderr, "\t"); } if( (node->left == NULL) && (node->right == NULL) ) { if( node->character != '\n' ) { fprintf(stderr, "ASCII %d \'%c\'", node->character, node->character); } else { fprintf(stderr, "ASCII %d ENTER", node->character); } } else { fprintf(stderr, "* "); } fprintf(stderr, "(id=%d code=0x%x %dbits x%d) [", node - HuffmanCodes, node->code, node->nBits, node->nOccurrences); PrintByte(node->code, node->nBits, 0); fprintf(stderr, "]\n\n"); } if( node->right != NULL ) { ComputeHuffmanCodes(node->right, (code * 2) + 1, level + 1); } } int GenerateHuffmanForest() { int n, i = 0; HuffmanRoot = HuffmanCodes + MAX_CHARACTERS; qsort(HuffmanSorted, nHuffmanCodes, sizeof(huffman_t *), IndirectCompareHuffmanOccurences); while( HuffmanSorted[i]->nOccurrences == 0) i++; for( n = nHuffmanCodes - i ; n > 1 ; n--, i += 2, nHuffmanCodes++ ) { qsort(HuffmanSorted + i, n, sizeof(huffman_t *), IndirectCompareHuffmanOccurences); HuffmanRoot=&HuffmanCodes[nHuffmanCodes]; HuffmanRoot->character = 255; HuffmanRoot->left = HuffmanSorted[i]; HuffmanRoot->right = HuffmanSorted[i+1]; HuffmanRoot->nOccurrences = HuffmanSorted[i]->nOccurrences + HuffmanSorted[i+1]->nOccurrences; } } static int CompareHuffmanCodes( const void *a, const void *b) { huffman_t *pa = (huffman_t *)(a); huffman_t *pb = (huffman_t *)(b); if( pa->nOccurrences == 0 ) return -1; if( pb->nOccurrences == 0 ) return 1; if ( pa->code < pb->code ) return -1; if ( pa->code > pb->code ) return 1; return 0; } static int IndirectCompareHuffmanCodes( const void *a, const void *b) { huffman_t *pa = *(huffman_t **)a; huffman_t *pb = *(huffman_t **)b; // fprintf(stderr, "indirect %p %p \n", pa, pb); if ( pa == NULL && pb == NULL ) return 0; if ( pa == NULL ) return -1; if ( pb == NULL ) return 1; return CompareHuffmanCodes(pa,pb); } static int CompareHuffmanCharacters( const void *a, const void *b) { huffman_t *pa = (huffman_t *)(a); huffman_t *pb = (huffman_t *)(b); if ( pa->character < pb->character ) return -1; if ( pa->character > pb->character ) return 1; return 0; } // used by WriteBits() and FinishWriteBits() int _bits_ = 0; unsigned char _idx_ = 0; void WriteBits(unsigned int val, int bits) { unsigned int i; for( i = bits ; i-- ; ) { _idx_ <<= 1; _idx_ |= (val >> i) & 0x1; if( ++_bits_ == WRITE_BLOC ) { fwrite(&_idx_, WRITE_BLOC / 8, 1, stdout); _bits_ = 0; _idx_ = 0; } } } int FinishWriteBits() { WriteBits(0, WRITE_BLOC); WriteBits(0, WRITE_BLOC); return 2 * WRITE_BLOC; } int main( int argc, char *argv[] ) { FILE *file; int i, n, bits; unsigned char c; int nread, j; unsigned char tread[16]; fprintf(stderr, "\nHuffman compression by YLP v%d.%d \n\n", MAJOR_VERSION, MINOR_VERSION); if( (file = fopen(argv[1], "ro")) == NULL ) { fprintf(stderr, "cannot open %s :( \n", argv[1] ); return 0; } start = stampstart(); fprintf(stderr, "Init Huffman Codes :"); InitHuffmanCodes(); stop = stampstop(start); fprintf(stderr, "Read symbols :"); ReadSymbols(file); stop = stampstop(stop); fprintf(stderr, "Generate Huffman codes : "); GenerateHuffmanForest(); ComputeHuffmanCodes(HuffmanRoot, 0, 0); if ( verbose ) { for( i = n = bits = 0 ; i < 256 ; i++ ) { if( HuffmanCodes[i].nOccurrences ) { n += HuffmanCodes[i].nOccurrences; bits += HuffmanCodes[i].nBits * HuffmanCodes[i].nOccurrences; PrintByte(HuffmanCodes[i].code, HuffmanCodes[i].nBits, 8); fprintf(stderr, " (ASCII=%d [%c] code=%d nOccurences=%-2d bits/symbol=%-2d total=%d bits) \n", HuffmanCodes[i].character, HuffmanCodes[i].character, HuffmanCodes[i].code, HuffmanCodes[i].nOccurrences, HuffmanCodes[i].nBits, HuffmanCodes[i].nOccurrences * HuffmanCodes[i].nBits ); } } fprintf(stderr, " [%d/%d = %d bits/symbol] ", bits, n, bits / n ); } stop = stampstop(stop); fprintf(stderr, "Write the compressed data : "); // fwrite(HuffmanCodes, sizeof(huffman_t), 256, stdout); // bits = 256 * 8 * sizeof(huffman_t); bits = 256 * 12 * 8; for ( i = 0 ; i < 256 ; i++ ) { fwrite(&HuffmanCodes[i].code, 4, 1, stdout); fwrite(&HuffmanCodes[i].nBits, 4, 1, stdout); fwrite(&HuffmanCodes[i].nOccurrences, 4, 1, stdout); } /* for( n = 0 ; fread(&c, 1, 1, file) == 1 ; n++ ) { bits += HuffmanCodes[c].nBits; WriteBits(HuffmanCodes[c].code, HuffmanCodes[c].nBits); } */ for ( n = 0 ; (nread = fread(tread, 1, 16, file)) > 0 ; n+= nread) { for (j = 0 ; j < nread ; j++ ) { c = tread[j]; bits += HuffmanCodes[c].nBits; WriteBits(HuffmanCodes[c].code, HuffmanCodes[c].nBits); } } bits += FinishWriteBits(); fclose(file); stop = stampstop(stop); fprintf(stderr, "\n\nCompression : %d/%d = %2.2f%% \n\n", bits/8, n , (float)(12.5f * bits) / (float)(n) ); fprintf(stderr, "Total time : "); stop = stampstop(start); fprintf(stderr, "\n"); return 0; }