#include #include #include #include #include "huffman.h" #include "timestamp.h" int verbose = 0; void PrintByte(int byte, int nbits, int justify) { static int pos = 0; int i, power; power = 1L << (nbits - 1); if ( justify > nbits ) { for( i = 0; i < justify - nbits ; i++ ) { fprintf(stderr, " "); if( (++pos % 8) == 0) { fprintf(stderr, " "); } } } for( i = 8 - nbits ; i < 8 ; i++, power >>= 1) { if( byte & power ) { fprintf(stderr, "1"); } else { fprintf(stderr, "0"); } if( justify ) { if( (++pos % 8) == 0 ) { fprintf(stderr, " "); } } else pos = 0; } } int InitHuffmanCodes() { int i; huffman_t *huf; memset( HuffmanCodes, 0, sizeof(HuffmanCodes)); for( i = 0, huf = HuffmanCodes ; i <= MAX_CHARACTERS ; i++, huf++ ) { huf->character = i; } for( i = 0 , huf = HuffmanCodes ; i < MAX_NODES ; i++, huf++ ) { HuffmanSorted[i] = huf; } nHuffmanCodes = MAX_CHARACTERS; } int ReadSymbols( FILE *file) { int n, j, nread; unsigned char c; unsigned char tread[F_BLOC]; // while( fread(&c, 1, 1, file) == 1 ) for ( n = 0 ; (nread = fread(tread, 1, F_BLOC, file)) > 0 ; n+= nread) { for (j = 0 ; j < nread ; j++ ) { c = tread[j]; HuffmanCodes[c].nOccurrences++; if ( verbose && (HuffmanCodes[c].nOccurrences == 1) ) { fprintf(stderr, " Add code ASCII %d (%c) \n", c, c); } } } rewind (file); return n; } static int CompareHuffmanOccurences( const void *a, const void *b) { huffman_t *pa = (huffman_t *)(a); huffman_t *pb = (huffman_t *)(b); return ( pa->nOccurrences - pb->nOccurrences); } static int IndirectCompareHuffmanOccurences( const void *a, const void *b) { huffman_t *pa = *(huffman_t **)a; huffman_t *pb = *(huffman_t **)b; if ( pa == NULL && pb == NULL ) return 0; if ( pa == NULL ) return 1; if ( pb == NULL ) return -1; return CompareHuffmanOccurences(pa,pb); } static int IndirectCompareHuffmanOccurencesInv( const void *a, const void *b) { return IndirectCompareHuffmanOccurences(b,a); } void ComputeHuffmanCodes( huffman_t *node, int code, int level ) { int i; node->code = code; node->nBits = level; if( node->left != NULL ) { ComputeHuffmanCodes(node->left, code * 2, level + 1); } if ( verbose) { for( i = 0 ; i < level ; i++) { fprintf(stderr, "\t"); } if( (node->left == NULL) && (node->right == NULL) ) { if( node->character != '\n' ) { fprintf(stderr, "ASCII %d \'%c\'", node->character, node->character); } else { fprintf(stderr, "ASCII %d ENTER", node->character); } } else { fprintf(stderr, "* "); } fprintf(stderr, "(id=%d code=0x%x %dbits x%d) [", node - HuffmanCodes, node->code, node->nBits, node->nOccurrences); PrintByte(node->code, node->nBits, 0); fprintf(stderr, "]\n\n"); } if( node->right != NULL ) { ComputeHuffmanCodes(node->right, (code * 2) + 1, level + 1); } } int GenerateHuffmanForest() { int n, i = 0; HuffmanRoot = HuffmanCodes + MAX_CHARACTERS; qsort(HuffmanSorted, nHuffmanCodes, sizeof(huffman_t *), IndirectCompareHuffmanOccurences); while( HuffmanSorted[i]->nOccurrences == 0) i++; for( n = nHuffmanCodes - i ; n > 1 ; n--, i += 2, nHuffmanCodes++ ) { qsort(HuffmanSorted + i, n, sizeof(huffman_t *), IndirectCompareHuffmanOccurences); HuffmanRoot=&HuffmanCodes[nHuffmanCodes]; HuffmanRoot->character = 255; HuffmanRoot->left = HuffmanSorted[i]; HuffmanRoot->right = HuffmanSorted[i+1]; HuffmanRoot->nOccurrences = HuffmanSorted[i]->nOccurrences + HuffmanSorted[i+1]->nOccurrences; } } static int CompareHuffmanCodes( const void *a, const void *b) { huffman_t *pa = (huffman_t *)(a); huffman_t *pb = (huffman_t *)(b); if( pa->nOccurrences == 0 ) return -1; if( pb->nOccurrences == 0 ) return 1; if ( pa->code < pb->code ) return -1; if ( pa->code > pb->code ) return 1; return 0; } static int IndirectCompareHuffmanCodes( const void *a, const void *b) { huffman_t *pa = *(huffman_t **)a; huffman_t *pb = *(huffman_t **)b; // fprintf(stderr, "indirect %p %p \n", pa, pb); if ( pa == NULL && pb == NULL ) return 0; if ( pa == NULL ) return -1; if ( pb == NULL ) return 1; return CompareHuffmanCodes(pa,pb); } static int CompareHuffmanCharacters( const void *a, const void *b) { huffman_t *pa = (huffman_t *)(a); huffman_t *pb = (huffman_t *)(b); if ( pa->character < pb->character ) return -1; if ( pa->character > pb->character ) return 1; return 0; } // used by WriteBits() and FinishWriteBits() int _bits_ = 0; unsigned char _idx_ = 0; void WriteBits(unsigned int val, int bits) { unsigned int i; for( i = bits ; i-- ; ) { _idx_ <<= 1; _idx_ |= (val >> i) & 0x1; if( ++_bits_ == WRITE_BLOC ) { fwrite(&_idx_, WRITE_BLOC / 8, 1, stdout); _bits_ = 0; _idx_ = 0; } } } int FinishWriteBits() { WriteBits(0, WRITE_BLOC); WriteBits(0, WRITE_BLOC); return 2 * WRITE_BLOC; } int main( int argc, char *argv[] ) { FILE *file; int i, n, bits; unsigned char c; int nread, j; unsigned char tread[F_BLOC]; uint32_t t0, t1, t2, t3,t4, t5, t6; start = stampstart(); fprintf(stderr, "\nHuffman compression by YLP v%d.%d \n", MAJOR_VERSION, MINOR_VERSION); fprintf(stderr, "\nOpen the file %s : ", argv[1] ); if( (file = fopen(argv[1], "ro")) == NULL ) { fprintf(stderr, "cannot open %s :( \n", argv[1] ); return 0; } t1 = stampstop(start); fprintf(stderr, "\nInit Huffman Codes : "); InitHuffmanCodes(); t2 = stampstop(t1); fprintf(stderr, "\nRead symbols : "); n = ReadSymbols(file); t3 = stampstop(t2); if ( t3 == t2) t2--; // because if the file is too small this make < 1 ms for to parse the file ... fprintf(stderr, "%d Mbits/s (%d Kbytes) \n", (int)((float)(n)/(float)(t3 - t2)) / 1000, n / 1024); fprintf(stderr, "\nGenerate Huffman Table : "); GenerateHuffmanForest(); t4 = stampstop(t3); if( t4 == t3) t3--; // if the file is too small, this make < 1 ms foot to generate the forest fprintf(stderr, "%d Knodes/s (%d nodes) \n", nHuffmanCodes / (t4-t3), nHuffmanCodes ); fprintf(stderr, "\nAggregate Codes : "); ComputeHuffmanCodes(HuffmanRoot, 0, 0); for( i = n = bits = 0 ; i < 256 ; i++ ) { if( HuffmanCodes[i].nOccurrences ) { n += HuffmanCodes[i].nOccurrences; bits += HuffmanCodes[i].nBits * HuffmanCodes[i].nOccurrences; if ( verbose ) { PrintByte(HuffmanCodes[i].code, HuffmanCodes[i].nBits, 8); fprintf(stderr, " (ASCII=%d [%c] code=%d nOccurences=%-2d bits/symbol=%-2d total=%d bits) \n", HuffmanCodes[i].character, HuffmanCodes[i].character, HuffmanCodes[i].code, HuffmanCodes[i].nOccurrences, HuffmanCodes[i].nBits, HuffmanCodes[i].nOccurrences * HuffmanCodes[i].nBits ); } } } t5 = stampstop(t4); if( t5 == t4) t4--; // "Too speed" cf. always < 1ms fprintf(stderr, "%d Msyms/s ", (int)((float)(n)/(float)(t5 - t4)) / 1000 ); fprintf(stderr, "(%d Kbits / %d Ksyms = %.3f bits/symbol) \n", bits / 1024 , n / 1000, (float)bits / (float)n ); fprintf(stderr, "\nWrite the compressed data : "); // fwrite(HuffmanCodes, sizeof(huffman_t), 256, stdout); // bits = 256 * 8 * sizeof(huffman_t); bits = 256 * 12 * 8; for ( i = 0 ; i < 256 ; i++ ) { fwrite(&HuffmanCodes[i].code, 4, 1, stdout); fwrite(&HuffmanCodes[i].nBits, 4, 1, stdout); fwrite(&HuffmanCodes[i].nOccurrences, 4, 1, stdout); } /* for( n = 0 ; fread(&c, 1, 1, file) == 1 ; n++ ) { bits += HuffmanCodes[c].nBits; WriteBits(HuffmanCodes[c].code, HuffmanCodes[c].nBits); } */ unsigned int val, cnt, k ; for ( n = 0 ; (nread = fread(tread, 1, F_BLOC, file)) > 0 ; n+= nread) { for (j = 0 ; j < nread ; j++ ) { c = tread[j]; bits += HuffmanCodes[c].nBits; WriteBits(HuffmanCodes[c].code, HuffmanCodes[c].nBits); /* cnt = HuffmanCodes[c].nBits; val = HuffmanCodes[c].code; bits += cnt; for( k = cnt ; k ; k-- ) { _idx_ <<= 1; _idx_ |= ( val >> k) & 0x1; if( ++_bits_ == WRITE_BLOC ) { fwrite(&_idx_, WRITE_BLOC / 8, 1, stdout); _bits_ = 0; _idx_ = 0; } } */ } } bits += FinishWriteBits(); fclose(file); stop = stampstop(t5); if( stop == t5) t5--; // because if the file is very small, the compressed file is generated in less than 1 ms fprintf(stderr, "%d Mbits/s (%d Kbytes) \n", (int)((float)(bits)/(float)(stop - t5)) / 1000, bits / 8192); fprintf(stderr, "\nCompression : %d / %d = %2.2f%% \n\n", bits/8, n , (float)(12.5f * bits) / (float) (n) ); fprintf(stderr, "Input : %.3f Mbits/s ", ((float)(n*8) / (float)(stop - start)) / 1000.0f ); fprintf(stderr, "(%d bytes / %d ms) \n", n, stop - start); fprintf(stderr, "Output : %.3f Mbits/s ", ((float)(bits) / (float)(stop - start)) / 1000.0f ); fprintf(stderr, "(%d bytes / %d ms) \n", bits / 8 , stop - start); fprintf(stderr, "\nTotal time : "); stop = stampstop(start); printf("\n"); return 0; }