#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>

#define MAJOR_VERSION 0
#define MINOR_VERSION 1

#define BLOCK_SIZE 	1024
#define ALPHABET_SIZE 	16384*100

typedef struct
{
	char	*	symbol;
	int 		nb;
	int		code;
	int 		codesize;
}
symbol_t;

typedef struct
{
	unsigned int	tot;
	unsigned int	asize;
	char            alphabet[ALPHABET_SIZE];
	symbol_t	symbols[ALPHABET_SIZE];	
	unsigned int    nbytes;         	
} 
model_t;

model_t model;

int strings[ALPHABET_SIZE];
int  nstrings = 0;

static int SortBySize(const void *a, const void *b)
{

	symbol_t *pa = (symbol_t *)(a);
	symbol_t *pb = (symbol_t *)(b);

	int sa = pa->nb;
	int sb = pb->nb;
	
	int la = strlen(pa->symbol);
	int lb = strlen(pb->symbol);

	if ( sb - sa )
		return sb - sa;

	if( lb - la )
		return lb - la;

	return strcmp(pa->symbol, pb->symbol);
}

			
int update(char *s, model_t *m)
{
	int i;
	int ret = -1;

	m->tot++;

	// fprintf(stderr, "\n\nUpdate(\"%s\", model) [asize=%d] \n", s, m->asize);

	for( i = 0 ; i < m->asize ; i++ )
	{
		if ( (strstr(m->symbols[i].symbol, s ) == m->symbols[i].symbol) && (strlen(s) >= strlen(m->symbols[i].symbol)) )  
		{
			m->symbols[i].nb++;
			ret = i;			
			break;
		}
	}

	if( ret == -1 )
	{
		// fprintf(stderr, "new \n");
		strcpy(m->alphabet + m->nbytes, s);
		m->symbols[m->asize].symbol = m->alphabet + m->nbytes;
		m->nbytes += strlen(s) + 1;
		m->symbols[m->asize].nb = 1;
		ret = m->asize++;
	}
	// else fprintf(stderr, "old\n");

	return ret;
}

int ReadToken(char *src, char *dst)
{
	int i = 0;

	while( isalpha(src[i]) )
		i++;
	
	if( ! isalpha(src[0]) )
		i++;

	strncpy(dst, src, i);
	dst[i] = 0;
	return i;
}

int main(int argc, char *argv[] )
{

	int  i, bsize, count = 0;
	int code = 0, total = 0; 
	char buf[BLOCK_SIZE];
	char buf2[BLOCK_SIZE];
	int first = 1;
	int nlines = 0;
	int repeat = 0;
	
	fprintf(stderr, "Strings encoding v%d.%d \n", MAJOR_VERSION, MINOR_VERSION);

	while ( ! feof(stdin ) )
	{
		if( fgets(buf, BLOCK_SIZE, stdin) )
		{
			nlines++;
			// fprintf(stderr, "line %d : %s (len=%d) \n", nlines++, buf, strlen(buf) ); 
			if( ++repeat == BLOCK_SIZE)
			{
				fprintf(stderr, "lines=%d asize=%d \n", nlines, model.asize);
				repeat = 0;
			}
			
			bsize  =  strlen(buf);

			total += bsize;

			for( i = 0 ; i < bsize ;  i += ReadToken(buf + i, buf2) )
			{
				if( first != 1 )
				{
					strings[nstrings++] = update(buf2, &model);
					// fprintf(stderr, "%s", buf2);
					// fprintf(stderr, "\nnstrings=%d\n", nstrings);
				}
				else
					first = 0;
			}
		}
	}
	fflush(stderr);

	fprintf(stderr, "Generate compressed strings \n");

	for( i = 0 ; i < nstrings ; i++ )
	{
		fprintf(stdout, "%s", model.symbols[ strings[i] ].symbol);
	} 

	fprintf(stderr, "Sort compressed strings \n");

	qsort(model.symbols, model.asize, sizeof(symbol_t), SortBySize);
	for( i = 0 ; i < model.asize ; i++ )
	{	
		// Golomb exponentials codes
		model.symbols[i].code = ++code;
		model.symbols[i].codesize = (int)(log(code) / log(2))*2 + 1; 
				
		fprintf(stderr, "Symbol[%d] = %s (x%d %1.1f%%) code=%d, codesize=%d, compressed=%d bits, reduction=%1.1f [%d bits/%d bits] \n", 
			i, model.symbols[i].symbol, model.symbols[i].nb,
			100.0f * (float)(model.symbols[i].nb) / (float)(model.tot),
			model.symbols[i].code, model.symbols[i].codesize,
			model.symbols[i].nb * model.symbols[i].codesize,
			(float)(model.symbols[i].codesize) / (float)(8 * strlen(model.symbols[i].symbol) ),
			model.symbols[i].codesize * model.symbols[i].nb , 8 * model.symbols[i].nb * strlen(model.symbols[i].symbol)
		);

		count += model.symbols[i].codesize * model.symbols[i].nb;
	}


	fprintf(stderr, "\n\n%d characters in / %d strings out (%d symbols, headersize=%d) \n", 
		total, nstrings, model.asize, model.nbytes);
	
	fprintf(stderr, "\ncompression = %2.2f (%d/%d) \n",
		// 100.0f * (float)(nstrings + model.nbytes) / (float)(total),
		100.0f * (float)(count + model.nbytes*8 ) / (float)(total*8),
		 count/8 + model.nbytes , total); 

	fprintf(stderr, "\ncompression sans header = %2.2f (%d/%d) \n", 
		100.0f * (float)(count) / (float)(total*8),
		 count/8 , total); 

	return 0;
}